In [None]:
import os
import sys

def add_to_path(new_path: str):
    module_path = os.path.abspath(os.path.join(new_path))
    if module_path not in sys.path:
        sys.path.append(module_path)

is_colab = False

if is_colab:
    !git clone https://github.com/pdkary/Karys.git
    !cd Karys && git fetch && git pull
    !cd Karys && pip install -r requirements.txt --quiet
    add_to_path("Karys/")
    from google.colab import drive
    drive.mount("/content/drive")
    !cd Karys && pip install -r requirements.txt --quiet
else:
    add_to_path("../../")
    !cd ../../ && pip install -r requirements.txt --quiet

In [None]:
from data.configs.TextDataConfig import TextDataConfig
from data.wrappers.TextDataWrapper import TextDataWrapper

if is_colab:
    file_input = "drive/MyDrive/Colab/Language/seinfeld_corpus.txt"
else:
    file_input = "./test_input/corpus.txt"

word_dimensionality = 50

vocab_size = 2000
sentence_length = 5

text_config = TextDataConfig(vocab_size, sentence_length)
text_data_wrapper = TextDataWrapper.load_from_file(file_input, text_config)
text_data_wrapper.show_sentence_n(135)

In [None]:
import numpy as np
from models.ModelWrapper import ModelWrapper
from tensorflow.keras.layers import Dense, LeakyReLU, ReLU, Activation, LSTM, Embedding, Softmax
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.losses import MSE, MSLE, binary_crossentropy, categorical_crossentropy

text_feature_size = 100

a = 0.08
glayers = [
    Embedding(vocab_size,word_dimensionality,input_length=sentence_length),
    LSTM(100, return_sequences=True),
    LSTM(100),
    Dense(100,activation='relu'),
    Dense(vocab_size), Softmax()]
goptimizer = RMSprop(learning_rate=5e-3)
gloss = categorical_crossentropy
#should take in input shape, and give out output shape
text_generator_model = ModelWrapper(text_config.input_shape, text_config.output_shape, glayers, goptimizer, gloss, flatten_input=False)
text_generator_model.build()

In [None]:
from trainers.TextTrainer import TextTrainer
from plotting.TrainPlotter import TrainPlotter
import json

if is_colab:
    file_output = "drive/MyDrive/Colab/Language/output/seinfeld.json"
else:
    file_output = "./test_output/output.json"

train_columns = ["Train Loss","Test Loss"]
loss_plot = TrainPlotter(moving_average_size=100,labels=train_columns)

epochs=1000
batch_size = 5
trains_per_test=10
batches_per_train = 10

text_trainer = TextTrainer(text_generator_model, text_data_wrapper)
test_phrases = [
    "jerry can i borrow your",
    "i can never catch a",
    "elaine and jerry are a",
    "kramer what are you doing",
    "george will never get a"
]

seed_phrase = "jerry can i borrow your"

test_loss = 0
for i in range(epochs):
    loss_plot.start_epoch()
    train_loss = text_trainer.train(batch_size, batches_per_train)

    if i % trains_per_test == 0 and i != 0:
        test_loss = text_trainer.test(25, 1)
        output = {}
        for s in test_phrases:
            output[s] = text_trainer.propogate_from_phrase(s, 5)

        with open(file_output,'w+') as f:
            json.dump(output,f)

    loss_plot.batch_update([train_loss, test_loss])
    loss_plot.log_epoch()