In [None]:
import os
import sys

def add_to_path(new_path: str):
    module_path = os.path.abspath(os.path.join(new_path))
    if module_path not in sys.path:
        sys.path.append(module_path)

is_colab = False

if is_colab:
    !git clone https://github.com/pdkary/Karys.git
    !cd Karys && git fetch && git pull
    !cd Karys && pip install -r requirements.txt --quiet
    add_to_path("Karys/")
    from google.colab import drive
    drive.mount("/content/drive")
    !cd Karys && pip install -r requirements.txt --quiet
else:
    add_to_path("../../")
    !cd ../../ && pip install -r requirements.txt --quiet

In [None]:
from data.configs.TextDataConfig import TextDataConfig
from data.wrappers.TextDataWrapper import TextDataWrapper

if is_colab:
    file_input = "drive/MyDrive/Colab/Language/seinfeld_corpus.txt"
else:
    file_input = "./test_input/corpus.txt"

vocab_size = 2500
sentence_length = 10
output_length = 7

text_config = TextDataConfig(vocab_size, sentence_length, output_length)
text_data_wrapper = TextDataWrapper.load_from_file(file_input, text_config)
text_data_wrapper.show_sentence_n(135)

In [None]:
import numpy as np
from models.ModelWrapper import ModelWrapper
from tensorflow.keras.layers import Dense, LeakyReLU, ReLU, Activation, LSTM, Embedding, Softmax
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.losses import MSE, MSLE, binary_crossentropy, categorical_crossentropy

LR = 5e-4

## Generator
a = 0.08
g_layers = [
    ##128 represents vector space size of the vocabulary
    Embedding(text_config.vocab_size, 128, input_length=sentence_length),
    LSTM(100, return_sequences=True),
    LSTM(100),
    Dense(text_config.vocab_size), Softmax()]

g_optimizer = RMSprop(learning_rate=LR)
g_loss = categorical_crossentropy
#should take in input shape, and give out output shape
text_generator_model = ModelWrapper(text_config.input_shape, text_config.output_shape, g_layers, g_optimizer, g_loss, flatten_input=False)
text_generator_model.build()

## Discriminator
d_layes = [
    Dense(256), Activation('relu'),
    Dense(256), Activation('relu'),
    Dense(256), Activation('relu'),
    Dense(1), Activation('sigmoid'),
]

d_optimizer = Adam(learning_rate=LR)
d_loss = binary_crossentropy
text_discriminator_model = ModelWrapper(text_config.label_shape,[1],d_layes,d_optimizer,d_loss)
text_discriminator_model.build()

In [None]:
from trainers.TextTrainer import TextTrainer
from plotting.TrainPlotter import TrainPlotter
import json

if is_colab:
    file_output = "drive/MyDrive/Colab/Language/output/seinfeld.json"
else:
    file_output = "./test_output/output.json"

train_columns = ["Train Loss","Test Loss"]
loss_plot = TrainPlotter(moving_average_size=100,labels=train_columns)

epochs=10
batch_size = 5
trains_per_test=2
batches_per_train = 10

text_trainer = TextTrainer(text_generator_model, text_discriminator_model, text_data_wrapper)

test_loss = 0
for i in range(epochs):
    loss_plot.start_epoch()
    train_loss = text_trainer.train(batch_size, batches_per_train)

    if i % trains_per_test == 0 and i != 0:
        test_loss = text_trainer.test(5, 1)
        ins = text_data_wrapper.translate_sentences(text_trainer.most_recent_inputs)
        outs = text_data_wrapper.translate_sentences(text_trainer.most_recent_outputs)
        
        test_output_data = {i:o for (i,o) in zip(ins, outs)}
        with open(file_output,'w+') as f:
            json.dump(test_output_data,f)

    loss_plot.batch_update([train_loss, test_loss])
    loss_plot.log_epoch()