In [None]:
from seq2seq import SimpleSeq2Seq, Seq2Seq, AttentionSeq2Seq
import numpy as np
import random
import csv
import time
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from IPython.display import clear_output

In [None]:
seed = 80085
np.random.seed(seed)  # for reproducibility
random.seed(seed)
weights_path = './model.hdf5'

csv.register_dialect('eugene', delimiter=';', quotechar='"', quoting=csv.QUOTE_MINIMAL)

In [None]:
def seq_to_categorical(seq):
    return np.array([[tokenizer.sequences_to_matrix(np.array([[s]]))[0] for s in s1] for s1 in seq])

def make_sequences():
    tX = []
    tY = []

    tokenizer = Tokenizer(lower=False)    
    with open('../syntethic_data/data.csv', 'r') as f:
        reader = csv.reader(f, 'eugene')
        for row in reader:
            tX.append(row[0])
            tY.append(row[1])

    tokenizer.fit_on_texts(tX + tY)
    X = tokenizer.texts_to_sequences(tX)
    Y = tokenizer.texts_to_sequences(tY)
    return X, Y, tokenizer

X, Y, tokenizer = make_sequences()
X = pad_sequences(X)
Y = pad_sequences(Y)

X = seq_to_categorical(X)
Y = seq_to_categorical(Y)

def add_extra_dim(seqs):
    ret = np.zeros((1,) + seqs.shape)
    ret[0,:] = seqs
    return ret
print(X.shape, Y.shape)

In [None]:
def make_model():
    input_length = X.shape[1]
    input_dim = X.shape[2]
    output_length = Y.shape[1]
    output_dim = Y.shape[2]
    print(input_length, input_dim)
    hidden_dim = 100
    model = SimpleSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim))
    model.compile(loss='mse', optimizer='sgd')
    return model

model = make_model()

input_length = 5
input_dim = 3

output_length = 3
output_dim = 4

samples = 1000
hidden_dim = 50

X = None
Y = None

def test_SimpleSeq2Seq():
    global X, Y
    X = np.random.random((samples, input_length, input_dim))
    Y = np.random.random((samples, output_length, output_dim))

    model = SimpleSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim))
    model.compile(loss='mse', optimizer='sgd')
    return model
model = test_SimpleSeq2Seq()

In [None]:
while True:
    clear_output()    
    #print(query("create 10 directories starting with Y29PE67"))
    model.fit(X, Y, epochs=10)
    model.save_weights(weights_path, overwrite=True)
    time.sleep(0.25)

In [None]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
def query(natural):
    seqs = tokenizer.texts_to_sequences([natural])
    seqs = pad_sequences(seqs, X.shape[1])
    seqs = seq_to_categorical(seqs)
    return model.predict(seqs)

print(query("create 10 directories starting with Y29PE67"))