In [None]:
import numpy as np
import csv
import json
import pandas as pd
import math

from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.layers import Dropout, Flatten, Activation
from keras.layers import LSTM, Conv1D
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from keras.optimizers import Adam

from nested_lstm import NestedLSTM

# Runs on TF 1.5.0 and Keras 2.1.4, with Python 3.6

In [None]:
GENERATOR_DATASET = './dataset/generator_dataset.txt'

MIN_LENGTH_CPP = 6
VAL_SPLIT = 0.2

#### Loading and Featurization

In [None]:
raw_text = open(GENERATOR_DATASET).read()
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

n_vocab = len(chars)
n_chars = len(raw_text)

In [None]:
seq_length = 5
dataX = []
dataY = []
for i in range(0, len(raw_text) - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    if seq_out != "\n":
        dataX.append([char_to_int[char] for char in seq_in])
        dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
# X = np.reshape(dataX, (n_patterns, seq_length, 1))

X = np.reshape(dataX, (n_patterns, seq_length, 1))

X = X / float(n_vocab)
y = np_utils.to_categorical(dataY)

#### Training the generator

In [None]:
model_lstm = Sequential()
model_lstm.add(LSTM(1024, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model_lstm.add(NestedLSTM(1024, depth=4, dropout=0.1, recurrent_dropout=0.0, return_sequences=True))
model_lstm.add(LSTM(1024, return_sequences=True))
model_lstm.add(Dropout(0.1))
model_lstm.add(Activation('relu'))
model_lstm.add(LSTM(512, return_sequences=True))
model_lstm.add(Dropout(0.1))
model_lstm.add(Activation('relu'))
model_lstm.add(LSTM(512))
model_lstm.add(Dropout(0.1))
model_lstm.add(Dense(y.shape[1], activation='softmax'))

In [None]:
model_lstm.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
filepath= "./model/generator/" + "epoch{epoch:02d}-loss{loss:.4f}-acc{acc:.4f}-val_loss{val_loss:.4f}-val_acc{val_acc:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

model_lstm.fit(X, y, epochs=1000, batch_size=256, validation_split=VAL_SPLIT, 
               callbacks=callbacks_list, verbose=True)