In [None]:
import nltk
import numpy as np
import pandas as pd
from pandas import DataFrame
from keras.models import Model, Sequential, load_model
from keras.preprocessing.sequence import TimeseriesGenerator, pad_sequences
from keras.layers.recurrent import LSTM
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.layers.core import Activation, Dropout, Dense, Flatten
from keras.layers import TimeDistributed, Bidirectional, InputLayer, GlobalMaxPooling1D
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.callbacks import TensorBoard
from keras.metrics import categorical_accuracy
from IPython.display import clear_output
from more_itertools import flatten, intersperse
import random

import os
import urllib.request

In [None]:
%load_ext autoreload
%autoreload 2
from batcher import batch_from_generator
from train_data import load_conll2003, create_conll_encoded_shifted_generator
from mappings import get_all_mappings, gen_input_feature_to_class_map, gen_input_feature_to_int_map
from corpus import corpus_training_data_generator, create_all_corpus_train_pipeline, pad, encode_each_sentence
from model import create_model, compile_model

In [None]:
DROPOUT = 0.5
TIME_SLICE_SIZE = 50
BATCH_SIZE = 512
SAMPLING_RATE = 1
OUTPUT_CLASSES = 2
PADDING = 0
UNKNOWN = 1
NUM_OF_UNITS = 200
EPOCHS=100
MODEL_SAVE_PATH = 'tc_model.h5'
LSTM_MODEL_SAVE_PATH = 'lstm_model.h5'

In [None]:
mapping, reverse_mapping, lower_mapping, lower_reverse_mapping = get_all_mappings()
input_feature_to_class_map = gen_input_feature_to_class_map()
input_feature_to_int_map = gen_input_feature_to_int_map()

In [None]:
model = create_model(1, 2, NUM_OF_UNITS, DROPOUT)

In [None]:
model = compile_model(model)

In [None]:
if os.path.isfile(MODEL_SAVE_PATH): 
    model.load_weights(MODEL_SAVE_PATH)

In [None]:
tensor_board = TensorBoard(batch_size=BATCH_SIZE, write_graph=False, )

In [None]:
X, Y = create_all_corpus_train_pipeline(TIME_SLICE_SIZE)
X_val, Y_val = create_all_corpus_train_pipeline(TIME_SLICE_SIZE, 'validation')

In [None]:
def get_sample_weights(Ys):
    W = Ys * 5
    W = W + 1
    W = W.reshape((-1, 50))
    return W


In [None]:
W = get_sample_weights(Y)
W_val = get_sample_weights(Y_val)

In [None]:

#g = corpus_training_data_generator('gutenberg',TIME_SLICE_SIZE, BATCH_SIZE, shift=False)
model.fit(X, Y, validation_data=(X_val, Y_val, W_val), callbacks=[tensor_board], batch_size=BATCH_SIZE, epochs=EPOCHS, sample_weight=W)

In [None]:
original_sentence = "Tim Smith works for Google in California. Towards summer the weather in London gets really warm".lower()
test_sentence = pad([list(original_sentence)], len(original_sentence))
test_sentence = encode_each_sentence(test_sentence, input_feature_to_int_map)

mapped_sentence = np.asarray(test_sentence)
predicted_result = model.predict_classes(mapped_sentence)[0]
print(predicted_result)
predicted_result = list(zip(pad([list(original_sentence)], len(original_sentence))[0], predicted_result.tolist()))

def true_case(letter, label):
    if letter == 0:
        return ''
    if(label == 0):
        return letter.lower()
    if (label == 1):
        return letter.upper()
    return letter

predicted_result = [true_case(letter, label) for letter, label in predicted_result]
''.join(predicted_result) 



In [None]:
model.save("big_lstm.h5")