In [1]:
from numpy.random import seed
seed(13)
from tensorflow import set_random_seed
set_random_seed(13)

from keras import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, MaxPooling3D, Dropout, Embedding, Bidirectional, SimpleRNN, LSTM, GRU
from keras import layers
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.optimizers import RMSprop
from keras.utils import to_categorical
from keras.models import model_from_json 
from keras.models import load_model
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.externals import joblib
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity
from tempfile import TemporaryFile
from operator import itemgetter
import numpy as np
import os
import pandas as pd
import random as rand
import os
import pickle

Using TensorFlow backend.


# Load Data

In [2]:
# %load conll_dictorizer.py
"""
CoNLL 2009 file readers and writers for the parts of speech.
Version with a class modeled as a vectorizer
"""
__author__ = "Pierre Nugues"

import regex as re


def save(file, corpus_dict, column_names):
    """
    Saves the corpus in a file
    :param file:
    :param corpus_dict:
    :param column_names:
    :return:
    """
    with open(file, 'w') as f_out:
        for sentence in corpus_dict:
            sentence_lst = []
            for row in sentence:
                items = map(lambda x: row.get(x, '_'), column_names)
                sentence_lst += '\t'.join(items) + '\n'
            sentence_lst += '\n'
            f_out.write(''.join(sentence_lst))


class Token(dict):
    pass


class CoNLLDictorizer:

    def __init__(self, column_names, sent_sep='\n\n', col_sep=' +'):
        self.column_names = column_names
        self.sent_sep = sent_sep
        self.col_sep = col_sep

    def fit(self):
        pass

    def transform(self, corpus):
        corpus = corpus.strip()
        sentences = re.split(self.sent_sep, corpus)
        return list(map(self._split_in_words, sentences))

    def fit_transform(self, corpus):
        return self.transform(corpus)

    def _split_in_words(self, sentence):
        rows = re.split('\n', sentence)
        return [Token(dict(zip(self.column_names,
                               re.split(self.col_sep, row))))
                for row in rows]


if __name__ == '__main__':
    
    BASE = os.getcwd()
    train_file = os.path.join(BASE, 'datasets/train.txt')

    column_names = ['id', 'form', 'lemma', 'cpos', 'pos', 'feats']
    train = open(train_file).read().strip()
    conll_dict = CoNLLDictorizer(column_names, col_sep='\t')
    train_dict = conll_dict.transform(train)

    print(train_dict[0])
    print(train_dict[0][0])
    print(type(train_dict[0][0]))
    #print(train_dict[0][0]['form'])
    print(train_dict[1])
    tok = Token({'id': '1', 'form': 'La', 'lemma': 'el', 'cpos': 'd', 'pos': 'da', 'feats': 'num=s|gen=f'})
    print(tok['form'])
    print('form' in tok)

    save('out', train_dict, column_names)

    tok_dict = {'id': '1', 'form': 'La', 'lemma': 'el', 'cpos': 'd', 'pos': 'da', 'feats': 'num=s|gen=f'}
    tok_dict2 = {'id': '1', 'form': 'La', 'lemma': 'el', 'cpos': 'd', 'pos': 'da', 'feats': 'num=s|gen=f'}

    tok_set = set(tok_dict)
    print(tok_set)

    tok_set = tok_set.union(tok_dict2)
    #print(tok_set)

    #print(tok.keys())

    # exit()
    word_set = set()
    word_set = set(tok_dict.values())
    #print(list(word_set))

    word_set = set()
    word_set = set(tok.values())
    #print(list(word_set))

    word_set = set()
    word_set.update(tok.values())
    #print(list(word_set))

    word_set = set()
    #print("Token value:", tok.values())
    word_set = word_set.union(set(tok.values()))
    #print(list(word_set))

[{'id': '-DOCSTART- -X- -X- O'}]
{'id': '-DOCSTART- -X- -X- O'}
<class '__main__.Token'>
[{'id': 'EU NNP B-NP B-ORG'}, {'id': 'rejects VBZ B-VP O'}, {'id': 'German JJ B-NP B-MISC'}, {'id': 'call NN I-NP O'}, {'id': 'to TO B-VP O'}, {'id': 'boycott VB I-VP O'}, {'id': 'British JJ B-NP B-MISC'}, {'id': 'lamb NN I-NP O'}, {'id': '. . O O'}]
La
True
{'cpos', 'form', 'feats', 'pos', 'id', 'lemma'}


In [3]:
# %load datasets.py
from conll_dictorizer import CoNLLDictorizer, Token
import os

def load_conll2009_pos():
    train_file = 'datasets\train.txt'
    dev_file = 'datasets\valid.txt'
    test_file = 'datasets\test.txt'
    test2_file = 'simple_pos_test.txt'

    column_names = ['id', 'form', 'lemma', 'plemma', 'pos', 'ppos']

    train_sentences = open(train_file).read().strip()
    dev_sentences = open(dev_file).read().strip()
    test_sentences = open(test_file).read().strip()
    test2_sentences = open(test2_file).read().strip()
    return train_sentences, dev_sentences, test_sentences, column_names

def load_conll2003_en():
    BASE_DIR = os.getcwd()
    train_file = BASE_DIR + '/datasets/train.txt'
    dev_file = BASE_DIR + '/datasets/valid.txt'
    test_file = BASE_DIR + '/datasets/test.txt'
    column_names = ['form', 'ppos', 'pchunk', 'ner']
    train_sentences = open(train_file).read().strip()
    dev_sentences = open(dev_file).read().strip()
    test_sentences = open(test_file).read().strip()
    return train_sentences, dev_sentences, test_sentences, column_names


if __name__ == '__main__':
    train_sentences, dev_sentences, test_sentences, column_names = load_conll2003_en()

    conll_dict = CoNLLDictorizer(column_names, col_sep=' +')
    train_dict = conll_dict.transform(train_sentences)
    val_dict = conll_dict.transform(dev_sentences)
    test_dict = conll_dict.transform(test_sentences)
    print(train_dict[0])
    print(train_dict[1])

[{'form': '-DOCSTART-', 'ppos': '-X-', 'pchunk': '-X-', 'ner': 'O'}]
[{'form': 'EU', 'ppos': 'NNP', 'pchunk': 'B-NP', 'ner': 'B-ORG'}, {'form': 'rejects', 'ppos': 'VBZ', 'pchunk': 'B-VP', 'ner': 'O'}, {'form': 'German', 'ppos': 'JJ', 'pchunk': 'B-NP', 'ner': 'B-MISC'}, {'form': 'call', 'ppos': 'NN', 'pchunk': 'I-NP', 'ner': 'O'}, {'form': 'to', 'ppos': 'TO', 'pchunk': 'B-VP', 'ner': 'O'}, {'form': 'boycott', 'ppos': 'VB', 'pchunk': 'I-VP', 'ner': 'O'}, {'form': 'British', 'ppos': 'JJ', 'pchunk': 'B-NP', 'ner': 'B-MISC'}, {'form': 'lamb', 'ppos': 'NN', 'pchunk': 'I-NP', 'ner': 'O'}, {'form': '.', 'ppos': '.', 'pchunk': 'O', 'ner': 'O'}]


In [4]:
def load_glove(file):
    embeddings_dict = {}
    glove = open(file, encoding='utf-8')
    
    for line in glove:
        line = line.strip().split()
        word = line[0]
        embedding_vec_word = np.array(line[1:], dtype='float32')
        embeddings_dict[word] = embedding_vec_word
        
    glove.close()
    return embeddings_dict

# Load and Save files

In [27]:
def load_file(file_name):
    with open('files/' + file_name + '.pkl', 'rb') as f:
        obj = pickle.load(f)
    return obj

In [28]:
def save_file(file_name, file):
    with open('files/' + file_name + '.pkl', 'wb') as f:
        pickle.dump(file, f)

In [29]:
embeddings_dict = load_file('embeddings_dict')

# Data preprocessing

In [30]:
# Train_dict is a list of lists of dictionaries
def extract_features(train_dict):
    X, y = [], []
    
    for sentence in train_dict:
        X_sentence = []
        y_sentence = []
        for word in sentence:
            w = word['form'].lower()
            n = word['ner']
            X_sentence.append(w)
            y_sentence.append(n)
    
        X.append(X_sentence)
        y.append(y_sentence)
    
    return X, y

#### Extract words and ner tags - X, Y

In [31]:
X, y = extract_features(train_dict)
print('Sentence words: ', X[1])
print('Sentence NER: ', y[1])

Sentence words:  ['eu', 'rejects', 'german', 'call', 'to', 'boycott', 'british', 'lamb', '.']
Sentence NER:  ['B-ORG', 'O', 'B-MISC', 'O', 'O', 'O', 'B-MISC', 'O', 'O']


#### Create vocabularies

In [32]:
def create_vocabulary(X, WORDS=True):
    X_vocabulary = set()
    
    if WORDS:
        X_vocabulary.add("UNKNOWN_WORD")
        
    for sentence in X:
        for word in sentence:
            X_vocabulary.add(word)
    
    return sorted(list(X_vocabulary))

In [33]:
X_vocabulary = create_vocabulary(X, WORDS=True)
print("Vocabulary size WORDS: ", len(X_vocabulary))

Vocabulary size WORDS:  21011


In [34]:
y_vocabulary = create_vocabulary(y, WORDS=False)
print("Vocabulary size NER: ", len(y_vocabulary))
nbr_of_classes = len(y_vocabulary) + 2

Vocabulary size NER:  9


In [35]:
y_vocabulary

['B-LOC', 'B-MISC', 'B-ORG', 'B-PER', 'I-LOC', 'I-MISC', 'I-ORG', 'I-PER', 'O']

#### Add words from GloVe

In [36]:
for word in embeddings_dict.keys():
    X_vocabulary.append(word)

X_vocabulary = sorted(list(set(X_vocabulary)))
total_word_count = len(X_vocabulary)
print('Words in the vocabulary total, X_vocabulary:', total_word_count)

Words in the vocabulary total, X_vocabulary: 402596


#### Create indices and inverted indices

In [37]:
def create_indices(X):
    return dict(enumerate(X), start=2) # 0 is padding, 1 is unknown

In [38]:
i = 2
X_indices_to_words = {}

for w in X_vocabulary:
    X_indices_to_words[i] = w
    i += 1

y_indices_to_len = {}
y_indices_to_len[0] = 'O' #PADDING
y_indices_to_len[1] = 'UNKNOWN_WORD'
i = 2

for l in y_vocabulary:
    if l != 'O':
        y_indices_to_len[i] = l
        i += 1
        
#X_indices_to_len = dict(enumerate(X_vocabulary), start=2)
#y_indices_to_len = dict(enumerate(y_vocabulary), start=2)

In [39]:
def create_inverted_indices(X):
    return {v: k for k, v in X.items()}

In [40]:
X_words_to_indices = create_inverted_indices(X_indices_to_words)
y_len_to_indices = create_inverted_indices(y_indices_to_len)

In [41]:
print('Word index:', list(X_words_to_indices.items())[:3])
print('LEN index:', list(y_len_to_indices.items())[:3])

Word index: [('!', 2), ('!!', 3), ('!!!', 4)]
LEN index: [('O', 0), ('UNKNOWN_WORD', 1), ('B-LOC', 2)]


#### Encode lists - Convert to indices

In [42]:
def encode_to_indices(X, X_words_to_indices, num_words=None):
    X_encoded = []
    for x in X:
        X_encoded_words = []
        if num_words:
            # We map the unknown words to the second first index of the matrix, for the test set
            X_encoded_words = list(map(lambda x: X_words_to_indices.get(x,1), x))
        else:
             X_encoded_words = list(map(X_words_to_indices.get, x))
            #for val in x:
            #    X_encoded_words.append(X_words_to_indices.get(val))
        X_encoded += [X_encoded_words]
    return X_encoded

In [43]:
X_only_indices = encode_to_indices(X, X_words_to_indices)
y_only_indices = encode_to_indices(y, y_len_to_indices)
print('First sentences, word indices', X_only_indices[4])
print("")
print('First sentences, LEN indices', y_only_indices[4])

First sentences, word indices [359699, 143138, 107474, 318005, 271940, 361488, 195554, 126463, 391264, 161837, 48420, 363369, 109493, 363369, 332754, 85853, 218261, 375629, 324031, 123767, 389005, 231734, 112304, 126757, 92049, 72526, 366525, 363369, 330316, 936]

First sentences, LEN indices [0, 4, 8, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


# Embeddings

In [44]:
def most_similar_embeddings(embeddings_dict, key_word, n):
    
    d = {}
    key_word_embedding = embeddings_dict[key_word]
    
    for word, embedding in embeddings_dict.items():
        d_val = cosine_similarity([key_word_embedding], [embedding])[0][0]
        d[word] = d_val
    
    d_sorted = sorted(d.items(), key=itemgetter(1), reverse=True)[1:n+1] # Do not return table
        
    return d_sorted

In [105]:
#table = most_similar_embeddings(embeddings_dict, 'table', 5)
#france = most_similar_embeddings(embeddings_dict, 'france', 5)
#sweden = most_similar_embeddings(embeddings_dict, 'sweden', 5)

In [106]:
print(table)
print(france)
print(sweden)

'''

[('tables', 0.8021162), ('place', 0.6582378), ('bottom', 0.65597194), ('room', 0.6543691), ('side', 0.6433667)]
[('belgium', 0.8076422), ('french', 0.80043775), ('britain', 0.79505277), ('spain', 0.75574636), ('paris', 0.7481586)]
[('denmark', 0.86244005), ('norway', 0.807325), ('finland', 0.79064953), ('netherlands', 0.74684644), ('austria', 0.7466836)]

'''

[('tables', 0.8021162), ('place', 0.6582378), ('bottom', 0.65597194), ('room', 0.6543691), ('side', 0.6433667)]
[('belgium', 0.8076422), ('french', 0.80043775), ('britain', 0.79505277), ('spain', 0.75574636), ('paris', 0.7481586)]
[('denmark', 0.86244005), ('norway', 0.807325), ('finland', 0.79064953), ('netherlands', 0.74684644), ('austria', 0.7466836)]


In [45]:
def fill_glove_matrix(X_vocabulary, embeddings_dict):
    for word in X_vocabulary:
        if word in embeddings_dict:
            i = X_words_to_indices[word]
            embedding = embeddings_dict[word]
            word_embedding_matrix[i] = embedding
    return word_embedding_matrix

In [46]:
word_embedding_matrix = np.random.random((len(X_vocabulary)+2, 100))
word_embedding_matrix = fill_glove_matrix(X_vocabulary, embeddings_dict)
print('Shape of embedding matrix:', word_embedding_matrix.shape)

Shape of embedding matrix: (402598, 100)


# Padding the sequences

#### Find the longest sequence in either train, val, test

In [47]:
max_seq_len_train = max(len(s) for s in X)
print("Maximum sentence length in train: ", max_seq_len_train)

X_val, _ = extract_features(val_dict)
max_seq_len_val = max(len(s) for s in X_val)
print("Maximum sentence length in val: ", max_seq_len_val)

X_test, _ = extract_features(test_dict)
max_seq_len_test = max(len(s) for s in X_test)
print("Maximum sentence length in test: ", max_seq_len_test)

max_seq_len = max(max_seq_len_train, max_seq_len_val, max_seq_len_test)

print("Maximum sentence length total: ", max_seq_len)

Maximum sentence length in train:  113
Maximum sentence length in val:  109
Maximum sentence length in test:  124
Maximum sentence length total:  124


In [48]:
X_train = pad_sequences(X_only_indices, maxlen=max_seq_len)
y_train = pad_sequences(y_only_indices, maxlen=max_seq_len)

# The number of classes and 0 (padding symbol)
y_train = to_categorical(y_train, num_classes=nbr_of_classes + 1)

# Load validation data

In [31]:
X_val, y_val = extract_features(val_dict)

X_only_indices_val = encode_to_indices(X_val, X_words_to_indices, num_words=total_word_count)
y_only_indices_val = encode_to_indices(y_val, y_len_to_indices)

X_val = pad_sequences(X_only_indices_val, maxlen=max_seq_len)
y_val = pad_sequences(y_only_indices_val, maxlen=max_seq_len)

y_val = to_categorical(y_val, num_classes=nbr_of_classes + 1)

[     0      0      0      0      0      0      0      0      0      0
      0      0      0      0      0      0      0      0      0      0
      0      0      0      0      0      0      0      0      0      0
      0      0      0      0      0      0      0      0      0      0
      0      0      0      0      0      0      0      0      0      0
      0      0      0      0      0      0      0      0      0      0
      0      0      0      0      0      0      0      0      0      0
      0      0      0      0      0      0      0      0      0      0
      0      0      0      0      0      0      0      0      0      0
      0      0      0      0      0      0      0      0      0      0
      0      0      0      0      0      0      0      0      0      0
      0      0      0 113352    679 221876 354361 275585  63472 364506
  49151 192164 381012    936]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

# Params

In [52]:
epochs = 5
batch_size = 64

# Model

#### SIMPLE RNN

In [33]:
def build_simpleRNN():
    model = Sequential()
    model.add(Embedding(total_word_count+2,
                               100,
                               mask_zero=True,
                               input_length=max_seq_len))
    model.layers[0].set_weights([word_embedding_matrix])
    model.layers[0].trainable = True
    model.add(SimpleRNN(32, return_sequences=True))
    model.add(Dense(nbr_of_classes + 1, activation='softmax'))
    return model

#### LSTM 

In [34]:
def build_LSTM():
    model = Sequential()
    model.add(Embedding(total_word_count+2,
                               100,
                               mask_zero=True,
                               input_length=max_seq_len))
    model.layers[0].set_weights([word_embedding_matrix])
    model.layers[0].trainable = True
    model.add(LSTM(32, return_sequences=True))
    model.add(LSTM(64, return_sequences=True))
    model.add(LSTM(128, return_sequences=True))
    model.add(Dense(nbr_of_classes + 1, activation='softmax'))
    return model

#### LSTM BIDIRECTIONAL

In [35]:
def build_LSTM_BIDIRECTIONAL():
    model = Sequential()
    model.add(Embedding(total_word_count+2,
                               100,
                               mask_zero=True,
                               input_length=max_seq_len))
    model.layers[0].set_weights([word_embedding_matrix])
    model.layers[0].trainable = True
    model.add(SimpleRNN(512, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
    model.add(Bidirectional(LSTM(256, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)))
    model.add(Dense(nbr_of_classes + 1, activation='softmax'))
    return model

In [36]:
model = build_LSTM_BIDIRECTIONAL()

In [7]:
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['acc'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 124, 100)          40259800  
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 124, 512)          313856    
_________________________________________________________________
bidirectional_1 (Bidirection (None, 124, 512)          1574912   
_________________________________________________________________
dense_1 (Dense)              (None, 124, 12)           6156      
Total params: 42,154,724
Trainable params: 42,154,724
Non-trainable params: 0
_________________________________________________________________


In [53]:
# Set callback functions to early stop training and save the best model so far
callbacks = [EarlyStopping(monitor='val_acc', patience=2),
             ModelCheckpoint(filepath='best_model.h5', monitor='val_acc', save_best_only=True)]

In [76]:
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1, callbacks=callbacks, validation_data=(X_val, y_val))

Train on 14987 samples, validate on 3466 samples
Epoch 1/1


<keras.callbacks.History at 0x11f22f11550>

# Evaluate

#### Load test set

In [53]:
X_test, y_test = extract_features(test_dict)

# We create the parallel sequences of indexes
X_only_indices_test = encode_to_indices(X_test, X_words_to_indices, num_words=total_word_count)
y_only_indices_test = encode_to_indices(y_test, y_len_to_indices)

X_test_padded = pad_sequences(X_only_indices_test, maxlen=max_seq_len)
y_test_padded = pad_sequences(y_only_indices_test, maxlen=max_seq_len)

# The number of LEN classes and 0 (padding symbol)
y_test_vectorized = to_categorical(y_test_padded, num_classes=nbr_of_classes + 1)

In [54]:
print(len(X_test), len(y_test), len(X_test_padded), len(y_test_padded))

3684 3684 3684 3684


In [55]:
loss, acc = model.evaluate(X_test_padded, y_test_vectorized, batch_size=batch_size, verbose=1)



In [56]:
print("Loss: ", loss)
print("Accuracy: ", acc)

Loss:  0.15566251565268452
Accuracy:  0.9671301056714839


#### Predict test set

In [96]:
y_pred_probs = model.predict(X_test_padded)

#### Remove padding and extract pred with highest probability

In [97]:
# Remove padding
y_pred_probs_no_padd = []
for sent_nbr, sent_len_predictions in enumerate(y_pred_probs):
    y_pred_probs_no_padd += [sent_len_predictions[-len(X_test[sent_nbr]):]]
print(y_pred_probs_no_padd[0])

[[1.00000000e+00 1.02456221e-11 3.63981485e-08 1.38617118e-08
  1.21875043e-08 2.77253935e-08 6.79346357e-10 1.05724824e-08
  5.76844184e-10 7.01748548e-10 4.52988255e-12 4.47230274e-12]]


In [98]:
# Extract prediction with highest probability and convert indices to symbols
y_pred = []
for sentence in y_pred_probs_no_padd:
    len_idx = list(map(np.argmax, sentence))
    len_cat = list(map(y_indices_to_len.get, len_idx))
    y_pred += [len_cat]

print(y_pred[:3])
print("")
print(y_test[:3])

[['O'], ['O', 'O', 'B-LOC', 'O', 'O', 'O', 'O', 'B-LOC', 'O', 'O', 'O', 'O'], ['B-PER', 'I-PER']]

[['O'], ['O', 'O', 'B-LOC', 'O', 'O', 'O', 'O', 'B-PER', 'O', 'O', 'O', 'O'], ['B-PER', 'I-PER']]


#### Evaluate

In [103]:
total, correct, unknown, correct_unknown = 0, 0, 0, 0

for id_s, sentence in enumerate(X_test):
    for id_w, word in enumerate(sentence):
        if y_pred[id_s][id_w] == y_test[id_s][id_w]:
            correct += 1
            
        # The word is not in the dictionary
        if word not in X_words_to_indices:
            unknown += 1
            if y_pred[id_s][id_w] == y_test[id_s][id_w]:
                correct_unknown += 1

total = correct + correct_unknown + unknown

#confusion_matrix(y_test, y_pred)

print("Total; ", total)
print("Correct: ", correct)
print("Acc: ", correct / total)
print("Unknown: ", unknown)
print("Unknown acc: ", correct_unknown / unknown)

Total;  47274
Correct:  45155
Acc:  0.9551762067944325
Unknown:  1143
Unknown acc:  0.8538932633420823


#### Write result to file

In [100]:
f = open("test_f1.txt", "r")
f_res = open("test_f1_res_12.txt", "w")

content = f.readlines()
id_f = 0

for id_s, sentence in enumerate(X_test):   
    for id_w, word in enumerate(sentence):   
        
        word_line = str(content[id_f])
        
        if word_line == '\n':
            f_res.write("\n")
            id_f += 1
        
        word_line = str(content[id_f])
        
        # Check if word is in this content line
        if word.lower() in word_line.lower():
            pred = y_pred[id_s][id_w]
            to_file = word_line.strip() + " " + str(pred) + "\n"
            f_res.write(to_file)
            id_f += 1

f_res = open("test_f1_res_12.txt", "r")         
print(len(content))
print(len(f_res.readlines()))

f.close()
f_res.close()

50349
50349


# Load and save models

In [4]:
def save_model(model, name):
    # Serialize model to JSON
    model_json = model.to_json()
    with open("models/" + name + ".json", "w") as json_file:
        json_file.write(model_json)

    # Serialize weights to HDF5
    model.save_weights("models/" + name + ".h5")
    print("Saved model to disk")

In [5]:
def load_model(name):
    json_file = open("models/" + name + ".json", 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    loaded_model.load_weights("models/" + name + ".h5")
    model = loaded_model
    print("Loaded model from disk")
    return model

In [74]:
#save_model(model, 'x') # NODES, EPOCHS, BATCH_SIZE

Saved model to disk


In [6]:
#model = load_model('LSTM_F1_84')

Loaded model from disk
