In [1]:
# python packages pd
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
import inspect

import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D, Bidirectional, Activation
from keras.layers import CuDNNLSTM
from keras.utils.np_utils import to_categorical
# from keras.callbacks import EarlyStopping
from keras.layers import Dropout

from sklearn.model_selection import train_test_split
import importlib
import utilis

# custom
from keras import backend as K
from keras.layers import Layer
from keras import initializers, regularizers, constraints, Input
from keras.models import Model

Using TensorFlow backend.


In [2]:
sys.path.append("..")

In [3]:
# custom python scripts
from packages import generator

In [4]:
importlib.reload(generator)

<module 'packages.generator' from '../packages/generator.py'>

In [5]:
# # check version
# print(inspect.getsource(generator.Keras_DataGenerator))

# Bidirectional LSTM with Hypotheses

In [6]:
# Check that you are running GPU's
utilis.GPU_checker()





You are runnning an instance with 4 GPU's


In [7]:
utilis.aws_setup()

AWS SETUP SHOULD BE COMPLETE, we are on <botocore.client.S3 object at 0x7fe9f4122358>


# Config, generators and train

In [17]:
INPUT_TENSOR_NAME = "inputs_input"
SIGNATURE_NAME = "serving_default"
W_HYP = True
LEARNING_RATE = 0.001
BATCH_SIZE = 64

# constnats
VOCAB_SIZE = 1254
INPUT_LENGTH = 3000 if W_HYP else 1000
EMBEDDING_DIM = 512

print(INPUT_LENGTH)

3000


In [18]:
importlib.reload(generator)

# generators
training_generator = generator.Keras_DataGenerator(data_dir='',  dataset='train_new', w_hyp=W_HYP)
print()
validation_generator = generator.Keras_DataGenerator(data_dir='', dataset='valid_new', w_hyp=W_HYP)

# of batches:  5888.0
Generating examples from a set of 376832 examples 


# of batches:  1600.0
Generating examples from a set of 102400 examples 



In [50]:
# custom dot product function
def dot_product(x, kernel):
    if K.backend() == 'tensorflow':
        return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
    else:
        return K.dot(x, kernel)
    
# find a way to return attention weight vector a
class AttentionWithContext(Layer):
    def __init__(self,
                 W_regularizer=None, u_regularizer=None, b_regularizer=None,
                 W_constraint=None, u_constraint=None, b_constraint=None,
                 bias=True, **kwargs):

        self.supports_masking = True
        # initialization of all learnable params
        self.init = initializers.get('glorot_uniform')
        
        # regularizers for params, init as None
        self.W_regularizer = regularizers.get(W_regularizer)
        self.u_regularizer = regularizers.get(u_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)
        
        # constraints for params, init as None
        self.W_constraint = constraints.get(W_constraint)
        self.u_constraint = constraints.get(u_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        super(AttentionWithContext, self).__init__(**kwargs)

    def build(self, input_shape):
#         assert len(input_shape) == 3
        
        # weight matrix
        self.W = self.add_weight((input_shape[-1], input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        # bias term
        if self.bias:
            self.b = self.add_weight((input_shape[-1],),
                                     initializer='lecun_uniform',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        
        # context vector
        self.u = self.add_weight((input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_u'.format(self.name),
                                 regularizer=self.u_regularizer,
                                 constraint=self.u_constraint)

        super(AttentionWithContext, self).build(input_shape)
        
    def compute_mask(self, input, input_mask=None):
        # do not pass the mask to the next layers
        return None

    def call(self, x, mask=None):
        uit = dot_product(x, self.W)

        if self.bias:
            uit += self.b

        uit = K.tanh(uit)
        ait = dot_product(uit, self.u)

        a = K.exp(ait)
        
        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
#         a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon() * 100, K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1)

    def compute_output_shape(self, input_shape):
        return input_shape[0], input_shape[-1]  

    
# model
def build_model(vocab_size, embedding_dim, input_length):
    sequence_input = Input(shape=(input_length,), dtype='int32')
    embedded_sequences = Embedding(vocab_size, embedding_dim, input_length=input_length)(sequence_input)
    output_1 = SpatialDropout1D(0.2)(embedded_sequences)
    output_2 = Bidirectional(CuDNNLSTM(512, return_sequences=True))(output_1)
    context_vec = AttentionWithContext()(output_2)
    predictions = Dense(41, activation='softmax')(context_vec)
    model = Model(inputs=sequence_input, outputs=predictions)
    return model

### testing generator

In [59]:
model = build_model(VOCAB_SIZE, EMBEDDING_DIM, INPUT_LENGTH)

model.compile(loss='categorical_crossentropy', optimizer='SGD', metrics=['accuracy'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_10 (InputLayer)        (None, 3000)              0         
_________________________________________________________________
embedding_10 (Embedding)     (None, 3000, 512)         642048    
_________________________________________________________________
spatial_dropout1d_10 (Spatia (None, 3000, 512)         0         
_________________________________________________________________
bidirectional_10 (Bidirectio (None, 3000, 1024)        4202496   
_________________________________________________________________
attention_with_context_10 (A (None, 1024)              1050624   
_________________________________________________________________
dense_8 (Dense)              (None, 41)                42025     
Total params: 5,937,193
Trainable params: 5,937,193
Non-trainable params: 0
_________________________________________________________________


In [60]:
## ARE YOU LOADING A MODEL IF YES RUN TEH FOLLOWING LINES 
# from keras.models import model_from_json
# json_file = open('model.json', 'r')
# loaded_model_json = json_file.read()
# json_file.close()
# loaded_model = model_from_json(loaded_model_json)
# # load weights into new model
# loaded_model.load_weights("model.h5")
# print("Loaded model from disk")
# # REMEMEBER TO COMPILE 
# loaded_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [61]:
#overwriting model
# model = loaded_model

In [77]:
model.layers[3].get_weights()

[array([[-2.1365041e-02, -3.7061837e-02, -8.3468631e-03, ...,
          2.6247233e-05,  4.5909718e-02, -1.6048344e-02],
        [-4.6823993e-02, -3.2539304e-02,  1.6552424e-02, ...,
         -3.5134781e-02, -3.1592704e-02,  5.9929532e-03],
        [ 1.8258411e-02,  1.5940977e-02, -2.7254185e-02, ...,
          2.0595365e-03, -4.2403486e-02, -3.0106008e-02],
        ...,
        [ 3.1335138e-02,  2.5152473e-02,  2.1871361e-03, ...,
         -4.0942155e-02, -3.0095126e-02,  3.1416127e-03],
        [ 4.5849313e-03,  3.8136318e-02,  3.3987521e-03, ...,
          4.6139356e-02,  3.7941877e-02,  2.9278230e-03],
        [-3.8018864e-02,  2.6216814e-02, -2.6242482e-02, ...,
          1.0189861e-02, -1.8183669e-02, -1.9201607e-02]], dtype=float32),
 array([[-0.02332686,  0.02809873, -0.01245729, ...,  0.02741083,
         -0.01322309,  0.01913308],
        [-0.01871773, -0.01041946, -0.03736302, ...,  0.0449085 ,
         -0.02017113,  0.02510605],
        [ 0.03911094,  0.02257312, -0.0097738 

In [78]:
%%time
#try and make it run until 9 am GMT+1
n_epochs = 1
history = model.fit_generator(generator=training_generator,
                            validation_data=validation_generator,
                            verbose=1,
                            use_multiprocessing=True,
                            epochs=n_epochs)

Epoch 1/1
CPU times: user 14min 28s, sys: 4min 20s, total: 18min 48s
Wall time: 36min 43s


## Save modek

In [None]:
# FOR SAVING MODEL
model_json = model_GPU.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")

In [79]:
#WARNING_DECIDE_HOW_TO_NAME_LOG
#descriptionofmodel_personwhostartsrun
#e.g. LSTM_128encoder_etc_tanc
LOSS_FILE_NAME = "forjeff1"

#WARNING NUMBER 2 - CURRENTLY EVERYTIME YOU RERUN THE CELLS BELOW THE FILES WITH THOSE NAMES GET WRITTEN OVER

In [80]:
# save history - WARNING FILE NAME 
utilis.history_saver_bad(history, LOSS_FILE_NAME)

FileNotFoundError: [Errno 2] No such file or directory: 'training_logs/forjeff1.csv'