In [27]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D, Bidirectional, Activation
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from keras.callbacks import EarlyStopping
from keras.layers import Dropout

In [2]:
# load numpy array from csv file
from numpy import loadtxt
# load array
X_train = loadtxt('x_train_small.csv', delimiter=',')
X_train_hyp = loadtxt('x_train_hyp_small.csv', delimiter=',')
Y_train = loadtxt('y_train_small.csv', delimiter=',')
# print the array
X_train

array([[2., 6., 9., ..., 0., 0., 0.],
       [2., 6., 6., ..., 0., 0., 0.],
       [2., 6., 6., ..., 0., 0., 0.],
       ...,
       [2., 6., 6., ..., 0., 0., 0.],
       [2., 6., 6., ..., 0., 0., 0.],
       [2., 6., 6., ..., 0., 0., 0.]])

In [3]:
X_train_hyp

array([[2., 6., 9., ..., 0., 0., 0.],
       [2., 6., 6., ..., 0., 0., 0.],
       [2., 6., 6., ..., 0., 0., 0.],
       ...,
       [2., 6., 6., ..., 0., 0., 0.],
       [2., 6., 6., ..., 0., 0., 0.],
       [2., 6., 6., ..., 0., 0., 0.]])

In [4]:
Y_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [5]:
# only for testing
X_train_hyp = X_train_hyp[:, :100]

In [6]:
VOCAB_SIZE = 1254
INPUT_LENGTH = 100 #3000
EMBEDDING_DIM = 128

In [28]:
# model
def build_model(vocab_size, embedding_dim, input_length):
    model = Sequential()
    model.add(Embedding(vocab_size, embedding_dim, input_length=input_length))
    model.add(SpatialDropout1D(0.2))
    model.add(Bidirectional(LSTM(128, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)))
    model.add((LSTM(41)))
    model.add(Activation('softmax'))
    return model

In [29]:
model = build_model(VOCAB_SIZE, EMBEDDING_DIM, INPUT_LENGTH)

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_8 (Embedding)      (None, 100, 128)          160512    
_________________________________________________________________
spatial_dropout1d_8 (Spatial (None, 100, 128)          0         
_________________________________________________________________
bidirectional_10 (Bidirectio (None, 100, 256)          263168    
_________________________________________________________________
lstm_15 (LSTM)               (None, 41)                48872     
_________________________________________________________________
activation_1 (Activation)    (None, 41)                0         
Total params: 472,552
Trainable params: 472,552
Non-trainable params: 0
_________________________________________________________________
None


In [30]:
epochs = 2
batch_size = 64

history = model.fit(X_train_hyp, Y_train, epochs=epochs, batch_size=batch_size,validation_split=0.1,callbacks=[EarlyStopping(monitor='val_loss', patience=3, min_delta=0.0001)])

Train on 180 samples, validate on 20 samples
Epoch 1/2
Epoch 2/2


In [31]:
example_x = X_train_hyp[0]
print(np.shape(example_x))
temp = model.predict(X_train_hyp)
print(np.sum(temp[0]))
# print(len(temp)), temp
print(temp[0])
for i in temp:
    print(np.argmax(i))


(100,)
0.99999994
[0.02374553 0.02314471 0.02317428 0.02721585 0.02391537 0.02711571
 0.02341498 0.02318092 0.02596592 0.02273191 0.02403271 0.02456891
 0.02497662 0.02458638 0.02415352 0.02605136 0.02547137 0.02339511
 0.02523265 0.02631804 0.02329337 0.02242875 0.02669866 0.02293356
 0.02578431 0.02323409 0.02555792 0.02288876 0.02473729 0.02544652
 0.02341451 0.02506028 0.02257454 0.02554857 0.02447547 0.02312406
 0.02253068 0.02763541 0.02311986 0.02458253 0.02253897]
37
3
37
37
37
3
37
37
37
37
37
37
5
37
3
37
37
37
37
37
37
3
37
3
37
37
37
37
37
37
3
5
37
37
37
5
37
37
37
3
5
37
37
3
37
37
3
3
37
37
37
37
37
37
37
3
3
37
3
37
37
37
37
3
37
37
5
37
37
3
37
37
37
37
3
37
3
37
37
3
37
37
37
3
37
37
37
3
37
3
37
37
5
37
5
37
37
37
3
37
37
3
37
3
37
37
37
37
37
3
37
3
37
37
37
37
37
37
37
37
37
37
37
37
37
37
37
37
37
37
37
37
37
3
37
37
37
37
37
37
37
37
37
37
37
37
5
37
37
37
37
3
37
3
5
3
37
37
37
37
37
37
37
5
5
3
3
37
37
3
37
37
37
5
3
5
37
3
3
37
3
37
37
3
37
5
37
37
37
37
37
3
