In [14]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D, Bidirectional
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from keras.callbacks import EarlyStopping
from keras.layers import Dropout

In [15]:
# load numpy array from csv file
from numpy import loadtxt
# load array
X_train = loadtxt('x_train_small.csv', delimiter=',')
X_train_hyp = loadtxt('x_train_hyp_small.csv', delimiter=',')
Y_train = loadtxt('y_train_small.csv', delimiter=',')
# print the array
X_train

array([[2., 6., 9., ..., 0., 0., 0.],
       [2., 6., 6., ..., 0., 0., 0.],
       [2., 6., 6., ..., 0., 0., 0.],
       ...,
       [2., 6., 6., ..., 0., 0., 0.],
       [2., 6., 6., ..., 0., 0., 0.],
       [2., 6., 6., ..., 0., 0., 0.]])

In [16]:
X_train_hyp

array([[2., 6., 9., ..., 0., 0., 0.],
       [2., 6., 6., ..., 0., 0., 0.],
       [2., 6., 6., ..., 0., 0., 0.],
       ...,
       [2., 6., 6., ..., 0., 0., 0.],
       [2., 6., 6., ..., 0., 0., 0.],
       [2., 6., 6., ..., 0., 0., 0.]])

In [17]:
Y_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [18]:
# only for testing
X_train_hyp = X_train_hyp[:, :100]

In [19]:
VOCAB_SIZE = 1254
INPUT_LENGTH = 100 #3000
EMBEDDING_DIM = 128

In [20]:
# model
def build_model(vocab_size, embedding_dim, input_length):
    model = Sequential()
    model.add(Embedding(vocab_size, embedding_dim, input_length=input_length))
    model.add(SpatialDropout1D(0.2))
    model.add(Bidirectional(LSTM(128, dropout=0.2, recurrent_dropout=0.2)))
    model.add(Dense(41, activation='softmax'))
    return model

In [21]:
model = build_model(VOCAB_SIZE, EMBEDDING_DIM, INPUT_LENGTH)

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 100, 128)          160512    
_________________________________________________________________
spatial_dropout1d_2 (Spatial (None, 100, 128)          0         
_________________________________________________________________
bidirectional_2 (Bidirection (None, 256)               263168    
_________________________________________________________________
dense_2 (Dense)              (None, 41)                10537     
Total params: 434,217
Trainable params: 434,217
Non-trainable params: 0
_________________________________________________________________
None


In [24]:
epochs = 10
batch_size = 64

history = model.fit(X_train_hyp, Y_train, epochs=epochs, batch_size=batch_size,validation_split=0.1)
# callbacks=[EarlyStopping(monitor='val_loss', patience=3, min_delta=0.0001)])

Train on 180 samples, validate on 20 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [13]:
example_x = X_train_hyp[0]
print(np.shape(example_x))
temp = model.predict(X_train_hyp)
# print(len(temp)), temp
print(np.sum(temp[0]))
print(temp[0])
for i in temp:
    print(np.argmax(i))


(100,)
1.0
[0.02417845 0.02402054 0.0237584  0.02598362 0.02378608 0.02602911
 0.02391152 0.02382103 0.02518614 0.02401045 0.02397499 0.0240003
 0.02439923 0.02499531 0.0241197  0.02476963 0.02414231 0.0243187
 0.02397819 0.02415072 0.02444125 0.02474251 0.02457397 0.02408241
 0.02527606 0.02433513 0.02463834 0.02430664 0.02455241 0.02471431
 0.02430093 0.02434285 0.02396849 0.02455771 0.02435185 0.02411873
 0.02407417 0.02505759 0.02416527 0.02409506 0.02376997]
5
3
5
3
3
3
5
3
3
5
3
5
5
5
3
5
5
5
5
5
5
3
5
3
3
3
5
3
5
3
3
5
3
5
5
5
5
5
3
3
5
5
5
5
5
3
3
3
3
5
5
5
3
3
5
3
3
3
5
3
5
3
5
5
5
5
5
5
3
5
5
5
3
5
3
3
5
5
5
5
5
5
3
5
5
5
3
5
5
3
5
5
5
5
5
5
3
5
5
3
3
5
5
3
3
3
5
5
5
5
5
3
5
5
5
5
5
5
3
5
5
5
5
5
5
5
3
5
5
3
5
5
5
3
3
5
5
5
5
5
5
5
3
3
5
5
5
3
5
3
5
5
5
5
5
3
5
5
5
5
5
5
3
5
5
3
3
5
5
3
3
5
5
5
5
5
5
5
5
5
3
3
3
3
5
5
3
3
5
3
5
5
5
5
5
5
5
5
3
5
