In [12]:
# import all dependencies first
import numpy as np
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dropout, Dense, LSTM
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Generating the datasets

## Problem 1 dataset

In [85]:
# Generate nparray of bits (length= 50), sequence size= 100000
MAX_BITS = 50
NUM_SEQUENCES = 100000
seq_1 = np.random.randint(2,size=(NUM_SEQUENCES, MAX_BITS)).astype('float32')
non_zeros = np.count_nonzero(seq_1,axis=1)
labels_1 = [0 if c % 2 == 0 else 1 for c in non_zeros]
labels_1 = np.asarray(labels_1,dtype='float32')
in_file=np.savetxt('input1.txt', seq_1, fmt="%i")
out_file=np.savetxt('result1.txt', labels_1, fmt="%i")

## Problem 2 dataset

In [86]:
# Generate nparray of bits with varying length (max length= 50), sequence size = 100000
seq_2 = []
labels_2 = []
for i in range(NUM_SEQUENCES):
    bit_size = np.random.randint(1, MAX_BITS + 1)
    val = np.random.randint(2,size=bit_size).astype('float32')
    seq_2.append(val)
    labels_2.append(0 if sum(val) % 2 == 0 else 1)
seq_2 = np.asarray(seq_2)
labels_2 = np.asarray(labels_2,dtype='float32')
with open('input2.txt','w') as in_file1:
    for row in seq_2:
        np.savetxt(in_file1,[row],fmt="%i")
out_file1=np.savetxt('result2.txt',labels_2,fmt="%i")

# Training & Testing LSTM Model

## Problem 1

### Load dataset 1 and split 80/20 for train/test

In [89]:
# load them from the in/out file(s)
seq_1 =np.loadtxt('input1.txt', dtype='float' )
seq_1 =np.array(seq_1)
labels_1 =np.loadtxt('result1.txt', dtype='int32' )
labels_1=np.array(labels_1)
seq_1 = np.expand_dims(seq_1 ,-1)
labels_1 = np.expand_dims(labels_1,-1)
split_ratio = int(len(seq_1) * 0.8)
train_set_in_1 = seq_1[:split_ratio]
test_set_in_1 = seq_1[split_ratio+1:]
train_set_out_1 = labels_1[:split_ratio]
test_set_out_1 = labels_1[split_ratio+1:]

### Define model

In [13]:
model_1 = Sequential()
model_1.add(LSTM(units=32,input_shape=(50,1)))
model_1.add(Dropout(0.2))
model_1.add(Dense(1,activation='sigmoid'))
model_1.summary()
#sgd = SGD(lr=0.001, momentum=0.9)
adam = Adam(lr=0.001)
model_1.compile(optimizer=adam,loss='binary_crossentropy', metrics=['accuracy'])

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 32)                4352      
_________________________________________________________________
dropout (Dropout)            (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 33        
Total params: 4,385
Trainable params: 4,385
Non-trainable params: 0
_________________________________________________________________


### Training

In [94]:
model_1.fit(train_set_in_1,train_set_out_1,epochs=20,batch_size=128,validation_split=0.2,shuffle=False)

Train on 64000 samples, validate on 16000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f6e021e56d0>

### Testing

In [96]:
eval_model_1 = model_1.evaluate(x=test_set_in_1,y=test_set_out_1)



## Problem 2 (with pre-padding)

### Load dataset 2, pre-pad, and split 80/20 for train/test

In [91]:
seq_2 = []
labels_2 = []
# load them from the in/out file(s)
for line in  open ('input2.txt','r') :
    temp=np.array(list(i for i in line.split(' ')))
    seq_2.append(temp)
seq_2 =np.array(seq_2)
seq_2  = pad_sequences(seq_2 , dtype='float32',maxlen=MAX_BITS,padding='pre')
labels_2 =np.loadtxt('result2.txt', dtype='int32' )
labels_2=np.array(labels_2)
seq_2= np.expand_dims(seq_2,-1)
labels_2 = np.expand_dims(labels_2,-1)
split_ratio = int(len(seq_2) * 0.8)
train_set_in_2 = seq_2[:split_ratio]
test_set_in_2 = seq_2[split_ratio+1:]
train_set_out_2 = labels_2[:split_ratio]
test_set_out_2 = labels_2[split_ratio+1:]

### Define Model

In [92]:
model_2 = Sequential()
model_2.add(LSTM(units=32,input_shape=(50,1)))
model_2.add(Dropout(0.2))
model_2.add(Dense(1,activation='sigmoid'))
model_2.summary()
#sgd = SGD(lr=0.001, momentum=0.9)
adam = Adam(lr=0.001)
model_2.compile(optimizer=adam,loss='binary_crossentropy', metrics=['accuracy'])

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_8 (LSTM)                (None, 32)                4352      
_________________________________________________________________
dropout_8 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 33        
Total params: 4,385
Trainable params: 4,385
Non-trainable params: 0
_________________________________________________________________


### Training

In [95]:
model_2.fit(train_set_in_2,train_set_out_2,epochs=20,batch_size=128,validation_split=0.2,shuffle=False)

Train on 64000 samples, validate on 16000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f6dca2b2490>

### Testing

In [97]:
eval_model_2 = model_2.evaluate(x=test_set_in_2,y=test_set_out_2)

