# Drawing Sequence Model

## Import modules

In [83]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, GRU, Embedding
from keras.callbacks import ReduceLROnPlateau
from sklearn.metrics import classification_report

## Preprocess

In [104]:
data = pd.read_csv('data/draw-sequence.csv')

# Fill NaN with -1
data = data.fillna(-1)
data = data.values[:,1:]

# Reshape to 3D
data = data.reshape((-1, 18, 1))

from data import preprocess
label = preprocess.preprocess('data/raw/CD_PD.mat')
label = label['diagnosis'].astype('int32')

# print(len(data), len(label))
X_train, X_test, y_train, y_test = train_test_split(data, label, test_size=0.2)

## Define Model

In [115]:
def get_model(n_features):
    model = Sequential()
    # model.add(Dense(128, activation='relu', input_shape=(n_features,)))
    # model.add(Embedding(196, 10, input_shape=(n_features)))
    model.add(GRU(100, return_sequences=True, input_shape=(n_features, 1)))
    model.add(Dropout(0.2))
    model.add(GRU(100, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(GRU(100, return_sequences=True))
    model.add(Flatten())
    model.add(Dense(1, activation='sigmoid'))

    reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.2, patience=5, min_lr=1e-5)
    model.compile(loss='binary_crossentropy', optimizer='adam', callbacks=[reduce_lr])
    model.build()
    return model

get_model(18).summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_34 (GRU)                 (None, 18, 100)           30600     
_________________________________________________________________
dropout_2 (Dropout)          (None, 18, 100)           0         
_________________________________________________________________
gru_35 (GRU)                 (None, 18, 100)           60300     
_________________________________________________________________
dropout_3 (Dropout)          (None, 18, 100)           0         
_________________________________________________________________
gru_36 (GRU)                 (None, 18, 100)           60300     
_________________________________________________________________
flatten_10 (Flatten)         (None, 1800)              0         
_________________________________________________________________
dense_94 (Dense)             (None, 1)                 1801      
Total para

In [116]:
model = get_model(len(X_train[0]))
model.fit(X_train, y_train, batch_size=32, epochs=10, verbose=1)
model.evaluate(X_test, y_test, verbose=0)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


0.711036992073059

## Evaluation

In [117]:
THRESHOLD = 0.5
y_pred = model.predict(X_test, batch_size=64, verbose=1)
y_pred_bool = np.where(y_pred < THRESHOLD, 0, 1).reshape(-1)
print(classification_report(y_test, y_pred_bool))

              precision    recall  f1-score   support

           0       0.53      1.00      0.69        19
           1       1.00      0.19      0.32        21

   micro avg       0.57      0.57      0.57        40
   macro avg       0.76      0.60      0.51        40
weighted avg       0.78      0.57      0.50        40

