# TIME series model prototype

In [37]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Masking, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model, regularizers, layers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical

## Loading time series 

In [38]:
raw_data_prep_dir = '../raw_data/pads-parkinsons-disease-smartwatch-dataset-1.0.0/preprocessed/'

In [39]:
file_list = pd.read_csv(raw_data_prep_dir  + "file_list.csv")
file_list

Unnamed: 0,resource_type,id,study_id,condition,disease_comment,age_at_diagnosis,age,height,weight,gender,handedness,appearance_in_kinship,appearance_in_first_grade_kinship,effect_of_alcohol_on_tremor,label
0,patient,1,PADS,Healthy,-,56,56,173,78,male,right,True,True,Unknown,0
1,patient,2,PADS,Other Movement Disorders,Left-Sided resting tremor and hypokinesia with...,69,81,193,104,male,right,False,,No effect,2
2,patient,3,PADS,Healthy,-,45,45,170,78,female,right,False,,Unknown,0
3,patient,4,PADS,Parkinson's,IPS akinetic-rigid type,63,67,161,90,female,right,False,,No effect,1
4,patient,5,PADS,Parkinson's,IPS tremordominant type,65,75,172,86,male,left,False,,Unknown,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
464,patient,465,PADS,Parkinson's,IPS mixed type,62,65,175,80,male,right,True,False,No effect,1
465,patient,466,PADS,Healthy,-,84,84,172,74,female,right,True,True,No effect,0
466,patient,467,PADS,Parkinson's,"Essential Tremor, starting IPS tremordominant ...",55,57,190,100,male,right,False,,Improvement,1
467,patient,468,PADS,Parkinson's,IPS mixed type,73,76,198,118,male,right,False,,No effect,1


In [40]:
n_id = len(file_list['id'])
n_id

469

In [42]:
time_data = []
y = []
X_to_split = []
for idx, subject_idx in enumerate(file_list['id']):
    file_name = raw_data_prep_dir + f'movement/{subject_idx:03d}_ml.bin'
    time_idx_data = np.fromfile(file_name, dtype=np.float32).reshape((-1, 976))
    time_data.append(time_idx_data)
    y_idx = file_list[file_list['id']==subject_idx].label.values
    X_id = idx
    y.append(y_idx)
    X_to_split.append(X_id)

X_to_split = np.array(X_to_split)
y = np.array(y).flatten()
print(X_to_split.shape, y.shape)

(469,) (469,)


In [43]:
channels = []
for task in ["Relaxed1", "Relaxed2", "RelaxedTask1", "RelaxedTask2", "StretchHold", "HoldWeight",
                "DrinkGlas", "CrossArms", "TouchNose", "Entrainment1", "Entrainment2"]:
    for device_location in ["LeftWrist", "RightWrist"]:
        for sensor in ["Acceleration", "Rotation"]:
            for axis in ["X", "Y", "Z"]:
                channel = f"{task}_{sensor}_{device_location}_{axis}"
                channels.append(channel)
len(channels)

132

In [44]:
time_data_array = np.array(time_data)
print(time_data_array.shape)

(469, 132, 976)


In [45]:
X_train_idx, X_test_idx, y_train, y_test = train_test_split(X_to_split, y, test_size=0.20, random_state=42, stratify = y)

In [46]:
X_train = time_data_array[X_train_idx]
X_test = time_data_array[X_test_idx]
print(len(X_train), len(X_test))
print(len(y_train), len(y_test))

375 94
375 94


In [21]:
n_obs, n_chan, n_time =  X_train.shape
print(n_obs, n_chan, n_time)
X_train_nn = X_train.reshape(n_obs, n_time, n_chan)
X_train_nn.shape

n_obs_test, n_chan_test, n_time_test =  X_test.shape
print(n_obs_test, n_chan_test, n_time_test)
X_test_nn = X_test.reshape(n_obs_test, n_time_test, n_chan_test)
X_test_nn.shape

375 132 976
94 132 976


(94, 976, 132)

In [22]:
y_test.shape
y_train.shape

(375,)

In [33]:
y_train_cat = to_categorical(y_train)
y_test_cat  = to_categorical(y_test)

In [24]:
def initialize_model_lstm(input_shape:tuple, dropout_rate: float = 0.3) -> Model:
    model = Sequential()
    model.add(Input(shape=input_shape))

    #two lstm layers to better capture
    model.add(LSTM(units=64, activation='tanh',
                   return_sequences=True,
                   dropout=dropout_rate,
                   recurrent_dropout=dropout_rate))
    model.add(LSTM(units=64, activation='tanh',
                   dropout=dropout_rate,
                   recurrent_dropout=dropout_rate))

    model.add(layers.Dropout(dropout_rate))

    #dense layer with batch normalization
    model.add(layers.Dense(32, activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(dropout_rate))

    model.add(layers.Dense(16, activation='relu'))
    model.add(layers.Dropout(dropout_rate))

    #output layer with 3 classification
    model.add(layers.Dense(3, activation='softmax'))

    return model

In [30]:
model_lstm = initialize_model_lstm(input_shape=X_train_nn.shape[1:])

model_lstm.compile(
    loss="categorical_crossentropy",
    optimizer=Adam(learning_rate=0.01),
    metrics=['accuracy', 'precision', 'recall'])

In [31]:
model_lstm.summary()

In [None]:
es = EarlyStopping(
    monitor="val_loss",
    patience=50,
    restore_best_weights=True,
    verbose=0
)

reduce_lr = ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.1,
        patience=10,
        verbose=1,
        min_lr=0
    )

history = model_lstm.fit(
    X_train_nn,
    y_train_cat,
    validation_split = 0.2,
    epochs=100,
    batch_size = 32,
    callbacks=[es, reduce_lr],
    verbose=1
)

Epoch 1/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1s/step - accuracy: 0.4900 - loss: 1.1433 - precision: 0.5314 - recall: 0.3100 - val_accuracy: 0.6267 - val_loss: 0.9842 - val_precision: 1.0000 - val_recall: 0.0133 - learning_rate: 0.0100
Epoch 2/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 974ms/step - accuracy: 0.5567 - loss: 1.0125 - precision: 0.6083 - recall: 0.4400 - val_accuracy: 0.6267 - val_loss: 0.9693 - val_precision: 0.4000 - val_recall: 0.0267 - learning_rate: 0.0100
Epoch 3/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 976ms/step - accuracy: 0.5500 - loss: 1.0452 - precision: 0.5659 - recall: 0.3867 - val_accuracy: 0.6267 - val_loss: 0.9845 - val_precision: 1.0000 - val_recall: 0.0267 - learning_rate: 0.0100
Epoch 4/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1s/step - accuracy: 0.5667 - loss: 1.0237 - precision: 0.5955 - recall: 0.3533 - val_accuracy: 0.6267 - val_loss: 0.

In [190]:
X_train_nn[1,:]
print(X_train_nn[1,:].shape)
print(X_train_nn.shape[1:])
y_train.shape
X_train_nn.shape

(976, 132)
(976, 132)


(375, 976, 132)

In [None]:
model_lstm.evaluate(X_test_nn, y_test_cat)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 201ms/step - accuracy: 0.5851 - loss: 0.9507 - precision: 0.5955 - recall: 0.5638


[0.9506561756134033, 0.585106372833252, 0.5955055952072144, 0.563829779624939]

In [None]:
y_pred = model_lstm.predict(X_test_nn)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 379ms/step


array([[0.21000591, 0.5680015 , 0.22199266],
       [0.18966551, 0.6101459 , 0.20018859],
       [0.18902658, 0.58213204, 0.22884136],
       [0.20419902, 0.5760465 , 0.21975443],
       [0.20982492, 0.56663114, 0.22354393],
       [0.20402202, 0.5782959 , 0.21768206],
       [0.17212388, 0.6465889 , 0.18128718],
       [0.20449007, 0.571807  , 0.22370294],
       [0.16366723, 0.6563604 , 0.17997234],
       [0.19758236, 0.5915597 , 0.2108579 ],
       [0.19867754, 0.57953584, 0.22178653],
       [0.21316162, 0.5386098 , 0.24822855],
       [0.18712825, 0.6072134 , 0.20565838],
       [0.22325048, 0.5202358 , 0.2565138 ],
       [0.16836359, 0.6537382 , 0.17789826],
       [0.14742973, 0.69792247, 0.15464774],
       [0.16521971, 0.6599914 , 0.17478892],
       [0.25403407, 0.43610638, 0.30985948],
       [0.18997173, 0.56569713, 0.24433114],
       [0.24091442, 0.447228  , 0.31185758],
       [0.19428438, 0.59969765, 0.20601794],
       [0.207583  , 0.53746504, 0.25495198],
       [0.