# TIME series model prototype

In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold

from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam, AdamW, schedules
from tensorflow.keras import Model, regularizers, layers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical

from sklearn.metrics import balanced_accuracy_score

## Loading time series 

In [7]:
raw_data_prep_dir = '../raw_data/pads-parkinsons-disease-smartwatch-dataset-1.0.0/preprocessed/'

In [8]:
file_list = pd.read_csv(raw_data_prep_dir  + "file_list.csv")
file_list

Unnamed: 0,resource_type,id,study_id,condition,disease_comment,age_at_diagnosis,age,height,weight,gender,handedness,appearance_in_kinship,appearance_in_first_grade_kinship,effect_of_alcohol_on_tremor,label
0,patient,1,PADS,Healthy,-,56,56,173,78,male,right,True,True,Unknown,0
1,patient,2,PADS,Other Movement Disorders,Left-Sided resting tremor and hypokinesia with...,69,81,193,104,male,right,False,,No effect,2
2,patient,3,PADS,Healthy,-,45,45,170,78,female,right,False,,Unknown,0
3,patient,4,PADS,Parkinson's,IPS akinetic-rigid type,63,67,161,90,female,right,False,,No effect,1
4,patient,5,PADS,Parkinson's,IPS tremordominant type,65,75,172,86,male,left,False,,Unknown,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
464,patient,465,PADS,Parkinson's,IPS mixed type,62,65,175,80,male,right,True,False,No effect,1
465,patient,466,PADS,Healthy,-,84,84,172,74,female,right,True,True,No effect,0
466,patient,467,PADS,Parkinson's,"Essential Tremor, starting IPS tremordominant ...",55,57,190,100,male,right,False,,Improvement,1
467,patient,468,PADS,Parkinson's,IPS mixed type,73,76,198,118,male,right,False,,No effect,1


In [9]:
n_id = len(file_list['id'])
n_id

469

In [10]:
time_data = []
y = []
X_to_split = []
for idx, subject_idx in enumerate(file_list['id']):
    file_name = raw_data_prep_dir + f'movement/{subject_idx:03d}_ml.bin'
    time_idx_data = np.fromfile(file_name, dtype=np.float32).reshape((-1, 976))
    time_data.append(time_idx_data)
    y_idx = file_list[file_list['id']==subject_idx].label.values
    X_id = idx
    y.append(y_idx)
    X_to_split.append(X_id)

X_to_split = np.array(X_to_split)
y = np.array(y).flatten()
print(X_to_split.shape, y.shape)

(469,) (469,)


In [11]:
channels = []
for task in ["Relaxed1", "Relaxed2", "RelaxedTask1", "RelaxedTask2", "StretchHold", "HoldWeight",
                "DrinkGlas", "CrossArms", "TouchNose", "Entrainment1", "Entrainment2"]:
    for device_location in ["LeftWrist", "RightWrist"]:
        for sensor in ["Acceleration", "Rotation"]:
            for axis in ["X", "Y", "Z"]:
                channel = f"{task}_{sensor}_{device_location}_{axis}"
                channels.append(channel)
len(channels)

132

In [12]:
time_data_array = np.array(time_data)
print(time_data_array.shape)

(469, 132, 976)


In [13]:
X_train_idx, X_test_idx, y_train, y_test = train_test_split(X_to_split, y, test_size=0.20, random_state=23, stratify = y)

In [14]:
X_train = time_data_array[X_train_idx]
X_test = time_data_array[X_test_idx]
print(len(X_train), len(X_test))
print(len(y_train), len(y_test))

375 94
375 94


In [15]:
n_obs, n_chan, n_time =  X_train.shape

X_train_nn = X_train.reshape(n_obs, n_time, n_chan)
X_train_nn.shape

n_obs_test, n_chan_test, n_time_test =  X_test.shape

X_test_nn = X_test.reshape(n_obs_test, n_time_test, n_chan_test)
X_test_nn.shape
X_train_nn.shape

(375, 976, 132)

In [16]:
y_test.shape
y_train.shape

(375,)

In [17]:
y_train_cat = to_categorical(y_train)
y_test_cat  = to_categorical(y_test)

In [32]:
def initialize_model_lstm(input_shape:tuple, dropout_rate: float = 0.3) -> Model:
    model = Sequential()
    normalizer = layers.Normalization(axis = -1)
    normalizer.adapt(X_train_nn)

    model.add(layers.Input(shape=input_shape))
    model.add(normalizer)

    #two lstm layers to better capture
    model.add(layers.GRU(units=64, activation='tanh',
                   return_sequences=True))
    model.add(layers.GRU(units=32, activation='tanh',
                          ))
    model.add(layers.Flatten())
    #dense layer with batch normalization
    model.add(layers.Dense(32, activation = layers.LeakyReLU(), kernel_regularizer=regularizers.L2(0.0001)))
    model.add(layers.BatchNormalization())
    model.add(layers.Dense(16, activation = layers.LeakyReLU(), kernel_regularizer=regularizers.L2(0.0001)))
    model.add(layers.Dropout(dropout_rate))

    #output layer with 3 classification
    model.add(layers.Dense(3, activation='softmax'))

    return model

In [33]:
model_lstm = initialize_model_lstm(input_shape=X_train_nn.shape[1:], dropout_rate = 0.2)

In [34]:
init_lr = 0.001
#lr_schedule = schedules.ExponentialDecay(
#     init_lr,
#     decay_steps=100000,
#     decay_rate=0.96,
#     staircase=True)

opt = AdamW(init_lr)

model_lstm.compile(
    loss="categorical_focal_crossentropy",
    optimizer=opt,
    metrics=['accuracy', 'precision', 'recall'])

In [35]:
model_lstm.summary()

In [37]:
es = EarlyStopping(
    monitor="val_loss",
    patience=100,
    restore_best_weights=True,
    verbose=0
)

reduce_lr = ReduceLROnPlateau(
        monitor='val_loss'
    )

history = model_lstm.fit(
    X_train_nn,
    y_train_cat,
    validation_split = 0.2,
    epochs=200,
    batch_size = 124,
    callbacks=[es, reduce_lr],
    verbose=1
)

Epoch 1/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step - accuracy: 0.7367 - loss: 0.0789 - precision: 0.8800 - recall: 0.3667 - val_accuracy: 0.5333 - val_loss: 0.1172 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 1.0000e-13
Epoch 2/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step - accuracy: 0.7000 - loss: 0.0821 - precision: 0.8467 - recall: 0.3867 - val_accuracy: 0.5333 - val_loss: 0.1172 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 1.0000e-13
Epoch 3/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step - accuracy: 0.7033 - loss: 0.0812 - precision: 0.8030 - recall: 0.3533 - val_accuracy: 0.5333 - val_loss: 0.1172 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 1.0000e-13
Epoch 4/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 885ms/step - accuracy: 0.7133 - loss: 0.0795 - precision: 0.8450 - recall: 0.3633 - val_accuracy: 

In [24]:
model_lstm.evaluate(X_test_nn, y_test_cat)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 147ms/step - accuracy: 0.5851 - loss: 0.9689 - precision: 0.6000 - recall: 0.5106


[0.9688957929611206, 0.585106372833252, 0.6000000238418579, 0.5106382966041565]

In [25]:
y_pred = model_lstm.predict(X_test_nn)
y_pred_cat = np.argmax(y_pred,axis =1)
y_pred_cat

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 217ms/step


array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 2, 1, 1])

In [26]:
balanced_accuracy_score(y_test, y_pred_cat)

print(y_test, y_pred_cat)
print(np.sum(y_test_cat, axis = 0))
print(np.unique(y_pred_cat, axis = 0,return_counts =True))

[0 1 0 0 2 1 0 1 2 0 1 1 1 2 1 1 1 2 1 2 1 1 0 1 2 1 1 1 1 1 1 2 2 2 2 1 1
 1 1 0 1 2 0 0 0 2 1 1 1 1 1 2 1 2 0 1 0 1 1 2 1 1 1 1 1 2 1 1 1 0 1 2 1 1
 1 0 1 0 2 1 1 1 1 1 1 2 2 1 2 2 1 2 1 0] [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1]
[16. 55. 23.]
(array([1, 2]), array([92,  2]))
