In [1]:
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split, GridSearchCV, PredefinedSplit
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler, RobustScaler
from collections import Counter
from Model_Utils import Sequence_Model, Classify_Model, Embedding_Model
from Dataset_Utils import *
from General_Utils import *
from torch.optim.lr_scheduler import ReduceLROnPlateau
import joblib

# 14, 6, 17, 3, 2, 13, 9, 10, 15, 8, 7, 11, 4, 5, 16
# SUBJECT_ID_TEST = [6, 17, 3, 2, 13, 9, 10, 15, 8, 7, 11, 4, 5]
# SUBJECT_ID_TEST = ['S'+str(x) for x in SUBJECT_ID_TEST]
SUBJECT_ID_TEST = 'S6'

# # 'GM1', 'EK1', 'NM1', 'RY1', 'KSG1', 'AD1', 'NM3', 'SJ1', 'BK1', 'RY2', 'GM2', 'MT1', 'NM2'
# SUBJECT_ID_TEST = 'AD1' # SJ1

MARGIN = 2 #Euclid distance usually set high
SEQ_DIM = 128
INPUT_FT = 60
EMBEDDING_HIDDEN = [60]
CLASSIFY_HIDDEN = [64]
DROPOUT = 0.2
LR = 0.003
MAX_EPOCH = 50
LOOK_BEFORE = 3

MODEL_NAME = 'EmbEuclid_2'
SAVE_MODEL_DIR = 'Output'
NAME_DATASET = 'WESAD'

##### READ DATASET #####
if NAME_DATASET == 'WESAD':
    DATA_DIR = '/home/nvtu/PhD_Work/StressDetection/DATA/MyDataset/WESAD'
    data_group = np.load(f'{DATA_DIR}/{NAME_DATASET}_WRIST_groups_1.npy')
    data_gt = np.load(f'{DATA_DIR}/{NAME_DATASET}_WRIST_ground_truth_1.npy')
    data_ft = np.load(f'{DATA_DIR}/{NAME_DATASET}_WRIST_stats_feats_1.npy')
else:
    DATA_DIR = '/home/nvtu/PhD_Work/StressDetection/DATA/MyDataset/AffectiveROAD_Data/Database'
    NAME_DATASET = 'AffectiveROAD'
    data_group = np.load(f'{DATA_DIR}/{NAME_DATASET}_groups_1.npy')
    data_gt = np.load(f'{DATA_DIR}/{NAME_DATASET}_ground_truth_1.npy')
    data_ft = np.load(f'{DATA_DIR}/{NAME_DATASET}_stats_feats_1.npy')
    indices = np.where(data_gt >= 0)[0]
    data_ft = data_ft[indices]
    data_group = data_group[indices]
    data_gt = data_gt[indices]

# Create dataframe for dataset
column_values = [f'f{x}' for x in range(data_ft.shape[1])]
data_full = pd.DataFrame(data = data_ft,  
                         columns = column_values)
data_full['subject_id'] = data_group
data_full['label'] = data_gt
list_subject_id = np.unique(data_full['subject_id']).tolist()

##### TRAIN / VAL / TEST #####
subject_id_test = SUBJECT_ID_TEST
data_train_val = data_full[data_full.subject_id != subject_id_test]
data_test = data_full[data_full.subject_id == subject_id_test]
list_id = list(set(data_train_val.subject_id))
list_id.sort()
subject_id_validate = random.Random(1509).choices(list_id,k=1)[0]
#subject_id_validate = 'S9'
data_train = data_train_val[data_train_val.subject_id != subject_id_validate]
data_validate = data_train_val[data_train_val.subject_id == subject_id_validate]
ft_names = data_train.columns.tolist()

# Scaler Data
X_train = data_train.iloc[:,:-1].to_numpy()
y_train = data_train.iloc[:,-1].to_numpy()
X_test = data_test.iloc[:,:-1].to_numpy()
y_test = data_test.iloc[:,-1].to_numpy()
X_validate = data_validate.iloc[:,:-1].to_numpy()
y_validate = data_validate.iloc[:,-1].to_numpy()

scaler = StandardScaler()
X_train[:,:-1] = scaler.fit_transform(X_train[:,:-1])
X_validate[:,:-1] = scaler.transform(X_validate[:,:-1])
X_test[:,:-1] = scaler.transform(X_test[:,:-1])

In [2]:
scaler.mean_

array([ 7.29350588e+01,  1.84427708e+01,  8.55780385e+02,  2.73200082e+02,
        5.80466749e+00,  1.02314788e+00,  9.04588628e+01,  6.51779548e+01,
        8.10105718e+01,  1.76843072e+03,  3.53044439e+02,  4.29629088e-02,
        7.38804364e-02,  3.35145800e+00,  3.65569703e-01,  5.11983497e-01,
        2.50663177e+02,  2.91061089e+02,  7.87466163e-01,  3.16223965e-04,
        3.13933798e-03,  3.39889683e-01,  5.80632424e-01,  2.07260727e+00,
       -1.27288503e-01,  1.83021140e+00,  6.91687883e-02,  2.32102286e-02,
       -2.61712439e-01,  5.67273777e+01,  1.11475966e+02,  1.00847271e+00,
        2.00931941e+00,  5.68886215e+01,  1.83019309e+00,  7.68090375e-02,
        1.66051406e+00,  2.00638956e+00,  3.45875495e-01, -6.87120175e-06,
        9.45897437e-02, -9.58791955e-02,  1.14760158e+01,  1.60075831e-01,
       -2.11762265e-07,  9.20321780e-03,  8.41413771e-07,  6.41853515e-03,
        3.12566170e-02,  9.45897437e-02, -9.43302875e-03,  2.66492354e-02,
       -1.31724452e-02,  

In [4]:
scaler.var_

array([1.09943190e+02, 5.14741946e+01, 2.18883095e+04, 1.05059296e+05,
       1.83432520e+02, 2.92378026e+00, 6.95806452e+02, 3.87643737e+02,
       1.82590848e+02, 5.33039201e+06, 1.43445357e+05, 2.99825364e-04,
       1.44043042e-03, 1.33659821e+03, 3.79557109e-02, 2.56436559e-02,
       7.30677087e+04, 1.40080942e+05, 2.19675923e-02, 1.30678450e-05,
       2.48847839e-04, 1.56271778e-02, 4.70061265e-02, 2.25893961e+01,
       2.16067643e-01, 5.90948987e+00, 1.83456505e-02, 2.10852517e-03,
       5.64294518e-01, 1.82855211e+03, 4.51592610e+03, 3.95734791e+00,
       1.89084947e+01, 1.83724966e+03, 5.90946520e+00, 2.05697695e-02,
       4.81998477e+00, 6.81183910e+00, 3.41316350e-01, 1.27865391e-06,
       2.56900978e-02, 4.74773872e-02, 5.25892202e+02, 5.32193603e+00,
       3.12648400e-09, 3.53963729e-04, 9.79407567e-09, 1.83384306e-04,
       3.81346924e-03, 2.56900978e-02, 1.54511594e-03, 2.10634734e-03,
       7.81849183e-04, 5.70927486e-03, 3.07104350e-02, 3.45686267e-03,
      

In [5]:
data_train.head()

Unnamed: 0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,...,f52,f53,f54,f55,f56,f57,f58,f59,subject_id,label
0,71.001875,11.594887,853.642086,154.521049,2.339071,0.333258,109.0,78.417266,94.244604,1125.0,...,-0.002344,0.002462,-0.018732,0.003601,21.886141,0.890333,1e-05,0.003222,S14,0
1,71.275764,11.99319,852.068345,156.208126,2.1947,0.313355,110.0,79.136691,94.964029,1125.0,...,-0.00187,0.002883,-0.009692,0.002393,21.88614,0.801697,6e-06,0.00237,S14,0
2,70.995376,11.993262,854.428957,158.174694,2.008055,0.29072,110.0,79.136691,94.964029,1125.0,...,-0.001849,0.002447,-0.009703,0.002347,21.886139,0.813464,6e-06,0.002419,S14,0
3,70.929045,12.084673,854.991007,159.511131,1.910564,0.295406,109.0,78.417266,94.244604,1125.0,...,-0.001749,0.003175,-0.009677,0.00232,21.88614,0.762883,5e-06,0.002215,S14,0
4,71.047971,12.048325,853.866906,159.098395,1.924567,0.293828,109.0,78.417266,94.964029,1125.0,...,-0.001919,0.001426,-0.009659,0.002218,21.886143,0.726163,5e-06,0.00213,S14,0


In [7]:
data_train['f9'].mean()

1768.4307213451418

In [8]:
data_train['f9'].std()

2308.8012175492113

In [13]:
modules = [] 
emb_size = [128]
for idx, size in enumerate(emb_size):
    if idx == 0:
        modules.append(nn.Linear(60, emb_size[idx]))
    else:
        modules.append(nn.Linear(emb_size[idx-1], emb_size[idx]))
    modules.append(nn.BatchNorm1d(num_features=emb_size[idx]))
    modules.append(nn.ReLU())
    modules.append(nn.Dropout(0.2))   

# Add for Res connect
#modules.append(nn.Linear(self.emb_size[-1], self.input_size))
#modules.append(nn.BatchNorm1d(num_features=self.input_size))
#modules.append(nn.ReLU())
#modules.append(nn.Dropout(self.do))    
emb = nn.Sequential(*modules)

In [17]:
emb[0].weight

Parameter containing:
tensor([[ 0.0988, -0.0020, -0.0154,  ...,  0.0274,  0.1140, -0.0258],
        [-0.0066,  0.0913, -0.1289,  ..., -0.0730,  0.1078, -0.1040],
        [-0.0611, -0.0070, -0.1198,  ..., -0.0228, -0.0460, -0.0347],
        ...,
        [ 0.0742, -0.0556,  0.0104,  ...,  0.0198, -0.0373,  0.0250],
        [-0.0158,  0.0866, -0.0060,  ..., -0.1018,  0.0731,  0.0742],
        [ 0.0228, -0.0764, -0.0969,  ..., -0.0252, -0.0199,  0.0934]],
       requires_grad=True)