## Gait Video Study 
### Traditional ML algorithms on task generalization framework 1: W to WT to classify HOA/MS/PD strides and subjects 
#### Remember to add the original count of frames in a single stride (before down sampling via smoothing) for each stride as an additional artificial feature to add information about speed of the subject to the model

1. Save the optimal hyperparameters, confusion matrices and ROC curves for each algorithm.
2. Make sure to not use x, y, z, confidence = 0, 0, 0, 0 as points for the model since they are simply missing values and not data points, so make sure to treat them before inputting to model 
3. Make sure to normalize (mean substract) the features before we feed them to the model.
4. We use the summary statistics as range, CoV and asymmetry between the right and left limbs as the features to input to the traditional models requiring fixed size 1D input for each training/testing set sample.


In [None]:
# 33 subject in total (~10 per group) 
# 4500 strides - 2000 strides - 200 groups for 10 strides per group
# STRIDE - 20*36 - MEAN SUBSTRACTION --- BATCH NORM. 
# 90 features - 36 Cov, 36 Range, 18 assymetry, 18 DEN
# Z-SCORE 
# Default + Dimensionality reduction - 3D space
# Try top 10 features 
# Subject generalization is where the overfitting issue is tested - If we get good results, that means we are not 
# overfitting 

In [1]:
from imports import *

In [2]:
path = 'C:\\Users\\Rachneet Kaur\\Box\\Gait Video Project\\GaitVideoData\\video\\'
data_path = path+'traditional_methods_dataframe.csv'

data = pd.read_csv(data_path, index_col= 0)
display(data.head())

Unnamed: 0,key,cohort,trial,scenario,video,PID,stride_number,frame_count,label,right hip-x-CoV,...,ankle-z-asymmetry,heel-x-asymmetry,heel-y-asymmetry,heel-z-asymmetry,toe 1-x-asymmetry,toe 1-y-asymmetry,toe 1-z-asymmetry,toe 2-x-asymmetry,toe 2-y-asymmetry,toe 2-z-asymmetry
0,GVS_212_T_T1_1,HOA,BW,SLWT,GVS_212_T_T1,212,1,46,0,0.046077,...,14.426173,3.407379,10.662441,0.830365,0.50257,31.450487,8.644012,5.236678,31.182183,8.215725
1,GVS_212_T_T1_2,HOA,BW,SLWT,GVS_212_T_T1,212,2,39,0,0.021528,...,1.360847,5.155307,11.363806,4.333776,1.025647,28.2664,2.671081,6.678294,15.058825,4.903579
2,GVS_212_T_T1_3,HOA,BW,SLWT,GVS_212_T_T1,212,3,56,0,0.034394,...,1.341021,8.625363,7.159495,3.366152,1.759968,17.545787,5.921325,8.243491,9.578638,3.008162
3,GVS_212_T_T1_4,HOA,BW,SLWT,GVS_212_T_T1,212,4,53,0,0.028511,...,2.375934,6.728268,0.098235,0.999027,0.541911,7.843339,4.279617,0.748023,19.471731,5.086056
4,GVS_212_T_T1_5,HOA,BW,SLWT,GVS_212_T_T1,212,5,44,0,0.025213,...,8.525816,1.775282,0.03321,9.166863,1.354601,6.674183,8.47948,4.373622,0.315168,11.795593


### Utility functions 

In [3]:
def keep_subjects_common_across_train_test(trial_train, trial_test):
    '''
    Since we need to implement pure task generalization framework, we must have same subjects across both training and testing trails 
    Hence, if there are some subjects that are present in the training set but not in the test set or vice versa, we eliminate those 
    subjects to have only common subjects across training and test sets. 
    Input: data subset for training and testing trial
    Output: training and testing subsets with common subjects 
    '''
    
    print ('Original number of subjects in training and test sets:', len(trial_train['PID'].unique()), len(trial_test['PID'].unique()))

    #Try to use same subjects in trials W and WT for testing on same subjects we train on
    print ('Subjects in test set, which are not in training set')
    pids_missing_training = [] #PIDs missing in training set (trial W) but are present in the test set (trial WT)
    for x in trial_test['PID'].unique():
        if x not in trial_train['PID'].unique():
            pids_missing_training.append(x)
    print (pids_missing_training)

    #Deleting the subjects from the test set that are missing in the training set 
    trial_test_reduced = trial_test.set_index('PID').drop(pids_missing_training).reset_index()

    print ('Subjects in training set, which are not in test set')
    pids_missing_test = [] #PIDs missing in test set (trial WT) but are present in the training set (trial W)
    for x in trial_train['PID'].unique():
        if x not in trial_test['PID'].unique():
            pids_missing_test.append(x)
    print (pids_missing_test)

    #Deleting the subjects from the training set that are missing in the test set 
    trial_train_reduced = trial_train.set_index('PID').drop(pids_missing_test).reset_index()

    print ('Number of subjects in training and test sets after reduction:', len(trial_train_reduced['PID'].unique()), \
           len(trial_test_reduced['PID'].unique()))
    #Returning the dataframes where the training and testing set have common subjects 
    return trial_train_reduced, trial_test_reduced 

In [4]:
#Standardize the data before ML methods 
#Take care that testing set is not used while normalizaing the training set, otherwise the train set indirectly contains 
#information about the test set
def normalize(dataframe, n_type): 
    '''
    Input: dataframe, type of normalization (z-score or min-max)
    '''
    col_names = list(dataframe.columns)
    if (n_type == 'z'): #z-score normalization 
        mean = dataframe.mean()
        sd = dataframe.std()
    else: #min-max normalization
        mean = dataframe.min()
        sd = dataframe.max()-dataframe.min()
    return mean, sd

In [5]:
def models(trainX, trainY, testX, testY, model_name = 'random_forest'):
    '''
    training set: trainX, testX
    testing set: testX, testY
    model: model_name
    '''
    trainY1 = trainY['label'] #Dropping the PID
    
    if(model_name == 'random_forest'): #Random Forest
        grid = {
       'n_estimators': [40,45,50],\
       'max_depth' : [15,20,25,None],\
       'class_weight': [None, 'balanced'],\
       'max_features': ['auto','sqrt','log2', None],\
       'min_samples_leaf':[1,2,0.1,0.05]
        }
        rf_grid = RandomForestClassifier(random_state=0)
        grid_search = GridSearchCV(estimator = rf_grid, param_grid = grid, scoring='accuracy', n_jobs = 1, cv = 5)
    
    if(model_name == 'adaboost'): #Adaboost
        ada_grid = AdaBoostClassifier(random_state=0)
        grid = {
        'n_estimators':[50, 75, 100, 125, 150],\
        'learning_rate':[0.01,.1, 1, 1.5, 2]\
        }
        grid_search = GridSearchCV(ada_grid, param_grid = grid, scoring='accuracy', n_jobs = 1, cv=5)
    
    if(model_name == 'kernel_svm'): #RBF SVM
        svc_grid = SVC(kernel = 'rbf', probability=True, random_state=0)
        grid = {
        'gamma':[0.0001, 0.001, 0.1, 1, 10, ]\
        }
        grid_search = GridSearchCV(svc_grid, param_grid=grid, scoring='accuracy', n_jobs = 1, cv=5)

    if(model_name == 'gbm'): #GBM
        gbm_grid = GradientBoostingClassifier(random_state=0)
        grid = {
        'learning_rate':[0.15,0.1,0.05], \
        'n_estimators':[50, 100, 150],\
        'max_depth':[2,4,7],\
        'min_samples_split':[2,4], \
        'min_samples_leaf':[1,3],\
        'max_features':[4, 5, 6]\
        }
        grid_search = GridSearchCV(gbm_grid, param_grid=grid, scoring='accuracy', n_jobs = 1, cv=5)
    
    if(model_name=='xgboost'): #Xgboost
        xgb_grid = xgboost.XGBClassifier(random_state=0)
        grid = {
            'min_child_weight': [1, 5],\
            'gamma': [0.1, 0.5, 1, 1.5, 2],\
            'subsample': [0.6, 0.8, 1.0],\
            'colsample_bytree': [0.6, 0.8, 1.0],\
            'max_depth': [5, 7, 8]
        }
        grid_search = GridSearchCV(xgb_grid, param_grid=grid, scoring='accuracy', n_jobs = 1, cv=5)
    
    if(model_name == 'knn'): #KNN
        knn_grid = KNeighborsClassifier()
        grid = {
            'n_neighbors': [1, 3, 4, 5, 10],\
            'p': [1, 2, 3, 4, 5]\
        }
        grid_search = GridSearchCV(knn_grid, param_grid=grid, scoring='accuracy', n_jobs = 1, cv=5)
        
    if(model_name == 'decision_tree'): #Decision Tree
        dec_grid = DecisionTreeClassifier(random_state=0)
        grid = {
            'min_samples_split': range(2, 50),\
        }
        grid_search = GridSearchCV(dec_grid, param_grid=grid, scoring='accuracy', n_jobs = 1, cv=5)
    
    if(model_name == 'linear_svm'): #Linear SVM
        lsvm_grid = LinearSVC(random_state=0)
        grid = {
            'loss': ['hinge','squared_hinge'],\

        }
        grid_search = GridSearchCV(lsvm_grid, param_grid=grid, scoring='accuracy', n_jobs = 1, cv=5)
    
    if(model_name == 'logistic_regression'): #Logistic regression
        grid_search = LogisticRegression(random_state=0)
    
    if(model_name == 'mlp'):
        mlp_grid = MLPClassifier(activation='relu', solver='adam', learning_rate = 'adaptive', learning_rate_init=0.001,\
                                                        shuffle=False, max_iter = 200, random_state = 0)
        grid = {
            'hidden_layer_sizes': [(128, 8, 8, 128, 32), (50, 50, 50, 50, 50, 50, 150, 100, 10), 
                                  (50, 50, 50, 50, 50, 60, 30, 20, 50), (50, 50, 50, 50, 50, 150, 10, 60, 150),
                                  (50, 50, 50, 50, 50, 5, 50, 10, 5), (50, 50, 50, 50, 50, 5, 50, 150, 150),
                                  (50, 50, 50, 50, 50, 5, 30, 50, 20), (50, 50, 50, 50, 10, 150, 20, 20, 30),
                                  (50, 50, 50, 50, 30, 150, 100, 20, 100), (50, 50, 50, 50, 30, 5, 100, 20, 100),
                                  (50, 50, 50, 50, 60, 50, 50, 60, 60), (50, 50, 50, 50, 20, 50, 60, 20, 20),
                                  (50, 50, 50, 10, 50, 10, 150, 60, 150), (50, 50, 50, 10, 50, 150, 30, 150, 5),
                                  (50, 50, 50, 10, 50, 20, 150, 5, 10), (50, 50, 50, 10, 150, 50, 20, 20, 100), 
                                  (50, 50, 50, 30, 100, 5, 30, 150, 30), (50, 50, 50, 50, 100, 150, 100, 200), 
                                  (50, 50, 50, 5, 5, 100, 100, 150), (50, 50, 5, 50, 200, 100, 150, 5), 
                                  (50, 50, 5, 5, 200, 100, 50, 30), (50, 50, 5, 10, 5, 200, 200, 10), 
                                  (50, 50, 5, 30, 5, 5, 50, 10), (50, 50, 5, 200, 50, 5, 5, 50), 
                                  (50, 50,50, 5, 5, 100, 100, 150), (5, 5, 5, 5, 5, 100, 50, 5, 50, 50), 
                                  (5, 5, 5, 5, 5, 100, 20, 100, 30, 30), (5, 5, 5, 5, 5, 20, 20, 5, 30, 100), 
                                  (5, 5, 5, 5, 5, 20, 20, 100, 10, 10), (5, 5, 5, 5, 10, 10, 30, 50, 10, 10), 
                                  (5, 5, 5, 5, 10, 100, 30, 30, 30, 10), (5, 5, 5, 5, 10, 100, 50, 10, 50, 10), 
                                  (5, 5, 5, 5, 10, 100, 20, 100, 30, 5), (5, 5, 5, 5, 30, 5, 20, 30, 100, 50), 
                                  (5, 5, 5, 5, 30, 100, 20, 50, 20, 30), (5, 5, 5, 5, 50, 30, 5, 50, 10, 100), 
                                  (21, 21, 7, 84, 21, 84, 84), (21, 21, 5, 42, 42, 7, 42), (21, 84, 7, 7, 7, 84, 5), 
                                  (21, 7, 84, 5, 5, 21, 120), (42, 5, 21, 21, 21, 5, 120), (42, 5, 42, 84, 7, 120, 84), 
                                  (50, 100, 10, 5, 100, 25), (10, 10, 25, 50, 25, 5), (50, 50, 50, 50, 50, 20, 30, 100, 60)]

        }
        grid_search = GridSearchCV(mlp_grid, param_grid=grid, scoring='accuracy', n_jobs = 1, cv=5)
        
    grid_search.fit(trainX, trainY1) #Fitting on the training set to find the optimal hyperparameters 
#     print('best score: ', grid_search.best_score_)
#     print('best_params: ', grid_search.best_params_, grid_search.best_index_)
#     print('Mean cv accuracy on test set:', grid_search.cv_results_['mean_test_score'][grid_search.best_index_])
#     print('Standard deviation on test set:' , grid_search.cv_results_['std_test_score'][grid_search.best_index_])
#     print('Mean cv accuracy on train set:', grid_search.cv_results_['mean_train_score'][grid_search.best_index_])
#     print('Standard deviation on train set:', grid_search.cv_results_['std_train_score'][grid_search.best_index_])
#     print('Test set performance:\n')
    person_wise_prob_for_roc, stride_person_metrics = evaluate(grid_search, testX, testY)
    return person_wise_prob_for_roc, stride_person_metrics

In [16]:
def evaluate(model, test_features, trueY):
    test_labels = trueY['label'] #Dropping the PID
    print ('Test labels', test_labels)
    predictions = model.predict(test_features)
    print ('Predictions', predictions)
    
    #Stride wise metrics 
    acc = accuracy_score(test_labels, predictions)
    p = precision_score(test_labels, predictions, average='macro')
    r = recall_score(test_labels, predictions, average = 'macro')
    f1 = f1_score(test_labels, predictions, average= 'macro')
    
    try:
        prediction_prob = model.predict_proba(test_features)[:, 1] #Score of the class with greater label
        print ('Prediction Probability', model.predict_proba(test_features))
        auc = roc_auc_score(test_labels, model.predict_proba(test_features), multi_class = 'ovo', average= 'macro')
    except:
        prediction_prob = model.best_estimator_._predict_proba_lr(test_features)[:, 1] #For linear SVM
        print ('Prediction Probability', model.best_estimator_._predict_proba_lr(test_features))
        auc = roc_auc_score(test_labels, model.best_estimator_._predict_proba_lr(test_features), multi_class = 'ovo', average= 'macro')
    print('Stride-based model performance: ', acc, p, r, f1, auc)
    
    #For computing person wise metrics 
    temp = copy.deepcopy(trueY) #True label for the stride 
    temp['pred'] = predictions #Predicted label for the stride 
       
    x = temp.groupby('PID')['pred'].value_counts().unstack()
    #Input for subject wise AUC is probabilities at columns [0, 1, 2]
    proportion_strides_correct = x.divide(x.sum(axis = 1), axis = 0).fillna(0) 
    proportion_strides_correct['True Label'] = trueY.groupby('PID').first()
    #Input for precision, recall and F1 score
    proportion_strides_correct['Predicted Label'] = proportion_strides_correct[[0, 1, 2]].idxmax(axis = 1) 
    
    try:
        print (model.best_estimator_)
    except:
        pass
    #Person wise metrics 
    person_acc = accuracy_score(proportion_strides_correct['True Label'], proportion_strides_correct['Predicted Label'])
    person_p = precision_score(proportion_strides_correct['True Label'], proportion_strides_correct['Predicted Label'], average = 'macro')
    person_r = recall_score(proportion_strides_correct['True Label'], proportion_strides_correct['Predicted Label'], average = 'macro')
    person_f1 = f1_score(proportion_strides_correct['True Label'], proportion_strides_correct['Predicted Label'], average = 'macro')
    person_auc = roc_auc_score(proportion_strides_correct['True Label'], proportion_strides_correct[[0, 1, 2]], multi_class = 'ovo', average= 'macro')
    print('Person-based model performance: ', person_acc, person_p, person_r, person_f1, person_auc)
    return proportion_strides_correct[[0, 1, 2]], [acc, p, r, f1, auc, person_acc, person_p, person_r, person_f1, person_auc] 

### main() 

In [7]:
#Trial W for training 
trialW = data[data['scenario']=='W']
#Trial WT for testing 
trialWT = data[data['scenario']=='WT']

#Trial W and WT after making sure both training and testing sets have common subjects 
trialW_reduced, trialWT_reduced = keep_subjects_common_across_train_test(trialW, trialWT)
# print ('Number of subjects in training and test sets after reduction:', len(trialW_reduced['PID'].unique()), \
#            len(trialWT_reduced['PID'].unique()))

cols_to_drop = ['PID', 'key', 'cohort', 'trial', 'scenario', 'video', 'stride_number', 'label']
#Shuffling the training stride data
trialW_reduced = shuffle(trialW_reduced, random_state = 0)
trainX = trialW_reduced.drop(cols_to_drop, axis = 1)
trainY = trialW_reduced[['PID', 'label']]
print ('Training shape', trainX.shape, trainY.shape)

#Shuffling the testing stride data 
trialWT_reduced = shuffle(trialWT_reduced, random_state = 0)
testX = trialWT_reduced.drop(cols_to_drop, axis = 1)
testY = trialWT_reduced[['PID', 'label']] #PID to compute person based metrics later 
print ('Testing shape', testX.shape, testY.shape)

#Normalize according to z-score standardization
norm_mean, norm_sd = normalize(trainX, 'z')
trainX_norm = (trainX-norm_mean)/norm_sd
testX_norm = (testX-norm_mean)/norm_sd

#Total strides and imbalance of labels in the training and testing set
#Training set 
print('Strides in training set: ', len(trialW_reduced))
print ('HOA, MS and PD strides in training set:\n', trialW_reduced['cohort'].value_counts())

#Test Set
print('\nStrides in test set: ', len(trialWT_reduced)) 
print ('HOA, MS and PD strides in test set:\n', trialWT_reduced['cohort'].value_counts())
print ('Imbalance ratio (controls:MS:PD)= 1:X:Y\n', trialWT_reduced['cohort'].value_counts()/trialWT_reduced['cohort'].value_counts()['HOA'])

Original number of subjects in training and test sets: 32 26
Subjects in test set, which are not in training set
[403]
Subjects in training set, which are not in test set
[312, 102, 112, 113, 115, 123, 124]
Number of subjects in training and test sets after reduction: 25 25
Training shape (1128, 91) (1128, 2)
Testing shape (1142, 91) (1142, 2)
Strides in training set:  1128
HOA, MS and PD strides in training set:
 PD     453
MS     341
HOA    334
Name: cohort, dtype: int64

Strides in test set:  1142
HOA, MS and PD strides in test set:
 PD     459
HOA    351
MS     332
Name: cohort, dtype: int64
Imbalance ratio (controls:MS:PD)= 1:X:Y
 PD     1.307692
HOA    1.000000
MS     0.945869
Name: cohort, dtype: float64


In [8]:
trialW.groupby(['video']).count()

Unnamed: 0_level_0,key,cohort,trial,scenario,PID,stride_number,frame_count,label,right hip-x-CoV,right hip-y-CoV,...,ankle-z-asymmetry,heel-x-asymmetry,heel-y-asymmetry,heel-z-asymmetry,toe 1-x-asymmetry,toe 1-y-asymmetry,toe 1-z-asymmetry,toe 2-x-asymmetry,toe 2-y-asymmetry,toe 2-z-asymmetry
video,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GVS_102_W_T1,90,90,90,90,90,90,90,90,90,90,...,90,90,90,90,90,90,90,90,90,90
GVS_112_W_T1,95,95,95,95,95,95,95,95,95,95,...,95,95,95,95,95,95,95,95,95,95
GVS_113_W_T1,57,57,57,57,57,57,57,57,57,57,...,57,57,57,57,57,57,57,57,57,57
GVS_115_W_T1,52,52,52,52,52,52,52,52,52,52,...,52,52,52,52,52,52,52,52,52,52
GVS_123_W_T1,118,118,118,118,118,118,118,118,118,118,...,118,118,118,118,118,118,118,118,118,118
GVS_124_W_T1,63,63,63,63,63,63,63,63,63,63,...,63,63,63,63,63,63,63,63,63,63
GVS_212_W_T2,44,44,44,44,44,44,44,44,44,44,...,44,44,44,44,44,44,44,44,44,44
GVS_213_W_T1,43,43,43,43,43,43,43,43,43,43,...,43,43,43,43,43,43,43,43,43,43
GVS_214_W_T1,38,38,38,38,38,38,38,38,38,38,...,38,38,38,38,38,38,38,38,38,38
GVS_215_W_T1,45,45,45,45,45,45,45,45,45,45,...,45,45,45,45,45,45,45,45,45,45


In [9]:
ml_models = ['random_forest', 'adaboost', 'kernel_svm', 'gbm', 'xgboost', 'knn', 'decision_tree',  'linear_svm', 
             'logistic_regression']
raw_metrics = pd.DataFrame(columns = ml_models) #Dataframe to store accuracies for each ML model for raw data 
#For storing predicted probabilities for person (for class 1) to show ROC curves 
predicted_probs_person = pd.DataFrame(columns = [ml_model + cohort for ml_model in ml_models for cohort in ['_HOA', '_MS', '_PD'] ]) 



random_forest
Test labels 108     0
661     1
397     1
1006    2
240     0
458     1
14      0
366     1
224     0
436     1
302     0
978     2
52      0
267     0
263     0
351     1
317     0
268     0
1141    2
363     1
618     1
1096    2
8       0
1116    2
571     1
1075    2
914     2
989     2
1127    2
1039    2
       ..
423     1
910     2
659     1
797     2
1120    2
99      0
850     2
448     1
755     2
976     2
115     0
777     2
72      0
845     2
537     1
849     2
174     0
87      0
551     1
705     2
314     0
600     1
1094    2
599     1
277     0
1033    2
763     2
835     2
559     1
684     2
Name: label, Length: 1142, dtype: int64
Predictions [0 0 1 ... 2 1 2]
Prediction Probability [[0.9        0.05       0.05      ]
 [0.75       0.25       0.        ]
 [0.25       0.375      0.375     ]
 ...
 [0.075      0.275      0.65      ]
 [0.175      0.77455461 0.05044539]
 [0.025      0.         0.975     ]]
Stride-based model performance:  0.79159369527145



Test labels 108     0
661     1
397     1
1006    2
240     0
458     1
14      0
366     1
224     0
436     1
302     0
978     2
52      0
267     0
263     0
351     1
317     0
268     0
1141    2
363     1
618     1
1096    2
8       0
1116    2
571     1
1075    2
914     2
989     2
1127    2
1039    2
       ..
423     1
910     2
659     1
797     2
1120    2
99      0
850     2
448     1
755     2
976     2
115     0
777     2
72      0
845     2
537     1
849     2
174     0
87      0
551     1
705     2
314     0
600     1
1094    2
599     1
277     0
1033    2
763     2
835     2
559     1
684     2
Name: label, Length: 1142, dtype: int64
Predictions [0 1 2 ... 2 1 2]


AttributeError: 'LinearSVC' object has no attribute 'predict_proba'

In [11]:
ml_models = ['logistic_regression', 'mlp']

for ml_model in ml_models:
    print (ml_model)
    predict_probs_person, stride_person_metrics = models(trainX_norm, trainY, testX_norm, testY, ml_model) #predict_probs_person, 
    raw_metrics[ml_model] = stride_person_metrics
    predicted_probs_person[ml_model+'_HOA'] = predict_probs_person[0]
    predicted_probs_person[ml_model+'_MS'] = predict_probs_person[1]
    predicted_probs_person[ml_model+'_PD'] = predict_probs_person[2]
    print ('********************************')

# raw_metrics.index = ['stride_accuracy', 'stride_precision', 'stride_recall', 'stride_F1', 'stride_AUC', 'person_accuracy', 
#                      'person_precision', 'person_recall', 'person_F1', 'person_AUC']  
# raw_metrics.to_csv(path+'trial_generalize_results_raw_data.csv')
# predicted_probs_person_raw.to_csv(path+'trial_generalize_ROCresults_raw_data.csv')

logistic_regression
Test labels 108     0
661     1
397     1
1006    2
240     0
458     1
14      0
366     1
224     0
436     1
302     0
978     2
52      0
267     0
263     0
351     1
317     0
268     0
1141    2
363     1
618     1
1096    2
8       0
1116    2
571     1
1075    2
914     2
989     2
1127    2
1039    2
       ..
423     1
910     2
659     1
797     2
1120    2
99      0
850     2
448     1
755     2
976     2
115     0
777     2
72      0
845     2
537     1
849     2
174     0
87      0
551     1
705     2
314     0
600     1
1094    2
599     1
277     0
1033    2
763     2
835     2
559     1
684     2
Name: label, Length: 1142, dtype: int64
Predictions [0 1 2 ... 2 1 2]
Prediction Probability [[9.28267935e-01 1.58997888e-04 7.15730674e-02]
 [3.74181055e-01 6.10598341e-01 1.52206035e-02]
 [1.52762144e-04 4.67152722e-04 9.99380085e-01]
 ...
 [5.47352333e-03 2.74812058e-01 7.19714418e-01]
 [2.85974836e-01 7.04040886e-01 9.98427765e-03]
 [7.19743196e-03 1.7

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



mlp




Test labels 108     0
661     1
397     1
1006    2
240     0
458     1
14      0
366     1
224     0
436     1
302     0
978     2
52      0
267     0
263     0
351     1
317     0
268     0
1141    2
363     1
618     1
1096    2
8       0
1116    2
571     1
1075    2
914     2
989     2
1127    2
1039    2
       ..
423     1
910     2
659     1
797     2
1120    2
99      0
850     2
448     1
755     2
976     2
115     0
777     2
72      0
845     2
537     1
849     2
174     0
87      0
551     1
705     2
314     0
600     1
1094    2
599     1
277     0
1033    2
763     2
835     2
559     1
684     2
Name: label, Length: 1142, dtype: int64
Predictions [0 0 2 ... 2 1 2]
Prediction Probability [[9.99999899e-01 3.13295393e-10 1.00727126e-07]
 [9.97322893e-01 2.65898379e-03 1.81232556e-05]
 [3.96922395e-08 2.80676297e-05 9.99971893e-01]
 ...
 [3.81582523e-07 6.83136458e-06 9.99992787e-01]
 [1.36934165e-05 9.99985391e-01 9.15691240e-07]
 [2.82737557e-05 4.93086891e-08 9.999716

In [17]:
ml_models = ['linear_svm']

for ml_model in ml_models:
    print (ml_model)
    predict_probs_person, stride_person_metrics = models(trainX_norm, trainY, testX_norm, testY, ml_model) #predict_probs_person, 
    raw_metrics[ml_model] = stride_person_metrics
    predicted_probs_person[ml_model+'_HOA'] = predict_probs_person[0]
    predicted_probs_person[ml_model+'_MS'] = predict_probs_person[1]
    predicted_probs_person[ml_model+'_PD'] = predict_probs_person[2]
    print ('********************************')

# raw_metrics.index = ['stride_accuracy', 'stride_precision', 'stride_recall', 'stride_F1', 'stride_AUC', 'person_accuracy', 
#                      'person_precision', 'person_recall', 'person_F1', 'person_AUC']  
# raw_metrics.to_csv(path+'trial_generalize_results_raw_data.csv')
# predicted_probs_person_raw.to_csv(path+'trial_generalize_ROCresults_raw_data.csv')

linear_svm




Test labels 108     0
661     1
397     1
1006    2
240     0
458     1
14      0
366     1
224     0
436     1
302     0
978     2
52      0
267     0
263     0
351     1
317     0
268     0
1141    2
363     1
618     1
1096    2
8       0
1116    2
571     1
1075    2
914     2
989     2
1127    2
1039    2
       ..
423     1
910     2
659     1
797     2
1120    2
99      0
850     2
448     1
755     2
976     2
115     0
777     2
72      0
845     2
537     1
849     2
174     0
87      0
551     1
705     2
314     0
600     1
1094    2
599     1
277     0
1033    2
763     2
835     2
559     1
684     2
Name: label, Length: 1142, dtype: int64
Predictions [0 1 2 ... 2 1 2]
Prediction Probability [[6.02426169e-01 8.99585386e-04 3.96674245e-01]
 [4.55500270e-01 5.11381262e-01 3.31184680e-02]
 [1.45114239e-02 9.73301371e-04 9.84515275e-01]
 ...
 [1.25357666e-02 3.74658355e-01 6.12805879e-01]
 [2.02914647e-01 7.74780376e-01 2.23049768e-02]
 [3.05284924e-02 6.49110210e-02 9.045604

In [None]:
predicted_probs_person

In [20]:
raw_metrics.index = ['stride_accuracy', 'stride_precision', 'stride_recall', 'stride_F1', 'stride_AUC', 'person_accuracy', 
                     'person_precision', 'person_recall', 'person_F1', 'person_AUC']  

raw_metrics

Unnamed: 0,random_forest,adaboost,kernel_svm,gbm,xgboost,knn,decision_tree,linear_svm,logistic_regression,mlp
stride_accuracy,0.791594,0.722417,0.75394,0.842382,0.837128,0.811734,0.704904,0.776708,0.789842,0.830123
stride_precision,0.784,0.708485,0.791816,0.839292,0.831954,0.813609,0.694516,0.771493,0.785146,0.825586
stride_recall,0.782592,0.707435,0.746962,0.832901,0.826811,0.800816,0.69308,0.769074,0.781024,0.821774
stride_F1,0.780488,0.707087,0.740777,0.832282,0.826587,0.798727,0.692917,0.767549,0.780241,0.821651
stride_AUC,0.925667,0.867162,0.923345,0.952206,0.946879,0.850612,0.80057,0.909222,0.919472,0.942795
person_accuracy,0.84,0.84,0.8,0.92,0.92,0.96,0.92,0.92,0.92,0.96
person_precision,0.867424,0.844048,0.871795,0.925926,0.925926,0.962963,0.925926,0.933333,0.933333,0.962963
person_recall,0.847222,0.842593,0.810185,0.925926,0.925926,0.962963,0.925926,0.921296,0.925926,0.962963
person_F1,0.839035,0.837473,0.803175,0.919118,0.919118,0.960784,0.919118,0.921133,0.921296,0.960784
person_AUC,0.997685,0.95978,0.981481,1.0,0.997685,1.0,0.988426,0.993056,1.0,1.0


In [None]:
#ROC curves for cohort prediction 
ml_models = ['random_forest',  'kernel_svm',  'xgboost', 'gbm', 'mlp'] 
#, 'adaboost', 'linear_svm', 'decision_tree', 'logistic_regression',] 
#'knn', 
ml_model_names = {'random_forest': 'RF', 'adaboost': 'Adaboost', 'kernel_svm': 'RBF SVM', 'gbm': 'GBM', \
                  'xgboost': 'Xgboost', 'knn': 'KNN', 'decision_tree': 'DT',  'linear_svm': 'LSVM', 
             'logistic_regression': 'LR', 'mlp': 'MLP'}
person_true_labels = testY.groupby('PID').first()
neutral = [0 for _ in range(len(person_true_labels))] # ROC for majority class prediction all the time 

fig, axes = plt.subplots(1, 1, sharex=True, sharey = True, figsize=(5.2, 3.5))
sns.despine(offset=0)
neutral_fpr, neutral_tpr, _ = roc_curve(person_true_labels, neutral) #roc curves
linestyles = ['-', '-', '-', '-.', '--', '-', '--', '-', '--']
colors = ['b', 'magenta', 'cyan', 'g',  'red', 'violet', 'lime', 'grey', 'pink']

axes.plot(neutral_fpr, neutral_tpr, linestyle='--', label='Majority (AUC = 0.5)', linewidth = 3, color = 'k')
for idx, ml_model in enumerate(ml_models):
    model_probs = predicted_probs_person_regressN[ml_model] # person-based prediction probabilities
    fpr, tpr, _ = roc_curve(person_true_labels, model_probs)
    axes.plot(fpr, tpr, label=ml_model_names[ml_model]+' (AUC = '+ str(round(regressN_metrics.loc['person_AUC'][ml_model], 3))
                 +')', linewidth = 3, alpha = 0.8, linestyle = linestyles[idx], color = colors[idx])
axes.set_ylabel('True Positive Rate')
axes.set_title('Cross-task generalization: Regress-N data')
plt.legend()
# axes[1].legend(loc='upper center', bbox_to_anchor=(1.27, 1), ncol=1)

axes.set_xlabel('False Positive Rate')
plt.tight_layout()
plt.savefig(path + 'ROC_trial_generalize_onlyregressN.png', dpi = 250)
plt.show()