In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns; sns.set_style('white')  # plot formatting
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold


# Algorithms
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier

# Import data
data = pd.read_csv('Data/Filtered_Data.csv')

In [2]:
data.head()

Unnamed: 0,participant,open,closed,"label (0 for not depressed, 1 for depressed)",BDI
0,509,491.109212,408.343868,0,7
1,519,324.802986,963.380921,0,6
2,542,450.667603,451.559918,0,0
3,545,356.680814,790.636981,0,0
4,546,700.472825,270.410014,0,5


In [6]:
X = data[['open', 'closed']]
Y = data['label (0 for not depressed, 1 for depressed)']

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, random_state=42)

In [9]:
X_train.shape

(64, 2)

In [8]:
y_train.shape

(64,)

In [10]:
# pipe = Pipeline([('std', StandardScaler()),
#                  ('classifier1', LinearDiscriminantAnalysis()),
#                  ('classifier2', SVC())])
#                  #('classifier3', KNeighborsClassifier())])

# # Parameters to search
# search_space = [{'classifier1': [LinearDiscriminantAnalysis()],
#                  'classifier1__solver': ['svd', 'lsqr', 'eigen']},
#                 {'classifier2': [SVC()],
#                  'classifier2__kernel': ['linear'],
#                  'classifier2__C': np.logspace(-4, 4, 9)
#                 #{'classifier3': [KNeighborsClassifier()],
#                  #'classifier3__weights': ['uniform', 'distance']
#                  #'classifier3__n_neighbors': [i for i in range(1,11)]
#                 }]

# # Create grid search 
# clf1 = GridSearchCV(pipe, search_space, cv=StratifiedKFold(n_splits=5), 
#                    scoring='accuracy', refit=True,
#                    verbose=0)

# # Fit grid search
# best_model = clf1.fit(X_train, y_train)

In [11]:
pipe_LDA = Pipeline([('std', StandardScaler()),
                 ('classifier', LinearDiscriminantAnalysis())])
search_space1 = [{'classifier': [LinearDiscriminantAnalysis()],
                 'classifier__solver': ['svd', 'lsqr', 'eigen']}]
# Create grid search 
clf = GridSearchCV(pipe_LDA, search_space1, cv=StratifiedKFold(n_splits=5), 
                   scoring='accuracy', refit=True,
                   verbose=0)

# Fit grid search
best_model_LDA = clf.fit(X_train, y_train)



In [22]:
#best_model_LDA.cv_results_

In [20]:
best_model_LDA.cv_results_['params'][ np.argmin(best_model_LDA.cv_results_['rank_test_score']) ]

{'classifier': LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
                            solver='svd', store_covariance=False, tol=0.0001),
 'classifier__solver': 'svd'}

In [21]:
pipe_SVC = Pipeline([('std', StandardScaler()),
                     ('classifier', SVC())])
search_space2 = [{'classifier': [SVC()],
                 'classifier__kernel': ['linear'],
                 'classifier__C': np.logspace(-4, 4, 9)}]
# Create grid search 
clf2 = GridSearchCV(pipe_SVC, search_space2, cv=StratifiedKFold(n_splits=5), 
                   scoring='accuracy', refit=True,
                   verbose=0)

# Fit grid search
best_model_SVC = clf2.fit(X_train, y_train)



In [53]:
best_model_SVC.cv_results_

{'mean_fit_time': array([0.00515895, 0.00385575, 0.00340171, 0.00330753, 0.00388646,
        0.00519519, 0.00505233, 0.00875373, 0.06964278]),
 'std_fit_time': array([9.93606623e-04, 1.52632394e-04, 8.11611863e-05, 1.61825634e-05,
        1.11142485e-03, 4.94210842e-04, 7.15976019e-04, 4.37424048e-03,
        3.69073421e-02]),
 'mean_score_time': array([0.00171652, 0.00141439, 0.00125365, 0.00123158, 0.00144057,
        0.00184231, 0.00163298, 0.00126219, 0.00204172]),
 'std_score_time': array([1.58388452e-04, 8.91538343e-05, 2.09978425e-05, 6.91265880e-06,
        3.92765108e-04, 1.33439060e-04, 1.64097493e-04, 2.61361576e-05,
        1.27209343e-03]),
 'param_classifier': masked_array(data=[SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
     decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
     kernel='linear', max_iter=-1, probability=False, random_state=None,
     shrinking=True, tol=0.001, verbose=False),
                    SVC(C=0.1, cache_size=200, cl

In [24]:
best_model_SVC.cv_results_['params'][ np.argmin(best_model_SVC.cv_results_['rank_test_score']) ]

{'classifier': SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
     decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
     kernel='linear', max_iter=-1, probability=False, random_state=None,
     shrinking=True, tol=0.001, verbose=False),
 'classifier__C': 0.1,
 'classifier__kernel': 'linear'}

In [39]:
pipe_KNN = Pipeline([('std', StandardScaler()),
                     ('classifier', KNeighborsClassifier())])
search_space3 = [{'classifier': [KNeighborsClassifier()],
                 'classifier__weights': ['uniform', 'distance'],
                 'classifier__n_neighbors': [i for i in range(1,11)],
                 'classifier__p': [1, 2]
                }]
# Create grid search 
clf3 = GridSearchCV(pipe_KNN, search_space3, cv=StratifiedKFold(n_splits=5), 
                   scoring='accuracy', refit=True,
                   verbose=0)

# Fit grid search
best_model_KNN = clf3.fit(X_train, y_train)



In [43]:
best_model_KNN.cv_results_

{'mean_fit_time': array([0.00500073, 0.00373917, 0.00328741, 0.00324225, 0.00324359,
        0.00379734, 0.00454783, 0.00382109, 0.00332355, 0.00323782,
        0.00324626, 0.0032351 , 0.003233  , 0.00324707, 0.0032423 ,
        0.00323448, 0.00323558, 0.00323687, 0.00330243, 0.00324302,
        0.00325127, 0.00324068, 0.00323176, 0.00324216, 0.0032403 ,
        0.00245361, 0.00238748, 0.00236373, 0.00244484, 0.00241961,
        0.0023572 , 0.00234418, 0.00238099, 0.00234852, 0.00236015,
        0.0023777 , 0.00235958, 0.00239081, 0.00236287, 0.00236979]),
 'std_fit_time': array([8.66972984e-04, 1.70123670e-04, 7.03262228e-05, 1.50040886e-05,
        7.18206043e-06, 1.08889920e-03, 2.47280643e-04, 1.72806562e-04,
        7.11314149e-05, 5.10148909e-06, 1.41761049e-05, 7.02101224e-06,
        6.35178597e-06, 9.34333179e-06, 7.91664575e-06, 4.89449593e-06,
        1.21429745e-05, 1.15455659e-05, 1.11584235e-04, 3.96492319e-06,
        2.68732325e-05, 8.55733404e-06, 6.21719590e-06, 9.488

In [40]:
best_model_KNN.cv_results_['params'][ np.argmin(best_model_KNN.cv_results_['rank_test_score']) ]

{'classifier': KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                      metric_params=None, n_jobs=None, n_neighbors=9, p=1,
                      weights='uniform'),
 'classifier__n_neighbors': 9,
 'classifier__p': 1,
 'classifier__weights': 'uniform'}

## Train Best Models

### LDA 

In [30]:
best_LDA_model = LinearDiscriminantAnalysis(solver='svd')
best_LDA_model.fit(X_train, y_train)

LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
                           solver='svd', store_covariance=False, tol=0.0001)

In [32]:
y_pred_LDA = best_LDA_model.predict(X_test)

In [34]:
accuracy_LDA = accuracy_score(y_pred_LDA, y_test)

In [36]:
accuracy_LDA

0.5625

### SVM

In [37]:
best_SVM_model = SVC(C=0.1, kernel='linear', max_iter=-1, random_state=None,verbose=False)
best_SVM_model.fit(X_train, y_train)

y_pred_SVM = best_SVM_model.predict(X_test)

accuracy_SVM = accuracy_score(y_pred_SVM, y_test)

In [38]:
accuracy_SVM

0.5625

### KNN

In [49]:
best_KNN_model1 = KNeighborsClassifier(n_neighbors=4, p=2,weights='uniform')
best_KNN_model1.fit(X_train, y_train)

y_pred_KNN1 = best_KNN_model.predict(X_test)

accuracy_KNN1 = accuracy_score(y_pred_KNN1, y_test)

In [50]:
accuracy_KNN1

0.6875

In [52]:
best_KNN_model2 = KNeighborsClassifier(n_neighbors=9, p=1,weights='uniform')
best_KNN_model2.fit(X_train, y_train)

y_pred_KNN2 = best_KNN_model2.predict(X_test)

accuracy_KNN2 = accuracy_score(y_pred_KNN2, y_test)

accuracy_KNN2

0.375