In [1]:
# import required packages
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.metrics import classification_report

In [2]:
# define # of nearest neighbors
n_neighbors = 15

In [3]:
# read in features csv file
df = pd.read_csv('chi/chi_pca_features.csv')
df

Unnamed: 0,PC1,PC2,PC3,PC4,label
0,4.127092,1.017250,1.710357,-1.310324,chi_pre
1,-1.047665,0.968566,1.815919,0.754062,chi_pre
2,0.202165,2.289469,3.323522,1.881058,chi_pre
3,1.298339,1.338633,1.269045,-1.095397,chi_pre
4,-1.148704,-0.171344,0.576492,-0.821199,chi_pre
...,...,...,...,...,...
187,-1.622496,1.410235,-2.179585,-0.396197,chi_med
188,-2.449131,-0.095308,-2.510813,0.199345,chi_med
189,-1.689727,0.454423,-2.617688,-0.181970,chi_med
190,-1.959745,1.467207,-2.073600,-0.028477,chi_med


In [4]:
# encode meditative/non-meditative labels
categories = {'chi_pre':0,'chi_med':1}
df['label'] = df['label'].replace(categories)
df

Unnamed: 0,PC1,PC2,PC3,PC4,label
0,4.127092,1.017250,1.710357,-1.310324,0
1,-1.047665,0.968566,1.815919,0.754062,0
2,0.202165,2.289469,3.323522,1.881058,0
3,1.298339,1.338633,1.269045,-1.095397,0
4,-1.148704,-0.171344,0.576492,-0.821199,0
...,...,...,...,...,...
187,-1.622496,1.410235,-2.179585,-0.396197,1
188,-2.449131,-0.095308,-2.510813,0.199345,1
189,-1.689727,0.454423,-2.617688,-0.181970,1
190,-1.959745,1.467207,-2.073600,-0.028477,1


In [5]:
# split dependent and independent variables
# target variable
Y = df['label'].values
# features
X = df.drop(labels = ['label'], axis = 1)
features_list = list(X.columns)

# Standardizing the features
X = StandardScaler().fit_transform(X)

In [6]:
# split training and testing dataset: 70% training, 30% testing
X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size = 0.3,random_state = 42)

# K Nearest Neighbors

In [7]:
#create the classifier
clf = neighbors.KNeighborsClassifier(n_neighbors, weights="distance")

In [8]:
# train the classifier
clf.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=15, weights='distance')

In [9]:
# test classifier

y_pred = clf.predict(X_test)

In [10]:
# analyze test_data
# display results in confusion matrix
cm = confusion_matrix(y_test,y_pred)
cm

results = pd.crosstab(y_test, y_pred, colnames=['Predicted / Actual'])
results = results.rename(columns={0: 'Non-Meditative', 1: 'Meditative'})
results.index = ['Non-Meditative', 'Meditative']
results

Predicted / Actual,Non-Meditative,Meditative
Non-Meditative,21,7
Meditative,4,26


In [11]:
target_names = ['Non-Meditative', 'Meditative']
print(classification_report(y_test, y_pred, target_names=target_names))

                precision    recall  f1-score   support

Non-Meditative       0.84      0.75      0.79        28
    Meditative       0.79      0.87      0.83        30

      accuracy                           0.81        58
     macro avg       0.81      0.81      0.81        58
  weighted avg       0.81      0.81      0.81        58



In [12]:
# store metrics for comparision amongst other k-nearest-neighbor classifier outputs later
svm_rec = []
svm_acc = []
svm_pre = []
svm_mcc = []

# obtain metrics for model
recall = metrics.recall_score(y_test,y_pred)
accuracy = metrics.accuracy_score(y_test,y_pred)
precision = metrics.precision_score(y_test,y_pred)
mcc = metrics.matthews_corrcoef(y_test,y_pred)

svm_rec.append(recall)
svm_acc.append(accuracy)
svm_pre.append(precision)
svm_mcc.append(mcc)

# display metrics
print('K-means\n')
print('Sensitivity = %.3f' % recall)
print('Accuracy = %.3f' % accuracy)
print('Precision = %.3f' % precision)
print('Matthew Correlation Coefficient = %.3f' % mcc)

K-means

Sensitivity = 0.867
Accuracy = 0.810
Precision = 0.788
Matthew Correlation Coefficient = 0.622
