In [1]:
# import required packages
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.metrics import classification_report

In [2]:
# define # of nearest neighbors
n_neighbors = 15

In [4]:
# read in features csv file
df = pd.read_csv('kundalini/k_pca_features.csv')
df

Unnamed: 0,PC1,PC2,PC3,PC4,label
0,1.919234,1.225710,-1.287233,0.285046,k_pre
1,1.629080,-0.495292,-0.689883,0.365053,k_pre
2,2.341778,-1.181016,-0.758231,0.174319,k_pre
3,1.229193,0.930177,-0.746822,0.327738,k_pre
4,1.881406,-0.417269,-0.858387,0.351611,k_pre
...,...,...,...,...,...
91,-0.584261,3.824609,-0.579344,-0.546390,k_med
92,-0.489981,2.892255,0.260431,-0.950496,k_med
93,-2.153267,2.608921,0.641948,-0.166193,k_med
94,-0.443943,3.236324,-0.394600,-0.524103,k_med


In [5]:
# encode meditative/non-meditative labels
categories = {'k_pre':0,'k_med':1}
df['label'] = df['label'].replace(categories)
df

Unnamed: 0,PC1,PC2,PC3,PC4,label
0,1.919234,1.225710,-1.287233,0.285046,0
1,1.629080,-0.495292,-0.689883,0.365053,0
2,2.341778,-1.181016,-0.758231,0.174319,0
3,1.229193,0.930177,-0.746822,0.327738,0
4,1.881406,-0.417269,-0.858387,0.351611,0
...,...,...,...,...,...
91,-0.584261,3.824609,-0.579344,-0.546390,1
92,-0.489981,2.892255,0.260431,-0.950496,1
93,-2.153267,2.608921,0.641948,-0.166193,1
94,-0.443943,3.236324,-0.394600,-0.524103,1


In [6]:
# split dependent and independent variables
# target variable
Y = df['label'].values
# features
X = df.drop(labels = ['label'], axis = 1)
features_list = list(X.columns)

# Standardizing the features
X = StandardScaler().fit_transform(X)

In [7]:
# split training and testing dataset: 70% training, 30% testing
X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size = 0.3,random_state = 42)

# K Nearest Neighbors

In [8]:
#create the classifier
clf = neighbors.KNeighborsClassifier(n_neighbors, weights="distance")

In [9]:
# train the classifier
clf.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=15, weights='distance')

In [10]:
# test classifier

y_pred = clf.predict(X_test)

In [11]:
# analyze test_data
# display results in confusion matrix
cm = confusion_matrix(y_test,y_pred)
cm

results = pd.crosstab(y_test, y_pred, colnames=['Predicted / Actual'])
results = results.rename(columns={0: 'Non-Meditative', 1: 'Meditative'})
results.index = ['Non-Meditative', 'Meditative']
results

Predicted / Actual,Non-Meditative,Meditative
Non-Meditative,11,6
Meditative,0,12


In [12]:
target_names = ['Non-Meditative', 'Meditative']
print(classification_report(y_test, y_pred, target_names=target_names))

                precision    recall  f1-score   support

Non-Meditative       1.00      0.65      0.79        17
    Meditative       0.67      1.00      0.80        12

      accuracy                           0.79        29
     macro avg       0.83      0.82      0.79        29
  weighted avg       0.86      0.79      0.79        29



In [14]:
# store metrics for comparision amongst other k-nearest-neighbor classifier outputs later
svm_rec = []
svm_acc = []
svm_pre = []
svm_mcc = []

# obtain metrics for model
recall = metrics.recall_score(y_test,y_pred)
accuracy = metrics.accuracy_score(y_test,y_pred)
precision = metrics.precision_score(y_test,y_pred)
mcc = metrics.matthews_corrcoef(y_test,y_pred)

svm_rec.append(recall)
svm_acc.append(accuracy)
svm_pre.append(precision)
svm_mcc.append(mcc)

# display metrics
print('K-means: Kundalini (post-pca)\n')
print('Sensitivity = %.3f' % recall)
print('Accuracy = %.3f' % accuracy)
print('Precision = %.3f' % precision)
print('Matthew Correlation Coefficient = %.3f' % mcc)

K-means: Kundalini (post-pca)

Sensitivity = 1.000
Accuracy = 0.793
Precision = 0.667
Matthew Correlation Coefficient = 0.657
