# Classification

In [13]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib widget

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn import svm

import common

In [2]:
data = common.loadFile("CleanedData").drop(["RID", "VISCODE"], axis=1)

In [3]:
plt.figure()
ax = sns.countplot(x=data['DX'])
plt.title("Data Diagnosis")
plt.ylabel("Count")
plt.xlabel("Diagnosis")
plt.xticks(ticks=range(0,3), labels=['CN', 'MCI', 'Dementia'])

for p in ax.patches:
        ax.annotate(f'\n{p.get_height()}', (p.get_x()+p.get_width()/2, p.get_height()), ha='center', 
                    va='top', color='white', size=18)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Create Training and Test Sets and Apply Scaling

In [4]:
data = common.loadFile("ICAData")
label = "ICA Data"

In [5]:
X = data.drop("DX", axis=1).to_numpy().astype('float')
y = data.loc[:,['DX']].to_numpy().astype('float').flatten()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Models

In [6]:
svm = SVC(kernel='rbf').fit(X_train, y_train)

print(f"Accuracy of SVM classifier for {label}")
print('Training set: {:.2f}'.format(svm.score(X_train, y_train)))
print('Test set: {:.2f}'.format(svm.score(X_test, y_test)))

Accuracy of SVM classifier for ICA Data
Training set: 0.82
Test set: 0.76


In [7]:
clf = SVC(kernel='linear', C=1).fit(X_train, y_train)
clf.score(X_test, y_test)

0.7699805068226121

In [8]:
from sklearn.model_selection import cross_val_score
clf = SVC(kernel='linear', C=1, random_state=42)
scores = cross_val_score(clf, X, y, cv=5)
scores

array([0.56432749, 0.56432749, 0.56432749, 0.56140351, 0.56140351])

In [12]:
from sklearn import metrics
scores = cross_val_score(clf, X, y, cv=5, scoring='f1_macro')
scores


array([0.24049844, 0.24049844, 0.24049844, 0.23970037, 0.23970037])