# ICA Classification 

Testing algorithms score

In [1]:
Training = 0.3 # Using 30% of the data to train

In [2]:
import pandas as pd 
import numpy as np
import math
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib widget

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler

import common

# Loading Datasets
data = common.loadFile("ICAData")
                                           
# Classifiers
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC, LinearSVC, SVR, LinearSVR
from sklearn.ensemble import RandomForestClassifier

In [3]:
algorithms = [DecisionTreeClassifier, KNeighborsClassifier, LinearDiscriminantAnalysis, GaussianNB, SVC, LinearSVC, SVR, LinearSVR, RandomForestClassifier]
labels = ["Decision Tree", "K-Nearest Neighbors", "Linear Discriminant Analysis", "Naïve Bayes", "Support Vector Classification", "Linear SVC", "Linear Support Vector Regression", "Linear SVR", "Random Forest"]

In [4]:
# Optimizing testing for every model
def getModelScore(model, xTrain, xTest, yTrain, yTest):
    model.fit(xTrain, yTrain)
    return model.score(xTest, yTest)

In [5]:
X = data.drop("DX", axis=1).to_numpy().astype('float')
y = data.loc[:,['DX']].to_numpy().astype('float').flatten()

xtrain, xtest, ytrain, ytest= train_test_split(X, y, test_size=Training)

In [6]:
simpleScore = []
crossValidationScore = []

for i in algorithms:
    simpleScore.append(getModelScore(i(), xtrain, xtest, ytrain, ytest) * 100)
    
for i in algorithms:
    crossValidationScore.append(cross_val_score(i(), X, y, cv=10))

In [7]:
plt.figure(figsize=[18, 10])
df = pd.DataFrame({"Score": (round(x, 2) for x in simpleScore), "Algorithms": labels})
ax = sns.barplot(x="Algorithms", y="Score", data=df)
ax.bar_label(ax.containers[0], fmt="%.2f%%")
plt.title(f"Simple Score with {int(Training * 100)}% of Training")
plt.tight_layout(pad=2)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [8]:
crossValidationScore = []
for i in algorithms:
    crossValidationScore.append(cross_val_score(i(), X, y, cv=10))

In [9]:
plt.figure(figsize = [18, 10])
df = pd.DataFrame({"Score":(round((sum(x)/len(x))*100, 2) for x in crossValidationScore), "Algorithms": labels})
ax = sns.barplot(x="Algorithms", y="Score", data=df)
ax.bar_label(ax.containers[0], fmt="%.2f%%")
plt.title(f"Cross Validation Score with {int(Training * 100)}% of Training")
plt.tight_layout(pad=2)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [12]:
plt.figure(figsize=[18, 10])

df = pd.DataFrame(columns = ['Algorithm', 'Method', 'Score'])

for i, n in zip(labels, range(len(labels))):
    df = df.append({"Algorithm" : i, "Method" : "Simple Validation", "Score": round(simpleScore[n], 2)}, ignore_index=True)
    df = df.append({"Algorithm" : i, "Method" : "Cross Validation", "Score": round(sum(crossValidationScore[n])/len(crossValidationScore[n]) *100, 2)}, ignore_index=True)
    
ax = sns.barplot(x="Algorithm", y="Score", hue="Method", data=df)
for container in ax.containers:
    ax.bar_label(container, fmt="%.2f%%")

ax.set(xlabel='Algorithms', ylabel='Scores (%)', title=f"ICA Score with {int(Training * 100)}% of Training")
plt.tight_layout(pad=2)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [11]:
df

Unnamed: 0,Algorithm,Method,Score
0,Decision Tree,Simple,68.23
1,Decision Tree,Cross Validation,63.27
2,K-Nearest Neighbors,Simple,74.85
3,K-Nearest Neighbors,Cross Validation,66.55
4,Linear Discriminant Analysis,Simple,79.92
5,Linear Discriminant Analysis,Cross Validation,76.37
6,Naïve Bayes,Simple,69.98
7,Naïve Bayes,Cross Validation,68.25
8,Support Vector Classification,Simple,79.53
9,Support Vector Classification,Cross Validation,74.8
