# ICA Classification 

Testing algorithms score

In [1]:
Training = 0.3 # Using 30% of the data to train

In [2]:
import pandas as pd 
import numpy as np
import math
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib widget

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler

import common

# Loading Datasets
data = common.loadFile("ICAData")
                                           
# Classifiers
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC, LinearSVC, SVR, LinearSVR
from sklearn.ensemble import RandomForestClassifier


In [3]:
algorithms = [DecisionTreeClassifier, KNeighborsClassifier, LinearDiscriminantAnalysis, GaussianNB, SVC, LinearSVC, SVR, LinearSVR, RandomForestClassifier]
labels = ["Decision Tree", "K-Nearest Neighbors", "Linear Discriminant Analysis", "Naïve Bayes", "Support Vector Classification", "Linear SVC", "Linear Support Vector Regression", "Linear SVR", "Random Forest"]

In [4]:
# Optimizing testing for every model
def getModelScore(model, xTrain, xTest, yTrain, yTest):
    model.fit(xTrain, yTrain)
    return model.score(xTest, yTest)

In [5]:
X = data.drop("DX", axis=1).to_numpy().astype('float')
y = data.loc[:,['DX']].to_numpy().astype('float').flatten()

xtrain, xtest, ytrain, ytest= train_test_split(X, y, test_size=Training)

In [6]:
simpleScore = []
crossValidationScore = []

for i in algorithms:
    simpleScore.append(getModelScore(i(), xtrain, xtest, ytrain, ytest) * 100)
    
for i in algorithms:
    crossValidationScore.append(cross_val_score(i(), X, y, cv=10))

In [7]:
plt.figure(figsize=[18, 10])
df = pd.DataFrame({"Score": (round(x, 2) for x in simpleScore), "Algorithms": labels})
ax = sns.barplot(x="Algorithms", y="Score", data=df)
ax.bar_label(ax.containers[0], fmt="%.2f%%")
plt.title(f"Simple Score with {int(Training * 100)}% of Training")
plt.tight_layout(pad=2)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [8]:
crossValidationScore = []
for i in algorithms:
    crossValidationScore.append(cross_val_score(i(), X, y, cv=10))

In [9]:
plt.figure(figsize = [18, 10])
df = pd.DataFrame({"Score":(round((sum(x)/len(x))*100, 2) for x in crossValidationScore), "Algorithms": labels})
ax = sns.barplot(x="Algorithms", y="Score", data=df)
ax.bar_label(ax.containers[0], fmt="%.2f%%")
plt.title(f"Cross Validation Score with {int(Training * 100)}% of Training")
plt.tight_layout(pad=2)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [10]:
plt.figure(figsize=[18, 10])

df = pd.DataFrame(columns = ['Algorithm', 'Method', 'Score'])

for i, n in zip(labels, range(len(labels))):
    df = df.append({"Algorithm" : i, "Method" : "Simple Validation", "Score": round(simpleScore[n], 2)}, ignore_index=True)
    df = df.append({"Algorithm" : i, "Method" : "Cross Validation", "Score": round(sum(crossValidationScore[n])/len(crossValidationScore[n]) *100, 2)}, ignore_index=True)
    
ax = sns.barplot(x="Algorithm", y="Score", hue="Method", data=df)
for container in ax.containers:
    ax.bar_label(container, fmt="%.2f%%")

ax.set(xlabel='Algorithms', ylabel='Scores (%)', title=f"ICA Score with {int(Training * 100)}% of Training")
plt.tight_layout(pad=2)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  df = df.append({"Algorithm" : i, "Method" : "Simple Validation", "Score": round(simpleScore[n], 2)}, ignore_index=True)
  df = df.append({"Algorithm" : i, "Method" : "Cross Validation", "Score": round(sum(crossValidationScore[n])/len(crossValidationScore[n]) *100, 2)}, ignore_index=True)
  df = df.append({"Algorithm" : i, "Method" : "Simple Validation", "Score": round(simpleScore[n], 2)}, ignore_index=True)
  df = df.append({"Algorithm" : i, "Method" : "Cross Validation", "Score": round(sum(crossValidationScore[n])/len(crossValidationScore[n]) *100, 2)}, ignore_index=True)
  df = df.append({"Algorithm" : i, "Method" : "Simple Validation", "Score": round(simpleScore[n], 2)}, ignore_index=True)
  df = df.append({"Algorithm" : i, "Method" : "Cross Validation", "Score": round(sum(crossValidationScore[n])/len(crossValidationScore[n]) *100, 2)}, ignore_index=True)
  df = df.append({"Algorithm" : i, "Method" : "Simple Validation", "Score": round(simpleScore[n], 2)}, ignore_index=True)
  df 

In [11]:
data

Unnamed: 0,IC1,IC2,IC3,IC4,IC5,IC6,IC7,IC8,IC9,IC10,DX
0,-0.004563,0.053338,-0.013619,-0.001153,-0.068614,-0.031754,0.009565,-0.045461,-0.008527,-0.003025,1
1,-0.012612,0.042117,0.017003,0.002385,-0.030882,0.008735,0.032264,-0.031358,-0.018602,-0.012969,1
2,-0.000506,0.015700,0.009852,0.003665,-0.036022,-0.009071,0.079807,-0.018471,0.015439,-0.029835,1
3,-0.017587,0.008918,0.012300,-0.033458,0.010938,-0.049676,-0.008352,-0.015894,-0.008206,0.005704,1
4,-0.036075,0.010869,0.028527,-0.037763,0.038536,-0.005521,-0.007436,-0.036291,-0.008239,-0.015538,1
...,...,...,...,...,...,...,...,...,...,...,...
1705,0.000996,-0.001272,-0.030139,-0.019873,0.024213,0.012539,0.009418,-0.000670,-0.002843,-0.037766,0
1706,0.030562,-0.025447,-0.025239,-0.024493,0.013733,0.029970,-0.013629,-0.041659,0.029084,0.032206,0
1707,0.007662,-0.008601,-0.013246,-0.034106,0.044446,-0.053993,-0.019101,0.010674,-0.008716,-0.005121,1
1708,0.013372,-0.002461,-0.012936,-0.035526,0.041069,-0.045988,-0.017614,-0.009831,-0.011393,0.015922,0
