In [5]:
from utils import load_grades_data, load_heart_data, generate_silhoutte_score_plot, generate_kmeans_SV_ICD_plots, generate_distance_bw_gmms_plots, generate_bic_plot
from sklearn.cluster import KMeans
from sklearn import metrics
from sklearn.mixture import GaussianMixture
from sklearn.decomposition import FastICA, PCA
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

from scipy.stats import kurtosis
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn import random_projection
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.decomposition import TruncatedSVD
np.random.seed(5)

In [6]:
X, y = load_heart_data()

In [9]:
pca = PCA(n_components= 2).fit(X)
X_pca = pca.transform(X)

clf = MLPClassifier(max_iter= 1000, hidden_layer_sizes=(5,2), activation='logistic', verbose=False, learning_rate_init=0.001)
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2)

cv_score = cross_val_score(clf, X_train, y_train, cv=20).mean()
print("Cross validation score: " + str(cv_score))

start = time.time()
clf.fit(X_train, y_train)
train_time = time.time() - start
print("Train time: " + str(train_time))

start = time.time()
y_pred = clf.predict(X_test)
query_time = time.time() - start
print("Query time: " + str(query_time))

score = accuracy_score(y_test, y_pred)
print("Test Accuracy: " + str(score))



Cross validation score: 0.8025641025641026
Train time: 0.5087099075317383
Query time: 0.00032782554626464844
Test Accuracy: 0.8852459016393442


In [10]:
#ICA

In [13]:
ica = FastICA(n_components= 2, max_iter=10000, tol=0.1).fit(X)
X_ica = ica.transform(X)

clf = MLPClassifier(max_iter=1000, hidden_layer_sizes=(5,2), activation='logistic', verbose=False, learning_rate_init=0.001)
X_train, X_test, y_train, y_test = train_test_split(X_ica, y, test_size=0.2)

cv_score = cross_val_score(clf, X_train, y_train, cv=20).mean()
print("Cross validation score: " + str(cv_score))

start = time.time()
clf.fit(X_train, y_train)
train_time = time.time() - start
print("Train time: " + str(train_time))

start = time.time()
y_pred = clf.predict(X_test)
query_time = time.time() - start
print("Query time: " + str(query_time))

score = accuracy_score(y_test, y_pred)
print("Test Accuracy: " + str(score))

Cross validation score: 0.5826923076923078
Train time: 0.011088848114013672
Query time: 0.0003631114959716797
Test Accuracy: 0.39344262295081966


In [14]:
#RP

In [16]:
rp = random_projection.SparseRandomProjection(n_components=15)
X_rp=rp.fit_transform(X)

clf = MLPClassifier(max_iter= 1000, hidden_layer_sizes=(5,2), activation='logistic', verbose=False, learning_rate_init=0.001)
X_train, X_test, y_train, y_test = train_test_split(X_rp, y, test_size=0.2)

cv_score = cross_val_score(clf, X_train, y_train, cv=20).mean()
print("Cross validation score: " + str(cv_score))

start = time.time()
clf.fit(X_train, y_train)
train_time = time.time() - start
print("Train time: " + str(train_time))

start = time.time()
y_pred = clf.predict(X_test)
query_time = time.time() - start
print("Query time: " + str(query_time))

score = accuracy_score(y_test, y_pred)
print("Test Accuracy: " + str(score))





Cross validation score: 0.7477564102564103
Train time: 0.5827493667602539
Query time: 0.00019598007202148438
Test Accuracy: 0.9016393442622951




In [17]:
#SVD

In [18]:

tsvd = TruncatedSVD(n_components=2)
X_tsvd = tsvd.fit_transform(X)

clf = MLPClassifier(max_iter=1000, hidden_layer_sizes=(5,2), activation='logistic', verbose=False, learning_rate_init=0.001)
X_train, X_test, y_train, y_test = train_test_split(X_tsvd, y, test_size=0.2)

cv_score = cross_val_score(clf, X_train, y_train, cv=20).mean()
print("Cross validation score: " + str(cv_score))

start = time.time()
clf.fit(X_train, y_train)
train_time = time.time() - start
print("Train time: " + str(train_time))

start = time.time()
y_pred = clf.predict(X_test)
query_time = time.time() - start
print("Query time: " + str(query_time))

score = accuracy_score(y_test, y_pred)
print("Test Accuracy: " + str(score))





Cross validation score: 0.8070512820512821
Train time: 0.46910715103149414
Query time: 0.0001900196075439453
Test Accuracy: 0.8524590163934426
