In [1]:
import numpy as np

from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.ensemble import GradientBoostingClassifier as GBC
from sklearn.naive_bayes import GaussianNB as GNB
from sklearn.neural_network import MLPClassifier as MLP

from sklearn.preprocessing import MinMaxScaler

from sklearn.metrics import accuracy_score
from sklearn.metrics import top_k_accuracy_score

from matplotlib import pyplot as plt

In [27]:
X_train, y_train = np.load('data/X_train.npy'), np.load('data/y_train.npy')
X_test, y_test = np.load('data/X_test.npy'), np.load('data/y_test.npy')

scaler = MinMaxScaler()
scaler.fit(X_train)
scaler.fit(X_test)

X_train = scaler.transform(X_train)
X_test = scaler.transform (X_test)

0.46259552 0.09991747
0.46255434 0.09991565
0.4899469 0.13628608
0.48976305 0.13620056


## Naive Bayes

In [67]:
# Train
gnb = GNB().fit(X_train, y_train)

In [73]:
# Evaluate
gnb_scores = gnb.predict_proba(X_test)

gnb_top1_acc = top_k_accuracy_score(y_test, gnb_scores, k=1)
gnb_top5_acc = top_k_accuracy_score(y_test, gnb_scores, k=5)
gnb_top10_acc = top_k_accuracy_score(y_test, gnb_scores, k=10)

print('Naive Bayes')
print('Top 1 Accuracy: ', gnb_top1_acc)
print('Top 5 Accuracy: ', gnb_top5_acc)
print('Top 10 Accuracy: ', gnb_top10_acc)

Naive Bayes
Top 1 Accuracy:  0.3134
Top 5 Accuracy:  0.5844
Top 10 Accuracy:  0.7016


## K Nearest Neighbors

In [69]:
# Train
knn = KNN(n_neighbors=1000).fit(X_train, y_train)

In [72]:
# Evaluate
knn_scores = knn.predict_proba(X_test)

knn_top1_acc = top_k_accuracy_score(y_test, knn_scores, k=1)
knn_top5_acc = top_k_accuracy_score(y_test, knn_scores, k=5)
knn_top10_acc = top_k_accuracy_score(y_test, knn_scores, k=10)

print('K Nearest Neighbors')
print('Top 1 Accuracy: ', knn_top1_acc)
print('Top 5 Accuracy: ', knn_top5_acc)
print('Top 10 Accuracy: ', knn_top10_acc)

Naive Bayes
Top 1 Accuracy:  0.2502
Top 5 Accuracy:  0.5047
Top 10 Accuracy:  0.6293


# Gradient Boosting Machine (takes super long, don't recommend)

In [12]:
# Train
gbc = GBC(n_estimators=5, max_depth=10, max_features=20).fit(X_train[:10000], y_train[:10000])

In [13]:
# Evaluate
gbc_scores = gbc.predict_proba(X_test)

gbc_top1_acc = top_k_accuracy_score(y_test, gbc_scores, k=1)
gbc_top5_acc = top_k_accuracy_score(y_test, gbc_scores, k=5)
gbc_top10_acc = top_k_accuracy_score(y_test, gbc_scores, k=10)

print('Gradient Boosting Machine')
print('Top 1 Accuracy: ', gbc_top1_acc)
print('Top 5 Accuracy: ', gbc_top5_acc)
print('Top 10 Accuracy: ', gbc_top10_acc)

Gradient Boosting Machine
Top 1 Accuracy:  0.0187
Top 5 Accuracy:  0.0872
Top 10 Accuracy:  0.1631


# Single-Layer Neural Network

In [33]:
mlp = MLP(hidden_layer_sizes=(), activation='logistic', max_iter=800).fit(X_train[:], y_train[:])



In [35]:
# Evaluate
mlp_scores = mlp.predict_proba(X_test)

mlp_top1_acc = top_k_accuracy_score(y_test, mlp_scores, k=1)
mlp_top5_acc = top_k_accuracy_score(y_test, mlp_scores, k=5)
mlp_top10_acc = top_k_accuracy_score(y_test, mlp_scores, k=10)

print('Neural Network')
print('Top 1 Accuracy: ', mlp_top1_acc)
print('Top 5 Accuracy: ', mlp_top5_acc)
print('Top 10 Accuracy: ', mlp_top10_acc)

Neural Network
Top 1 Accuracy:  0.6352
Top 5 Accuracy:  0.88916
Top 10 Accuracy:  0.94508
