In [4]:
import numpy as np
import time
import matplotlib


In [2]:
train_file = "./data/sign_mnist_train.csv"
test_file = "./data/sign_mnist_test.csv"

# Load data
train_data = np.loadtxt(train_file, delimiter=",", skiprows=1)
test_data = np.loadtxt(test_file, delimiter=",", skiprows=1)

# Split data into features and labels
y_train = train_data[:, 0]
X_train = train_data[:, 1:]
y_test = test_data[:, 0]
X_test = test_data[:, 1:]

# Normalize data
X_train = X_train / 255
X_test = X_test / 255

In [3]:
from sklearn.neighbors import KNeighborsClassifier

tic = time.perf_counter()

knn_model = KNeighborsClassifier()

knn_model.fit(X_train, y_train)

# y_knn_pred = knn_model.predict(X_test)

print("KNN, n = 5")
print("Training set score: {:.3f}".format(knn_model.score(X_train, y_train)))
print("Test set score: {:.3f}".format(knn_model.score(X_test, y_test)))

toc = time.perf_counter()
print(f"Time taken: {toc-tic:.3f} seconds")


KNN, n = 5
Training set score: 0.999
Test set score: 0.806


In [6]:
from sklearn.linear_model import LogisticRegression


In [7]:

tic = time.perf_counter()
# Create model
ovr_model = LogisticRegression(multi_class="ovr", solver="liblinear", C=10)

# Train model
ovr_model.fit(X_train, y_train)

# Evaluate
print("OVR")
print("Training set score: {:.3f}".format(ovr_model.score(X_train, y_train)))
print("Test set score: {:.3f}".format(ovr_model.score(X_test, y_test)))
toc = time.perf_counter()
print(f"Time taken: {toc-tic:.3f} seconds")


OVR
Training set score: 1.000
Test set score: 0.660
Time taken: 174.511


In [8]:
tic = time.perf_counter()
softmax_model = LogisticRegression(multi_class="multinomial", solver="lbfgs", C=10)

softmax_model.fit(X_train, y_train)

print("Softmax")
print("Training set score: {:.3f}".format(softmax_model.score(X_train, y_train)))
print("Test set score: {:.3f}".format(softmax_model.score(X_test, y_test)))
toc = time.perf_counter()
print(f"Time taken: {toc-tic:.3f} seconds")

Softmax
Training set score: 0.957
Test set score: 0.679
Time taken: 11.915




In [9]:
from sklearn import svm

tic = time.perf_counter()

svm_model = svm.SVC(kernel='linear', C=1.0)

svm_model.fit(X_train, y_train)

print("SVM")
print("Training set score: {:.3f}".format(svm_model.score(X_train, y_train)))
print("Test set score: {:.3f}".format(svm_model.score(X_test, y_test)))

toc = time.perf_counter()
print(f"Time taken: {toc-tic:.3f} seconds")

SVM
Training set score: 1.000
Test set score: 0.786
Time taken: 129.154 seconds


In [None]:
from sklearn.mixture import GaussianMixture

tic = time.perf_counter()

gm_model = GaussianMixture(n_components=24)

gm_model.fit(X_train, y_train)

print("GMM")
print("Training set score: {:.3f}".format(gm_model.score(X_train, y_train)))
print("Test set score: {:.3f}".format(gm_model.score(X_test, y_test)))

toc = time.perf_counter()
print(f"Time taken: {toc-tic:.3f} seconds")