In [3]:
import csv 
import numpy as np
import time 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.utils import check_random_state
from sklearn.neural_network import MLPClassifier
import matplotlib.pyplot as plt

In [4]:
# download data 
!wget https://pjreddie.com/media/files/mnist_train.csv
!wget https://pjreddie.com/media/files/mnist_test.csv

--2020-12-21 08:10:30--  https://pjreddie.com/media/files/mnist_train.csv
Resolving pjreddie.com (pjreddie.com)... 128.208.4.108
Connecting to pjreddie.com (pjreddie.com)|128.208.4.108|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 109575994 (104M) [application/octet-stream]
Saving to: ‘mnist_train.csv’


2020-12-21 08:21:38 (160 KB/s) - ‘mnist_train.csv’ saved [109575994/109575994]

--2020-12-21 08:21:38--  https://pjreddie.com/media/files/mnist_test.csv
Resolving pjreddie.com (pjreddie.com)... 128.208.4.108
Connecting to pjreddie.com (pjreddie.com)|128.208.4.108|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 18289443 (17M) [application/octet-stream]
Saving to: ‘mnist_test.csv’


2020-12-21 08:23:27 (165 KB/s) - ‘mnist_test.csv’ saved [18289443/18289443]



In [5]:
# test_set is the list of the 10000 test digits, each represented as a row vector 
test_set = np.genfromtxt('mnist_test.csv',delimiter=',')

In [6]:
test_labels = np.array(list(map(lambda x: x[0], test_set)))
test_digits = np.array(list(map(lambda x: x[1:], test_set)))

In [7]:
def error_rate(pred):
  '''Takes list of predictions and returns ratio of
     wrong predictions to total.'''
  counter = 0
  assert(len(pred) == len(test_set))
  for i, l in enumerate(pred):
    if l != test_set[i][0]:
      counter += 1
  return counter / len(test_set)

In [8]:
train_set = np.genfromtxt('mnist_train.csv',delimiter=',')

In [9]:
training_labels = np.array(list(map(lambda x: x[0], train_set)))
training_digits = np.array(list(map(lambda x: x[1:], train_set)))

In [None]:
# n = 5 time to train
start = time.perf_counter()
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(training_digits,training_labels)
end = time.perf_counter()
print(end - start)

34.0572419199998


In [None]:
# n = 5 time to predict
start = time.perf_counter()
pred = knn.predict(test_digits)
end = time.perf_counter()
print(f'error rate: {error_rate(pred)}\ntime: {end - start}')

error rate: 0.0312
time: 1003.6046141819998


In [None]:
# n = 5, 500 training examples
training_digits_500 = training_digits[0:500]
training_labels_500 = training_labels[0:500]

start = time.perf_counter()
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(training_digits_500,training_labels_500)
end = time.perf_counter()
print(end - start)

start = time.perf_counter()
pred = knn.predict(test_digits)
end = time.perf_counter()
print(f'error rate: {error_rate(pred)}\ntime: {end - start}')

0.03062950300227385
error rate: 0.2603
time: 9.293689033998817


In [None]:
# n = 50 time to train
start = time.perf_counter()
knn50 = KNeighborsClassifier(n_neighbors=50)
knn50.fit(training_digits,training_labels)
end = time.perf_counter()
print(end - start)

26.862418410000828


In [None]:
# n = 50 time to predict
start = time.perf_counter()
pred = knn50.predict(test_digits)
end = time.perf_counter()
print(f'error rate: {error_rate(pred)}\ntime: {end - start}')

error rate: 0.0466
time: 996.9896169619997


In [None]:
# n = 50, 500 training examples
start = time.perf_counter()
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(training_digits_500,training_labels_500)
end = time.perf_counter()
print(end - start)

start = time.perf_counter()
pred = knn.predict(test_digits)
end = time.perf_counter()
print(f'error rate: {error_rate(pred)}\ntime: {end - start}')

0.030851954001263948
error rate: 0.2563
time: 9.181835622999643


In [None]:
# for cut and paste
def timer():
  start = time.perf_counter()
  end = time.perf_counter()
  print(end - start)
  print(f'error rate: {error_rate(pred)}\ntime: {end - start}')

In [None]:
# softmax training
start = time.perf_counter()
scaler = StandardScaler()
training_digits = scaler.fit_transform(training_digits)
test_digits = scaler.transform(test_digits)
sft = LogisticRegression(C=1e5,
                         multi_class='multinomial',
                         penalty='l2', solver='sag', tol=0.1)
sft.fit(training_digits, training_labels)
end = time.perf_counter()
print(end - start)

13.711362129999543


In [11]:
# softmax prediction
def time_pred(clf):
  lst1 = []
  lst2 = []
  for i in range(0,100):
    start = time.perf_counter()
    pred = clf.predict(test_digits)
    end = time.perf_counter()
    lst1.append(end-start)
    lst2.append(error_rate(pred))

  print(f'error rate avg: {sum(lst2)/len(lst2)}\ntime avg: {sum(lst1)/len(lst1)}')

In [None]:
time_pred(sft)

In [10]:
# visualizion of weights w_i
def viz(coef, s):
    plt.figure(figsize=(10, 5))
    scale = np.abs(coef).max()
    for i in range(10):
        l2_plot = plt.subplot(2, 5, i + 1)
        l2_plot.imshow(coef[i].reshape(28, 28), interpolation='nearest',
                      cmap=plt.cm.Greys, vmin=-scale, vmax=scale)
        l2_plot.set_xticks(())
        l2_plot.set_yticks(())
        l2_plot.set_xlabel('Class %i' % i)
    plt.suptitle(s)
    plt.show()

In [None]:
viz(sft.coef_.copy(), 'image form of weights vector $w_i$ for digit class $i$')

NameError: ignored

In [None]:
# 10 by 10
start = time.perf_counter()
scaler = StandardScaler()
scaler.fit(training_digits)
training_digits = scaler.transform(training_digits)
test_digits = scaler.transform(test_digits)
clf = MLPClassifier(solver='adam', alpha=1e-5,
                     hidden_layer_sizes=(10, 10), random_state=1)
clf.fit(training_digits, training_labels)
end = time.perf_counter()
print(f'{end - start}')

137.06309448699994




In [None]:
time_pred(clf)

error rate avg: 0.06419999999999987
time avg: 0.03349773879999816


In [None]:
# 500 by 300
start = time.perf_counter()
scaler = StandardScaler()
scaler.fit(training_digits)
training_digits = scaler.transform(training_digits)
test_digits = scaler.transform(test_digits)
clf = MLPClassifier(solver='adam', alpha=1e-5,
                     hidden_layer_sizes=(500, 300), random_state=1)
clf.fit(training_digits, training_labels)
end = time.perf_counter()
print(f'{end - start}')
time_pred(clf)

KeyboardInterrupt: ignored