In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# load data

In [None]:
f=np.load('/kaggle/input/mnist-numpy/mnist.npz')
x_train, y_train = f['x_train'], f['y_train']
x_test, y_test = f['x_test'], f['y_test']
f.close()

In [None]:
import matplotlib.pyplot as plt
def plot_example(X, y):
    """Plot the first 5 images and their labels in a row."""
    for i, (img, y) in enumerate(zip(X[:5].reshape(5, 28, 28), y[:5])):
        plt.subplot(151 + i)
        plt.imshow(img)
        plt.xticks([])
        plt.yticks([])
        plt.title(y)
plot_example(x_train, y_train)

In [None]:
nsamples, nx, ny = x_train.shape
x_train_2d=x_train.reshape((nsamples,nx*ny))/255
nsamples, nx, ny = x_test.shape
x_test_2d=x_test.reshape((nsamples,nx*ny))/255
print(x_train_2d.shape,x_test_2d.shape)

# Compare
|Model|Time Cost/s|Accuracy|
|:--:|:--:|:--:|
|Decision Tree||0.8656|
|Support Vector Machine||0.9184|
|Logistic Regression||0.9258|
|K-NearestNeighbor||0.9688|
|Random Forest||0.9723|
|Multi-Layer Perceptron||0.9791|
|Convolutional Neural Networks||0.9905|

# KNN

In [None]:
import time
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

clf = KNeighborsClassifier()

start = time.time()

print("Train model")
clf.fit(x_train_2d, y_train)

print("Compute predictions")
predicted = clf.predict(x_test_2d)

print("Accuracy: ", accuracy_score(y_test, predicted))

stop = time.time()
print(f"Training time: {stop - start}s")

# SVM

In [None]:
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score

clf = LinearSVC(max_iter=1000)

start = time.time()

print("Train model")
clf.fit(x_train_2d, y_train)

print("Compute predictions")
predicted = clf.predict(x_test_2d)

print("Accuracy: ", accuracy_score(y_test, predicted))

stop = time.time()
print(f"Training time: {stop - start}s")

# CNN

In [None]:
import keras
from keras import layers

num_classes = 10
input_shape = (28, 28, 1)
model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax"),
    ]
)

model.summary()

In [None]:
start = time.time()

batch_size = 128
epochs = 15

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

x_train_cnn = np.expand_dims(x_train, -1)
x_test_cnn = np.expand_dims(x_test, -1)
# convert class vectors to binary class matrices
y_train_cnn = keras.utils.to_categorical(y_train, num_classes)
y_test_cnn = keras.utils.to_categorical(y_test, num_classes)

model.fit(x_train_cnn, y_train_cnn, batch_size=batch_size, epochs=epochs, validation_split=0.1)

In [None]:
score = model.evaluate(x_test_cnn, y_test_cnn, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

stop = time.time()
print(f"Training time: {stop - start}s")

# Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

clf = RandomForestClassifier(n_estimators=100)

start = time.time()

print("Train model")
clf.fit(x_train_2d, y_train)

print("Compute predictions")
predicted = clf.predict(x_test_2d)

print("Accuracy: ", accuracy_score(y_test, predicted))

stop = time.time()
print(f"Training time: {stop - start}s")

# Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

clf = LogisticRegression()

start = time.time()

print("Train model")
clf.fit(x_train_2d, y_train)

print("Compute predictions")
predicted = clf.predict(x_test_2d)

print("Accuracy: ", accuracy_score(y_test, predicted))

stop = time.time()
print(f"Training time: {stop - start}s")

# Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Seed for reproducability
seed = 1234
clf = DecisionTreeClassifier(max_depth=10, random_state=seed)

start = time.time()

print("Train model")
clf.fit(x_train_2d, y_train)

print("Compute predictions")
predicted = clf.predict(x_test_2d)

print("Accuracy: ", accuracy_score(y_test, predicted))

stop = time.time()
print(f"Training time: {stop - start}s")

# MLP

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

clf = MLPClassifier(random_state=1, max_iter=300)

start = time.time()

print("Train model")
clf.fit(x_train_2d, y_train)

print("Compute predictions")
predicted = clf.predict(x_test_2d)

print("Accuracy: ", accuracy_score(y_test, predicted))

stop = time.time()
print(f"Training time: {stop - start}s")