In [1]:
import numpy as np
import pandas as pd
import os
from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn import metrics
from sklearn.svm import SVC

from joblib import dump, load

In [2]:
X_mnist,y_mnist = fetch_openml("mnist_784", version = 1, return_X_y = True)

X_mnist = np.array(X_mnist)
y_mnist = np.int_(y_mnist)
X_mnist /= 256

In [3]:
df = pd.DataFrame(X_mnist, columns = [i for i in range(len(X_mnist[0]))])
df["y"] = y_mnist
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,y
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
69996,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
69997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
69998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5


In [4]:
df.drop(df[ df['y'] == 0 ].index, inplace = True)
y_mnist = df["y"].values
X_mnist = df.drop(["y"], axis = 1).values
y_mnist.shape, X_mnist.shape

((63097,), (63097, 784))

In [5]:
X_0 = []
y_0 = []
folder = r"digits/0"
images = os.listdir(folder)
for img_id in images:
    image = Image.open(os.path.join(folder, img_id)).convert("L")
    X_0.append(np.array(image).reshape(784))
    y_0.append(0)
X_0 = np.array(X_0)/256
y_0 = np.int_(y_0)

In [6]:
y_0.shape, X_0.shape

((18,), (18, 784))

In [7]:
X_mnist = np.vstack((X_mnist, X_0))
y_mnist = np.hstack((y_mnist, y_0))

y_mnist.shape, X_mnist.shape

((63115,), (63115, 784))

In [8]:
X_train_mnist, X_val_mnist, y_train_mnist, y_val_mnist = train_test_split(X_mnist, y_mnist, test_size=0.15, random_state=42, stratify = y_mnist)
X_train_mnist.shape, X_val_mnist.shape, y_train_mnist.shape, y_val_mnist.shape

((53647, 784), (9468, 784), (53647,), (9468,))

In [9]:
#Creating a dataset for the images in all folders
folder_path = r"digits"
labels = os.listdir(folder_path)
X_print = []
y_print= []
for label in labels:
    image_folder = os.path.join(folder_path, label)
    images = os.listdir(image_folder)
    for img_id in images:
        image = Image.open(os.path.join(image_folder, img_id)).convert("L")
        X_print.append(np.array(image).reshape(784))
        y_print.append(label)

X_print = np.array(X_print)/256
y_print = np.int_(y_print)

X_print.shape, y_print.shape

((3456, 784), (3456,))

In [10]:
X_train_print, X_val_print, y_train_print, y_val_print = train_test_split(X_print, y_print, test_size=0.15, random_state=42, stratify = y_print)
X_train_print.shape, X_val_print.shape, y_train_print.shape, y_val_print.shape

((2937, 784), (519, 784), (2937,), (519,))

In [11]:
X_train = np.vstack((X_train_mnist, X_train_print))
y_train = np.hstack((y_train_mnist, y_train_print))

In [12]:
from sklearn.neural_network import MLPClassifier
MLP_model = MLPClassifier(hidden_layer_sizes=(250, 50), max_iter=50, alpha=1e-4, solver='sgd', verbose=100, random_state=42, learning_rate_init=.1, n_iter_no_change = 50).fit(X_train, y_train)
print("train accuracy: ",MLP_model.score(X_train, y_train))

Iteration 1, loss = 0.36783053
Iteration 2, loss = 0.16202776
Iteration 3, loss = 0.11361967
Iteration 4, loss = 0.08403816
Iteration 5, loss = 0.06620139
Iteration 6, loss = 0.05184820
Iteration 7, loss = 0.03695070
Iteration 8, loss = 0.03109519
Iteration 9, loss = 0.02792062
Iteration 10, loss = 0.02289526
Iteration 11, loss = 0.01927938
Iteration 12, loss = 0.01627449
Iteration 13, loss = 0.01449091
Iteration 14, loss = 0.01169539
Iteration 15, loss = 0.00694160
Iteration 16, loss = 0.00359143
Iteration 17, loss = 0.00522076
Iteration 18, loss = 0.00382416
Iteration 19, loss = 0.00331904
Iteration 20, loss = 0.00303273
Iteration 21, loss = 0.00290178
Iteration 22, loss = 0.00559683
Iteration 23, loss = 0.01262798
Iteration 24, loss = 0.01041655
Iteration 25, loss = 0.01812080
Iteration 26, loss = 0.02185560
Iteration 27, loss = 0.01533755
Iteration 28, loss = 0.00965672
Iteration 29, loss = 0.00356698
Iteration 30, loss = 0.00186574
Iteration 31, loss = 0.00404898
Iteration 32, los



train accuracy:  1.0


In [13]:
y_pred_mnist = MLP_model.predict(X_val_mnist)
print("mnist vaidation accuracy: ",MLP_model.score(X_val_mnist, y_val_mnist))

mnist vaidation accuracy:  0.9834178284748627


In [14]:
dump(MLP_model, "model_dumps/MLP_onMNIST.joblib")

['model_dumps/MLP_onMNIST.joblib']

In [15]:
print(metrics.classification_report(y_val_mnist, y_pred_mnist))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       0.99      0.99      0.99      1182
           2       0.99      0.98      0.99      1048
           3       0.98      0.98      0.98      1071
           4       0.99      0.98      0.98      1024
           5       0.98      0.98      0.98       947
           6       0.99      0.99      0.99      1031
           7       0.98      0.99      0.98      1094
           8       0.98      0.98      0.98      1024
           9       0.98      0.98      0.98      1044

    accuracy                           0.98      9468
   macro avg       0.99      0.98      0.98      9468
weighted avg       0.98      0.98      0.98      9468



In [16]:
y_pred_print = MLP_model.predict(X_val_print)
print("mnist vaidation accuracy: ",MLP_model.score(X_val_print, y_val_print))

mnist vaidation accuracy:  0.8420038535645472


In [17]:
print(metrics.classification_report(y_val_print, y_pred_print))

              precision    recall  f1-score   support

           0       0.60      1.00      0.75         3
           1       0.93      0.95      0.94        56
           2       0.91      0.81      0.85        62
           3       0.82      0.88      0.85        58
           4       0.88      0.81      0.85        54
           5       0.84      0.81      0.82        58
           6       0.77      0.79      0.78        52
           7       0.95      0.92      0.93        62
           8       0.71      0.76      0.74        55
           9       0.79      0.83      0.81        59

    accuracy                           0.84       519
   macro avg       0.82      0.86      0.83       519
weighted avg       0.85      0.84      0.84       519

