In [None]:
#importing all necessary libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

import keras
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, Dense, MaxPool2D, Flatten, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import warnings
warnings.filterwarnings('ignore')

In [2]:
import io
from google.colab import files

In [None]:
#uplaoding train and test datasets

In [3]:
uploaded=files.upload()
df_train=pd.read_csv(io.BytesIO(uploaded['train.csv']))
df_train.head()

Saving train.csv to train.csv


Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
uploaded=files.upload()
df_test=pd.read_csv(io.BytesIO(uploaded['test.csv']))
df_test.head()

Saving test.csv to test.csv


Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
#preprocessing the train and test datasets(scaling to (0,1) and splitting the train dataset into train and validation sets)

In [6]:
def preprocessing(df_train, df_test):
    X_train = df_train.drop('label', axis=1)
    X_train = np.array(X_train)
    X_train = X_train / 255.0

    X_test = np.array(df_test)
    X_test = X_test / 255.0

    y_train = np.array(df_train['label'])

    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.33, shuffle=True, random_state=42)

    return X_train, X_val, X_test, y_train, y_val

X_train, X_val, X_test, y_train, y_val = preprocessing(df_train, df_test)

In [7]:
X_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [8]:
X_train.shape

(28140, 784)

In [9]:
y_train

array([8, 6, 3, ..., 2, 6, 0])

In [10]:
y_train.shape

(28140,)

In [None]:
#reshaping to give the data a single color channel

In [11]:
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_val = X_val.reshape(X_val.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)

In [12]:
X_train.shape

(28140, 28, 28, 1)

In [None]:
#one hot encoding to make the network understand what digit the image actually is

In [14]:
from tensorflow.keras.utils import to_categorical
y_cat_train = to_categorical(y_train, num_classes=10)
y_cat_val = to_categorical(y_val, num_classes=10)

In [15]:
y_cat_train

array([[0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]])

In [None]:
# data augmentation to prevent the model from overfitting and learning noise

In [16]:
train_datagen = ImageDataGenerator(rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.1)
train_datagen.fit(X_train)
train_generator = train_datagen.flow(X_train, y_cat_train, batch_size=32)

In [None]:
#building the model

In [17]:
from keras.models import Sequential
from keras.layers import Conv2D, Dense, MaxPool2D, Flatten, Dropout

In [18]:
model=Sequential()
model.add(Conv2D(filters=32, kernel_size=(4,4), padding='same', input_shape=(28,28,1), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2),strides=2,padding="valid"))
model.add(Conv2D(filters=32, kernel_size=(5,5), padding='valid', activation = 'relu'))
model.add(MaxPool2D(pool_size=(2, 2), strides=2, padding="valid"))
model.add(Flatten())
model.add(Dense(units=128, activation='relu'))
model.add(Dense(units=10, activation='softmax'))

In [20]:
model.summary()

In [22]:
model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [None]:
#training the model

In [23]:
history = model.fit(x=train_generator, epochs=20, verbose=2, validation_data=(X_val, y_cat_val))

Epoch 1/20
880/880 - 46s - 52ms/step - accuracy: 0.8849 - loss: 0.3626 - val_accuracy: 0.9719 - val_loss: 0.0901
Epoch 2/20
880/880 - 80s - 91ms/step - accuracy: 0.9632 - loss: 0.1177 - val_accuracy: 0.9835 - val_loss: 0.0519
Epoch 3/20
880/880 - 82s - 93ms/step - accuracy: 0.9740 - loss: 0.0868 - val_accuracy: 0.9876 - val_loss: 0.0412
Epoch 4/20
880/880 - 80s - 91ms/step - accuracy: 0.9768 - loss: 0.0733 - val_accuracy: 0.9872 - val_loss: 0.0398
Epoch 5/20
880/880 - 41s - 46ms/step - accuracy: 0.9811 - loss: 0.0607 - val_accuracy: 0.9891 - val_loss: 0.0346
Epoch 6/20
880/880 - 40s - 46ms/step - accuracy: 0.9830 - loss: 0.0554 - val_accuracy: 0.9892 - val_loss: 0.0342
Epoch 7/20
880/880 - 41s - 47ms/step - accuracy: 0.9838 - loss: 0.0532 - val_accuracy: 0.9903 - val_loss: 0.0307
Epoch 8/20
880/880 - 40s - 46ms/step - accuracy: 0.9845 - loss: 0.0480 - val_accuracy: 0.9900 - val_loss: 0.0295
Epoch 9/20
880/880 - 41s - 47ms/step - accuracy: 0.9865 - loss: 0.0427 - val_accuracy: 0.9905 - 

In [None]:
#making predictions

In [43]:
preds=model.predict(X_val)

[1m434/434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step


In [44]:
preds=np.argmax(preds,axis=1)

In [45]:
preds

array([8, 1, 9, ..., 6, 3, 2])

In [46]:
preds.size

13860

In [47]:
from sklearn.metrics import accuracy_score

In [48]:
accuracy_score(y_val, preds)

0.9874458874458875

In [49]:
from sklearn.metrics import classification_report, confusion_matrix

In [50]:
print(classification_report(y_val, preds))

              precision    recall  f1-score   support

           0       1.00      0.98      0.99      1333
           1       0.99      0.99      0.99      1520
           2       1.00      0.99      0.99      1414
           3       0.99      0.99      0.99      1471
           4       0.98      1.00      0.99      1358
           5       0.99      0.98      0.99      1205
           6       0.98      0.99      0.99      1397
           7       0.99      0.99      0.99      1480
           8       0.95      0.99      0.97      1334
           9       1.00      0.96      0.98      1348

    accuracy                           0.99     13860
   macro avg       0.99      0.99      0.99     13860
weighted avg       0.99      0.99      0.99     13860



In [None]:
#optimization using early stopping and dropout

In [58]:
callback = keras.callbacks.EarlyStopping(monitor='val_accuracy',mode='max', patience=5, verbose=1)

In [53]:
model_new=Sequential()
model_new.add(Conv2D(filters=32, kernel_size=(4,4), padding='same', input_shape=(28,28,1), activation='relu'))
model_new.add(Dropout(0.25))
model_new.add(Conv2D(filters=32, kernel_size=(4,4), padding='same', activation='relu'))
model_new.add(MaxPool2D(pool_size=(2,2),strides=2,padding="valid"))
model_new.add(Conv2D(filters=32, kernel_size=(5,5), padding='valid', activation = 'relu'))
model_new.add(Dropout(0.5))
model_new.add(Conv2D(filters=32, kernel_size=(5,5), padding='valid', activation = 'relu'))
model_new.add(MaxPool2D(pool_size=(2, 2), strides=2, padding="valid"))
model_new.add(Flatten())
model_new.add(Dense(units=128, activation='relu'))
model_new.add(Dropout(0.36))
model_new.add(Dense(units=10, activation='softmax'))

In [55]:
model_new.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['categorical_accuracy']
)

In [56]:
model_new.summary()

In [59]:
history_new = model_new.fit(x=train_generator, epochs=10, verbose=1,callbacks=[callback], validation_data=(X_val, y_cat_val))

Epoch 1/10
[1m880/880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 152ms/step - categorical_accuracy: 0.9468 - loss: 0.1804 - val_categorical_accuracy: 0.9847 - val_loss: 0.0453
Epoch 2/10
[1m880/880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 154ms/step - categorical_accuracy: 0.9625 - loss: 0.1264 - val_categorical_accuracy: 0.9885 - val_loss: 0.0371
Epoch 3/10
[1m880/880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 150ms/step - categorical_accuracy: 0.9714 - loss: 0.0967 - val_categorical_accuracy: 0.9871 - val_loss: 0.0486
Epoch 4/10
[1m880/880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 150ms/step - categorical_accuracy: 0.9742 - loss: 0.0886 - val_categorical_accuracy: 0.9902 - val_loss: 0.0311
Epoch 5/10
[1m880/880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 150ms/step - categorical_accuracy: 0.9779 - loss: 0.0833 - val_categorical_accuracy: 0.9903 - val_loss: 0.0341
Epoch 6/10
[1m880/880[0m [32m━━━━━━━━━━━━━━━━━━

In [60]:
preds_new=model.predict(X_val)

[1m434/434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 21ms/step


In [62]:
preds_new

array([[1.9124480e-11, 1.3187421e-14, 1.4651563e-07, ..., 5.7526920e-15,
        9.9999982e-01, 1.5603135e-08],
       [2.6712874e-09, 9.9987626e-01, 6.4291301e-09, ..., 3.7655695e-08,
        1.3827252e-05, 2.1318496e-09],
       [5.2009559e-15, 1.1818062e-12, 2.0222950e-13, ..., 1.4943146e-07,
        2.5774931e-09, 9.9999684e-01],
       ...,
       [7.4791475e-12, 5.9802588e-22, 1.4647440e-13, ..., 5.5264485e-22,
        1.2800576e-10, 1.9243525e-17],
       [1.6766607e-12, 1.2750644e-08, 1.6995751e-09, ..., 9.8142862e-11,
        5.1922204e-09, 8.1710916e-10],
       [9.1759683e-16, 3.5660203e-10, 9.9998999e-01, ..., 2.1345304e-08,
        9.9399367e-06, 3.9073752e-17]], dtype=float32)

In [63]:
preds_new=np.argmax(preds_new,axis=1)

In [64]:
preds_new

array([8, 1, 9, ..., 6, 3, 2])

In [65]:
accuracy_score(y_val, preds_new)

0.9874458874458875

In [66]:
print(classification_report(y_val, preds_new))

              precision    recall  f1-score   support

           0       1.00      0.98      0.99      1333
           1       0.99      0.99      0.99      1520
           2       1.00      0.99      0.99      1414
           3       0.99      0.99      0.99      1471
           4       0.98      1.00      0.99      1358
           5       0.99      0.98      0.99      1205
           6       0.98      0.99      0.99      1397
           7       0.99      0.99      0.99      1480
           8       0.95      0.99      0.97      1334
           9       1.00      0.96      0.98      1348

    accuracy                           0.99     13860
   macro avg       0.99      0.99      0.99     13860
weighted avg       0.99      0.99      0.99     13860

