# Digit Recognizer

[here](https://www.kaggle.com/competitions/digit-recognizer/)

In [60]:
# imports

import pandas as pd
import numpy as np

import tensorflow as tf

import keras
from keras import layers, optimizers, metrics, callbacks

In [75]:
train_df = pd.read_csv("./data/input/train.csv")
test_df = pd.read_csv("./data/input/test.csv")

In [65]:
label = 'label'
pixels = train_df.columns.drop(label).tolist()

X = train_df[pixels].to_numpy()
y = train_df[label]

# Reshape each row to 28 * 26 (Image)
X = X.reshape(-1, 28, 28, 1)

# Normalizing Pixel Values
X = X / 255

# Determine number of distinct label types
n_labels = y.unique().shape[0]

# Onehot Encoding Labels
y = tf.one_hot(y, n_labels).numpy()

In [77]:
X_test_unseen = test_df[pixels].to_numpy()
X_test_unseen = X_test_unseen.reshape(-1, 28, 28, 1)
X_test_unseen = X_test_unseen / 255

In [66]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [87]:
model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_14 (Conv2D)          (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d_14 (MaxPoolin  (None, 13, 13, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_15 (Conv2D)          (None, 11, 11, 16)        4624      
                                                                 
 max_pooling2d_15 (MaxPoolin  (None, 5, 5, 16)         0         
 g2D)                                                            
                                                                 
 flatten_7 (Flatten)         (None, 400)               0         
                                                                 
 dense_14 (Dense)            (None, 128)              

In [88]:
model = keras.Sequential(
    [
        layers.InputLayer(input_shape=(28, 28, 1)),
        layers.Conv2D(filters=32, kernel_size=3),
        layers.MaxPooling2D(),
        layers.Conv2D(filters=16, kernel_size=3),
        layers.MaxPooling2D(),
        layers.Flatten(),
        layers.Dense(128, activation='sigmoid'),
        layers.Dense(10, activation='softmax')
    ]
)


#Compiling our model with appropriate loss function and optimizer
model.compile(loss = 'categorical_crossentropy', optimizer= 'adam', metrics=['categorical_accuracy'])

#Fitting the model
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2b079f14b20>

In [86]:
from datetime import datetime


# Predict on test unseen
y_unseen_predicted = model.predict(X_test_unseen)

# Transform labels
y_unseen_predicted = tf.argmax(y_unseen_predicted, axis = 1).numpy()

now = datetime.now().strftime("%Y%m%d_%H%M%S")

output = pd.read_csv("./data/input/sample_submission.csv", index_col=0)
output['Label'] = y_unseen_predicted

output.to_csv(f"./data/output/digit_recognizer_{now}.csv", index=True)

