<a href="https://www.kaggle.com/code/sitaberete/handwritten-digit-recognizer?scriptVersionId=163196816" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

### Table of Contents

[1. Data Loading and Preprocessing](#dataprep)<br>
[2. Neural Network With Fully Connected Layers (96% validation accuracy)](#dense)<br>
[3. Convolutional Neural Network (99% validation accuracy)](#cnn)<br>
[4. Transfert Learning With MobileNet and ImageNet (100% validation accuracy)](#mobilenet)<br>
[5. Submit Result From Best Model](#submit)<br>






## Data Loading and Preprocessing <a id="dataprep"></a>

In [179]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
import tensorflow.keras.layers as layers

tf.random.set_seed(0)

train_data_path = '/kaggle/input/digit-recognizer/train.csv'
test_data_path = '/kaggle/input/digit-recognizer/test.csv'

train_data = pd.read_csv(train_data_path)
test_data = pd.read_csv(test_data_path)

y = train_data['label']
X = train_data.drop('label', axis=1)

n_classes = y.unique().size

In [37]:
display(X.head())
f"Size = {len(X):0,}"

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


'Size = 42,000'

In [38]:
y.head()

0    1
1    0
2    1
3    4
4    0
Name: label, dtype: int64

In [39]:
len(X) * 0.9

37800.0

In [40]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X, y, train_size=0.9)

X_train = tf.convert_to_tensor(X_train),
X_val   = tf.convert_to_tensor(X_val)

X_train = X_train / 255
X_val   = X_val / 255

y_train = tf.one_hot(y_train, depth=n_classes)
y_val   = tf.one_hot(y_val, depth=n_classes)

print("X_train shape: ",X_train.shape)
print("y_train shape: ",y_train.shape)
print("X_val shape: ",  X_val.shape)
print("y_val shape: ",  y_val.shape)

X_train shape:  (37800, 784)
Available GPUs: []


In [None]:
gpus = tf.config.list_physical_devices('GPU')
print("Available GPUs:")
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu,True)
    print(gpu)

## 2. Model with fully connected (Dense) layers <a id="dense"></a>

In [None]:
def train_model(X,y, iterations=60, learning_rate=0.001, _lambda = 0.0004):
    model = Sequential(
        name= f'lbda-{_lambda}__lr-{learning_rate}__iter-{iterations}',
        layers = [
            layers.Dense(units=30, activation='relu'), # kernel_regularizer=tf.keras.regularizers.L2(_lambda),
            layers.Dense(units=20, activation='relu'), #kernel_regularizer=tf.keras.regularizers.L2(_lambda)),
            layers.Dense(units=15, activation='relu'), #kernel_regularizer=tf.keras.regularizers.L2(_lambda)),
            layers.Dropout(0.2),
            layers.Dense(units=10),# kernel_regularizer=tf.keras.regularizers.L2(_lambda))
        ], 
    )

    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        decay_rate=0.9,
        decay_steps=300_000,
        initial_learning_rate=learning_rate,
    )
    
    model.compile(
        loss= tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        optimizer= tf.keras.optimizers.Adam(learning_rate=lr_schedule)
    )

    model.fit(X,y, epochs=iterations)
    return model

models = [
   train_model(X_train, y_train, _lambda=0, learning_rate=learning_rate, iterations=iterations)
#   for iterations, learning_rate in zip([60] * 3,[0.00001, 0.00003, 0.00005])
#   for iterations, learning_rate in zip([60] * 3,[0.00007, 0.0001])
#   for iterations, learning_rate in zip([60] * 2,[0.0002, 0.0004])
#   for iterations, learning_rate in zip([30] * 2,[0.0006, 0.001])
   for iterations, learning_rate in zip([50] ,[0.001])
]

#models = [
#    train_model(X_train, y_train, _lambda=_lambda)
#    for _lambda in [0.001, 0.005, 0.01, 0.03, 0.05, 0.1]
#    for _lambda in [0.0005]
#    for _lambda in [0.0004]
#]

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50

In [54]:
def get_model_accuracy(model, input, label):
    lin_predictions = model.predict(input)
    predictions = [np.argmax(pred) for pred in lin_predictions]
    return np.sum(predictions == label) / len(label)


for index, model in enumerate(models):
    training_accuracy = get_model_accuracy(model, X_train, y_train)
    validation_accuracy = get_model_accuracy(model, X_val, y_val)
    print(f'{model.name}:')
    print(f'Training accuracy = {100*training_accuracy:.2f}%')
    print(f'Validation accuracy = {100*validation_accuracy:.2f}%')
    print('-------------------------------------------------------------')

lbda-0__lr-0.001__iter-50:
Training accuracy = 99.20%
Validation accuracy = 95.69%
-------------------------------------------------------------
lbda-0__lr-0.0007__iter-50:
Training accuracy = 99.50%
Validation accuracy = 95.93%
-------------------------------------------------------------


## 3. Convolutional Neural Network Model  <a id="cnn"></a>

In [None]:
# The images are flattened, let's convert them into 2D for the Convnet
X_train = tf.reshape(X_train, [X_train.shape[0], 28, 28, 1])
X_val   = tf.reshape(X_val, [X_val.shape[0], 28, 28, 1])

In [None]:
def train_model_conv(X, y, iterations=20, learning_rate=0.001):
    model = Sequential([
        layers.Input(shape=(28,28,1)),
        layers.Conv2D(filters=6, kernel_size=4, activation='relu'),
        layers.MaxPooling2D(pool_size=2, strides=2),
        layers.Conv2D(filters=16, kernel_size=4, activation='relu'),
        layers.MaxPooling2D(pool_size=2, strides=2),
        layers.Flatten(),
        layers.Dense(units=120, activation='relu'),
        layers.Dense(units=40,  activation='relu'),
        layers.Dense(units=n_classes)
    ])
    
    model.compile(
        loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
        optimizer=tf.keras.optimizers.Adam(learning_rate),
        metrics=['accuracy']
    )
    
    metrics = model.fit(X,y, epochs=iterations)
    
    return model, metrics

model, metrics = train_model_conv(X_train, y_train)

In [None]:
model.evaluate(X_val, y_val)

plt.plot(metrics.history['loss'])

## 5. Submit result from best model  <a id="submit"></a>

In [None]:
test_data.head()

In [None]:
X_final = np.concatenate((X_train, X_val))
y_final = np.concatenate((y_train, y_val))
model = train_model(X_final, y_final, initial_learning_rate=0.001, _lambda = 0.0005, iterations=20)

In [91]:
X_test = tf.convert_to_tensor(test_data)
test_predictions = model.predict(X_test)
test_predictions = [np.argmax(pred) for pred in test_predictions]
submission_df = pd.DataFrame({'ImageId': test_data.index + 1, 'Label': test_predictions})
display(submission_df)
submission_df.to_csv('submission.csv', index=False)



Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,3
3,4,9
4,5,3
...,...,...
27995,27996,9
27996,27997,7
27997,27998,3
27998,27999,9
