<a href="https://colab.research.google.com/github/lee-thien-tuyen/Digit-Recognizer-/blob/main/digit_recognizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# download data file from github
! git clone https://github.com/lee-thien-tuyen/Digit-Recognizer-.git 

In [None]:
import pandas as pd
import numpy as np 
import tensorflow as tf 
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline 

In [None]:
train = pd.read_csv("Digit-Recognizer-/train.csv")
test = pd.read_csv("Digit-Recognizer-/test.csv")

train.head(10)

In [None]:
train.info()

In [None]:
train.describe()

In [None]:
print(train.shape)
print(test.shape)

In [None]:
y_train = train['label'].astype('float32')
x_train = train.drop(['label'],axis = 1).astype('int32')

x_test = test.astype('float32')

print("x_train have shape:",x_train.shape)
print("y_train have shape:",y_train.shape)
print("x_test have shape:",x_test.shape)

In [None]:
x_train = x_train.values.reshape(-1,28,28)
x_test = x_test.values.reshape(-1,28,28)
print("x_train after reshape:",x_train.shape)
print("x_test after reshape:",x_test.shape)

In [None]:
# one hot coding y
y_train = tf.keras.utils.to_categorical(y_train,10)
print("y_train shape:", y_train.shape)

print(y_train[0:5,:])

In [None]:
#split data
from sklearn.model_selection import train_test_split
x_train, x_val, y_train, y_val = train_test_split(x_train,y_train,test_size = 0.2,random_state = 42)
print("x_train shape:",x_train.shape)
print("y_train shape:", y_train.shape)

In [None]:
#define callback function to stop our training when the target accuracy is reached
class mycallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self,epoch,logs={}):
    if (logs.get('accuracy') is not None and logs.get('accuracy') >0.999):
      print("\n Reached 99.9% accuracy so cancelling training!!!")
      self.model.stop_training = True 


In [None]:
# Define function model NNs
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32,3,activation = 'relu',input_shape = (28,28,1)),
    tf.keras.layers.Conv2D(32,3,activation = 'relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64,3,activation = 'relu',padding = 'same'),
    tf.keras.layers.Conv2D(64,3,activation = 'relu',padding = 'same'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64,3,activation = 'relu',padding = 'same'),
    tf.keras.layers.Conv2D(64,3,activation = 'relu',padding = 'same'),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2),strides=(2,2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256,activation = 'relu'),
    tf.keras.layers.Dense(256,activation = 'relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(10,activation = 'softmax')
])
model.summary()

In [None]:
# Compile and fit model
optimizer = tf.keras.optimizers.Adam(
                                learning_rate= 0.001,
                                beta_1 = 0.9,
                                beta_2 = 0.999,
                                epsilon = 1e-07,
                                name = 'Adam')
callbacks = mycallback()

model.compile(optimizer = optimizer,
              loss = 'categorical_crossentropy',
              metrics = ['accuracy'])
model.fit(x_train,y_train,batch_size =50,epochs = 50,validation_data = (x_val,y_val),callbacks=[callbacks])


In [None]:
results = model.predict(x_test)

#select the index with the maximum probaility 
results = np.argmax(results,axis = 1)

results = pd.Series(results,name = "Label")

In [None]:
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)

submission.to_csv("submission.csv",index = False)