## Setting up the data from Kaggle

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
import os
os.environ["KAGGLE_CONFIG_DIR"] = '/content/gdrive/MyDrive/Kaggle'

In [3]:
%cd gdrive/MyDrive/Kaggle

/content/gdrive/MyDrive/Kaggle


In [4]:
!kaggle competitions download -c digit-recognizer

Downloading digit-recognizer.zip to /content/gdrive/MyDrive/Kaggle
  0% 0.00/15.3M [00:00<?, ?B/s] 85% 13.0M/15.3M [00:00<00:00, 135MB/s]
100% 15.3M/15.3M [00:00<00:00, 138MB/s]


In [5]:
!unzip digit-recognizer.zip -d digit-recognizer

Archive:  digit-recognizer.zip
  inflating: digit-recognizer/sample_submission.csv  
  inflating: digit-recognizer/test.csv  
  inflating: digit-recognizer/train.csv  


In [6]:
%cd digit-recognizer

/content/gdrive/MyDrive/Kaggle/digit-recognizer


## Importing libraries

In [7]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [8]:
tf.__version__

'2.11.0'

## Importing the datasets

In [9]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [10]:
print(train.shape)
print(test.shape)

(42000, 785)
(28000, 784)


In [11]:
train.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
test.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Checking if the datasets are already clean

In [13]:
train.isnull().sum()

label       0
pixel0      0
pixel1      0
pixel2      0
pixel3      0
           ..
pixel779    0
pixel780    0
pixel781    0
pixel782    0
pixel783    0
Length: 785, dtype: int64

In [14]:
test.isnull().sum()

pixel0      0
pixel1      0
pixel2      0
pixel3      0
pixel4      0
           ..
pixel779    0
pixel780    0
pixel781    0
pixel782    0
pixel783    0
Length: 784, dtype: int64

## The independent variables and the dependent variables

In [15]:
X = train.iloc[:, 1:].values
y = train.iloc[:, 0].values
X_test = test.iloc[:, :].values

In [16]:
print(X.shape)
print(X_test.shape)
print(y.shape)

(42000, 784)
(28000, 784)
(42000,)


## Spliting the training set into the training set and the cross validation set

In [17]:
from sklearn.model_selection import train_test_split
X_train, X_cv, y_train, y_cv = train_test_split(X, y, test_size=0.2, random_state=0)

In [18]:
print(X_train.shape)
print(X_cv.shape)
print(y_train.shape)
print(y_cv.shape)

(33600, 784)
(8400, 784)
(33600,)
(8400,)


## Feature Scaling (without scaling the dependent variable)

In [19]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_cv = sc_X.transform(X_cv)
X_test = sc_X.transform(X_test)

In [20]:
print(X_train.shape)
print(X_cv.shape)
print(X_test.shape)
print(y_train.shape)
print(y_cv.shape)

(33600, 784)
(8400, 784)
(28000, 784)
(33600,)
(8400,)


#ANN

## Builiding and Training the ANN on the training set

In [21]:
ann = tf.keras.models.Sequential()
ann.add(tf.keras.layers.Dense(units=50, activation='relu'))
ann.add(tf.keras.layers.Dense(units=50, activation='relu'))
ann.add(tf.keras.layers.Dense(units=50, activation='relu'))
ann.add(tf.keras.layers.Dense(units=10, activation='linear'))

In [22]:
from tensorflow.keras.losses import SparseCategoricalCrossentropy
ann.compile(optimizer='adam', loss=SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

In [23]:
#epochs=100, units=20,20
#1050/1050 [==============================] - 3s 2ms/step - loss: 0.0077 - accuracy: 0.9978
ann.fit(X_train, y_train, batch_size=32, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f00305cddf0>

## Testing the ANN on the cross validation set

In [24]:
logits = ann(X_cv)
y_pred = tf.nn.softmax(logits)
y_pred = y_pred.numpy()

In [25]:
y_pred = np.argmax(y_pred, axis=1)

In [26]:
print(y_pred.shape)
print(y_cv.shape)

(8400,)
(8400,)


In [27]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_cv, y_pred)
print(cm)
score = accuracy_score(y_cv, y_pred)
print(score)

[[795   0   1   0   3   2   3   2   5   2]
 [  0 946   5   4   0   1   1   1   3   0]
 [  2   1 837   4   0   0   1  10   5   0]
 [  0   2   8 809   1  19   0   5  11   8]
 [  1   3   6   0 782   1   8   3   4  19]
 [  2   0   1   4   0 734   6   1   8   0]
 [  3   0   1   0   2   8 827   0   0   0]
 [  1   3   7   1   2   0   0 875   1   9]
 [  2   7   2   8   0  16   6   1 720   6]
 [  4   0   0   4   8   4   1  15   7 769]]
0.9635714285714285


In [28]:
print(sum(y_pred - y_cv))

106


In [29]:
# This code evaluate the predictions automatically
loss, accuracy = ann.evaluate(X_cv, y_cv)

