In [None]:
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import csv
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
train = pd.read_csv('../input/digit-recognizer/train.csv')
test = pd.read_csv('../input/digit-recognizer/test.csv')
print(train.head())
print(20*'---')
print(test.head())


In [None]:
# set up training and testing data
training_images = (train.iloc[:,1:].values).astype('float32') # all pixel values
training_labels = train.iloc[:,0].values.astype('int32') # only labels

testing_images = test.values.astype('float32')

training_images = training_images.reshape(training_images.shape[0], 28, 28,1)
X_test = testing_images.reshape(testing_images.shape[0], 28, 28,1)

In [None]:
print("Total Training images", training_images.shape)
print("Total Training labels",training_labels.shape)
print("Total Testing images",X_test.shape)

In [None]:
# Display some pictures of the dataset
fig, axes = plt.subplots(nrows=4, ncols=6, figsize=(8, 8),
                        subplot_kw={'xticks': [], 'yticks': []})

for i, ax in enumerate(axes.flat):
    img = training_images[i].reshape(28,28)
    ax.imshow(img, cmap = 'gray')
    title = training_labels[i]
    ax.set_title(title, fontsize = 15)
plt.tight_layout(pad=0.5)
plt.show()

In [None]:
# Display the distribution of each letter

vc = pd.Series(training_labels).value_counts()
plt.figure(figsize=(20,5))
sns.barplot(x = sorted(vc.index), y = vc, palette = "rocket")
plt.title("Number of pictures of each category", fontsize = 15)
plt.xticks(fontsize = 15)
plt.show()

In [None]:
# prepares data for training
X = training_images
y = training_labels

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.20, random_state=1)

print(X_train.shape)
print(X_val.shape)

In [None]:
# Create an ImageDataGenerator and do Image Augmentation

train_datagen = ImageDataGenerator(rescale = 1.0/255.0,
                                   height_shift_range=0.1,
                                   width_shift_range=0.1,
                                   zoom_range=0.1,
                                   shear_range=0.1,
                                   rotation_range=10,
                                   fill_mode='nearest',
                                   horizontal_flip=False)

#Image Augmentation is not done on the testing data

validation_datagen = ImageDataGenerator(rescale=1.0/255)

train_datagenerator = train_datagen.flow(X_train, y_train, batch_size = 128)

validation_datagenerator = validation_datagen.flow(X_val,y_val, batch_size=128)

In [None]:
# Define a Callback class that stops training once accuracy reaches 99.0%

class myCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    if(logs.get('accuracy')>0.99):
      print("\nReached 99.0% accuracy so cancelling training!")
      self.model.stop_training = True

In [None]:
model = tf.keras.models.Sequential([tf.keras.layers.InputLayer(input_shape=[28,28,1]),
                                    tf.keras.layers.experimental.preprocessing.RandomContrast(0.2),
                                    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
                                    tf.keras.layers.MaxPool2D(2,2),
                                    tf.keras.layers.Conv2D(256, (3,3), activation='relu'),
                                    tf.keras.layers.MaxPool2D(2,2),
                                    tf.keras.layers.Conv2D(512, (3,3), activation='relu'),
                                    tf.keras.layers.MaxPool2D(2,2),
                                    tf.keras.layers.Flatten(),
                                    tf.keras.layers.Dense(512, activation = 'relu'),
                                    tf.keras.layers.Dropout(0.2),
                                    tf.keras.layers.Dense(512, activation = 'relu'),
                                    tf.keras.layers.Dropout(0.2),
                                    tf.keras.layers.Dense(10, activation = 'softmax')])

In [None]:
model.summary()

In [None]:
# Compiling the Model. 
model.compile(loss = 'sparse_categorical_crossentropy',
             optimizer = tf.keras.optimizers.Adam(),
              metrics = ['accuracy'])

In [None]:
learning_rate_reduction = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                            patience = 2, 
                                            verbose=1,factor=0.25, 
                                            min_lr=0.0001)

In [None]:
# Train the Model
callbacks = myCallback()
history = model.fit(train_datagenerator,
                    validation_data = validation_datagenerator,
                    steps_per_epoch = len(X_train)//128,
                    epochs = 50,
                    validation_steps = len(X_val)//128,
                    callbacks = [callbacks, learning_rate_reduction])

In [None]:
# Plot the chart for accuracy and loss on both training and validation

import matplotlib.pyplot as plt
fig.set_size_inches(16,9)

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()

plt.plot(epochs, loss, 'r', label='Training Loss')
plt.plot(epochs, val_loss, 'b', label='Validation Loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [None]:
from sklearn.metrics import confusion_matrix,accuracy_score, classification_report
# Predict the label of the test_images
pred = model.predict(X_val)
pred = np.argmax(pred,axis=1)

# Get the accuracy score
acc = accuracy_score(y_val,pred)

# Display the results
print(f'## {acc*100:.2f}% accuracy on the test set')

In [None]:
print(classification_report(y_val, pred)) 

In [None]:
# Display a confusion matrix
cf_matrix = confusion_matrix(y_val, pred, normalize='true')
plt.figure(figsize = (20,15))
sns.heatmap(cf_matrix, annot=True, xticklabels = sorted(set(y_val)), yticklabels = sorted(set(y_val)),cbar=False)
plt.title('Normalized Confusion Matrix\n', fontsize = 23)
plt.xlabel("Predicted Classes",fontsize=15)
plt.ylabel("True Classes",fontsize=15)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15,rotation=0)
plt.show()

In [None]:
correct = np.nonzero(pred == y_val)[0]
plt.figure(figsize=(8, 8))
i = 0
for c in correct[:9]:
    plt.subplot(3,3,i+1)
    plt.imshow(X_val[c].reshape(28,28), cmap="gray", interpolation='none')
    plt.title("Predicted:{}, Actual:{}".format(pred[c], y_val[c]))
    plt.tight_layout()
    i += 1

In [None]:
# Final submission

predictions = model.predict(X_test, verbose=0)
pred = predictions.argmax(axis=1)
print(pred)

In [None]:
submission = pd.DataFrame({"ImageId": list(range(1,len(pred)+1)),
                            "Label": pred})

submission.to_csv("submission.csv", index=False, header=True)

print("Finished submission file to csv")