In [None]:
import numpy as np
import pandas as pd
import keras 
import tensorflow.compat.v1 as tf
import matplotlib.pyplot as plt
%matplotlib inline
from keras.models import Sequential
from keras import initializers
import keras.utils
from keras.optimizers import RMSprop, Adam
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.image as mpimg
from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split
import itertools
import warnings
warnings.filterwarnings('ignore')
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
np.random.seed(7)

In [None]:
test_data = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
train_data = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')

In [None]:
train_data.info()

In [None]:
test_data.info()

In [None]:
train_data.describe()

In [None]:
test_data.describe()

In [None]:
test_data.head(3)

In [None]:
train_data.tail(3)

In [None]:
train_data.shape, test_data.shape

In [None]:
train_data.isna().sum()

In [None]:
test_data.isna().sum()

We can clearly see that there are no null values so we proceed

In [None]:
#Label counts
sns.countplot(x=train_data['label'])

In [None]:
#Normalizing data
y_train = train_data['label']
X_train = train_data.drop('label', axis=1)

In [None]:
X_train = X_train/255.0
test_data = test_data/255.0

In [None]:
 #Reshaping the data
X_train = X_train.values.reshape(-1,28,28,1)
test_data = test_data.values.reshape(-1,28,28, 1)

In [None]:
y_train.value_counts()

In [None]:
# One hot encoding the data for easy modelling
y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_train

In [None]:
#Splitting the data into train and validation data
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.1, random_state=2)

In [None]:
g = plt.imshow(X_train[12][:,:,0])

Defining Model

In [None]:
model = Sequential()
#Layer 1
model.add(Conv2D(filters = 32, kernel_initializer= tf.keras.initializers.glorot_uniform(seed=0), kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
model.add(Conv2D(filters = 32,kernel_initializer= tf.keras.initializers.glorot_uniform(seed=0), kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.50))

#Layer2
model.add(Conv2D(filters = 64,kernel_initializer= tf.keras.initializers.glorot_uniform(seed=0), kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(Conv2D(filters = 64,kernel_initializer= tf.keras.initializers.glorot_uniform(seed=0), kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.50))

# Output layer
model.add(Flatten())
model.add(Dense(256, kernel_initializer= tf.keras.initializers.glorot_uniform(seed=0), activation = "relu"))
model.add(Dropout(0.50))
model.add(Dense(10, kernel_initializer= tf.keras.initializers.glorot_uniform(seed=0), activation = "softmax"))

In [None]:
model.summary()

In [None]:
optimizer = Adam(learning_rate=0.001, beta_1=0.9, epsilon=1e-08, decay=0.0)
optimizer

In [None]:
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', verbose=1, patience=3, factor=0.5, min_lr=0.0001)
learning_rate_reduction

Fitting model without data augmentation

In [None]:
epochs = 5
batch_size = 100
history = model.fit(X_train, y_train, batch_size=batch_size, verbose=2, epochs=epochs, validation_data=(X_valid, y_valid))

Fitted model with data augmentation for a better accuracy score 

In [None]:
datagenerator = ImageDataGenerator(featurewise_center=False,  
        samplewise_center=False,  
        featurewise_std_normalization=False,  
        samplewise_std_normalization=False,  
        zca_whitening=False,  
        rotation_range=20,  
        zoom_range = 0.1, 
        width_shift_range=0.1,  
        height_shift_range=0.1, 
        horizontal_flip=False,  
        vertical_flip=False)

In [None]:
datagenerator.fit(X_train)

In [None]:
history = model.fit(datagenerator.flow(X_train, y_train, batch_size=batch_size),
                epochs=5, validation_data=(X_valid, y_valid), verbose=2, 
                steps_per_epoch=X_train.shape[0]//batch_size,
                callbacks=[learning_rate_reduction])

Clearly we can see that data augmentation improves the val_accuracy

Training and validation curves 

In [None]:
plt.figure(figsize=(8,6))
fig, [ax1, ax2] = plt.subplots(2,1)
ax1.plot(history.history['loss'], color='green', label='training_loss')
ax1.plot(history.history['val_loss'], color='darkred', label='validation_loss')
ax1.legend(loc='best', shadow=True)

plt.figure(figsize=(8,6))
ax2.plot(history.history['accuracy'], color='cyan', label='training_accuracy')
ax2.plot(history.history['val_accuracy'], color='black', label='validation_accuracy')
ax2.legend(loc='best', shadow=True)

In [None]:
#Predict the values from the validation dataset
y_pred = model.predict(X_valid)
#Convert predictions classes to one hot vectors 
y_pred_classes = np.argmax(y_pred,axis = 1) 
# Convert validation observations to one hot vectors
y_true = np.argmax(y_valid,axis = 1) 

In [None]:
# Classification report and confusion matrix
from sklearn.metrics import confusion_matrix, classification_report
print(classification_report(y_true,y_pred_classes))
print(confusion_matrix(y_true,y_pred_classes))

In [None]:
#Display some error results 
#Errors are difference between predicted labels and true labels
errors = (y_pred_classes - y_true != 0)

y_pred_classes_errors = y_pred_classes[errors]
y_pred_errors = y_pred[errors]
y_true_errors = y_true[errors]
X_valid_errors = X_valid[errors]

def show_errors(errors_index,img_errors,pred_errors, obs_errors):
    
    n = 0
    nrows = 2
    ncols = 3
    fig, ax = plt.subplots(nrows,ncols,sharex=False,sharey=False)
    plt.tight_layout(pad=2.3)
    for row in range(nrows):
        for col in range(ncols):
            error = errors_index[n]
            ax[row,col].imshow((img_errors[error]).reshape((28,28)))
            ax[row,col].set_title("Predicted label :{}\nTrue label :{}".format(pred_errors[error],obs_errors[error]))
            n += 1

# Probabilities of the wrong predicted numbers
y_pred_errors_prob = np.max(y_pred_errors,axis = 1)

# Predicted probabilities of the true values in the error set
true_prob_errors = np.diagonal(np.take(y_pred_errors, y_true_errors, axis=1))

# Difference between the probability of the predicted label and the true label
delta_pred_true_errors = y_pred_errors_prob - true_prob_errors

sorted_errors = np.argsort(delta_pred_true_errors)

#top 6 errors 
frequent_errors = sorted_errors[:6]

show_errors(frequent_errors, X_valid_errors, y_pred_classes_errors, y_true_errors)

In [None]:

results = model.predict(test_data)

# select the indices with the maximum probability
results = np.argmax(results,axis = 1) 
results = pd.Series(results,name="Label")
results

In [None]:
results.to_csv('adriankasito.csv', index=False)