In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
import warnings
warnings.filterwarnings('ignore')

Loading the data

In [None]:
train_data = pd.read_csv('/kaggle/input/Kannada-MNIST/train.csv')
test_data = pd.read_csv('/kaggle/input/Kannada-MNIST/test.csv')
y_train = train_data['label']
X = train_data.drop(['label'],axis=1)
del train_data

In [None]:
Id = test_data['id']

In [None]:
test_data = test_data.drop(['id'],axis=1)

Checking for null values

In [None]:
X.isnull().all().unique()

In [None]:
y_train.isnull().any()

In [None]:
test_data.isnull().all().unique()

# Visualizations

In [None]:
label_val = y_train.value_counts()
plt.figure(figsize=(12,6))
sns.barplot(x=label_val.index,y=label_val.values)

Right now our data is flattened out, therefore we have to reshape it

In [None]:
X_temp = X.values.reshape(X.shape[0], 28, 28)

In [None]:
classes = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
num_classes = len(classes)
samples_per_class = 6
plt.figure(0,figsize=(16,10))
for y, cls in enumerate(classes):
    idxs = np.flatnonzero(y_train == y)
    idxs = np.random.choice(idxs, samples_per_class, replace=False)
    for i, idx in enumerate(idxs):
        plt_idx = i * num_classes + y + 1
        plt.subplot(samples_per_class, num_classes, plt_idx)
        plt.imshow(X_temp[idx])
        plt.axis('off')
        if i == 0:
            plt.title(cls)
plt.show()

Keras needs an additional dimension as it also take info about no. of color channel
since here we only have greyscale images , so we only need to add third dimension as 1

In [None]:
X = X.values.reshape(X.shape[0], 28, 28,1)
test_data = test_data.values.reshape(test_data.shape[0], 28, 28,1)

# Adding data Augmentation

In [None]:
from keras.preprocessing.image import ImageDataGenerator

In [None]:
datagen = ImageDataGenerator(
        rotation_range= 8,  
        zoom_range = 0.12,  
        width_shift_range=0.1, 
        height_shift_range=0.1)
datagen.fit(X)

# Model

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, Dense, Dropout, BatchNormalization, Flatten, MaxPool2D
from keras.utils.np_utils import to_categorical

In [None]:
y_train = to_categorical(y_train,num_classes=10)

from keras.callbacks import ModelCheckpoint
checkpoint = ModelCheckpoint('BWeight.md5',monitor='val_loss',
                            save_best_only=True)

Designing the model

In [None]:
model = Sequential()

model.add(Conv2D(32,kernel_size=3,activation='relu',input_shape=(28,28,1)))
model.add(Conv2D(32,kernel_size=3,activation='relu'))
model.add(MaxPool2D())
model.add(Conv2D(32,kernel_size=5,strides=2,padding='same',activation='relu'))
model.add(Dropout(0.4))

model.add(Conv2D(64,kernel_size=3,activation='relu'))
model.add(Conv2D(64,kernel_size=3,activation='relu'))
model.add(MaxPool2D())
model.add(Conv2D(64,kernel_size=5,strides=2,padding='same',activation='relu'))
model.add(Dropout(0.4))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(10, activation='softmax'))

Compiling the model

In [None]:
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

Splitting data

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_val1, y_train, y_val1 = train_test_split(
    X, y_train, test_size=0.05, random_state=42)

# Training

In [None]:
size_batch = 64

In [None]:
history = model.fit_generator(datagen.flow(X_train,y_train, batch_size=size_batch),
                              epochs = 60,
                              validation_data = (X_val1,y_val1),
                              verbose = 2,
                              steps_per_epoch = X_train.shape[0] // size_batch,
                              callbacks=[checkpoint])

In [None]:
fig, ax = plt.subplots(2,1)
ax[0].plot(history.history['loss'], color='b', label="Training loss")
ax[0].plot(history.history['val_loss'], color='r', label="validation loss",axes =ax[0])
legend = ax[0].legend(loc='best', shadow=True)

ax[1].plot(history.history['acc'], color='b', label="Training accuracy")
ax[1].plot(history.history['val_acc'], color='r',label="Validation accuracy")
legend = ax[1].legend(loc='best', shadow=True)

# Extra validation

Loading best weights

In [None]:
model.load_weights('BWeight.md5')

Testing on extra validation data

In [None]:
extra_validation = pd.read_csv('/kaggle/input/Kannada-MNIST/Dig-MNIST.csv')

In [None]:
y_extra_validate = extra_validation['label']
X_extra_validate = extra_validation.drop(['label'],axis=1)

In [None]:
X_extra_validate = X_extra_validate.values.reshape(X_extra_validate.shape[0], 28, 28,1)

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_extra_validate,model.predict_classes(X_extra_validate)))

Model's performance is good
Therefore, amking final predictions from it

# Final Predictions

In [None]:
FINAL_PREDS = model.predict_classes(test_data)

In [None]:
submission = pd.DataFrame({ 'id': Id,
                            'label': FINAL_PREDS })
submission.to_csv(path_or_buf ="Kannada_MNIST_KERAS.csv", index=False)