# Intro 
Welcome To The [Kannada-MINST](https://www.kaggle.com/c/Kannada-MNIST) Competition
![](https://storage.googleapis.com/kaggle-media/competitions/Kannada-MNIST/kannada.png)
This notebook is a starter code for all beginners and easy to understand. To predict the test data a simple CNN is used.

Some helpful informations for the image classification of a MINST dataset you will find her:

https://towardsdatascience.com/image-classification-in-10-minutes-with-mnist-dataset-54c35b77a38d



<span style="color: royalblue;">Please vote the notebook up if it helps you. Thank you. </span>

# Load Libraries

In [None]:
import numpy as np
import pandas as pd
import scipy.special
import matplotlib.pyplot as plt
import os
print(os.listdir("../input/Kannada-MNIST"))

In [None]:
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import RMSprop,Adam
from keras.preprocessing.image import ImageDataGenerator

In [None]:
from sklearn.model_selection import train_test_split

# Load Data
We use the Dig-MNIST file for valtidation data.

In [None]:
path_in = '../input/Kannada-MNIST/'

In [None]:
train_data = pd.read_csv(path_in+'train.csv')
val_data = pd.read_csv(path_in+'Dig-MNIST.csv')
test_data = pd.read_csv(path_in+'test.csv')
samp_subm = pd.read_csv(path_in+'sample_submission.csv')

# A Look on the labels
The lables are equally distributed, so we need no class weights for the CNN.

In [None]:
dict_data = dict(zip(range(0, 10), (((train_data['label'].value_counts()).sort_index())).tolist()))
names = list(dict_data.keys())
values = list(dict_data.values())
plt.bar(names, values)
plt.grid()
plt.show()

In [None]:
dict_data = dict(zip(range(0, 10), (((val_data['label'].value_counts()).sort_index())).tolist()))
names = list(dict_data.keys())
values = list(dict_data.values())
plt.bar(names, values)
plt.grid()
plt.show()

# Overview

In [None]:
print('# train samples:', len(train_data.index))
print('# val samples:', len(val_data.index))
print('# test samples:', len(test_data.index))

# Define train, val and test set

In [None]:
X_train = train_data.copy()
y_train = train_data['label']
del X_train['label']
X_val = val_data.copy()
y_val = val_data['label']
del X_val['label']
X_test = test_data.copy()
del X_test['id']
y_train = to_categorical(y_train, num_classes = 10)
y_val = to_categorical(y_val, num_classes = 10)

In [None]:
X_train = X_train.values.reshape(-1,28,28,1)
X_val = X_val.values.reshape(-1,28,28,1)
X_test = X_test.values.reshape(-1,28,28,1)

# Scale data

In [None]:
X_train = X_train.astype('float32')/255
X_val = X_val.astype('float32')/255
X_test = X_test.astype('float32')/255

# Split train data to get val data

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.1, random_state=2020)

# Some Examples

In [None]:
fig, axs = plt.subplots(2, 5, figsize=(15, 6))
fig.subplots_adjust(hspace = .5, wspace=.5)
axs = axs.ravel()
for i in range(10):
    idx = train_data[train_data['label']==i].index[0]
    axs[i].imshow(X_train[idx][:,:,0], cmap='gray')
    axs[i].set_title(y_train[idx].argmax())
    axs[i].set_xticklabels([])
    axs[i].set_yticklabels([])

# Define Model
We use a simple CNN model.

In [None]:
model = Sequential()
model.add(Conv2D(filters=64, kernel_size=3, padding='same', activation='relu', input_shape=(28,28,1)))
model.add(Conv2D(filters=128, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=2))

model.add(Conv2D(filters=128, kernel_size=3, padding='same', activation='relu'))
model.add(Conv2D(filters=256, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=2, padding='same'))

model.add(Conv2D(filters=256, kernel_size=3, padding='same', activation='relu'))
model.add(Conv2D(filters=512, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=2, padding='same'))

model.add(Dropout(0.5))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(10, activation='softmax'))

In [None]:
optimizer = RMSprop(lr=0.001)

In [None]:

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['acc'])

In [None]:
model.summary()

In [None]:
epochs = 50
batch_size = 512

# Define the ImageDataGenerator

In [None]:
datagen = ImageDataGenerator(
        featurewise_center=False,
        samplewise_center=False,
        featurewise_std_normalization=False,
        samplewise_std_normalization=False,
        zca_whitening=False,
        rotation_range=10,
        zoom_range = 0.10,
        width_shift_range=0.15,
        height_shift_range=0.15,
        horizontal_flip=False,
        vertical_flip=False)
datagen.fit(X_train)

# Train data

In [None]:
# Fit the model
history = model.fit(datagen.flow(X_train, y_train, batch_size=batch_size),
                    epochs=epochs,
                    validation_data=(X_val,y_val),
                    steps_per_epoch=X_train.shape[0] // batch_size)

# Predict Test data

In [None]:
y_test = model.predict(X_test)

In [None]:
y_test_classes = np.argmax(y_test, axis = 1)

# Write Output For Submission

In [None]:
output = pd.DataFrame({'id': samp_subm['id'],
                       'label': y_test_classes})
output.to_csv('submission.csv', index=False)

# Analyse the results

In [None]:
loss = history.history['loss']
loss_val = history.history['val_loss']
epochs = range(1, len(loss)+1)
plt.plot(epochs, loss, 'bo', label='loss_train')
plt.plot(epochs, loss_val, 'b', label='loss_val')
plt.title('value of the loss function')
plt.xlabel('epochs')
plt.ylabel('value of the loss function')
plt.legend()
plt.grid()
plt.show()

In [None]:
acc = history.history['acc']
acc_val = history.history['val_acc']
epochs = range(1, len(loss)+1)
plt.plot(epochs, acc, 'bo', label='accuracy_train')
plt.plot(epochs, acc_val, 'b', label='accuracy_val')
plt.title('accuracy')
plt.xlabel('epochs')
plt.ylabel('value of accuracy')
plt.legend()
plt.grid()
plt.show()

In [None]:
del model