In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [None]:
origin_train = pd.read_csv('../input/Kannada-MNIST/train.csv')
validate = pd.read_csv('../input/Kannada-MNIST/Dig-MNIST.csv')

In [None]:
train = pd.concat([origin_train, validate])

In [None]:
print(origin_train.shape)
print(validate.shape)
print(train.shape)

In [None]:
train.head()

In [None]:
a = np.matrix(train.iloc[0:1, 1:]).reshape(28, 28)
plt.imshow(a)

In [None]:
i, j = 4, 10
fig, ax = plt.subplots(i, j)
fig.set_figheight(6)
fig.set_figwidth(6)

image_total_count = i * j

for index, row in train.iterrows():
    if index < image_total_count:
        ax[index//j, index%j].set_title(row[0])
        ax[index//j, index%j].imshow(np.matrix(row[1:785]).reshape(28,28))

In [None]:
X_train = train.iloc[:, 1:]
y_train = train.iloc[:, 0]
print(X_train.shape, y_train.shape)

In [None]:
from keras.layers import Dense,Dropout,Conv2D,MaxPool2D,Flatten
from sklearn.model_selection import train_test_split
from sklearn.metrics import *
from keras.utils import np_utils, to_categorical
from keras.callbacks import ReduceLROnPlateau
from keras.models import Sequential  
from keras import backend as K

# test data split
X_train, X_test, y_train, y_test = train_test_split(X_train.values, y_train.values, test_size=0.2, random_state=0)

In [None]:
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

In [None]:
def func_prep(data_prep):
    data_prep_data=[]
    for i in range(data_prep.shape[0]):
        num=np.array(data_prep[i:i+1])
        num=num.ravel()
        num=num.reshape((28,28))
        data_prep_data.append(num)
        
    return np.array(data_prep_data)

X_train=func_prep(X_train)
X_test=np.array(X_test)

In [None]:
# images
import matplotlib.pyplot as plt
fig = plt.figure()
ax1 = fig.add_subplot(2,2,1)
ax1.imshow(X_train[3],cmap='gray')
ax2 = fig.add_subplot(2,2,2)
ax2.imshow(X_train[8],cmap='gray')
ax3 = fig.add_subplot(2,2,3)
ax3.imshow(X_train[2],cmap='gray')
ax4 = fig.add_subplot(2,2,4)
ax4.imshow(X_train[20],cmap='gray')

In [None]:
# input image dimensions
img_rows, img_cols = 28, 28
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)

In [None]:
# type casting to float
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# normaliztion
X_train /= 255
X_test /= 255

# printing size
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

In [None]:
# scalar to one-hot-encoding
num_classes = 10
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

In [None]:
model = Sequential()
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
             activation ='relu', input_shape = (img_rows,img_cols,1)))
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
             activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
             activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
             activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(10, activation = "softmax"))

In [None]:
model.summary()

In [None]:
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator

optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])
learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss',patience=3,factor=0.2,min_lr=0.00001)

# fit model
# hist = model.fit(X_train,y_train,batch_size=128,epochs=30,validation_split=0.3,
#                  callbacks=[learning_rate_reduction], shuffle=True)

# Data Agumentation
datagen = ImageDataGenerator(rotation_range=10,zoom_range = 0.1, width_shift_range=0.1, height_shift_range=0.1) 

# Model Training with data agumentation
hist = model.fit(x=datagen.flow(X_train,y_train, batch_size=128),
                              epochs = 20, 
                              verbose = 2, steps_per_epoch=X_train.shape[0] // 128
                              ,callbacks=[learning_rate_reduction])

In [None]:
# evalute model
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
test = pd.read_csv('../input/Kannada-MNIST/test.csv')

In [None]:
test.shape

In [None]:
X_test = test.values[:, 1:]
X_test = func_prep(X_test)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
X_test = X_test.astype('float32')
X_test /= 255


In [None]:
print(X_test[0].shape)

In [None]:
import time

t = time.process_time()

preds = np.argmax(model.predict(X_test), axis=-1)

elapsed_time = time.process_time() - t

print('elapsed_time: ', elapsed_time)

In [None]:
print(preds.shape)

In [None]:
# Tried verifying a few images manually. They all matched. I'll see what's the final score after submission.

idx = 3
print(preds[idx])

plt.imshow(X_test[idx].reshape(28, 28))

In [None]:
df = pd.DataFrame({'id': [ i for i in range(len(preds))], 'label': preds})
# print(df)

df.to_csv('submission.csv', index=False)