In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler

In [None]:
train_data = pd.read_csv("/kaggle/input/Kannada-MNIST/train.csv")
test_data = pd.read_csv("/kaggle/input/Kannada-MNIST/test.csv")


In [None]:
train_data.head()

In [None]:
Y_train = train_data["label"]
Y_train.head()

In [None]:
X_train = train_data.drop(labels = ["label"],axis = 1)
X_train.head()

In [None]:
X_train = X_train / 255.0
X_train.head()

In [None]:
X_test = test_data / 255.0
X_test.tail()

In [None]:
X_train_2 = X_train.values.reshape(-1,28,28,1)
len(X_train_2)

In [None]:
X_test.head()

In [None]:
X_test = X_test.drop(columns= ["id"],axis = 1)

In [None]:
X_test_2 = X_test.values.reshape(-1,28,28,1)
X_test_2[1]

In [None]:
Y_train_2 = to_categorical(Y_train, num_classes = 10)
Y_train_2[11]

In [None]:
Y_train_2[:12]

In [None]:
datagen = ImageDataGenerator(
        rotation_range=10,
        zoom_range = 0.1,
        width_shift_range=0.1,
        height_shift_range=0.1)

datagen.fit(X_train_2)

In [None]:
X_train, X_val, Y_train, Y_val = train_test_split(X_train_2, Y_train_2, test_size = 0.1)

In [None]:
# to initialize the neural network
model = Sequential() 

In [None]:
# Conv2D - This is the package that I'll use for the first step of making the CNN that is the convolution step in which
# I will add the convolutional layers

# MaxPool2D - is the step to the pooling step that will add our pooling layers. 
# I apply this spilling step because I want to reduce the number of nodes I'll get in the next step.

# That is the flattening step and then the full connection step because in these next steps basically

# Flatten - So this is the package that will use for Step 3 flattening in which we convert all the pooled future
# maps that we created through convolution and Max pooling into this large feature vector that is then
# becoming the input of our fully connected layers.

# Dense - This is the package we use to add the fully connected layers and a classic artificial neural network.
# relu - This rectify activation function to make sure we get this nonlinearity.


model.add(Conv2D(32, kernel_size=5,input_shape=(28, 28, 1), activation = 'relu'))
model.add(Conv2D(32, kernel_size=5, activation = 'relu'))
model.add(MaxPool2D(2,2))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Conv2D(64, kernel_size=3,activation = 'relu'))
model.add(Conv2D(64, kernel_size=3,activation = 'relu'))
model.add(MaxPool2D(2,2))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Conv2D(128, kernel_size=3, activation = 'relu'))
model.add(BatchNormalization())

model.add(Flatten())    
model.add(Dense(256, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(2048, activation = "relu"))
model.add(Dropout(0.4))

# we have 10 classes thats y I have use softmax function
model.add(Dense(10, activation = "softmax")) 

In [None]:
optimizer=Adam(lr=0.001)
model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])

model.summary()

annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x)
model_try = model.fit_generator(datagen.flow(X_train,Y_train, batch_size=128),
                              epochs = 110, validation_data = (X_val,Y_val),
                              verbose = 1, steps_per_epoch=410, callbacks=[annealer])


In [None]:
X_test_2.shape

In [None]:
test_data.shape

In [None]:
X_test.shape

In [None]:
predictions = model.predict(X_test_2)
predictions = np.argmax(predictions,axis = 1)
predictions = pd.Series(predictions, name="label")

In [None]:
score = model.evaluate(X_val, Y_val)
print('Test accuracy: ', score[1])

# Saving Model for future API
model.save('Digits-1.3.0.h5')
print("Saved model to disk")

In [None]:
# summarize history for accuracy
plt.plot(model_try.history['accuracy'])
plt.plot(model_try.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='lower right')
plt.show()

In [None]:
# summarize history for loss
plt.plot(model_try.history['loss'])
plt.plot(model_try.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()

In [None]:
print('Base model scores:')
valid_loss, valid_acc = model.evaluate(X_val, Y_val, verbose=0)
valid_p = np.argmax(model.predict(X_val), axis=1)
target = np.argmax(Y_val, axis=1)
cm = confusion_matrix(target, valid_p)
print(cm)

In [None]:
sns.heatmap(cm,annot=True,cbar=False, fmt= 'd')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.title('Confusion Matrix')


In [None]:
recall = np.diag(cm) / np.sum(cm, axis = 1)
precision = np.diag(cm) / np.sum(cm, axis = 0)

In [None]:
recall

In [None]:
precision

In [None]:
np.mean(recall)

In [None]:
np.mean(precision)

In [None]:
from sklearn.metrics import classification_report

print(classification_report(target, valid_p))

In [None]:
submit = pd.DataFrame({'id' : test_data['id'], 'label' : predictions})

In [None]:
submit.shape

In [None]:
test_data.shape

In [None]:
submit.to_csv("submission.csv",index=False)

In [None]:
submit.tail()

In [None]:
submit.head()