In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
df=pd.read_csv('/kaggle/input/Kannada-MNIST/train.csv')
test = pd.read_csv('/kaggle/input/Kannada-MNIST/test.csv')

In [None]:
df.head()

Label : TARGET FOR CLASSIFICATION
    
Pixel0 to Pixel783: These are the grayscale pixel values of the image.That is each row contains 28 * 28 = 784 pixel (0-783) values here

In [None]:
df.label.value_counts()

We can see that each digit has the same number of observations this tells us that the dataset is balanced

# LABEL AND FEATURES 



In [None]:
X=df.drop('label',axis=1)
y=df.label

test = test.drop('id',axis=1)

# Normalize pixels

This can be achieved by dividing all pixels values by the largest pixel value; that is 255. This is performed across all channels, regardless of the actual range of pixel values that are present in the image.

For most image data, the pixel values are integers with values between 0 and 255.

Neural networks process inputs using small weight values, and inputs with large integer values can disrupt or slow down the learning process. As such it is good practice to normalize the pixel values so that each pixel value has a value between 0 and 1.



In [None]:
X=X/255

test = test/255

# reshape data to 28 28 2d matrix

In [None]:
X=X.values.reshape(-1,28,28,1)
test=test.values.reshape(-1,28,28,1)

# Encoding target

In [None]:
len(X[0]),len(X[0][0])

In [None]:
y

In [None]:
from keras.utils.np_utils import to_categorical

y=to_categorical(y)
y

# TRAIN TEST SPLIT FOR VALIDATION 



In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=42,test_size=0.15)



In [None]:
X_train[0][:,:,0] # pixel values of first entry 

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns


plt.imshow(X_train[0][:,:,0]) #Visualizing this matrix as picture



# DATA AUGMENTATION 

For example, the number is not centered The scale is not the same (some who write with big/small numbers) The image is rotated...

Approaches that alter the training data in ways that change the array representation while keeping the label the same are known as data augmentation techniques. Some popular augmentations people use are grayscales, horizontal flips, vertical flips, random crops, color jitters, translations, rotations, and much more.

By applying just a couple of these transformations to our training data, we can easily double or triple the number of training examples and create a very robust model.



For the data augmentation, i choosed to :

    Randomly rotate some training images by 10 degrees
    Randomly Zoom by 10% some training images
    Randomly shift images horizontally by 10% of the width
    Randomly shift images vertically by 10% of the height

I did not apply a vertical_flip nor horizontal_flip since it could have lead to misclassify symetrical numbers such as 6 and 9.




In [None]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
        featurewise_center=False, 
        samplewise_center=False,  
        featurewise_std_normalization=False, 
        samplewise_std_normalization=False,  
        zca_whitening=False,  
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  
        vertical_flip=False) 


datagen.fit(X_train) # only augment training dataset 

# BUILDING CNN

In [None]:
from keras.models import Sequential

from keras.layers import Dense,Conv2D,Flatten,MaxPool2D,Dropout,BatchNormalization


model = Sequential()

# add conv layers 


model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))


# normalization latyer 

model.add(BatchNormalization(momentum=.15))

#Pooling layer 

model.add(MaxPool2D(pool_size=(2,2)))

#dropout 

model.add(Dropout(0.25))

# same structure again with different parameters 

model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))

model.add(BatchNormalization(momentum=0.15))

model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))

model.add(Dropout(0.25))

# again 

model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))
model.add(BatchNormalization(momentum=.15))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

#flatteining our output to feed into a ANN 
model.add(Flatten())

model.add(Dense(256, activation = "relu"))
model.add(Dropout(0.4))

#output layer that will give us the label 
model.add(Dense(10, activation = "softmax"))



model.summary()



#### structure of our model 


# OPTIMIZER

## we need to choose a good optimizer so that our model can learn effectively 

# loss function - categorical_crossentropy

In [None]:
from keras.optimizers import Adam #,RMSprop
optimizer=Adam(learning_rate=0.001,beta_1=0.9,beta_2=0.999)


model.compile(optimizer=optimizer,loss=['categorical_crossentropy'],metrics=['accuracy'])





The LR is the step by which the optimizer walks through the 'loss landscape'. The higher LR, the bigger are the steps and the quicker is the convergence. However the sampling is very poor with an high LR and the optimizer could probably fall into a local minima.

Its better to have a decreasing learning rate during the training to reach efficiently the global minimum of the loss function.

To keep the advantage of the fast computation time with a high LR, i decreased the LR dynamically every X steps (epochs) depending if it is necessary (when accuracy is not improved).

With the ReduceLROnPlateau function from Keras.callbacks, i choose to reduce the LR by half if the accuracy is not improved after 3 epochs.

In [None]:
from keras.callbacks import ReduceLROnPlateau
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

# FITTING/TRAINING OUR MODEL 

In [None]:
epochs=5 #change this to 30 if you need to get better score
batch_size=64

# Fit the model
history = model.fit_generator(datagen.flow(X_train,y_train, batch_size=batch_size),
                              epochs = epochs, 
                              validation_data = (X_test,y_test),
                              verbose = 2, 
                              steps_per_epoch=X_train.shape[0] // batch_size, 
                              callbacks=[learning_rate_reduction])


In [None]:
fig,ax=plt.subplots(2,1)
fig.set
x=range(1,1+epochs)
ax[0].plot(x,history.history['loss'],color='red')
ax[0].plot(x,history.history['val_loss'],color='blue')

ax[1].plot(x,history.history['accuracy'],color='red')
ax[1].plot(x,history.history['val_accuracy'],color='blue')
ax[0].legend(['trainng loss','validation loss'])
ax[1].legend(['trainng acc','validation acc'])
plt.xlabel('Number of epochs')
plt.ylabel('accuracy')

In [None]:
history.history['accuracy'][-1]

In [None]:
test=pd.read_csv('../input/Kannada-MNIST/test.csv')




test=test.drop('id',axis=1)
test=test/255
test=test.values.reshape(-1,28,28,1)



In [None]:
test

In [None]:
plt.imshow(test[0][:,:,0])

In [None]:
test[[0]][:,:,:].shape

# PREDICTIONS 

In [None]:
test[0].shape

In [None]:
preds = model.predict(test)


preds[0]

In [None]:


preds=np.argmax(preds,axis=1)
preds

In [None]:
test

In [None]:
test[[55]][:,:,:].shape

# PREDICTING WITH A SINGLE IMAGE 

In [None]:
preds = model.predict(test[[55]][:,:,:])
preds=np.argmax(preds,axis=1)
preds

# SAVE MODEL AS H5


In [None]:
model.save('my_model.h5')
