In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
%matplotlib inline
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential,load_model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras import utils
from tensorflow.keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D
from tensorflow.keras.layers import LeakyReLU 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
np.random.seed(25)

In [None]:
df=pd.read_csv('../input/Kannada-MNIST/train.csv')

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.label.value_counts()
image= df.iloc[1,1:].values.reshape(28,28)
plt.imshow(image,cmap='gray')

In [None]:
y=df.label.values
X=df.drop(['label'], axis=1).values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
print("X_train original shape", X_train.shape)
print("y_train original shape", y_train.shape)
print("X_test original shape", X_test.shape)
print("y_test original shape", y_test.shape)

In [None]:
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

X_train/=255
X_test/=255

X_train.shape

In [None]:
number_of_classes = 10
Y_train = utils.to_categorical(y_train, number_of_classes)
Y_test = utils.to_categorical(y_test, number_of_classes)
y_train[0], Y_train[0]

In [None]:
# Three steps to Convolution
# 1. Convolution
# 2. Activation
# 3. Polling
# Repeat Steps 1,2,3 for adding more hidden layers
# 4. After that make a fully connected network
# This fully connected network gives ability to the CNN
# to classify the samples
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(28,28,1)))
model.add(Activation('relu'))
BatchNormalization(axis=-1)
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
BatchNormalization(axis=-1)
model.add(Conv2D(64,(3, 3)))
model.add(Activation('relu'))
BatchNormalization(axis=-1)
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
# Fully connected layer
BatchNormalization()
model.add(Dense(512))
model.add(Activation('relu'))
BatchNormalization()
model.add(Dropout(0.2))
model.add(Dense(10))

# model.add(Convolution2D(10,3,3, border_mode='same'))
# model.add(GlobalAveragePooling2D())
model.add(Activation('softmax'))

In [None]:
model.summary()

In [None]:
model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])

In [None]:
gen = ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                         height_shift_range=0.08, zoom_range=0.08)

test_gen = ImageDataGenerator()

In [None]:
X_train.shape
X_test.shape

In [None]:
train_generator = gen.flow(X_train, Y_train, batch_size=64)
test_generator = test_gen.flow(X_test, Y_test, batch_size=64)

In [None]:
model.fit(train_generator, steps_per_epoch=40200//64, epochs=5, 
                    validation_data=test_generator, validation_steps=19800//64)

In [None]:
score = model.evaluate(X_test, Y_test)
print('Test accuracy: ', score[1])

In [None]:
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
y_pred=np.argmax(model.predict(X_test),axis=1)
print(classification_report(y_pred,y_test))

In [None]:
import seaborn as sns
categories = ['0','1', '2','3','4','5','6','7','8','9']
CMatrix = pd.DataFrame(confusion_matrix(y_test, y_pred), columns=categories, index =categories)
plt.figure(figsize=(6, 6))
ax = sns.heatmap(CMatrix, annot = True, fmt = 'g' ,vmin = 0, vmax = 100,cmap = 'Blues')
ax.set_xlabel('Predicted',fontsize = 14,weight = 'bold')
ax.set_xticklabels(ax.get_xticklabels(),rotation =90);
ax.set_ylabel('Actual',fontsize = 14,weight = 'bold')    
ax.set_title('Confusion Matrix - Test Set',fontsize = 16,weight = 'bold',pad=20);

In [None]:
model.save('model.h5')
model=load_model('model.h5')

In [None]:
#remove fully connected layer from trained CNN
from keras.models import Model
model = Model(model.input, model.layers[-6].output)
model.summary()


In [None]:
from sklearn.tree import DecisionTreeClassifier
#features extraction
X_test_=model.predict(X_test)
X_train_=model.predict(X_train)
dtree_model = DecisionTreeClassifier(max_depth = 10000,min_samples_split=10).fit(X_train_, y_train)
dtree_predictions = dtree_model.predict(X_test_)

In [None]:
print(classification_report(dtree_predictions,y_test))

In [None]:
CMatrix = pd.DataFrame(confusion_matrix(y_test, dtree_predictions), columns=categories, index =categories)
plt.figure(figsize=(6, 6))
ax = sns.heatmap(CMatrix, annot = True, fmt = 'g' ,vmin = 0, vmax = 100,cmap = 'Blues')
ax.set_xlabel('Predicted',fontsize = 14,weight = 'bold')
ax.set_xticklabels(ax.get_xticklabels(),rotation =90);
ax.set_ylabel('Actual',fontsize = 14,weight = 'bold')    
ax.set_title('Confusion Matrix - Test Set',fontsize = 16,weight = 'bold',pad=20);

In [None]:
randomForest = RandomForestClassifier(max_depth=10000, random_state=0)
randomForest.fit(X_train_, y_train)
randomForest_predictions=randomForest.predict(X_test_)

In [None]:
print(classification_report(randomForest_predictions,y_test))

In [None]:
CMatrix = pd.DataFrame(confusion_matrix(y_test, randomForest_predictions), columns=categories, index =categories)
plt.figure(figsize=(6, 6))
ax = sns.heatmap(CMatrix, annot = True, fmt = 'g' ,vmin = 0, vmax = 100,cmap = 'Blues')
ax.set_xlabel('Predicted',fontsize = 14,weight = 'bold')
ax.set_xticklabels(ax.get_xticklabels(),rotation =90);
ax.set_ylabel('Actual',fontsize = 14,weight = 'bold')    
ax.set_title('Confusion Matrix - Test Set',fontsize = 16,weight = 'bold',pad=20);