##  Task 2
A simple model to classify 47 categories of handwritten alpha-numeric charecters, is defined in this notebook.
Class-wise classification report has been generated using sklearn.

The following sources were referenced:
1. https://stackoverflow.com/questions/16992713/translate-every-element-in-numpy-array-according-to-key
2. https://stackoverflow.com/questions/45930750/how-to-output-per-class-accuracy-in-keras 

In [2]:
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report


In [6]:
## TODO : Function to read dataframe and return the images and lables array generated

def images_labels(data_df):
    labels = data_df.iloc[:,0].values
    images = data_df.iloc[:,1:].values.reshape(len(data_df),28,28,1,order='F') ## column major ordering
    images = images/255.0
    return labels,images

In [7]:
## Get the test, train and validation sets

DATASET_PATH = r'.\Character + Digits data\\'

test_data = pd.read_csv(DATASET_PATH+'characters-digits-test.csv',header=None)
test_labels,test_images = images_labels(test_data)

train_dataSet = pd.read_csv(DATASET_PATH+'characters-digits-train.csv',header=None)
train_df, val_df = train_test_split(train_dataSet, test_size=0.2)

train_labels, train_images = images_labels(train_df)
val_labels, val_images = images_labels(val_df)


In [8]:
## checking the test, train and validation arrays

print('train :' ,(train_labels.shape, train_images.shape))
print('validation :' ,(val_labels.shape, val_images.shape))
print('test :' ,(test_labels.shape, test_images.shape),'\n')

train : ((90240,), (90240, 28, 28, 1))
validation : ((22560,), (22560, 28, 28, 1))
test : ((18800,), (18800, 28, 28, 1)) 



In [97]:
## TODO : Define the model 
# defining early stopping based on validation accuracy

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy', 
    verbose=1,
    patience=5,
    mode='max',
    restore_best_weights=True)

In [99]:
# Define the CNN model 

model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(64, (3,3), activation='relu', input_shape=(28, 28, 1)),
  tf.keras.layers.MaxPooling2D(2, 2),
  tf.keras.layers.Conv2D(64, (3,3), activation='relu'), 
  tf.keras.layers.MaxPooling2D(2,2),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.5),
  tf.keras.layers.Dense(47, activation='softmax')
])
model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# model.summary()
model.fit(train_images, train_labels, 
          epochs=25,
          batch_size = 128,
          callbacks = [early_stopping],
          validation_data=(val_images, val_labels)) 

Train on 90240 samples, validate on 22560 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 00018: early stopping


<tensorflow.python.keras.callbacks.History at 0x1cf31684f48>

In [272]:
## Save the model. 
model.save("task2_v1.h5")

In [104]:
test_loss = model.evaluate(test_images, test_labels)
print(test_loss)

[0.38805885689055664, 0.8728191]


In [11]:
## TODO : generate the classwise accuracy
# map the data to ascii values to get the right labels
# https://stackoverflow.com/questions/16992713/translate-every-element-in-numpy-array-according-to-key

mapping_df = pd.read_csv(DATASET_PATH+'characters-digits-mapping.txt',header=None,delimiter = ' ')
mapping_df['class_name'] = mapping_df[1].apply(chr)
mapping_dict = dict(zip(mapping_df[0], mapping_df.class_name))


In [13]:
## https://stackoverflow.com/questions/45930750/how-to-output-per-class-accuracy-in-keras

pred_labels = model.predict_classes(test_images)
report_dict = classification_report(test_labels, pred_labels,output_dict=True)
report_df = pd.DataFrame(report_dict).transpose()
report_df = report_df.reset_index()
report_df['class_label'] = report_df.loc[0:46,'index'].astype('int').replace(mapping_dict)
report_df

Unnamed: 0,index,precision,recall,f1-score,support,class_label
0,0,0.63956,0.7275,0.680702,400.0,0
1,1,0.504039,0.78,0.612365,400.0,1
2,2,0.91906,0.88,0.899106,400.0,2
3,3,0.987374,0.9775,0.982412,400.0,3
4,4,0.885442,0.9275,0.905983,400.0,4
5,5,0.93883,0.8825,0.909794,400.0,5
6,6,0.950649,0.915,0.932484,400.0,6
7,7,0.956098,0.98,0.967901,400.0,7
8,8,0.919118,0.9375,0.928218,400.0,8
9,9,0.641618,0.8325,0.724701,400.0,9
