In [None]:
import pandas as pd
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import StratifiedKFold
from keras.preprocessing import image
from keras.applications import vgg16
from keras.models import Sequential,Model
from keras.layers import Flatten,Dense,Dropout
from sklearn.metrics import precision_recall_fscore_support
from pathlib import Path

In [None]:
#reading the balanceddata
imgarr=[]
labelarr=[]
for i in os.listdir('/content/Balanced_Dataset/'):
  if i != '.ipynb_checkpoints':#ignoring checkpoints if present in the same directory
    for j in os.listdir(os.path.join('/content/Balanced_Dataset/',i)):
      try:
        arr=cv2.imread('/content/Balanced_Dataset/'+i+'/'+j,cv2.IMREAD_GRAYSCALE)
        imgarr.append(arr.ravel())
        labelarr.append(i[0])
      except:
        continue  

In [None]:
#creating test and train set , since we will be manually labeling the 10% of the data that goes into training
#the classifier,i am splitting the data into 10 fold using stratified k fold splitter to ensure equal class 
#distribution across train and test dataset
X = np.array(imgarr)
y = np.array(labelarr)
skf = StratifiedKFold(n_splits=8)
for test_index,train_index in skf.split(X, y):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    break

TRAIN: [    0     1     2 ... 28137 28138 28139] TEST: [  140   141   142 ... 29117 29118 29119]


In [None]:
#checking size of the train and test set
print(y_test.shape,y_train.shape)
X_train.shape,X_test.shape

(25480,) (3640,)


((3640, 784), (25480, 784))

In [None]:
#defining key value pair for label encoding
key_val = {0:'A',1:'B',2:'C',3:'D',4:'E',5:'F',6:'G',7:'H',8:'I',9:'J',10:'K',11:'L',12:'M',13:'N',14:'O',15:'P',16:'Q',17:'R',18:'S',19:'T',20:'U',21:'V',22:'W',23:'X',24:'Y',25:'Z'}

In [None]:
#for label encoding
label_encoder=dict()
for i in key_val.keys():
    label_encoder[key_val[i]]=i 
y_test=np.array([label_encoder[i] for i in y_test])
y_train=np.array([label_encoder[i] for i in y_train])

In [None]:
#converting into the categorical classes
train_Y_one_hot = to_categorical(y_train)
test_Y_one_hot = to_categorical(y_test)

In [None]:
#checking the size of the train and test set
X_train.shape,X_test.shape

((3640, 784), (25480, 784))

In [None]:
# Convert the images into 3 channels
X_train=np.dstack([X_train]*3)
X_test=np.dstack([X_test]*3)
X_train.shape,X_test.shape

((3640, 784, 3), (25480, 784, 3))

In [None]:
#Reshape images in the tensor format
X_train = X_train.reshape(-1, 28,28,3)
X_test= X_test.reshape (-1,28,28,3)

In [None]:
from keras.preprocessing.image import img_to_array, array_to_img
train_X = np.asarray([img_to_array(array_to_img(im, scale=False).resize((32,32))) for im in X_train])
test_X = np.asarray([img_to_array(array_to_img(im, scale=False).resize((32,32))) for im in X_test])

In [None]:
#checking for the size
train_X.shape,test_X.shape

((3640, 32, 32, 3), (25480, 32, 32, 3))

In [None]:
#defining the metrics
def accuracy_metric(predicted_prob,test_Y_one_hot):
  predicted=[]
  for i in predicted_prob:
    predicted.append(np.argmax(i))

  actual=[]
  for j in test_Y_one_hot:
    actual.append(np.argmax(j))
  correct = 0
  for i in range(len(actual)):
    if actual[i] == predicted[i]:
      correct += 1
  return correct / float(len(actual)) * 100.0



In [None]:
#defining the average precision , recall and Fscore
def prf_metric(predicted_prob,test_Y_one_hot):
  predicted=[]
  for i in predicted_prob:
    predicted.append(np.argmax(i))

  actual=[]
  for j in test_Y_one_hot:
    actual.append(np.argmax(j))
  correct = 0
  res=precision_recall_fscore_support(actual, predicted, average='weighted')
  print("Avg precision :{} Avg recall : {} Avg fcore: {}".format(res[0],res[1],res[2]))

In [None]:
def build_model(train_X,train_Y_one_hot,test_X,test_Y_one_hot):
  vgg16_model = vgg16.VGG16(pooling='avg', weights='imagenet', include_top=False, input_shape=(32,32,3))
  for layers in vgg16_model.layers:
              layers.trainable=False
  last_output = vgg16_model.layers[-1].output
  vgg_x = Flatten()(last_output)
  vgg_x = Dense(128, activation = 'relu')(vgg_x)
  vgg_x = Dense(26, activation = 'softmax')(vgg_x)
  vgg16_final_model = Model(vgg16_model.input, vgg_x)
  vgg16_final_model.compile(loss = 'categorical_crossentropy', optimizer= 'adam', metrics=['acc'])
  vgg16_final_model.fit(train_X,train_Y_one_hot,epochs=10,shuffle=True)
  predicted_prob=vgg16_final_model.predict(test_X)
  acc=accuracy_metric(predicted_prob,test_Y_one_hot)
  prf_metric(predicted_prob,test_Y_one_hot)
  
  count=0
  for i,j in enumerate(predicted_prob):
    if(np.max(j)<0.6):
      try:
        train_X=np.append(train_X,np.array([test_X[i]]),axis=0)
        train_Y_one_hot=np.append(train_Y_one_hot,np.array([test_Y_one_hot[i]]),axis=0)
        test_X=np.delete(test_X,i,axis=0)
        test_Y_one_hot=np.delete(test_Y_one_hot,i,axis=0)
        count +=1
      except:
        pass
  thres=count/len(predicted_prob)
  print("Percentage of classification with confidence score less than 0.6 is {}".format(thres))
  
  if(thres>0.10):#build the model again
    build_model(train_X,train_Y_one_hot,test_X,test_Y_one_hot)
  else:
    model_structure=vgg16_final_model.to_json()
    f=Path("model_structure.json")
    f.write_text(model_structure)
    vgg16_final_model.save_weights("model-detection.h5")
  
  return "Accuracy is {}".format(acc)

build_model(train_X,train_Y_one_hot,test_X,test_Y_one_hot)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Avg precision :0.8431917963333797 Avg recall : 0.8346546310832025 Avg fcore: 0.8356868532325372
Percentage of false classification with confidence score less than 0.6 is 0.09211145996860283


'Accuracy is 83.46546310832025'

In [None]:
import tensorflow as tf

json_file = open('model_structure.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = tf.keras.models.model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model-detection.h5")

In [None]:
#reading the testdata
imgarr=[]
labelarr=[]
for i in os.listdir('/content/test/'):
  if i != '.ipynb_checkpoints':#ignoring checkpoints if present in the same directory
    for j in os.listdir(os.path.join('/content/test/',i)):
      try:
        arr=cv2.imread('/content/test/'+i+'/'+j,cv2.IMREAD_GRAYSCALE)
        imgarr.append(arr.ravel())
        labelarr.append(i[0])
      except:
        continue 
#defining key value pair for label encoding
key_val = {0:'A',1:'B',2:'C',3:'D',4:'E',5:'F',6:'G',7:'H',8:'I',9:'J',10:'K',11:'L',12:'M',13:'N',14:'O',15:'P',16:'Q',17:'R',18:'S',19:'T',20:'U',21:'V',22:'W',23:'X',24:'Y',25:'Z'}
label_encoder=dict()
X_test=np.array(imgarr)
y_test=np.array(labelarr)
for i in key_val.keys():
    label_encoder[key_val[i]]=i 
y_test=np.array([label_encoder[i] for i in y_test])
test_Y_one_hot = to_categorical(y_test)
X_test=np.dstack([X_test]*3)
X_test= X_test.reshape (-1,28,28,3)
test_X = np.asarray([img_to_array(array_to_img(im, scale=False).resize((32,32))) for im in X_test])

In [None]:
predicted_prob=loaded_model.predict(test_X)
accuracy_metric(predicted_prob,test_Y_one_hot)

82.99637535239629