# Image procesing

In [None]:
import cv2
import pandas
import numpy as np
import matplotlib.pyplot as plt
import os
from tensorflow.keras.utils import img_to_array
from PIL import Image

In [None]:
RESIZED_IMAGE_WIDTH = 20                    # width of resized image
RESIZED_IMAGE_HEIGHT = 30                   # height of resized image

DATASET_PATH = os.getcwd() + '/dataset/'    # path to dataset

In [None]:
X = []                          # list of alphabet images
y = []                          # list of labels        

In [None]:
for char in os.listdir(DATASET_PATH):
    print(char)
    if len(char) > 1:           # if the folder name is more than 1 character, it is nto a character
        continue
    for img in os.listdir(DATASET_PATH + char):
        if not img.endswith('.png'):            # if the file is not a png file, skip it
            continue
        image = cv2.imread(DATASET_PATH + char + '/' + img, cv2.IMREAD_GRAYSCALE)       # read the image
        imageROI = cv2.resize(                                                          # resize the image
            image, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT))
        X.append(img_to_array(Image.fromarray(imageROI)))           # add the image to the list
        y.append(ord(char))                                         # add the label to the list
X = np.array(X)                 # convert the list to a numpy array
y = np.array(y)                 # convert the list to a numpy array

In [None]:
print(X.shape)
print(y.shape)

# Initial Analysis and Data Wrangling

### Scale b/w 0 and 1

In [None]:
X /= 255.0                              # normalize the data

Print frequency of each character in the list

In [None]:
temp = set(y)
for t in temp :
    print('Occurance count of ' + chr(t) + ' : ' + str(len(y[y == t])))

In [None]:
import seaborn

In [None]:
temp_df = pandas.DataFrame({
    'labels' : [t for t in temp],
    'Count' : [len(y[y == t]) for t in temp]
})

In [None]:
plt.figure(figsize = (20,7))
seaborn.barplot(x = 'labels', y = 'Count', data = temp_df, palette = 'Blues_d')
plt.title('Label distribution in CAPTCHAS', fontsize = 20)

### One hot encoding

In [None]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

y_combine = LabelEncoder().fit_transform(y)
y_one_hot = OneHotEncoder(sparse = False).fit_transform(y_combine.reshape(len(y_combine),1))

In [None]:
print('letter n : ' + str(y[1]))
print('label : ' + str(y_combine[1]))
print('Count : ' + str(len(y_combine[y_combine == y_combine[1]])))

In [None]:
info = {y_combine[i] : y[i] for i in range(len(y))}

In [None]:
print(info)

# Train test split

In [48]:
print(X.shape)
print(y_one_hot.shape)  # one hot encoded form

(10000, 30, 20, 1)
(10000, 34)


In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, test_size = 0.2, random_state = 1)

# Model Creation

In [None]:
from keras.models import Sequential 
from keras.layers import Dense
from keras.layers import Conv2D
from keras.layers import Flatten
from keras.layers import MaxPooling2D
from keras.layers import BatchNormalization

from keras.layers import Dropout
from keras.layers import Input

In [None]:
print(X_train.shape)
print(y_train.shape)

In [None]:
print(X_test.shape)
print(y_test.shape)

In [None]:
def conv_layer (filterx) :
    model = Sequential()
    model.add(Conv2D(filterx, (3,3), padding = 'same', activation = 'relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(MaxPooling2D(pool_size = (2,2), padding = 'same'))
    return model

In [None]:
def dens_layer (hiddenx) :
    model = Sequential()
    model.add(Dense(hiddenx, activation = 'relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    return model

In [None]:
def cnn (filter1, filter2, filter3, hidden1, hidden2) :
    model = Sequential()
    model.add(Input((X.shape[1:])))
    
    model.add(conv_layer(filter1))
    model.add(conv_layer(filter2))
    model.add(conv_layer(filter3))
    
    model.add(Flatten())
    model.add(dens_layer(hidden1))
    model.add(dens_layer(hidden2))
    
    model.add(Dense(y_one_hot.shape[1], activation = 'softmax'))
    
    model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
    return model

# Data augmentation and Oversampling

Oversampled images

In [None]:
plt.figure(figsize = (30,20))

hi = 7800
lo = 5000

for i in range(25) :
    plt.subplot(5,5,i+1)
    x = np.random.randint(lo, hi)
    plt.imshow(X_train[x], 'gray')
    plt.title('Label is ' + str(info[np.argmax(y_train[x])]))
plt.show()

### ImageDataGenerator

In [None]:
from keras.preprocessing.image import ImageDataGenerator

In [None]:
traingen = ImageDataGenerator(rotation_range = 5, width_shift_range = [-2,2])
traingen.fit(X_train)

In [None]:
train_set = traingen.flow(X_train, y_train)

In [None]:
trainX, trainy = train_set.next()

# Model Training

In [None]:
model = cnn(128, 32, 16, 32, 32)
model.summary()

### ModelCheckpoint and ReduceLROnPlateau
Used ModelCheckpoint to retain the best perfroming model (in terms of loss), and ReduceLROnPlateau to reduce the learning in case the model stops improving.

In [None]:
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
checkp = ModelCheckpoint('./result_model.h5', monitor = 'val_loss', verbose = 1, save_best_only = True)

In [None]:
reduce = ReduceLROnPlateau(monitor = 'val_loss', patience = 20, verbose = 1)

In [None]:
print(X_train.shape)
print(y_train.shape)

In [None]:
history = model.fit(traingen.flow(X_train, y_train, batch_size = 32), validation_data = (X_test, y_test), epochs = 50, steps_per_epoch = len(X_train)/32, callbacks = [checkp])

In [None]:
plt.figure(figsize = (20,10))
plt.subplot(2,1,1)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('Epochs')
plt.ylabel('Losses')
plt.legend(['train loss','val loss'])
plt.title('Loss function wrt epochs')

plt.subplot(2,1,2)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['train acc' , 'val acc'])
plt.title('Model accuracy wrt Epoch')

# Prediction

In [None]:
from keras.models import load_model

In [None]:
model = load_model('./result_model.h5')

In [None]:
pred = model.predict(X_test)

Perfromed numpy argmax to obtain the value which has the highest probability of being the truth value.

In [None]:
pred = np.argmax(pred, axis = 1)
yres = np.argmax(y_test,axis= 1)

In [None]:
from sklearn.metrics import accuracy_score, classification_report

In [None]:
target_name = []
for i in sorted(info) :
    target_name.append(info[i])

In [None]:
target_name

In [None]:
print('Accuracy : ' + str(accuracy_score(yres, pred)))

# Testing on samples

In [None]:
def get_demo (img_path) :
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    
    plt.imshow(img, 'gray')
    plt.axis('off')
    plt.show()

    # get binary image
    thresh = cv2.adaptiveThreshold(
        img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 0)
    # cv2.imshow('thresh', thresh)
    # cv2.waitKey(0)
    # remove noise from image
    close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE,
                             np.ones((3, 3), np.uint8))
    # cv2.imshow('close', close)
    # cv2.waitKey(0)
    # get dilated image
    dilate = cv2.dilate(close, np.ones((2, 2), np.uint8), iterations=1)
    # cv2.imshow('dilate', dilate)
    # cv2.waitKey(0)
    # invert image colors
    image = cv2.bitwise_not(dilate)
    
    # get separate characters from captcha
    image_list = [image[5:40, i:i+36] for i in range(0, 180, 36)]
    
    # append characters to list as array
    Xdemo = []
    for i in range(5) :
        image_list[i] = cv2.resize(image_list[i], (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT))
        cv2.imshow('image', image_list[i])
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        Xdemo.append(img_to_array(Image.fromarray(image_list[i])))
    Xdemo = np.array(Xdemo)

    # predict characters
    ydemo = model.predict(Xdemo)
    ydemo = np.argmax(ydemo, axis = 1)
    
    for res in ydemo :
        print(chr(info[res]))
    print(img_path[-9:])
    cv2.destroyAllWindows()

In [None]:
get_demo('./test_images/6M45U.png')