<a href="https://colab.research.google.com/github/mlokendra/Navia_life_care_Assignment1/blob/main/Num_Captcha.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Libraries required**

In [184]:
import os
import cv2
import numpy as np
import pandas as pd
from random import random, randint, choices
import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, Input
import matplotlib.pyplot as plt
from google.colab.patches import cv2_imshow

# **Data Loading And Pre-processing**`

In [53]:
def load_data(path, test_split=0.1):
    print ('loading dataset...')
    y_train = []
    y_test = []
    x_train = []
    x_test = []

    # r=root, d=directories, f = files
    counter = 0
    for r, d, f in os.walk(path):
        for fl in f[:100]:
            if '.png' in fl:
                flr = fl.split('_')[0]
                counter += 1
                label = np.zeros((NUM_OF_LETTERS, 10))
                for i in range(NUM_OF_LETTERS):
                    label[i, int(flr[i])] = 1

                img = cv2.imread(os.path.join(r, fl))
                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                img = cv2.resize(img, (int(135/2), int(50/2)), interpolation=cv2.INTER_AREA)
                img = np.reshape(img, (img.shape[0], img.shape[1], 1))

                if random() < test_split:
                    y_test.append(label)
                    x_test.append(img)
                else:
                    y_train.append(label)
                    x_train.append(img)


    print('dataset size:', counter, '(train=%d, test=%d)' % (len(y_train), len(y_test)))
    return np.array(x_train), np.array(y_train), np.array(x_test), np.array(y_test)

In [25]:
NUM_OF_LETTERS=5
x_train, y_train, x_test, y_test = load_data('./electoral-tagged')
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

loading dataset...
dataset size: 938 (train=858, test=80)


In [26]:

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(858, 25, 67, 1)
(858, 5, 10)
(80, 25, 67, 1)
(80, 5, 10)


# **MODEL**

In [160]:
input_layer = Input((25, 67, 1))
x = Conv2D(filters=16, kernel_size=(5, 5), padding='same', activation='relu')(input_layer)
x = MaxPooling2D(pool_size=(2, 2))(x)

x = Conv2D(filters=32, kernel_size=(5, 5), padding='same', activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

x = Conv2D(filters=64, kernel_size=(5, 5), padding='same', activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

x = Conv2D(filters=128, kernel_size=(5, 5), padding='same', activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

x = Dropout(0.3)(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.3)(x)

out = [Dense(10, name='digit%d' % i, activation='softmax')(x) for i in range(NUM_OF_LETTERS)]
model = Model(inputs=input_layer, outputs=out)
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])


model.summary()

Model: "model_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            [(None, 25, 67, 1)]  0                                            
__________________________________________________________________________________________________
conv2d_17 (Conv2D)              (None, 25, 67, 16)   416         input_6[0][0]                    
__________________________________________________________________________________________________
max_pooling2d_17 (MaxPooling2D) (None, 12, 33, 16)   0           conv2d_17[0][0]                  
__________________________________________________________________________________________________
conv2d_18 (Conv2D)              (None, 12, 33, 32)   12832       max_pooling2d_17[0][0]           
____________________________________________________________________________________________

In [153]:
s_train = []
s_test = []
for i in range(NUM_OF_LETTERS):
    s_train.append(y_train[:, i, :])
    s_test.append(y_test[:, i, :])

In [178]:
# train in multiple steps 
history = model.fit(x_train, s_train,
                    batch_size=32,
                    epochs=120,
                    verbose=1,
                    validation_data=(x_test, s_test)
                   )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


# **Prediction**

In [186]:
def transform_data(path):
    print ('loading dataset...')
    x = []
    y=[]
    # r=root, d=directories, f = files
    counter = 0
    for r, d, f in os.walk(path):
        for fl in f:
            if '.png' in fl:
                flr = fl.split('_')[0]
                counter += 1
                y.append(flr)

                img = cv2.imread(os.path.join(r, fl))
                #cv2_imshow(img)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                img = cv2.resize(img, (int(135/2), int(50/2)), interpolation=cv2.INTER_AREA)
                img = np.reshape(img, (img.shape[0], img.shape[1], 1))
                x.append(img)


    print('dataset size:', counter)
    return np.array(x),y

In [187]:
NUM_OF_LETTERS=5
x,k = transform_data('./electoral-captchas')
x = x.astype('float32')

loading dataset...
dataset size: 1500


In [188]:
y=model.predict(x)

In [189]:
y=np.argmax(y, axis=2)

In [190]:
l=[]
for i in range(y.shape[1]):
  num=y[0][i]*10000+y[1][i]*1000+y[2][i]*100+y[3][i]*10+y[4][i]
  l.append(num)

In [191]:
l

[72125,
 13539,
 36677,
 98473,
 97324,
 88419,
 73191,
 65692,
 69562,
 44697,
 99999,
 51415,
 88784,
 44177,
 81567,
 74191,
 64598,
 14461,
 41352,
 16892,
 27591,
 27366,
 47667,
 41123,
 72989,
 94867,
 52883,
 27481,
 54195,
 27763,
 15315,
 99892,
 18878,
 97687,
 78732,
 17598,
 38981,
 61835,
 97613,
 41379,
 76247,
 38624,
 22988,
 88278,
 11575,
 42379,
 91588,
 14762,
 54715,
 65384,
 81152,
 14634,
 12495,
 54255,
 78572,
 34459,
 51282,
 28225,
 46625,
 28275,
 22434,
 83283,
 55574,
 96395,
 38486,
 16373,
 54944,
 66531,
 49823,
 66244,
 36298,
 29439,
 96792,
 96865,
 38544,
 73883,
 58887,
 79575,
 24757,
 89162,
 31936,
 79598,
 66766,
 24758,
 22788,
 88293,
 74196,
 87498,
 86592,
 47793,
 84662,
 56662,
 75687,
 65645,
 19595,
 11425,
 97395,
 48946,
 16911,
 95366,
 51544,
 22115,
 33858,
 37364,
 48975,
 38227,
 51196,
 79526,
 76565,
 99283,
 39742,
 85299,
 51894,
 87894,
 55442,
 59617,
 14599,
 97396,
 67449,
 73739,
 59991,
 22421,
 96973,
 23743,
 38556,


In [192]:
df = pd.DataFrame({"image_name" : k, "Number" : l})
df.to_csv("submission.csv", index=False)