In [None]:
#!pip install claptcha

In [None]:
#!rm -R data/training
#!rm -R data/validation

In [None]:
#!rm -R data

In [None]:
#!rmdir /Q /S "data/validation"
#!rmdir /Q /S "data/training"

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import random
import os
import captcha_params
from claptcha import Claptcha
from scipy.sparse import csr_matrix
import tensorflow as tf
import os
from __future__ import print_function
import numpy as np
import cv2

np.random.seed(1337)  # for reproducibility

from keras.utils import np_utils
from keras import backend as K
import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, Input
import h5py
from keras.callbacks import ModelCheckpoint

In [None]:
N_DATASET = 200000
N_TRAINING = N_DATASET * 0.8

path_training = "data/training"
path_validation = "data/validation"

height_p = 46
width_p = 180

# input image dimensions
img_rows, img_cols = captcha_params.get_height(), captcha_params.get_width()
batch_size = 256
created_labels = {}

In [None]:
# generate  the captcha text randomly from the char lists above
def random_captcha_text(
    char_set= captcha_params.get_char_set(),
    captcha_size= captcha_params.get_captcha_size(),
):  
    good = False
    while not good:
        captcha_text = []
        lab = ""
        for i in range(captcha_size):
            c = random.choice(char_set)
            captcha_text.append(c)
            lab += c
        if created_labels.get(lab,None) is None:
        good = True
        created_labels[lab] = True
    return captcha_text

# generate the captcha text and image and save the image
def gen_captcha_text_and_image(i,path='data'):
    captcha_text = random_captcha_text()
    captcha_text = "".join(captcha_text)
    c = Claptcha(captcha_text, "font.ttf", (width_p,height_p), margin=(25,5))
    if not os.path.exists(path):  # if the folder is not existed, create it
        os.mkdir(path)
    #print(path + type_dataset + '/' + str(i) + "_" + captcha_text + ".png")
    text, _ = c.write(path + '/' + str(i) + "_" + captcha_text + ".png")
    return text

In [None]:
partition = {'train':[], 'validation':[]}
labels = {}

for i in range(N_DATASET):
    if i % 10000 == 0 and i > 0:
        print(i)
    if i < N_TRAINING:
        text = gen_captcha_text_and_image(i)
        label = text
        partition['train'].append(label)
        labels[label] = i
    else:
        text = gen_captcha_text_and_image(i)
        label = text
        partition['validation'].append(label)
        labels[label] = i

In [None]:
partition_test = {'test':[]}
labels_test = {}
for i in range(10000):
  text = gen_captcha_text_and_image(i,'test')
  label = text
  partition_test['test'].append(label)
  labels_test[label] = i

In [None]:
#tf.config.list_physical_devices('GPU')

In [None]:
# the length of the captcha text
MAX_CAPTCHA = captcha_params.get_captcha_size()
# the number of elements in the char set
CHAR_SET_LEN = captcha_params.get_char_set_len()
CHAR_SET = captcha_params.get_char_set()

In [None]:
#tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
#print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
#
#tf.config.experimental_connect_to_cluster(tpu)
#tf.tpu.experimental.initialize_tpu_system(tpu)
#strategy = tf.distribute.experimental.TPUStrategy(tpu)
#
#print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, batch_size=batch_size, dim=(height_p,width_p), n_channels=1,
                 n_classes= CHAR_SET_LEN * MAX_CAPTCHA, shuffle=True,test=False):
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()
        self.test = test

    def __len__(self):
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        # Generate data
        X, y = self.__data_generation(list_IDs_temp)
        return X, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        # Initialization
        X = np.empty((self.batch_size, int(self.dim[0] / 2), 
                      int(self.dim[1] / 2), self.n_channels),
                      dtype="float32")
        #print(X.shape)
        y = np.empty((self.batch_size, MAX_CAPTCHA, CHAR_SET_LEN), dtype="uint32")
        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            if self.test:
              img = cv2.imread('test/{}_{}.png'.format(self.labels[ID],ID))
            else:
              img = cv2.imread('data/{}_{}.png'.format(self.labels[ID],ID))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            img = cv2.resize(img, (int(self.dim[1] / 2), int(self.dim[0] / 2)), interpolation=cv2.INTER_AREA)
            img = np.reshape(img, (img.shape[0], img.shape[1], 1))
            #image = Image.open('data/{}_{}.png'.format(self.labels[ID],ID))
            #image = image.convert('L')
            arr = np.asarray(img, dtype="float32")
            X[i,]  = arr
            label = np.zeros((MAX_CAPTCHA, CHAR_SET_LEN))
            for j in range(MAX_CAPTCHA):
              label[j, CHAR_SET.index(ID.lower()[j])] = 1
            y[i] = label
        y_final = []
        for i in range(MAX_CAPTCHA):
          y_final.append(y[:, i, :])
        X = X.reshape(batch_size, int(img_rows/2), int(img_cols/2), 1)
        X = X.astype('float32')
        X /= 255
        return X,y_final

In [None]:
input_layer = Input((int(img_rows / 2), int(img_cols / 2), 1))
x = Conv2D(filters=32, kernel_size=(5, 5), padding='same', activation='relu')(input_layer)
x = MaxPooling2D(pool_size=(2, 2))(x)

x = Conv2D(filters=48, kernel_size=(5, 5), padding='same', activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

x = Conv2D(filters=64, kernel_size=(5, 5), padding='same', activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

x = Dropout(0.3)(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.3)(x)

out = [Dense(CHAR_SET_LEN, name='digit%d' % i, activation='softmax')(x) for i in range(MAX_CAPTCHA)]
# out = Dense(num_alphabet*5, activation='sigmoid')(x)

model = Model(inputs=input_layer, outputs=out)

In [None]:
import numpy as np
from keras.models import Sequential

# Parameters
params = {'dim': (height_p,width_p),
          'batch_size': batch_size,
          'n_classes': MAX_CAPTCHA * CHAR_SET_LEN,
          'n_channels': 1,
          'shuffle': True,
          'test':False}

# Generators
training_generator = DataGenerator(partition['train'], labels, **params)
validation_generator = DataGenerator(partition['validation'], labels, **params)

#checkpoint_filepath = 'checkpoint/200k_model.ckpt'
#model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
#    filepath=checkpoint_filepath,
#    save_weights_only=True,
#    monitor='val_accuracy',
#    mode='max',
#    save_best_only=True)

# Design model
#with strategy.scope():
#input_shape = (img_rows, img_cols, 1)
#model = load_model.get_model(input_shape)
#optimizer = keras.optimizers.Adadelta(learning_rate=0.)
model.compile(loss="binary_crossentropy", optimizer='adam',metrics=["accuracy"])
model.summary() 

In [None]:
# Train model on dataset
model.fit(training_generator, validation_data=validation_generator, epochs=65, callbacks=[model_checkpoint_callback])

In [None]:
model.save("model")

In [None]:
#from google.colab import drive
#drive.mount('/content/drive')

In [None]:
# Parameters
params = {'dim': (height_p,width_p),
          'batch_size': batch_size,
          'n_classes': MAX_CAPTCHA * CHAR_SET_LEN,
          'n_channels': 1,
          'shuffle': True,
          'test':True}

# Generators
test_generator = DataGenerator(partition_test['test'], labels_test, **params)

In [None]:
score = model.evaluate(test_generator, verbose=0)

In [None]:
for i in range(len(model.metrics_names)):
    print('{}: {}'.format(model.metrics_names[i],score[i]))

In [None]:
acc = 0
tot = 0
for x,y in test_generator:
    X_test = x
    y_test = y
    predict = model.predict(X_test, batch_size=batch_size, verbose=0)
    for i in range(batch_size):
      tot += 1
      true = []
      pred = []
      for j in range(MAX_CAPTCHA):
        true.append(CHAR_SET[np.argmax(y_test[j][i])])
        pred.append(CHAR_SET[np.argmax(predict[j][i])])
      if tot % 100 == 0:
        print('TRUE: {}'.format(true))
        print('PREDICTED: {}'.format(pred))
        print('='*75)
      if true == pred:
        acc += 1
print('ACCURACY: {}'.format(acc / tot))