In [1]:
import logging, os
logging.disable(logging.WARNING)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import tensorflow as tf
import numpy as np
from tensorflow import keras
import matplotlib.pyplot as plt

In [2]:
ENCODING_DICT = {
            'a': 0,
            'f': 1,
            'e': 2,
            'c': 3,
            'b': 4,
            'h': 5,
            'v': 6,
            'z': 7,
            '2': 8,
            'x': 9,
            'g': 10,
            'm': 11,
            'r': 12,
            'u': 13,
            'p': 14,
            's': 15,
            'd': 16,
            'n': 17,
            '6': 18,
            'k': 19,
            't': 20
            }

DECODING_DICT = {
            0: 'a',
            1: 'f',
            2: 'e',
            3: 'c',
            4: 'b',
            5: 'h',
            6: 'v',
            7: 'z',
            8: '2',
            9: 'x',
            10: 'g',
            11: 'm',
            12: 'r',
            13: 'u',
            14: 'p',
            15: 's',
            16: 'd',
            17: 'n',
            18: '6',
            19: 'k',
            20: 't'
            }

In [3]:
def imagepath_to_captcha(imagepath):
    return imagepath.split("_")[1].split(".")[0]

def to_onehot(captcha):
    label = np.zeros((len(captcha), len(ENCODING_DICT)))
    for index, char in enumerate(captcha):
        label[index, ENCODING_DICT[char]] = 1
    return tf.convert_to_tensor(label, tf.float32)
    # return tf.reshape(tf.convert_to_tensor(label, tf.float32), [-1])

def captcha_from_onehot(tensor):
    label = []
    for index in tf.argmax(tensor, 1).numpy():
        label.append(DECODING_DICT[index])
    return "".join(label)

# Instead of one hot, if we directly use the labels, the label becomes of size 6.
def to_labels(captcha):
    label = np.zeros(len(captcha))
    for index, char in enumerate(captcha):
        label[index] = ENCODING_DICT[char]
    return tf.convert_to_tensor(label, tf.float32)

def load_dataset(filepath):
    filenames = os.listdir(filepath)
    x, y = [], []
    for file in filenames:
        imagepath = filepath + "/" + file
        image = tf.io.read_file(imagepath)
        image = tf.image.decode_image(image)
        pixels = tf.image.convert_image_dtype(image, tf.float32)
        # To show images: 
        # plt.imshow(tf.reshape(pixels, image.shape))
        # plt.title('Image as Pixels')
        # plt.show()
        x.append(pixels)
        captcha = imagepath_to_captcha(file).lower()
        y.append(to_onehot(captcha))
    return (tf.convert_to_tensor(x, tf.float32), tf.convert_to_tensor(y, tf.float32))


# Read input data

In [4]:
train_x, train_y = load_dataset("dataset/train")
test_x, test_y = load_dataset("dataset/test")

input_shape = train_x[0].shape
output_shape = train_y[0].shape

print("Training input shape:", train_x.shape)
print("Training output shape:", train_y.shape)
print("Testing input shape:", test_x.shape)
print("Testing output shape:", test_y.shape)

Training input shape: (8501, 50, 250, 3)
Training output shape: (8501, 6, 21)
Testing input shape: (1500, 50, 250, 3)
Testing output shape: (1500, 6, 21)


In [5]:

def get_feature_layers(inputs: keras.layers.Input):
    inputs = keras.applications.resnet.preprocess_input(inputs)
    feature_extractor = keras.applications.resnet.ResNet50(
        include_top=False,
        weights='imagenet',
        input_shape=input_shape,
    )(inputs)
    return feature_extractor

def get_classification_layers(inputs: keras.layers.Input):
    x = keras.layers.GlobalAveragePooling2D()(inputs)
    x = keras.layers.Flatten()(x)
    x = keras.layers.Dense(1024, activation='relu')(x)
    x = keras.layers.Dense(512, activation='relu')(x)
    x = keras.layers.Dense(126, activation='softmax')(x)
    x = keras.layers.Reshape(output_shape)(x)
    return x


def final_model(inputs):
    feature_extractor = get_feature_layers(inputs)
    classification_layers = get_classification_layers(feature_extractor)
    return classification_layers
    

In [6]:
inputs = keras.layers.Input(shape=input_shape)
output = final_model(inputs)

model = keras.Model(inputs=inputs, outputs=output)

model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 50, 250, 3)]      0         
                                                                 
 tf.__operators__.getitem (  (None, 50, 250, 3)        0         
 SlicingOpLambda)                                                
                                                                 
 tf.nn.bias_add (TFOpLambda  (None, 50, 250, 3)        0         
 )                                                               
                                                                 
 resnet50 (Functional)       (None, 2, 8, 2048)        23587712  
                                                                 
 global_average_pooling2d (  (None, 2048)              0         
 GlobalAveragePooling2D)                                         
                                                             

In [7]:
model.compile(
    optimizer = "adam",
    loss = 'categorical_crossentropy',
    metrics=['accuracy']
)

# graphwiz doesn't install on gLinux ffs.
# tf.keras.utils.plot_model(model, show_shapes=True)


In [8]:
callbacks = [keras.callbacks.EarlyStopping(monitor = 'loss', 
                                           patience = 1, 
                                           restore_best_weights = True)]

history = model.fit(train_x, train_y,
                    epochs = 50,
                    callbacks = callbacks)
print(history.history)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
{'loss': [2.9806342124938965, 2.447199583053589, 1.5181081295013428, 0.7359229326248169, 0.38669854402542114, 0.240092471241951, 0.1505930870771408, 0.11962655186653137, 0.1025000512599945, 0.08418072760105133, 0.07753480225801468, 0.0735083520412445, 0.07856258004903793], 'accuracy': [0.066972516477108, 0.16609810292720795, 0.43547818064689636, 0.7294828295707703, 0.8743284940719604, 0.9370270371437073, 0.9656510949134827, 0.9729443788528442, 0.9750421643257141, 0.9795122146606445, 0.9798455238342285, 0.9812766909599304, 0.9800611734390259]}


In [9]:
model.evaluate(test_x, test_y)



[6.113442897796631, 0.047333333641290665]