In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path
from collections import Counter

import pandas as pd
import tensorflow as tf
from tensorflow import keras
#from tensorflow.keras.utils.vis_utils import plot_model
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping

%reload_ext autoreload
%autoreload 2

In [None]:
data_dir = Path("./data/raw/")

# Get list of all the images
images = sorted(list(map(str, list(data_dir.glob("*.png")))))
labels = [img.split(os.path.sep)[-1].split(".png")[0] for img in images]
characters = set(char for label in labels for char in label)

print("Number of images found: ", len(images))
print("Number of labels found: ", len(labels))
print("Number of unique characters: ", len(characters))
print("Characters present: ", characters)

# Batch size for training and validation
batch_size = 16

# Desired image dimensions
image_width = 200
image_height = 50

# Factor by which the image is going to be downsampled
# by the convolutional blocks. We will be using two
# convolution blocks and each block will have
# a pooling layer which downsample the features by a factor of 2.
# Hence total downsampling factor would be 4.
downsample_factor = 4

# Maximum length of any captcha in the dataset
max_length = max([len(label) for label in labels])

In [None]:
characters = np.unique([char for label in labels for char in label])

In [None]:
# Mapping integers back to original characters
num_to_char = layers.experimental.preprocessing.StringLookup(
    vocabulary=list(characters), mask_token=None, invert=True
)
num_to_char

In [None]:

from src.stages.split import split_data_from_config
# Splitting data into training and validation sets
split_data_from_config('./params.yaml');


In [None]:
from src.stages.datasets import create_datasets_from_config
create_datasets_from_config('./params.yaml');


train_dataset = tf.data.experimental.load('./data/datasets/train_dataset', 
                                            {'image': tf.TensorSpec(shape=(None, image_width, image_height, 1), dtype=tf.float32, name=None), 
                                             'label': tf.TensorSpec(shape=(None, None), dtype=tf.int64, name=None)
                                            }
                                            )

validation_dataset = tf.data.experimental.load('./data/datasets/validation_dataset', 
                                            {'image': tf.TensorSpec(shape=(None, image_width, image_height, 1), dtype=tf.float32, name=None), 
                                             'label': tf.TensorSpec(shape=(None, None), dtype=tf.int64, name=None)
                                            }
                                            )

In [None]:
_, ax = plt.subplots(4, 4, figsize=(10, 5))
for batch in train_dataset.take(1):
    images = batch["image"]
    labels = batch["label"]
    for i in range(16):
        img = (images[i] * 255).numpy().astype("uint8")
        label = tf.strings.reduce_join(num_to_char(labels[i])).numpy().decode("utf-8")
        ax[i // 4, i % 4].imshow(img[:, :, 0].T, cmap="gray")
        ax[i // 4, i % 4].set_title(label)
        ax[i // 4, i % 4].axis("off")
plt.show()

In [None]:
from src.stages.model_setup import create_model_from_config

create_model_from_config('./params.yaml')

model = tf.keras.models.load_model('./models/untrained_model.h5')

In [None]:
#tf.keras.utils.plot_model(model)

In [None]:
from src.stages.training import train_from_config

history, trained_model = train_from_config('./params.yaml')

In [None]:
fig,ax = plt.subplots(ncols=2)

ax[0].plot(history.epoch[1:], history.history['loss'][1:])
ax[0].plot(history.epoch[1:], history.history['val_loss'][1:])

ax[1].semilogy(history.epoch[1:], history.history['loss'][1:])
ax[1].semilogy(history.epoch[1:], history.history['val_loss'][1:])

fig.tight_layout()

In [None]:
prediction_model = keras.models.Model(
    trained_model.get_layer(name="image").input, 
    trained_model.get_layer(name="dense2").output
)
prediction_model.summary()


In [None]:
# A utility function to decode the output of the network
def decode_batch_predictions(pred):
    input_len = np.ones(pred.shape[0]) * pred.shape[1]
    # Use greedy search. For complex tasks, you can use beam search
    results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
        :, :max_length
    ]
    # Iterate over the results and get back the text
    output_text = []
    for res in results:
        res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
        output_text.append(res)
    return output_text

acc_score = 0

#  Let's check results on some validation samples
for batch in validation_dataset.take(1):
    batch_images = batch["image"]
    batch_labels = batch["label"]

    preds = prediction_model.predict(batch_images)
    pred_texts = decode_batch_predictions(preds)
    m = len(pred_texts)
    
    orig_texts = []
    for label in batch_labels:
        label = tf.strings.reduce_join(num_to_char(label)).numpy().decode("utf-8")
        orig_texts.append(label)

    _, ax = plt.subplots(4, 4, figsize=(15, 5))
    
    
    for i in range(len(pred_texts)):
        img = (batch_images[i, :, :, 0] * 255).numpy().astype(np.uint8)
        img = img.T
        title = f"Prediction: {pred_texts[i]}"
        if str(pred_texts[i]) == orig_texts[i]:
            acc_score+=1
        ax[i // 4, i % 4].imshow(img, cmap="gray")
        ax[i // 4, i % 4].set_title(title)
        ax[i // 4, i % 4].axis("off")
plt.show()

In [None]:
print("Validation Score:" + str(acc_score/m * 100))