In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm 
import cv2

In [2]:
df=pd.read_csv('/kaggle/input/handwriting-recognitionocr/CSV/written_name_train.csv')

In [3]:
df


Unnamed: 0,FILENAME,IDENTITY
0,TRAIN_00001.jpg,BALTHAZAR
1,TRAIN_00002.jpg,SIMON
2,TRAIN_00003.jpg,BENES
3,TRAIN_00004.jpg,LA LOVE
4,TRAIN_00005.jpg,DAPHNE
...,...,...
330956,TRAIN_330957.jpg,LENNY
330957,TRAIN_330958.jpg,TIFFANY
330958,TRAIN_330959.jpg,COUTINHO DESA
330959,TRAIN_330960.jpg,MOURAD


In [11]:
import os
MAIN_FOLDER_PATH='/kaggle/input/handwriting-recognitionocr'
image_list = []
def load_images(main_folder):
    for root,dirs,files in os.walk(main_folder):
        for file in files:
            if file.endswith('.png') or file.endswith('.jpg') or file.endswith('.jpeg'):
                file_path=os.path.join(root,file)
                img=cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
                image_list.append(img)
    return image_list


images = load_images(MAIN_FOLDER_PATH)

KeyboardInterrupt: 

In [8]:
def preprocess(img,target_width=128, target_height=32):
    resized_img = cv2.resize(img, (target_width, target_height))
    normalized_img = resized_img.astype(np.float32) / 255.0
    return normalized_img

In [12]:
train_imgs_path = '/kaggle/input/handwriting-recognitionocr/train_v2/train/'
test_imgs_path = '/kaggle/input/handwriting-recognitionocr/test_v2/test/'
val_imgs_path = '/kaggle/input/handwriting-recognitionocr/validation_v2/validation/'

In [13]:
def preprocess_image(image_path, target_size=(32, 256)):
    
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, target_size)
    img = img.astype('float32') / 255.0
    img = np.expand_dims(img, axis=-1)
    
    return img

In [14]:
train_path = "/kaggle/input/handwriting-recognitionocr/CSV/written_name_train.csv"
test_path = "/kaggle/input/handwriting-recognitionocr/CSV/written_name_test.csv"
val_path = "/kaggle/input/handwriting-recognitionocr/CSV/written_name_validation.csv"

In [16]:
def read_data(csv_path, images_path, number_of_samples):
    labels = []
    imgs = []
    
    csv = pd.read_csv(csv_path)[:number_of_samples]
    
    for index, row in tqdm(csv.iterrows(), total=csv.shape[0], desc="Processing Images"):
        labels.append(row["IDENTITY"])
        img_path = images_path + row["FILENAME"]
        img = preprocess_image(img_path)
        imgs.append(img)
    return imgs, labels

In [17]:
train_size = 16000
val_size = 8000
test_size = 1600

In [18]:
train_imgs, train_labels = read_data(train_path, train_imgs_path, train_size)

Processing Images: 100%|██████████| 16000/16000 [02:17<00:00, 116.45it/s]


In [19]:
print(f"Number of images: {len(train_imgs)}\nNumber of labels {len(train_labels)}")

Number of images: 16000
Number of labels 16000


In [20]:
print("Test\n")
test_imgs, test_labels = read_data(test_path, test_imgs_path, test_size)
print(f"Number of images: {len(test_imgs)}\nNumber of labels {len(test_labels)}")
print("Validation\n")
val_imgs, val_labels = read_data(val_path, val_imgs_path, val_size)
print(f"Number of images: {len(val_imgs)}\nNumber of labels {len(train_labels)}")

Test



Processing Images: 100%|██████████| 1600/1600 [00:02<00:00, 685.18it/s]


Number of images: 1600
Number of labels 1600
Validation



Processing Images: 100%|██████████| 8000/8000 [00:29<00:00, 266.99it/s]

Number of images: 8000
Number of labels 16000





In [21]:
train_labels = [str(label) for label in train_labels]
test_labels = [str(label) for label in test_labels]
val_labels = [str(label) for label in val_labels]

In [22]:
train_imgs[0].shape

(256, 32, 1)

In [23]:
unique_chars = set(char for word in train_labels for char in word)
n_classes = len(unique_chars)

print(f"Total number of unique characters : {n_classes}")
print(f"Unique Characters : \n{unique_chars}")

Total number of unique characters : 41
Unique Characters : 
{'F', 'p', 'L', 'R', 'W', 'y', 'Y', 'C', 'D', 'U', ' ', 'X', 'r', 'J', "'", 'a', 'B', 'K', 'n', 'E', 'H', 'I', 'u', 'Q', 'N', 'O', 'G', 'P', '-', 's', 'e', 'Z', 'c', 'T', 'V', 'o', 'S', 'A', 't', 'l', 'M'}


In [24]:
char_to_num = {char: idx for idx, char in enumerate(unique_chars)}
num_to_char = {idx: char for char, idx in char_to_num.items()}

In [31]:
def encode_labels(labels):
    encoded = [np.array([char_to_num[char] for char in label]) for label in labels]
    return encoded

In [26]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Reshape, Dense, Bidirectional, LSTM, Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.backend import ctc_batch_cost
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow.keras.backend as K

In [27]:
def ocr(input_shape, output_dim):
    input_img = Input(shape=input_shape, name='image_input')
    
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    
    new_shape = ((input_shape[0] // 8), (input_shape[1] // 8) * 128)
    x = Reshape(target_shape=new_shape)(x)
    
    x = Bidirectional(LSTM(256, return_sequences=True))(x)
    x = Bidirectional(LSTM(256, return_sequences=True))(x)
    
    output = Dense(output_dim, activation='softmax', name='dense_output')(x)
    labels = Input(shape=(None,), name='labels', dtype='float32')
    input_length = Input(shape=(1,), name='input_length', dtype='int64')
    label_length = Input(shape=(1,), name='label_length', dtype='int64')
    
    def ctc_loss_lambda(inputs):
        y_true, y_pred, input_length, label_length = inputs
        return K.ctc_batch_cost(y_true, y_pred, input_length, label_length)
    loss_out = Lambda(ctc_loss_lambda, output_shape=(1,))([labels, output, input_length, label_length])
    
    model = Model(inputs=[input_img, labels, input_length, label_length], outputs=loss_out)
    model.compile(optimizer='adam', loss=lambda y_true, y_pred: y_pred)
    return model


In [33]:
train_labels_encoded = [[char_to_num[char] for char in label] for label in train_labels]
val_labels_encoded = [[char_to_num[char] for char in label] for label in val_labels]
train_imgs = np.array(train_imgs)
val_imgs = np.array(val_imgs) 
train_input_length = np.full((len(train_imgs), 1), train_imgs.shape[1] // 8, dtype=int)
val_input_length = np.full((len(val_imgs), 1), val_imgs.shape[1] // 8, dtype=int)
train_label_length = np.array([len(label) for label in train_labels_encoded], dtype=int).reshape(-1, 1)
val_label_length = np.array([len(label) for label in val_labels_encoded], dtype=int).reshape(-1, 1)

In [34]:
input_shape = train_imgs[0].shape
output_dim = n_classes + 1
model = ocr(input_shape, output_dim)
model.summary()

In [35]:
train_labels_encoded = encode_labels(train_labels)
test_labels_encoded = encode_labels(test_labels)
val_labels_encoded = encode_labels(val_labels)

In [36]:
print(f"Number of training images: {len(train_imgs)}")
print(f"Number of training labels: {len(train_labels_encoded)}")

Number of training images: 16000
Number of training labels: 16000


In [37]:
train_imgs = np.array(train_imgs)
val_imgs = np.array(val_imgs)

print(f"Shape of training images: {train_imgs.shape}")
print(f"Shape of validation images: {val_imgs.shape}")
print(f"Shape of a single image: {train_imgs[0].shape}")

Shape of training images: (16000, 256, 32, 1)
Shape of validation images: (8000, 256, 32, 1)
Shape of a single image: (256, 32, 1)


In [38]:
max_label_length = max(len(label) for label in train_labels_encoded)
max_label_length

24

In [39]:
train_labels_encoded_padded = pad_sequences(train_labels_encoded, maxlen=max_label_length, padding='post')
val_labels_encoded_padded = pad_sequences(val_labels_encoded, maxlen=max_label_length, padding='post')

In [40]:
print(f"Shape of padded training labels: {train_labels_encoded_padded.shape}")
print(f"First few padded training labels: {train_labels_encoded_padded[:5]}")
print(f"Shape of padded validation labels: {val_labels_encoded_padded.shape}")
print(f"First few padded validation labels: {val_labels_encoded_padded[:5]}")

Shape of padded training labels: (16000, 24)
First few padded training labels: [[16 37  2 33 20 37 31 37  3  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [36 21 40 25 24  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [16 19 24 19 36  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 2 37 10  2 25 34 19  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 8 37 27 20 24 19  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]]
Shape of padded validation labels: (8000, 24)
First few padded validation labels: [[16 21  2 19  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 2 37  9 40 21 25 24 21 19  3  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 2 19 37  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [13 19 37 24 28  3 25  7 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 3  9 27 27  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]]


In [41]:
history = model.fit(
    x={'image_input': train_imgs, 
       'labels': train_labels_encoded_padded, 
       'input_length': train_input_length, 
       'label_length': train_label_length},
    y=np.zeros(len(train_imgs)),
    epochs=20,
    batch_size=64,
    validation_data=(
        {'image_input': val_imgs, 
         'labels': val_labels_encoded_padded, 
         'input_length': val_input_length, 
         'label_length': val_label_length},
        np.zeros(len(val_imgs))
    )
)

Epoch 1/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 80ms/step - loss: 24.1455 - val_loss: 19.9829
Epoch 2/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 75ms/step - loss: 19.7310 - val_loss: 19.0629
Epoch 3/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 75ms/step - loss: 18.9218 - val_loss: 18.6801
Epoch 4/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 76ms/step - loss: 18.5989 - val_loss: 18.5072
Epoch 5/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 75ms/step - loss: 18.3148 - val_loss: 18.4038
Epoch 6/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 76ms/step - loss: 18.2386 - val_loss: 18.1974
Epoch 7/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 75ms/step - loss: 17.9749 - val_loss: 18.0007
Epoch 8/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 75ms/step - loss: 17.8276 - val_loss: 17.8562
Epoch 9/