In [2]:
import os
import cv2
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import img_to_array

In [8]:
# Load image paths and labels
def load_data(image_dir, label_file):
    data = []
    labels = pd.read_csv(label_file)
    
    for index, row in labels.iterrows():
        img_path = os.path.join(image_dir, row['FILENAME'])
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
        img = cv2.resize(img, (128, 32))  # Resize to a fixed size
        img = img_to_array(img)
        data.append(img)
    
    data = np.array(data, dtype="float") / 255.0
    return data, labels['text'].values

In [5]:
%pwd

'd:\\Developments\\Python\\OCR Implementation'

In [9]:
# Load train, validation, and test data
train_images, train_labels = load_data('dataset/train_v2/train', 'dataset/CSV/written_name_train.csv')

KeyboardInterrupt: 

In [None]:
val_images, val_labels = load_data('dataset/validation_v2', 'dataset/CSV/written_name_validation.csv')

In [None]:
test_images, test_labels = load_data('dataset/test_v2', 'dataset/CSV/written_name_test.csv')

In [None]:

val_images, val_labels = load_data('dataset/validation_v2', 'dataset/CSV/validation_labels.csv')
test_images, test_labels = load_data('dataset/test_v2', 'dataset/CSV/test_labels.csv')

# Convert labels to categorical (for a simple OCR problem; this might change depending on your actual labels)
char_list = 'abcdefghijklmnopqrstuvwxyz0123456789'
char_to_num = {char: i for i, char in enumerate(char_list)}
num_to_char = {i: char for i, char in enumerate(char_list)}

def encode_labels(labels):
    encoded = []
    for label in labels:
        encoded.append([char_to_num[char] for char in label])
    return np.array(encoded)

train_labels_encoded = encode_labels(train_labels)
val_labels_encoded = encode_labels(val_labels)
test_labels_encoded = encode_labels(test_labels)


In [10]:
!pip install tensorflow opencv-python pandas matplotlib joblib

Collecting joblib
  Using cached joblib-1.4.2-py3-none-any.whl (301 kB)
Installing collected packages: joblib
Successfully installed joblib-1.4.2



[notice] A new release of pip available: 22.3.1 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [53]:
import os
import cv2
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.sequence import pad_sequences
from joblib import Parallel, delayed
import multiprocessing


In [12]:
# Function to preprocess a single image
def preprocess_image(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # Convert to grayscale
    img = cv2.resize(img, (128, 32))  # Resize to a fixed size
    img = img_to_array(img) / 255.0  # Normalize
    return img

In [47]:
# Load image paths and labels with parallel processing
def load_data(image_dir, label_file):
    labels = pd.read_csv(label_file)
    labels['IDENTITY'] = labels['IDENTITY'].astype(str)
    image_paths = [os.path.join(image_dir, row['FILENAME']) for index, row in labels.iterrows()]
    
    # Use parallel processing to preprocess images
    num_cores = multiprocessing.cpu_count()
    data = Parallel(n_jobs=num_cores)(delayed(preprocess_image)(image_path) for image_path in image_paths)
    
    data = np.array(data, dtype="float")
    return data, labels['IDENTITY'].values

In [48]:
# Load train, validation, and test data
train_images, train_labels = load_data('dataset/train_v2/train', 'dataset/CSV/written_name_train.csv')


In [49]:
val_images, val_labels = load_data('dataset/validation_v2/validation', 'dataset/CSV/written_name_validation.csv')

In [50]:
test_images, test_labels = load_data('dataset/test_v2/test', 'dataset/CSV/written_name_test.csv')

In [39]:
# Convert labels to categorical (for a simple OCR problem; this might change depending on your actual labels)
import string

# Combine all characters into one variable
char_list = string.ascii_lowercase + string.ascii_uppercase + string.digits + string.punctuation +' '
# char_list = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 -~`!@#$%^&*()_+=[]{}|;:,.<>?/'
char_to_num = {char: i for i, char in enumerate(char_list)}
num_to_char = {i: char for i, char in enumerate(char_list)}

In [55]:

def encode_labels(labels):
    encoded = []
    for label in labels:
        try:
            encoded.append([char_to_num[char] for char in label])
        except KeyError:
            # Handle any invalid characters
            encoded.append([char_to_num[char] for char in label if char in char_to_num])
    return encoded

In [56]:
train_labels_encoded = encode_labels(train_labels)
val_labels_encoded = encode_labels(val_labels)
test_labels_encoded = encode_labels(test_labels)

In [57]:
# Pad sequences to ensure uniform length
max_length = max(max(len(label) for label in train_labels_encoded),
                 max(len(label) for label in val_labels_encoded),
                 max(len(label) for label in test_labels_encoded))


In [58]:
train_labels_padded = pad_sequences(train_labels_encoded, maxlen=max_length, padding='post')
val_labels_padded = pad_sequences(val_labels_encoded, maxlen=max_length, padding='post')
test_labels_padded = pad_sequences(test_labels_encoded, maxlen=max_length, padding='post')

In [83]:
# Model definition
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)


In [101]:
test_images.shape[1:]

(32, 128, 1)

In [103]:

# input_shape = (32, 128, 1)  # (height, width, channels)
input_shape = test_images.shape[1:]
inputs = Input(shape=input_shape)


In [155]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Reshape, Dense, LSTM, Bidirectional, TimeDistributed, Lambda, BatchNormalization, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping

def build_model(input_shape, char_list):
    inputs = Input(shape=input_shape)

    # Convolutional layers with Batch Normalization and Activation
    conv_1 = Conv2D(32, (3, 3), padding='same')(inputs)
    batch_norm_1 = BatchNormalization()(conv_1)
    activation_1 = Activation('relu')(batch_norm_1)
    pool_1 = MaxPooling2D(pool_size=(2, 1))(activation_1)  # Adjusted pooling size to reduce downsampling

    conv_2 = Conv2D(64, (3, 3), padding='same')(pool_1)
    batch_norm_2 = BatchNormalization()(conv_2)
    activation_2 = Activation('relu')(batch_norm_2)
    pool_2 = MaxPooling2D(pool_size=(2, 1))(activation_2)  # Adjusted pooling size to reduce downsampling

    conv_3 = Conv2D(128, (3, 3), padding='same')(pool_2)
    batch_norm_3 = BatchNormalization()(conv_3)
    activation_3 = Activation('relu')(batch_norm_3)
    pool_3 = MaxPooling2D(pool_size=(2, 1))(activation_3)  # Adjusted pooling size to reduce downsampling

    # Reshape for RNN layers
    reshaped = Reshape(target_shape=((32 // 2), (128 // 2) * 128))(pool_3)

    # RNN layers
    rnn_1 = Bidirectional(LSTM(128, return_sequences=True))(reshaped)
    rnn_2 = Bidirectional(LSTM(128, return_sequences=True))(rnn_1)

    dense = TimeDistributed(Dense(len(char_list) + 1, activation='softmax'))(rnn_2)

    # Define the model
    model = Model(inputs, dense)
    return model



model = build_model((32, 128, 1), char_list)
model.summary()

ValueError: The total size of the tensor must be unchanged. Received: input_shape=(4, 128, 128), target_shape=(16, 8192)

In [148]:
# Calculate input length
# Calculate input length for the CTC loss function
# Calculate input length for the CTC loss function
input_length_train = np.ones((len(train_images), 1)) * (32 // 2)  # Adjusted to ensure sufficient length
input_length_val = np.ones((len(val_images), 1)) * (32 // 2)      # Adjusted to ensure sufficient length
input_length_test = np.ones((len(test_images), 1)) * (32 // 2)    # Adjusted to ensure sufficient length


label_length_train = np.ones((len(train_images), 1)) * max_length
label_length_val = np.ones((len(val_images), 1)) * max_length
label_length_test = np.ones((len(test_images), 1)) * max_length


In [149]:
# # CTC Loss
# labels = Input(name='the_labels', shape=[max_length], dtype='float32')
# input_length = Input(name='input_length', shape=[1], dtype='int64')
# label_length = Input(name='label_length', shape=[1], dtype='int64')
# ctc_loss = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([dense, labels, input_length, label_length])

# CTC Loss
labels = Input(name='the_labels', shape=[max_length], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
ctc_loss = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([model.output, labels, input_length, label_length])



In [150]:

# Model to be used at training time
model_train = Model(inputs=[model.input, labels, input_length, label_length], outputs=ctc_loss)
model_train.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=Adam())


In [151]:
input_length_train.shape

(330961, 1)

In [152]:
print("Train images shape:", train_images.shape)
print("Train labels padded shape:", train_labels_padded.shape)
print("Input length train shape:", input_length_train.shape)
print("Label length train shape:", label_length_train.shape)

print("Validation images shape:", val_images.shape)
print("Validation labels padded shape:", val_labels_padded.shape)
print("Input length val shape:", input_length_val.shape)
print("Label length val shape:", label_length_val.shape)

Train images shape: (330961, 32, 128, 1)
Train labels padded shape: (330961, 34)
Input length train shape: (330961, 1)
Label length train shape: (330961, 1)
Validation images shape: (41370, 32, 128, 1)
Validation labels padded shape: (41370, 34)
Input length val shape: (41370, 1)
Label length val shape: (41370, 1)


In [153]:
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [154]:
model_train.fit(
    x=[train_images, train_labels_padded, input_length_train, label_length_train],
    y=np.zeros(len(train_images)),
    validation_data=([val_images, val_labels_padded, input_length_val, label_length_val], np.zeros(len(val_images))),
    epochs=10,
    batch_size=32,
    callbacks=[early_stopping]
)

Epoch 1/10


InvalidArgumentError: Graph execution error:

Detected at node functional_22_1/ctc_1/CTCLoss defined at (most recent call last):
  File "C:\Program Files\Python310\lib\runpy.py", line 196, in _run_module_as_main

  File "C:\Program Files\Python310\lib\runpy.py", line 86, in _run_code

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\ipykernel_launcher.py", line 18, in <module>

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\ipykernel\kernelapp.py", line 739, in start

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\tornado\platform\asyncio.py", line 205, in start

  File "C:\Program Files\Python310\lib\asyncio\base_events.py", line 603, in run_forever

  File "C:\Program Files\Python310\lib\asyncio\base_events.py", line 1909, in _run_once

  File "C:\Program Files\Python310\lib\asyncio\events.py", line 80, in _run

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\ipykernel\kernelbase.py", line 545, in dispatch_queue

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\ipykernel\kernelbase.py", line 534, in process_one

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\ipykernel\kernelbase.py", line 437, in dispatch_shell

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\ipykernel\ipkernel.py", line 362, in execute_request

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\ipykernel\kernelbase.py", line 778, in execute_request

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\ipykernel\ipkernel.py", line 449, in do_execute

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\IPython\core\interactiveshell.py", line 3075, in run_cell

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\IPython\core\interactiveshell.py", line 3130, in _run_cell

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\IPython\core\interactiveshell.py", line 3334, in run_cell_async

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\IPython\core\interactiveshell.py", line 3517, in run_ast_nodes

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\IPython\core\interactiveshell.py", line 3577, in run_code

  File "C:\Users\Lakshitha\AppData\Local\Temp\ipykernel_41692\3767926917.py", line 1, in <module>

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 318, in fit

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 121, in one_step_on_iterator

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 108, in one_step_on_data

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 51, in train_step

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\keras\src\layers\layer.py", line 882, in __call__

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\keras\src\ops\operation.py", line 46, in __call__

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\keras\src\utils\traceback_utils.py", line 156, in error_handler

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\keras\src\models\functional.py", line 175, in call

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\keras\src\ops\function.py", line 171, in _run_through_graph

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\keras\src\models\functional.py", line 556, in call

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\keras\src\layers\layer.py", line 882, in __call__

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\keras\src\ops\operation.py", line 46, in __call__

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\keras\src\utils\traceback_utils.py", line 156, in error_handler

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\keras\src\layers\core\lambda_layer.py", line 120, in call

  File "C:\Users\Lakshitha\AppData\Local\Temp\ipykernel_41692\2972813071.py", line 9, in ctc_lambda_func

  File "d:\Developments\Python\OCR Implementation\ocr\lib\site-packages\keras\src\legacy\backend.py", line 666, in ctc_batch_cost

sequence_length(0) <= 4
	 [[{{node functional_22_1/ctc_1/CTCLoss}}]] [Op:__inference_one_step_on_iterator_35704]

In [156]:
# Common
import numpy as np
import pandas as pd
import tensorflow as tf
from IPython.display import clear_output as cls

# Data
from glob import glob
from tqdm import tqdm
import tensorflow.data as tfd

# Data Visualization
import matplotlib.pyplot as plt

# Model
from tensorflow import keras
from tensorflow.keras import callbacks
from tensorflow.keras import layers

In [160]:
# Image Size
IMG_WIDTH = 200
IMG_HEIGHT = 50
IMAGE_SIZE = (IMG_WIDTH, IMG_HEIGHT)

# Batch Size
BATCH_SIZE = 16

# EPOCHS
EPOCHS = 100

# Model Name
MODEL_NAME = 'Handwritten-OCR'

# Learning Rate
LEARNING_RATE = 1e-3

# Random Seed
np.random.seed(2569)
tf.random.set_seed(2569)

# File Paths
train_csv_path = 'dataset/CSV/written_name_train.csv'
valid_csv_path = 'dataset/CSV/written_name_validation.csv'
test_csv_path = 'dataset/CSV/written_name_test.csv'

train_image_dir = 'dataset/train_v2/train'
valid_image_dir = 'dataset/validation_v2/validation'
test_image_dir = 'dataset/test_v2/test'

# Data Size
TRAIN_SIZE = BATCH_SIZE * 10000
VALID_SIZE = BATCH_SIZE * 5000
TEST_SIZE  = BATCH_SIZE * 1000

# AUTOTUNE
AUTOTUNE = tfd.AUTOTUNE

In [161]:
# Train CSV
train_csv = pd.read_csv(train_csv_path)[:TRAIN_SIZE]

# Validation CSV
valid_csv = pd.read_csv(valid_csv_path)[:VALID_SIZE]

# Test CSV
test_csv = pd.read_csv(test_csv_path)[:TEST_SIZE]

In [162]:
train_csv.shape

(160000, 2)

In [163]:
train_labels = [str(word) for word in train_csv['IDENTITY'].to_numpy()]