In [None]:
import pandas as pd
import numpy as np
import os
import cv2
import string
import glob
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences
from src.data_processing import batch_extract_true_labels

In [None]:
# df = pd.read_csv('labels.csv')
# df['label'] = df['label'].str.replace(r'[^A-Za-z0-9]', '', regex=True).str[2:]
# df.to_csv('labels_.csv', index=False)

df = pd.read_csv('labels_.csv')  

# Define the character set and mappings
characters = string.ascii_uppercase + string.digits  # 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
char_to_num = {char: idx for idx, char in enumerate(characters)}
num_to_char = {idx: char for idx, char in enumerate(characters)}
num_classes = len(characters)

max_label_len = df['label'].str.len().max()
img_width = 128
img_height = 64

def preprocess_image(img_path):
    # Load image in grayscale
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    # Resize image
    img = cv2.resize(img, (img_width, img_height))
    # Normalize image
    img = img.astype('float32') / 255.0
    # Expand dimensions to match model input
    img = np.expand_dims(img, axis=-1)
    return img

def encode_label(label):
    return [char_to_num[char] for char in label]

# Load images and labels
images = []
labels = []
for idx, row in df.iterrows():
    img_path = os.path.join('cropped_images_processed/train/', row['filename']) 
    if os.path.exists(img_path):
        images.append(preprocess_image(img_path))
        labels.append(encode_label(row['label']))
    else:
        print(f"Image {img_path} not found.")

# Pad labels to max_label_len
labels_padded = pad_sequences(labels, maxlen=max_label_len, padding='post', value=num_classes)

# Convert labels to categorical
labels_categorical = []
for i in range(max_label_len):
    ith_chars = labels_padded[:, i]
    labels_categorical.append(to_categorical(ith_chars, num_classes=num_classes+1))

X = np.array(images)

# Build the model
inputs = Input(shape=(img_height, img_width, 1))

# Block 1
x = Conv2D(32, (3,3), activation='relu', padding='same')(inputs)
x = BatchNormalization()(x) 
x = MaxPooling2D((2,2))(x)
x = Dropout(0.25)(x)  

# Block 2
x = Conv2D(64, (3,3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)  
x = MaxPooling2D((2,2))(x)
x = Dropout(0.25)(x)  

# Block 3
x = Conv2D(128, (3,3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)  
x = MaxPooling2D((2,2))(x)
x = Dropout(0.25)(x) 

# Flatten and Fully Connected Layers
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)  
x = Dropout(0.5)(x)  

# Output layers for each character in the label
outputs = []
for _ in range(max_label_len):
    outputs.append(Dense(num_classes+1, activation='softmax')(x))

model = Model(inputs=inputs, outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer=Adam())
model.fit(X, labels_categorical, batch_size=32, epochs=100, validation_split=0.1)

model.save('ocr_model.h5')

Epoch 1/100
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 145ms/step - loss: 27.1533 - val_loss: 85.8972
Epoch 2/100
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 154ms/step - loss: 19.7148 - val_loss: 97.3096
Epoch 3/100
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 151ms/step - loss: 13.6448 - val_loss: 25.7486
Epoch 4/100
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 156ms/step - loss: 9.0283 - val_loss: 14.0622
Epoch 5/100
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 153ms/step - loss: 6.8791 - val_loss: 12.9554
Epoch 6/100
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 152ms/step - loss: 5.7213 - val_loss: 12.0015
Epoch 7/100
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 154ms/step - loss: 4.8752 - val_loss: 10.8087
Epoch 8/100
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 157ms/step - loss: 4.3374 - val_loss: 9.9185
Epoch 9/100
[1m44/44



In [2]:
def get_original_filename(cropped_image_path):
    basename = os.path.basename(cropped_image_path) 
    idx = basename.find('_crop')  # Finds the position where '_crop' starts
    if idx != -1:
        new_basename = basename[:idx] + '.jpg'  # Removes '_crop_N' and appends '.jpg'
    else:
        new_basename = basename  # If '_crop' is not found, keep the basename as is
    return new_basename

In [22]:
def evaluate(split):
    test_images = []
    test_image_paths = glob.glob(os.path.join(f'cropped_images_processed/{split}', '*.jpg'))
    # Preprocess test images
    for img_path in test_image_paths:
        if os.path.exists(img_path):
            test_images.append(preprocess_image(img_path))
        else:
            print(f"Image {img_path} not found.")

    X_test = np.array(test_images)
    predictions = model.predict(X_test)

    # If predictions is a list of arrays, stack them to form a single array
    # Shape will be (num_samples, max_label_len, num_classes+1)
    predictions = np.stack(predictions, axis=1)

    # Decode predictions
    decoded_labels = []
    for pred in predictions:
        label = ''
        for char_probs in pred:
            # Get the index with the highest probability
            char_idx = np.argmax(char_probs)
            # If char_idx is num_classes, it is the padding character we can skip
            if char_idx < num_classes:
                label += num_to_char[char_idx]
        decoded_labels.append(label)

    predictions_df = pd.DataFrame({'filename': test_image_paths, 'label': decoded_labels})
    predictions_df['filename'] = predictions_df['filename'].apply(get_original_filename)

    json_directory = f'data/{split}'
    true_labels = batch_extract_true_labels(json_directory)

    merged_df = pd.merge(predictions_df, true_labels, on='filename', how='inner')
        
    # Compute OCR accuracy
    merged_df['correct'] = merged_df['label'] == merged_df['true_lp_text']
    accuracy = merged_df['correct'].sum() / len(decoded_labels)

    print(f"OCR Accuracy for '{split}': {accuracy:.2%}")

    return accuracy

In [23]:
evaluate('val')

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
OCR Accuracy for 'val': 26.44%


0.2643979057591623

In [24]:
evaluate('test')

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
OCR Accuracy for 'test': 23.29%


0.2329059829059829