In [1]:
!pip install pandas numpy matplotlib opencv-python kaggle



In [2]:
!pip install tensorflow



In [3]:
import os
import zipfile

# Set dataset path
dataset_path = "archive.zip"  # Update the path

# Extract dataset
with zipfile.ZipFile(dataset_path, 'r') as zip_ref:
    zip_ref.extractall("dataset")

print("Dataset extracted successfully!")


Dataset extracted successfully!


In [4]:
import pandas as pd
import pathlib

# Path to extracted dataset
data_dir = pathlib.Path("dataset")

# Load labels (CSV file contains filename-image mappings)
labels_file = os.path.join(data_dir, "written_name_test_v2.csv")
df = pd.read_csv(labels_file)

# View first few rows
print(df.head())

        FILENAME  IDENTITY
0  TEST_0001.jpg     KEVIN
1  TEST_0002.jpg  CLOTAIRE
2  TEST_0003.jpg      LENA
3  TEST_0004.jpg     JULES
4  TEST_0005.jpg   CHERPIN


In [5]:
import cv2
import os
import numpy as np
import pandas as pd

In [6]:
# Define image size (adjust based on your model requirements)
IMG_WIDTH = 128
IMG_HEIGHT = 32

In [7]:
# Function to preprocess an image
def process_image(image_path, img_size=(IMG_WIDTH, IMG_HEIGHT)):
    """Loads an image, converts to grayscale, resizes, normalizes, and expands dimensions."""
    
    if not os.path.exists(image_path):  # Check if the file exists
        raise FileNotFoundError(f"❌ Image not found: {image_path}")

    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # Convert to grayscale
    if img is None:  # Check if image loaded properly
        raise ValueError(f"⚠️ Error loading image: {image_path}")

    img = cv2.resize(img, img_size)  # Resize image
    img = img / 255.0  # Normalize pixel values (0 to 1)
    img = np.expand_dims(img, axis=-1)  # Add a channel dimension for CNNs

    return img

In [8]:
# ✅ Example Usage: Process a sample image
sample_image_path = "dataset/train_v2/train/TRAIN_00001.jpg"  # Replace with an actual image path
processed_img = process_image(sample_image_path)

print("✅ Image processed successfully! Shape:", processed_img.shape)

✅ Image processed successfully! Shape: (32, 128, 1)


In [9]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm  # Shows a progress bar

# Define dataset paths
dataset_dir = "dataset"  # Replace with actual dataset path
train_images_path = os.path.join(dataset_dir, "train_v2/train")
train_csv_path = os.path.join(dataset_dir, "written_name_train_v2.csv")

# Load CSV
train_df = pd.read_csv(train_csv_path)

# Remove missing or NaN values
train_df.dropna(inplace=True)

# Define empty lists to store data
X_train, y_train = [], []

# ✅ Process all images in the dataset
for i, row in tqdm(train_df.iterrows(), total=len(train_df)):
    filename = row["FILENAME"]
    label = row["IDENTITY"]

    image_path = os.path.join(train_images_path, filename)

    # Check if file exists
    if os.path.exists(image_path):
        img = process_image(image_path)  # Apply preprocessing
        X_train.append(img)
        y_train.append(label)  # Keep the label (handwritten text)
    else:
        print(f"⚠️ Missing file: {image_path}")  # Debugging message

# Convert lists to NumPy arrays
X_train = np.array(X_train)
y_train = np.array(y_train)

print(f"✅ Dataset Loaded: {X_train.shape}, Labels: {len(y_train)}")


100%|██████████| 330396/330396 [45:55<00:00, 119.89it/s]


✅ Dataset Loaded: (330396, 32, 128, 1), Labels: 330396


In [17]:
# --- Label Encoding ---
# Create a mapping from characters to integers
char_list = sorted(list(set(char for label in y_train for char in label)))
char_to_num = {char: idx + 1 for idx, char in enumerate(char_list)}  # +1 for blank label
num_to_char = {idx + 1: char for idx, char in enumerate(char_list)}

# Add padding to labels
max_label_len = max([len(label) for label in y_train])
label_len = [len(label) for label in y_train]

def encode_labels(labels, max_len, char_to_num):
    encoded_labels = np.zeros((len(labels), max_len), dtype=np.float32)
    for i, label in enumerate(labels):
        encoded_label = [char_to_num.get(char, 0) for char in label]
        encoded_labels[i, :len(encoded_label)] = encoded_label
    return encoded_labels

encoded_y_train = encode_labels(y_train, max_label_len, char_to_num)


In [18]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import cv2
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Reshape, Bidirectional, LSTM, Dense, Activation, Lambda
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K

In [22]:
# --- Model Building ---
input_shape = (IMG_HEIGHT, IMG_WIDTH, 1)
input_data = Input(shape=input_shape)

conv_1 = Conv2D(64, (3, 3), activation='relu', padding='same')(input_data)
pool_1 = MaxPooling2D(pool_size=(2, 2))(conv_1)

conv_2 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool_1)
pool_2 = MaxPooling2D(pool_size=(2, 2))(conv_2)

conv_3 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool_2)
conv_4 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv_3)
pool_4 = MaxPooling2D(pool_size=(2, 1))(conv_4)

conv_5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool_4)
batch_norm_5 = tf.keras.layers.BatchNormalization()(conv_5)

conv_6 = Conv2D(512, (3, 3), activation='relu', padding='same')(batch_norm_5)
batch_norm_6 = tf.keras.layers.BatchNormalization()(conv_6)
pool_6 = MaxPooling2D(pool_size=(2, 1))(batch_norm_6)

conv_7 = Conv2D(512, (2, 2), activation='relu')(pool_6)

# Reshape for RNN
squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv_7)

# RNN
blstm_1 = Bidirectional(LSTM(256, return_sequences=True, dropout=0.2))(squeezed)
blstm_2 = Bidirectional(LSTM(256, return_sequences=True, dropout=0.2))(blstm_1)

# Output layer
outputs = Dense(len(char_list) + 1, activation='softmax')(blstm_2)

model = Model(inputs=input_data, outputs=outputs)
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy')

# --- CTC Loss ---
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

labels = Input(name='the_labels', shape=[max_label_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')

ctc_loss = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([outputs, labels, input_length, label_length])

ctc_model = Model(inputs=[input_data, labels, input_length, label_length], outputs=ctc_loss) #Here ctc_model is defined.
ctc_model.compile(optimizer=Adam(learning_rate=0.001), loss={'ctc': lambda y_true, y_pred: y_pred})

In [23]:
# --- Prepare input for CTC ---
input_length_train = np.ones(len(X_train)) * (IMG_WIDTH // 4 - 2)  # Adjust based on model architecture
label_length_train = np.array([len(label) for label in y_train])

In [None]:
# --- Training ---
ctc_model.fit(
    [X_train, encoded_y_train, input_length_train, label_length_train],
    np.zeros(len(X_train)),
    epochs=10,  # Adjust epochs as needed
    batch_size=64, # Adjust batch_size as needed
    validation_split=0.2
)

Epoch 1/10

[1m 157/4130[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:52:47[0m 2s/step - loss: 24.9025