In [2]:
import os
import cv2
import numpy as np
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.layers import LSTM, TimeDistributed, Dense, Flatten, Input, Dropout, Bidirectional
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras

class DataLoader:
    def __init__(self, dataset_dir, img_height, img_width, use_text_line_segmentation=True):
        self.dataset_dir = dataset_dir
        self.image_dir = os.path.join(dataset_dir, 'images')
        self.transcriptions_file = os.path.join(dataset_dir, 'text', 'transcriptions.txt')
        self.train_file = os.path.join(dataset_dir, 'partitions', 'train.txt')
        self.validation_file = os.path.join(dataset_dir, 'partitions', 'validation.txt')
        self.test_file = os.path.join(dataset_dir, 'partitions', 'test.txt')
        self.img_height = img_height
        self.img_width = img_width

    def load_dataset(self):
        transcriptions = self.load_transcriptions()
        train_set = self.load_set(self.train_file)
        validation_set = self.load_set(self.validation_file)
        test_set = self.load_set(self.test_file)

        return transcriptions, train_set, validation_set, test_set

    def load_transcriptions(self):
        transcriptions = {}
        with open(self.transcriptions_file, 'r', encoding='utf-8') as file:
            for line in file:
                image_name, transcription = line.strip().split(" ", 1)
                transcriptions[image_name] = transcription
        return transcriptions

    def load_set(self, set_file):
        set_list = []
        with open(set_file, 'r') as file:
            for line in file:
                image_name = line.strip()
                image_path = os.path.join(self.image_dir, f'{image_name}.png')
                set_list.append(image_path)
        return set_list

    def load_images_and_labels(self, set_list, transcriptions):
        images = []
        labels = []
        for image_path in set_list:
            img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

            # Binarization with adaptive thresholding
            thresh = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)

            # Noise reduction (optional)
            denoised = cv2.medianBlur(thresh, 3)

            # Since not using text line segmentation, resize the entire image
            img = cv2.resize(img, (self.img_width, self.img_height))
            images.append(img)

            if images:  # Check if images list is not empty
                images[-1] = images[-1] / 255.0  # Normalize pixel values

            image_name = os.path.basename(image_path).split('.')[0]
            transcription = transcriptions[image_name]
            labels.append(transcription)

        images = np.array(images).reshape(-1, self.img_height, self.img_width, 1)
        return images, labels

2024-04-07 07:38:33.555769: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-07 07:38:33.555885: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-07 07:38:33.807659: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
!pip install keras-tuner



In [4]:
def build_crnn_model(hp):
    img_height = 32  # Fixed image height
    img_width = 128  # Fixed image width
    num_classes = 111  # Fixed number of classes

    # Define search space for hyperparameters
    conv_filters = hp.Int('conv_filters', min_value=32, max_value=128, step=16)
    lstm_units = hp.Int('lstm_units', min_value=64, max_value=256, step=16)
    dropout_rate = hp.Float('dropout_rate', min_value=0.0, max_value=0.5)
    kernel_size = hp.Choice('kernel_size', values = [3,5])

    model = keras.Sequential()

    # Convolutional layers with HP for filters
    model.add(Conv2D(conv_filters, kernel_size, activation='relu', input_shape=(img_height, img_width, 1)))
    #model.add(Dropout(dropout_rate))
    model.add(MaxPooling2D((2, 2)))
    #model.add(BatchNormalization())

    model.add(Conv2D(conv_filters, kernel_size, activation='relu'))
    #model.add(Dropout(dropout_rate))
    model.add(MaxPooling2D((2, 2)))
    #model.add(BatchNormalization())

    #model.add(Conv2D(conv_filters, kernel_size, activation='relu'))
    #model.add(Dropout(dropout_rate))
    #model.add(MaxPooling2D((2, 2)))
    #model.add(BatchNormalization())

    # Recurrent layers
    model.add(TimeDistributed(Flatten()))
    model.add(Bidirectional(LSTM(lstm_units, return_sequences=True, dropout=dropout_rate)))
    model.add(Bidirectional(LSTM(lstm_units, return_sequences=True, dropout=dropout_rate)))
    model.add(Bidirectional(LSTM(lstm_units, return_sequences=True, dropout=dropout_rate)))
    #model.add(Bidirectional(LSTM(lstm_units // 2)))  # Reduce units in final LSTM

    model.add(Flatten())
    model.add(Dense(max_len, activation='softmax'))

    model.compile(optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', values=[1e-0, 1e-2])),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

    return model


In [5]:
# Set the path to your dataset
dataset_dir = '/kaggle/input/general-dataset'
img_height = 32
img_width = 128

# Initialize data loader
data_loader = DataLoader(dataset_dir, img_height, img_width)

# Load transcriptions and datasets
transcriptions, train_set, validation_set, test_set = data_loader.load_dataset()

# Load actual images and labels
train_images, train_labels = data_loader.load_images_and_labels(train_set, transcriptions)
validation_images, validation_labels = data_loader.load_images_and_labels(validation_set, transcriptions)

print(train_images.shape)

(9000, 32, 128, 1)


In [6]:
# Convert labels to numerical representatio
char_to_index = {}
index = 0
for text in transcriptions.values():
    for char in text:
        if char not in char_to_index:
            char_to_index[char] = index
            index += 1
            
def label_to_index(label):
    return [char_to_index[char] if char in char_to_index else char_to_index['<unknown>'] for char in label]
num_classes = len(char_to_index)
print(num_classes)

'''train_labels_encoded = []
for label in train_labels:
    encoded_label = [char_to_index[char] for char in label]
    encoded_label = to_categorical(encoded_label, num_classes=num_classes)
    # Pad or truncate the encoded label to the maximum sequence length
    encoded_label = pad_sequences([encoded_label], maxlen=max_len, padding='post')[0]
    train_labels_encoded.append(encoded_label)
train_labels_encoded = np.array(train_labels_encoded)

validation_labels_encoded = []
for label in validation_labels:
    encoded_label = [char_to_index[char] for char in label]
    encoded_label = to_categorical(encoded_label, num_classes=num_classes)
    # Pad or truncate the encoded label to the maximum sequence length
    encoded_label = pad_sequences([encoded_label], maxlen=max_len, padding='post')[0]
    validation_labels_encoded.append(encoded_label)
validation_labels_encoded = np.array(validation_labels_encoded)'''

train_labels_encoded = [label_to_index(label) for label in train_labels]
#train_labels_encoded = np.array(train_labels_encoded)
validation_labels_encoded = [label_to_index(label) for label in validation_labels]
#validation_labels_encoded = np.array(validation_labels_encoded)

# Pad sequences with zeros (adding zeros at the end)
train_labels_padded = pad_sequences(train_labels_encoded, padding='post')
validation_labels_padded = pad_sequences(validation_labels_encoded, padding='post')

# Convert to NumPy arrays
train_labels_array = np.array(train_labels_padded)
validation_labels_array = np.array(validation_labels_padded)

# Calculate the maximum sequence length
#max_len = max(train_labels_array.shape[1], validation_labels_array.shape[1])
max_len = max(len(label) for label in train_labels + validation_labels)


# Print shapes for verification
print("Train Images Shape:", train_images.shape)
print("Train Labels Shape:", train_labels_array.shape)
print("Validation Images Shape:", validation_images.shape)
print("Validation Labels Shape:", validation_labels_array.shape)
print(train_labels_padded.shape)
print(max_len)

111
Train Images Shape: (9000, 32, 128, 1)
Train Labels Shape: (9000, 74)
Validation Images Shape: (1000, 32, 128, 1)
Validation Labels Shape: (1000, 71)
(9000, 74)
74


In [12]:
from kerastuner import RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters
from kerastuner import HyperModel

tuner_search = RandomSearch(build_crnn_model, objective='val_accuracy', max_trials=5)

#tuner_search.fit(train_images, train_labels_padded, epochs=10, validation_split=0.1)

tuner_search.search(train_images,train_labels_padded,epochs=3,validation_split=0.1)

Reloading Tuner from ./untitled_project/tuner0.json


In [13]:
model=tuner_search.get_best_models(num_models=1)[0]
model.summary()

In [14]:
model.fit(train_images, train_labels_padded, epochs=10, validation_split=0.1, initial_epoch=3)

Epoch 4/10
[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 52ms/step - accuracy: 0.0178 - loss: 4008647.5000 - val_accuracy: 0.0256 - val_loss: 5526091.0000
Epoch 5/10
[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 46ms/step - accuracy: 0.0275 - loss: 6943677.5000 - val_accuracy: 0.0256 - val_loss: 6283909.0000
Epoch 6/10
[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 46ms/step - accuracy: 0.0170 - loss: 7249979.0000 - val_accuracy: 0.0256 - val_loss: 6403379.0000
Epoch 7/10
[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 48ms/step - accuracy: 0.0224 - loss: 8257271.0000 - val_accuracy: 0.0256 - val_loss: 7641455.5000
Epoch 8/10
[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 47ms/step - accuracy: 0.0191 - loss: 9429448.0000 - val_accuracy: 0.0689 - val_loss: 8673196.0000
Epoch 9/10
[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 48ms/step - accuracy: 0.0233 - loss: 10349

<keras.src.callbacks.history.History at 0x7c350ccaead0>