In [2]:
import tensorflow as tf
import numpy as np
import cv2
import os

def load_image(image_path):
    """ Load an image and convert to grayscale. """
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise ValueError("Image not found")
    return image / 255.0  # Normalize to [0, 1]

def parse_annotation(annotation_str):
    """ Parse the annotation string into structured data. """
    words = annotation_str.split(' ## ')
    structured_annotations = []
    for word in words:
        parts = word.split(' # ')
        word_annotations = []
        for part in parts:
            letters = part.split('\\')
            word_annotations.append(letters)
        structured_annotations.append(word_annotations)
    return structured_annotations

def preprocess_data(image_path, annotation_str):
    """ Process image and annotation data. """
    image = load_image(image_path)
    annotations = parse_annotation(annotation_str)
    return image, annotations

def process_directory(image_dir, annotation_dir):
    """ Process all images and annotations in given directories. """
    data = []
    for filename in os.listdir(image_dir):
        if filename.endswith(".png"):
            image_path = os.path.join(image_dir, filename)
            annotation_path = os.path.join(annotation_dir, filename.replace('.png', '.txt'))
            
            with open(annotation_path, 'r', encoding='utf-8') as file:
                annotation_str = file.read().strip()
            
            processed_image, processed_annotation = preprocess_data(image_path, annotation_str)
            data.append((processed_image, processed_annotation))
    return data

# Define your paths
image_dir = "Sentence_Dataset/images"
annotation_dir = "Sentence_Dataset/annotations"

# Process the entire directory
dataset = process_directory(image_dir, annotation_dir)

# Now dataset contains all the preprocessed images and their annotations


In [3]:
dataset

[(array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
          0.        ],
         [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
          0.        ],
         [0.        , 0.01960784, 0.14117647, ..., 0.        , 0.        ,
          0.        ],
         ...,
         [0.        , 0.00392157, 0.13333333, ..., 0.        , 0.        ,
          0.        ],
         [0.        , 0.        , 0.00392157, ..., 0.        , 0.        ,
          0.        ],
         [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
          0.        ]]),
  [[['-', '-', '-', '-', '-', '-', '-', '- #### ५', 'ख', 'ढ', '-']]]),
 (array([[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.]]),
  [[['-', 'न', 'ट', '-', '४', '-', 'न', '९'], ['-', 'ढ #### ञ', 'ड']]

In [4]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Reshape, Dense, LSTM, Bidirectional

def create_crnn_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)

    # Convolutional layers
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)

    # Prepare output for RNN layers
    # We have to know the shape of the output from the conv layers to reshape it properly
    new_shape = (-1, x.shape[2] * x.shape[3])
    x = Reshape(target_shape=new_shape)(x)

    # RNN layers
    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = Bidirectional(LSTM(64, return_sequences=True))(x)

    # Output layer
    output = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=output)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

# Assuming 1632x32 input images and 128 possible characters/classes including the CTC blank label
model = create_crnn_model(input_shape=(32, 1632, 1), num_classes=128)
model.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 32, 1632, 1)]     0         
                                                                 
 conv2d (Conv2D)             (None, 32, 1632, 32)      320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 16, 816, 32)      0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 16, 816, 64)       18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 8, 408, 64)       0         
 2D)                                                             
                                                                 
 reshape (Reshape)           (None, 8, 26112)          0     

In [5]:
# Assuming `dataset` is a list of (image, label) tuples
from sklearn.model_selection import train_test_split

# Convert dataset to suitable format
images = [img for img, _ in dataset]
labels = [label for _, label in dataset]

# Split data
train_images, val_images, train_labels, val_labels = train_test_split(images, labels, test_size=0.2)

# Training the model
history = model.fit(x=train_images, y=train_labels, validation_data=(val_images, val_labels), epochs=10, batch_size=16)


ValueError: Failed to find data adapter that can handle input: (<class 'list'> containing values of types {"<class 'numpy.ndarray'>"}), (<class 'list'> containing values of types {'(<class \'list\'> containing values of types {\'(<class \\\'list\\\'> containing values of types {\\\'(<class \\\\\\\'list\\\\\\\'> containing values of types {"<class \\\\\\\'str\\\\\\\'>"})\\\'})\'})'})

In [1]:
import tensorflow as tf
import numpy as np
import cv2
import os

def load_and_normalize_image(image_path):
    """ Load an image, convert to grayscale, and normalize. """
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise ValueError("Image not found")
    return image / 255.0  # Normalize to [0, 1]

def parse_annotations(annotation_str):
    """ Parse and clean annotations, removing backslashes. """
    clean_annotation = annotation_str.replace('\\', '')
    return clean_annotation.split(' ')

def load_dataset(image_dir, annotation_dir):
    """ Load dataset from directories. """
    images = []
    labels = []
    for filename in os.listdir(image_dir):
        if filename.endswith('.png'):
            image_path = os.path.join(image_dir, filename)
            annotation_path = os.path.join(annotation_dir, filename.replace('.png', '.txt'))
            
            if not os.path.exists(annotation_path):
                continue
            
            with open(annotation_path, 'r') as file:
                annotation = file.read().strip()
            
            image = load_and_normalize_image(image_path)
            processed_annotation = parse_annotations(annotation)
            
            images.append(image)
            labels.append(processed_annotation)
    
    return images, labels

# Setup paths to your image and annotation directories
image_dir = "Sentence_Dataset/images"
annotation_dir = "Sentence_Dataset/annotations"

images, annotations = load_dataset(image_dir, annotation_dir)

# Now you can convert this data into a TensorFlow Dataset if needed.
dataset = tf.data.Dataset.from_tensor_slices((images, annotations))


: 