In [1]:
pip install opencv-python

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install numpy==1.20.0

Note: you may need to restart the kernel to use updated packages.


In [None]:

import os
import numpy as np
import pandas as pd
import random
import string
from itertools import groupby
import re
import cv2  
import matplotlib.pyplot as plt


import tensorflow as tf
from tensorflow.keras.utils import pad_sequences
from keras.layers import Dense, LSTM, Reshape, BatchNormalization, Input, Conv2D, MaxPool2D, Lambda, Bidirectional, Dropout
from tensorflow.keras.optimizers import Adam
import keras.backend as K
from keras.models import Model
from tensorflow.keras import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import train_test_split

In [None]:
train = pd.read_csv('Train.csv')
print(train.head())

In [None]:
val = pd.read_csv('Validation.csv')
test = pd.read_csv('Test.csv')
shapetrain = train.shape
shapeval = val.shape
shapetest = test.shape
print("Train DataFrame shape:", shapetrain)
print("Val DataFrame shape:", shapeval)
print("Test DataFrame shape:", shapetest)

In [None]:
train.columns = ['Image Path', 'Word']
val.columns = ['Image Path', 'Word']
test.columns = ['Image Path', 'Word']
print(val.head())

In [None]:

null_values = train.isnull()

rows_with_null = train[null_values.any(axis=1)]

print("Rows with null values:")
print(rows_with_null)

In [None]:
image_path = train.at[159, 'Image Path']
word_of_image = train.at[159, 'Word']

image = cv2.imread(image_path)

# Display the image
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.axis('off')  # Hide axis labels
plt.show()
word_of_image

In [None]:

def process_dataset_inplace(dataset):
    # Check for null values in the dataset
    null_values = dataset.isnull()
    null = null_values.any().any()
    print("Are there any null values in the DataFrame?", null)

   
    for index, row in dataset.iterrows():
        if null_values.iloc[index].any():
            pattern = r'(\d{4})\.jpeg$'
            match = re.search(pattern, dataset.at[index, 'Image Path'])
            if match:
                extracted_number = match.group(1)
                
                dataset.at[index, 'Word'] = extracted_number

process_dataset_inplace(train)

In [None]:
image_path = train.at[159, 'Image Path']
word_of_image = train.at[159, 'Word']

image = cv2.imread(image_path)

plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.axis('off') 
plt.show()
word_of_image

In [None]:
process_dataset_inplace(val)
process_dataset_inplace(test)

In [None]:
def display_random_images(train,path, num_images=5):

    random_indices = random.sample(range(len(train)), num_images)

    plt.figure(figsize=(15, 5))
    for i, idx in enumerate(random_indices):
        
        image_path =train.at[idx, 'Image Path']
        word_of_image = train.at[idx, 'Word']
        image = cv2.imread(image_path)

        # Create a subplot for each image
        plt.subplot(1, num_images, i + 1)
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        plt.title(f'Word: {word_of_image}')
        plt.axis('off')

    plt.show()


display_random_images(test,"test/", num_images=5)


In [None]:

# Define the characters you want to support, including uppercase, lowercase, and digits
alphabets = string.ascii_letters + string.digits + " ' -"

max_str_len = 24  # max length of input labels
num_of_characters = len(alphabets) + 1
num_of_timestamps = 64  # max length of predicted labels: 64
batch_size = 512

def encode_and_pad_strings(text):
    # Initialize an empty list to store the encoded values for the string
    # Encoding each output word into digits
    dig_list = []
    for char in str(text):
        idx = alphabets.find(char)
        dig_list.append(idx if idx != -1 else alphabets.find('-'))

    return pad_sequences([dig_list], maxlen=max_str_len, padding='post', value=-1)[0]

# Example usage
input_string = "A9ello world"
result = encode_and_pad_strings(input_string)
print(result)

In [None]:
def decode_padded_sequence(padded_sequence):
    text = ''
    for number in padded_sequence:
        if number == -1: # CTC blank
            break
        else:
            text += alphabets[number]

    return text

# Example usage
input_string = "Hello world"
encoded_result = encode_and_pad_strings(input_string)
print("Encoded Sequence:", encoded_result)

decoded_result = decode_padded_sequence(encoded_result)
print("Decoded String:", decoded_result)

In [None]:
import tensorflow as tf

@tf.function
def preprocess_image(image_path, label, label_len):
    target_height = 256
    target_width= 64

    file = tf.io.read_file(image_path)

    image = tf.image.decode_png(file, channels=1)

    image = tf.image.convert_image_dtype(image, tf.float32)
    
    original_height = tf.shape(image)[0]
    original_width = tf.shape(image)[1]
    aspect_ratio = tf.cast(original_width, tf.float32) / tf.cast(original_height, tf.float32)
    
    new_width = tf.cast(target_height, tf.float32) * aspect_ratio
    
    image = tf.image.resize(image, [target_height, tf.cast(new_width, tf.int32)])
    
    return image, label, label_len


image_path = "train/0001-1-1-6-Hexagons.jpeg"
imagex, label, label_len = preprocess_image(image_path, label, label_len)
num_of_timestamps = 64

plt.imshow(imagex.numpy().squeeze(), cmap='gray')
plt.axis('off')
plt.show()

In [None]:
def create_data_list(dataset, img_dir):
    data_x, data_y, label_len = [], [], []
    for idx, row in dataset.iterrows():
        if isinstance(row['Word'], str):
            text = row['Word']
            label_len.append(len(text))
            data_y.append(encode_and_pad_strings(text))
            data_x.append(row['Image Path'])
        
    return data_x, data_y, label_len

In [None]:
train_img_dir = "train"
val_img_dir = "val"
batch_size = 512


AUTOTUNE = tf.data.experimental.AUTOTUNE

def create_tf_dataset(data_x, data_y, label_len):
    data_xx = tf.constant(data_x, dtype=tf.string)
    data_yy = tf.constant(data_y, dtype=tf.int64)
    label_lenn = tf.constant(label_len, dtype=tf.int64)
    dataset = tf.data.Dataset.from_tensor_slices((data_xx, data_yy, label_lenn))
    dataset = dataset.map(preprocess_image, num_parallel_calls=AUTOTUNE).batch(batch_size)
    dataset = dataset.map(lambda *x: (x[0:-1],x[-1])).prefetch(AUTOTUNE).prefetch(buffer_size=AUTOTUNE)
    return dataset


train_x, train_y, train_label_len = create_data_list(train,train_img_dir)
train_dataset = create_tf_dataset(train_x, train_y, train_label_len)

val_x, val_y, val_label_len = create_data_list(val,val_img_dir)
val_dataset = create_tf_dataset(val_x, val_y, val_label_len)

print(len(train_y), len(val_y))