# Architecture

### CNN to extract the features of the image

### RNN to interpret the sequential data extracted by CNNs, LSTM (Long Short-Term Memory) units are often used due to their efficiency in handling sequences.

### CTC Loss To handle the alignment between input sequences (the image features) and the target sequences (the transcribed text).

In [37]:
import pandas as pd
import cv2 
import os
import numpy as np
from keras import backend as K
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, Reshape, Bidirectional, LSTM, Dense, Lambda, Activation, BatchNormalization, Dropout
from keras.optimizers import Adam
import matplotlib.pyplot as plt


In [38]:
with open(r'bressay-competition\bressay\sets\test.txt', 'r') as file:
    test_content = file.readlines()

with open(r'bressay-competition\bressay\sets\training.txt', 'r') as file:
    training_content = file.readlines()

with open(r'bressay-competition\bressay\sets\validation.txt', 'r') as file:
    validation_content = file.readlines()



In [39]:
# counter_rasura = 0
# if "--xxx---" in data_text:
#     counter_rasura += 1

# Extracting line dataset

In [40]:
lines_directory = r'bressay-competition\bressay\data\lines'
test = {}
train = {}
val = {}

def count_unique_characters(*data_structures):
    unique_characters = set()

    for data_structure in data_structures:
        for data in data_structure.values():
            if 'txt' in data:
                for text_entry in data['txt']:
                    unique_characters.update(text_entry)

    sorted_characters = sorted(unique_characters)
    characters_string = ''.join(sorted_characters)
    return characters_string


def find_max_text_length(*data_structures):
    max_length = 0
    for data_structure in data_structures:
        len_line = 0
        for data in data_structure.values():
            if 'txt' in data:
                for line in data["txt"]:
                    if len(line) > len_line:
                        len_line = len(line)
                if len_line > max_length:
                    max_length = len_line

    return max_length

def preprocess(img):
    (h, w) = img.shape
    
    final_img = np.ones([64, 256])*255 # blank white image
    
    # crop
    if w > 256:
        img = img[:, :256]
        
    if h > 64:
        img = img[:64, :]
    
    
    final_img[:h, :w] = img
    return cv2.rotate(final_img, cv2.ROTATE_90_CLOCKWISE)

def collect_file_data_to_dataframe(dir_name, base_path):
    data = []
    full_path = os.path.join(base_path, dir_name)
    
    if os.path.isdir(full_path):
        for file in os.listdir(full_path):
            if file.endswith('.png'):
                img_file_path = os.path.join(full_path, file)
                txt_file_name = file.replace('.png', '.txt')
                txt_file_path = os.path.join(full_path, txt_file_name)
                
                if os.path.exists(txt_file_path):
                    with open(txt_file_path, 'r', encoding='utf-8') as txt_file:
                        identity = txt_file.read()
                        data.append([img_file_path, identity])
    
    return pd.DataFrame(data, columns=['FILENAME', 'IDENTITY'])



    

In [41]:
test_dfs = []
training_dfs = []
val_dfs = []

for dir_name in test_content:
    dir_name = dir_name.strip()
    test_dfs.append(collect_file_data_to_dataframe(dir_name, lines_directory))

for dir_name in training_content:
    dir_name = dir_name.strip()
    training_dfs.append(collect_file_data_to_dataframe(dir_name, lines_directory))

# for dir_name in val_content:
#     dir_name = dir_name.strip()
#     val_dfs.append(collect_file_data_to_dataframe(dir_name, lines_directory))

# Concatenate all DataFrames in the lists
test = pd.concat(test_dfs, ignore_index=True)
train = pd.concat(training_dfs, ignore_index=True)
# val_df = pd.concat(val_dfs, ignore_index=True)


In [42]:
train_size = 19628
test_size = 56

In [43]:
train_x = []
for i in range(train_size):
    
    relative_path = train.loc[i, 'FILENAME'].replace('\\\\', '\\')
    img_dir = fr"{relative_path}"

    image = cv2.imread(img_dir, cv2.IMREAD_GRAYSCALE)
    
    if image is not None:
        image = preprocess(image)
        image = image / 255.0  # Normalize
        train_x.append(image)
    else:
        print(f"Image not found or unable to load: {img_dir}")

train_x = np.array(train_x)

In [44]:
test_x = []
for i in range(test_size):
    
    relative_path = test.loc[i, 'FILENAME'].replace('\\\\', '\\')
    img_dir = fr"{relative_path}"

    image = cv2.imread(img_dir, cv2.IMREAD_GRAYSCALE)
    
    if image is not None:
        image = preprocess(image)
        image = image / 255.0  # Normalize
        test_x.append(image)
    else:
        print(f"Image not found or unable to load: {img_dir}")

test_x = np.array(test_x)

In [45]:
train_x = np.array(train_x).reshape(-1, 256, 64, 1)
test_x = np.array(test_x).reshape(-1, 256, 64, 1)

### Vectorizing txt (preparing for CTC loss)

In [46]:
def preprocess_text(text):
    special_token_mappings = {
        '--text--': '=',
        '--xxx--': '+',
        '##text##': '*',
        '$$text$$': '^',
        '@@???@@': '&',
        '##--xxx--##': '%',
        '$$--xxx--$$': '`',
        '##@@???@@##': '{',
        '$$@@???@@$$': '}'
    }
    for token, unique_char in special_token_mappings.items():
        text = text.replace(token, unique_char)
    return text

train['IDENTITY'] = train['IDENTITY'].apply(preprocess_text)
test['IDENTITY'] = test['IDENTITY'].apply(preprocess_text)


In [47]:
train['IDENTITY']

0        Ao longo do desenvolvimento de sociedades comp...
1        zes de explicar e intervir positivamente no fu...
2        a psiquiatria --real-- surgiram para tratar e ...
3        na sociedade brasileira, a manutenção de um es...
4        causada por uma questão histórica e por uma fa...
                               ...                        
19623    pulação e o ambiente cinematográfico, através ...
19624    se utilizam de espaços públicos, como escolas ...
19625    em cartaz nos cinemas, a fim de proporcionar à...
19626    democrático + e permitir que esses indivíduos ...
19627     ra crítica por meio dos & metragens em exibição.
Name: IDENTITY, Length: 19628, dtype: object

In [71]:
alphabets = ' =+^&*!"%`\'(),}-.{/0123456789:;=?ABCDEFGHIJKLMNOPQRSTUVWXYZ\\]`abcdefghijklmnopqrstuvwxyz|ª°´ºÀÁÈÉÍÓÚàáâãçèéêíóôõúü'
max_str_len = 250
num_of_characters = len(alphabets) + 1
num_of_timestamps = 64


def label_to_num(label):
    label_num = []
    for ch in label:
        label_num.append(alphabets.find(ch))
        
    return np.array(label_num)

def num_to_label(num):
    ret = ""
    for ch in num:
        if ch == -1:  # CTC Blank
            break
        else:
            ret+=alphabets[ch]
    return ret

In [49]:
name = 'ra crítica por meio dos & metragens em exibição'
print(name, '\n',label_to_num(name))

ra crítica por meio dos & metragens em exibição 
 [ 79  62   0  64  79 108  81  70  64  62   0  77  76  79   0  74  66  70
  76   0  65  76  80   0   4   0  74  66  81  79  62  68  66  75  80   0
  66  74   0  66  85  70  63  70 104 103  76]


In [50]:
train_size

19628

In [72]:
train_y = np.ones([train_size, max_str_len]) * -1
train_label_len = np.zeros([train_size, 1])
train_input_len = np.ones([train_size, 1]) * (num_of_timestamps-2)
train_output = np.zeros([train_size])

for i in range(train_size):
    train_label_len[i] = len(train.loc[i, 'IDENTITY'])
    train_y[i, 0:len(train.loc[i, 'IDENTITY'])]= label_to_num(train.loc[i, 'IDENTITY']) 

In [73]:
test_y = np.ones([test_size, max_str_len]) * -1
test_label_len = np.zeros([test_size, 1])
test_input_len = np.ones([test_size, 1]) * (num_of_timestamps-2)
test_output = np.zeros([test_size])

for i in range(test_size):
    test_label_len[i] = len(test.loc[i, 'IDENTITY'])
    test_y[i, 0:len(test.loc[i, 'IDENTITY'])]= label_to_num(test.loc[i, 'IDENTITY'])  

In [53]:
train

Unnamed: 0,FILENAME,IDENTITY
0,bressay-competition\bressay\data\lines\0651-04...,Ao longo do desenvolvimento de sociedades comp...
1,bressay-competition\bressay\data\lines\0651-04...,zes de explicar e intervir positivamente no fu...
2,bressay-competition\bressay\data\lines\0651-04...,a psiquiatria --real-- surgiram para tratar e ...
3,bressay-competition\bressay\data\lines\0651-04...,"na sociedade brasileira, a manutenção de um es..."
4,bressay-competition\bressay\data\lines\0651-04...,causada por uma questão histórica e por uma fa...
...,...,...
19623,bressay-competition\bressay\data\lines\4775-02...,"pulação e o ambiente cinematográfico, através ..."
19624,bressay-competition\bressay\data\lines\4775-02...,"se utilizam de espaços públicos, como escolas ..."
19625,bressay-competition\bressay\data\lines\4775-02...,"em cartaz nos cinemas, a fim de proporcionar à..."
19626,bressay-competition\bressay\data\lines\4775-02...,democrático + e permitir que esses indivíduos ...


In [74]:
print('True label : ',train.loc[100, 'IDENTITY'] , '\ntrain_y : ',train_y[100],'\ntrain_label_len : ',train_label_len[100], 
      '\ntrain_input_len : ', train_input_len[100])

True label :  plo, foi responsável por retratar essa falsa felicidade das famílias tradicionais estaduni- 
train_y :  [ 77.  73.  76.  13.   0.  67.  76.  70.   0.  79.  66.  80.  77.  76.
  75.  80. 101.  83.  66.  73.   0.  77.  76.  79.   0.  79.  66.  81.
  79.  62.  81.  62.  79.   0.  66.  80.  80.  62.   0.  67.  62.  73.
  80.  62.   0.  67.  66.  73.  70.  64.  70.  65.  62.  65.  66.   0.
  65.  62.  80.   0.  67.  62.  74. 108.  73.  70.  62.  80.   0.  81.
  79.  62.  65.  70.  64.  70.  76.  75.  62.  70.  80.   0.  66.  80.
  81.  62.  65.  82.  75.  70.  15.  -1.  -1.  -1.  -1.  -1.  -1.  -1.
  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.
  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.
  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.
  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.
  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.
  -1.  -1.  -1.  -1.  -1.  -1.

In [75]:
print('True label : ',test.loc[10, 'IDENTITY'] , '\ntrain_y : ',test_y[10],'\ntrain_label_len : ',test_label_len[10], 
      '\ntrain_input_len : ', test_input_len[10])

True label :  ciado às doenças mentais e, por isso, uma resolução urge. 
train_y :  [ 64.  70.  62.  65.  76.   0. 100.  80.   0.  65.  76.  66.  75. 104.
  62.  80.   0.  74.  66.  75.  81.  62.  70.  80.   0.  66.  13.   0.
  77.  76.  79.   0.  70.  80.  80.  76.  13.   0.  82.  74.  62.   0.
  79.  66.  80.  76.  73.  82. 104. 103.  76.   0.  82.  79.  68.  66.
  16.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.
  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.
  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.
  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.
  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.
  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.
  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.
  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.
  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1.  -1

# Model

In [76]:
input_data = Input(shape=(256, 64, 1), name='input')

inner = Conv2D(32, (3, 3), padding='same', name='conv1', kernel_initializer='he_normal')(input_data)  
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(2, 2), name='max1')(inner)

inner = Conv2D(64, (3, 3), padding='same', name='conv2', kernel_initializer='he_normal')(inner)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(2, 2), name='max2')(inner)
inner = Dropout(0.3)(inner)

inner = Conv2D(128, (3, 3), padding='same', name='conv3', kernel_initializer='he_normal')(inner)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(1, 2), name='max3')(inner)
inner = Dropout(0.3)(inner)

# CNN to RNN
inner = Reshape(target_shape=((64, 1024)), name='reshape')(inner)
inner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense1')(inner)

## RNN
inner = Bidirectional(LSTM(256, return_sequences=True), name = 'lstm1')(inner)
inner = Bidirectional(LSTM(256, return_sequences=True), name = 'lstm2')(inner)

## OUTPUT
inner = Dense(num_of_characters, kernel_initializer='he_normal',name='dense2')(inner)
y_pred = Activation('softmax', name='softmax')(inner)

model = Model(inputs=input_data, outputs=y_pred)
model.summary()

Model: "model_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (InputLayer)          [(None, 256, 64, 1)]      0         
                                                                 
 conv1 (Conv2D)              (None, 256, 64, 32)       320       
                                                                 
 batch_normalization_9 (Bat  (None, 256, 64, 32)       128       
 chNormalization)                                                
                                                                 
 activation_9 (Activation)   (None, 256, 64, 32)       0         
                                                                 
 max1 (MaxPooling2D)         (None, 128, 32, 32)       0         
                                                                 
 conv2 (Conv2D)              (None, 128, 32, 64)       18496     
                                                           

In [77]:
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    # the 2 is critical here since the first couple outputs of the RNN
    # tend to be garbage
    y_pred = y_pred[:, 2:, :]
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

In [78]:
labels = Input(name='gtruth_labels', shape=[max_str_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')

ctc_loss = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])
model_final = Model(inputs=[input_data, labels, input_length, label_length], outputs=ctc_loss)

# TRAINNNN!!

In [79]:
model_final.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=Adam(lr = 0.0001))

model_final.fit(x=[train_x, train_y, train_input_len, train_label_len], y=train_output, 
                validation_data=([test_x, test_y, test_input_len, test_label_len], test_output),
                epochs=60, batch_size=128)



Epoch 1/60


InvalidArgumentError: Graph execution error:

Detected at node model_7/ctc/CTCLoss defined at (most recent call last):
  File "c:\Users\Samsung\anaconda3\envs\ML\lib\runpy.py", line 197, in _run_module_as_main

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\runpy.py", line 87, in _run_code

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\ipykernel_launcher.py", line 17, in <module>

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\traitlets\config\application.py", line 992, in launch_instance

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\ipykernel\kernelapp.py", line 712, in start

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\tornado\platform\asyncio.py", line 195, in start

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\asyncio\base_events.py", line 601, in run_forever

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\asyncio\base_events.py", line 1905, in _run_once

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\asyncio\events.py", line 80, in _run

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\ipykernel\kernelbase.py", line 510, in dispatch_queue

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\ipykernel\kernelbase.py", line 499, in process_one

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\ipykernel\kernelbase.py", line 406, in dispatch_shell

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\ipykernel\kernelbase.py", line 730, in execute_request

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\ipykernel\ipkernel.py", line 383, in do_execute

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\ipykernel\zmqshell.py", line 528, in run_cell

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\IPython\core\interactiveshell.py", line 3006, in run_cell

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\IPython\core\interactiveshell.py", line 3061, in _run_cell

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\IPython\core\interactiveshell.py", line 3266, in run_cell_async

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\IPython\core\interactiveshell.py", line 3445, in run_ast_nodes

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\IPython\core\interactiveshell.py", line 3505, in run_code

  File "C:\Users\Samsung\AppData\Local\Temp\ipykernel_12896\3621477213.py", line 3, in <module>

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\keras\src\engine\training.py", line 1783, in fit

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\keras\src\engine\training.py", line 1377, in train_function

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\keras\src\engine\training.py", line 1360, in step_function

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\keras\src\engine\training.py", line 1349, in run_step

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\keras\src\engine\training.py", line 1126, in train_step

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\keras\src\engine\training.py", line 589, in __call__

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\keras\src\engine\base_layer.py", line 1149, in __call__

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\keras\src\utils\traceback_utils.py", line 96, in error_handler

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\keras\src\engine\functional.py", line 515, in call

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\keras\src\engine\functional.py", line 672, in _run_internal_graph

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\keras\src\engine\base_layer.py", line 1149, in __call__

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\keras\src\utils\traceback_utils.py", line 96, in error_handler

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\keras\src\layers\core\lambda_layer.py", line 212, in call

  File "C:\Users\Samsung\AppData\Local\Temp\ipykernel_12896\3353683713.py", line 6, in ctc_lambda_func

  File "c:\Users\Samsung\anaconda3\envs\ML\lib\site-packages\keras\src\backend.py", line 7171, in ctc_batch_cost

All labels must be nonnegative integers, batch: 0 labels: 77,66,75,80,62,79,0,64,76,75,80,64,70,66,75,81,66,0,15,15,88,64,76,74,76,88,15,15,0,-1,-1,66,-1,-1,0,73,66,83,66,74,0,77,62,79,62,0,62,0,64,76,74,82,75,70,65,62,65,66,0,62,0,79,66,67,73,66,85,103,76,0,80,76,63,79,66,0,76,0,81,66,74,62,13,0,79,66,65,82,87,70,75,65,76,0,76,0,77,79,66,64,76,75,64,66,70,81,76,16,0,96
	 [[{{node model_7/ctc/CTCLoss}}]] [Op:__inference_train_function_55457]