# Introduction 
This notebook was made for fast test new CNN models and parameters over the time.
Every section will be explained theirself.

In [7]:
import tensorflow as tf
from tensorflow.keras.layers import (
    Conv2D, MaxPooling2D, Flatten, Bidirectional, LSTM, Dense, Input, Reshape
)
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Attention

# Creating Model

In [8]:
def build_crnn_with_attention(input_shape, num_classes):
    timesteps = 5
    # Input
    inputs = Input(shape=input_shape, name="input_layer")

    # Convolutional Layers
    x = Conv2D(32, (3, 3), activation="relu", padding="same")(inputs)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Conv2D(64, (3, 3), activation="relu", padding="same")(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    # Flatten and Reshape for RNN Input
    x = Reshape((timesteps, -1))(x)

    # Bidirectional LSTM
    x = Bidirectional(LSTM(128, return_sequences=True))(x)

    # Attention Mechanism
    attention_output = Attention()([x, x])

    # Fully Connected Layer for Classification
    x = Dense(num_classes, activation="softmax", name="output_layer")(attention_output)

    outputs = Reshape((timesteps, num_classes))(x)
    # Model
    model = Model(inputs, outputs)
    return model


# Creating Data for training and Testing

In [9]:
import os
import numpy as np
import cv2
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import LeaveOneOut, KFold


In [12]:
import cv2
import numpy as np
from PIL import Image
import io
import base64

def cv_image_processing(image_path, gaussian_kernel, sigma, median_kernel, closing_k, dilation_k, method):
	kernel_d = np.ones(dilation_k, np.uint8)
	kernel_c = np.ones(closing_k, np.uint8)

	img = cv2.imread(image_path, 0)
	(h, w) = img.shape[:2]
	img = cv2.resize(img, (int(w*1.8), int(h*1.8)))
	ret, thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)

	if median_kernel != None:
		thresh = cv2.medianBlur(thresh, median_kernel)

	if gaussian_kernel != None:
		thresh = cv2.GaussianBlur(thresh, (gaussian_kernel, gaussian_kernel), sigma)

	tmp_path = "./data/tmp/" + image_path[-9:]
	if method == 'dilation' and dilation_k != None:
		dilation = cv2.dilate(thresh, kernel_d, iterations=1)
		dilation_image = Image.fromarray(dilation, mode="L")
		# dilation_image.save(tmp_path,format='PNG')
		# dilation_buffer = io.BytesIO()
		# dilation_image.save(dilation_buffer,format='PNG')
		return dilation
	elif method == 'closing' and closing_k != None:
		closing = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel_c)
		closing_image = Image.fromarray(closing, mode="L")
		# closing_image.save(tmp_path,format='PNG')
		# closing_buffer = io.BytesIO()
		# closing_image.save(closing_buffer,format='PNG')
		return closing
	else:
		return thresh

	# cv2.imshow('Original', img)
	# # cv2.imshow('Blur', blur)
	# cv2.imshow('Median', median)
	# cv2.imshow('Dilation', dilation)
	# cv2.imshow('Closing', closing)

	# cv2.waitKey(0)
	# cv2.destroyAllWindows()

## Creating Images and Labels

In [None]:
def load_images_and_labels(data_dir, img_shape=(50, 200)):
    images = []
    labels = []
    # dilation or close
    method = "closing"
    x=0
    # Listar todos os arquivos na pasta
    for file_name in os.listdir(data_dir):
        if file_name.endswith('.png') or file_name.endswith('.jpg'):
            # Caminho completo para a imagem
            img_path = os.path.join(data_dir, file_name)
            img = cv_image_processing(img_path,
                    gaussian_kernel=None, sigma=0.5,
                    median_kernel=None,
                    closing_k=(5,5),
                    dilation_k=(3,5),
                    method = method)
            # Carregar a imagem e redimensionar
            img = cv2.resize(img, img_shape)
            images.append(img)

            # Obter o rótulo (nome do arquivo sem a extensão)
            label = os.path.splitext(file_name)[0]
            labels.append(label)

    # Converter listas para arrays numpy
    images = np.array(images, dtype=np.float32) / 255.0  # Normalizar pixels (0 a 1)
    labels = np.array(labels)

    return images, labels

# Creating One-hot encoding for test

In [14]:
def encode_labels(labels, max_length=5):
    # Flatten os caracteres (separar cada caractere)
    chars = [char for label in labels for char in label]
    chars = np.array(chars).reshape(-1, 1)
    print(chars)
    # Criar o OneHotEncoder
    encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
    encoder.fit(chars)

    # Codificar cada rótulo
    encoded_labels = []
    for label in labels:
        # Codificar cada caractere e concatenar
        encoded = np.vstack([encoder.transform([[char]]) for char in label])
        encoded_labels.append(encoded)

    # Padronizar para comprimento fixo (max_length)
    encoded_labels = np.array(encoded_labels).reshape(-1, 5, 19)
    return encoded_labels, encoder


# Setting Leave one out with k-fold

In [None]:
def split_leave_one_out(images, labels, splits):
    loo = LeaveOneOut()
    folds =  KFold(n_splits=splits, shuffle=True, random_state=42)
    # for train_index, test_index in loo.split(images):
    #     train_images, test_images = images[train_index], images[test_index]
    #     train_labels, test_labels = labels[train_index], labels[test_index]

    #     folds.append((train_images, train_labels, test_images, test_labels))
    return folds

# Pipeline

In [None]:
# Diretório com os Captchas
data_dir = '../data/samples'

# 1. Carregar imagens e rótulos
images, labels = load_images_and_labels(data_dir, img_shape=(200, 50))

In [None]:
from sklearn.model_selection import train_test_split

train_images, test_images, train_labels, test_labels = train_test_split(
    images, labels, test_size=0.2, random_state=42
)
train_labels, train_encoder = encode_labels(train_labels)
test_labels, test_encoder = encode_labels(test_labels)

model = build_crnn_with_attention(input_shape=(200,50, 1), num_classes=19)

# Compilar o modelo
model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)
# model.summary()

# Treinar o modelo
history = model.fit(
    train_images, train_labels,
    validation_data=(test_images, test_labels),
    batch_size=32,
    epochs=5,  # Ajuste conforme necessário
    verbose=1
)



In [None]:
loss, accuracy = model.evaluate(test_images, test_labels, verbose=0)
print(f"Acurácia no conjunto de teste: {accuracy:.4f}")

In [None]:
encoded_labels, encoder = encode_labels(labels)

In [None]:
# # 3. Configurar Leave-One-Out Cross Validation
n_splits = 5
folds = split_leave_one_out(images, encoded_labels, splits=n_splits)

In [None]:
accuracy_per_fold = []
# Iterar sobre os folds
for fold, (train_idx, test_idx) in enumerate(folds.split(images)):
    print(f"Treinando Fold {fold + 1}/{n_splits}...")

    # Separar os dados do fold atual
    train_images, test_images = images[train_idx], images[test_idx]
    train_labels, test_labels = encoded_labels[train_idx], encoded_labels[test_idx]
    print(test_images.shape)
    # Criar um novo modelo para cada fold (para evitar reutilizar pesos)
    model = build_crnn_with_attention(input_shape=(200,50, 1), num_classes=19)

    # Compilar o modelo
    model.compile(
        optimizer="adam",
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )
    # model.summary()

    # Treinar o modelo
    history = model.fit(
        train_images, train_labels,
        validation_data=(test_images, test_labels),
        batch_size=32,
        epochs=10,  # Ajuste conforme necessário
        verbose=1
    )

    # Avaliar no conjunto de teste
    loss, accuracy = model.evaluate(test_images, test_labels, verbose=0)
    print(f"Fold {fold + 1} - Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")

    # Armazenar a acurácia do fold
    accuracy_per_fold.append(accuracy)

In [None]:
# Resultados médios
print("\nResultados finais:")
print(f"Acurácia média: {np.mean(accuracy_per_fold):.4f}")
print(f"Desvio padrão da acurácia: {np.std(accuracy_per_fold):.4f}")

# Resultados 
## 5-fold 10 Epochs
### Dilation preprocessing:
- Acurácia média: 0.4105
- Desvio padrão da acurácia: 0.0174
### Closing preprocessing
- Acurácia média: 0.4054
- Desvio padrão da acurácia: 0.0394

## 0.2 Stratification
### 10 Epochs
#### Closing
- Acurácia média: 0.3916
#### Dilation: 
- Acurácia média: 0.3308

### 30 Epochs
#### Closing
- Acurácia média: 0.5430
#### Dilation: 
- Acurácia média: 0.5355

### 50 Epochs
#### Closing
- Acurácia média: 0.5551
#### Dilation: 
- Acurácia média: 0.5888
