In [15]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [16]:
# importação das bibliotecas
import os
import tarfile
from tqdm import tqdm
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
import pandas as pd

import torch
import torch.optim as optim
from torchsummaryX import summary

from mltu.torch.model import Model
from mltu.torch.losses import CTCLoss
from mltu.torch.dataProvider import DataProvider
from mltu.torch.metrics import CERMetric, WERMetric
from mltu.torch.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, Model2onnx, ReduceLROnPlateau

from mltu.preprocessors import ImageReader
from mltu.transformers import ImageResizer, LabelIndexer, LabelPadding, ImageShowCV2
from mltu.augmentors import RandomBrightness, RandomRotate, RandomErodeDilate, RandomSharpen
from mltu.annotations.images import CVImage

from handwriting_recognition_torch.model import Network
from handwriting_recognition_torch.configs import ModelConfigs

In [17]:
df = pd.read_csv('labels_BR.csv')

In [18]:
dataset, vocab, max_len = [], set(), 0
for _, row in tqdm(df.iterrows(), total=len(df)):
    # image = CVImage(row['image'])
    label = row['Label']
    path_image = row['ID']
    dataset.append([path_image, label])
    vocab.update(list(label))
    max_len = max(max_len, len(label))
    
configs = ModelConfigs()
configs.model_path = os.path.join( "./Models/")
configs.train_epochs = 100
configs.vocab = "".join(sorted(vocab))
configs.max_text_length = max_len    
configs.save()

100%|██████████| 11372/11372 [00:00<00:00, 27760.79it/s]


In [19]:
configs.vocab

" '(),-.012345679:ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÁÂÃÇÉÊÍÓÔÕÚáãçéêó"

In [20]:
dataset

[['../data/data_20230622/14881.jpeg', 'Natureza'],
 ['../data/data_20230622/14882.jpeg', 'Planeta'],
 ['../data/data_20230622/14883.jpeg', 'Terra'],
 ['../data/data_20230622/14884.jpeg', 'Flor'],
 ['../data/data_20230622/14885.jpeg', 'O nosso planeta é muito bonito'],
 ['../data/data_20230622/14886.jpeg', 'Natureza'],
 ['../data/data_20230622/14887.jpeg', 'Planeta'],
 ['../data/data_20230622/14888.jpeg', 'Terra'],
 ['../data/data_20230622/14889.jpeg', 'Flor'],
 ['../data/data_20230622/14890.jpeg',
  'Kauane Sofia Alves: O nosso planeta é muito bonito'],
 ['../data/data_20230622/14891.jpeg', 'NATUREVA'],
 ['../data/data_20230622/14892.jpeg', 'PANETA'],
 ['../data/data_20230622/14893.jpeg', 'TERA'],
 ['../data/data_20230622/14894.jpeg', 'FORE'],
 ['../data/data_20230622/14895.jpeg', 'O NOSE PANETA E BONITO'],
 ['../data/data_20230622/14896.jpeg', 'NATUNEZA'],
 ['../data/data_20230622/14897.jpeg', 'PANETA'],
 ['../data/data_20230622/14898.jpeg', 'PERA'],
 ['../data/data_20230622/14899.jpe

In [21]:
# Criação do modelo e definição dos hiperparâmetros
data_provider = DataProvider(
    dataset=dataset,
    skip_validation=True,
    batch_size=configs.batch_size,
    data_preprocessors=[ImageReader(CVImage)],
    transformers=[
        # ImageShowCV2(),  # uncomment to show images when iterating over the data provider
        ImageResizer(configs.width, configs.height, keep_aspect_ratio=False),
        LabelIndexer(configs.vocab),
        LabelPadding(max_word_length=configs.max_text_length,
                     padding_value=len(configs.vocab))
    ],
    use_cache=True,
)

2023-06-23 11:48:18,205 INFO DataProvider: Skipping Dataset validation...


In [22]:
# Divisão do dataset em treino e teste (90% e 10%)
train_dataProvider, test_dataProvider = data_provider.split(split = 0.9)

In [23]:
#
train_dataProvider.augmentors = [
    RandomBrightness(), 
    RandomErodeDilate(),
    RandomSharpen(),
    RandomRotate(angle=10), 
    ]

In [24]:
network = Network(len(configs.vocab), activation="leaky_relu", dropout=0.3)
loss = CTCLoss(blank=len(configs.vocab))
optimizer = optim.Adam(network.parameters(), lr=configs.learning_rate)

In [25]:
# Criação do callback para o treinamento
earlyStopping = EarlyStopping(monitor="val_CER", patience=20, mode="min", verbose=1)
modelCheckpoint = ModelCheckpoint(configs.model_path + "/model.pt", monitor="val_CER", mode="min", save_best_only=True, verbose=1)
tb_callback = TensorBoard(configs.model_path + "/logs")
reduce_lr = ReduceLROnPlateau(monitor="val_CER", factor=0.9, patience=10, verbose=1, mode="min", min_lr=1e-6)
model2onnx = Model2onnx(
    saved_model_path=configs.model_path + "/model.pt",
    input_shape=(1, configs.height, configs.width, 3), 
    verbose=1,
    metadata={"vocab": configs.vocab}
    )

In [26]:
# Criação do modelo para o treinamento
model = Model(network, optimizer, loss, metrics=[CERMetric(configs.vocab), WERMetric(configs.vocab)])
model.fit(
    train_dataProvider, 
    test_dataProvider, 
    epochs=configs.train_epochs, 
    callbacks=[earlyStopping, modelCheckpoint, tb_callback, reduce_lr, Model2onnx(
        saved_model_path=configs.model_path + "/model.pt",
        input_shape=(1, configs.height, configs.width, 3), 
        verbose=1,
        metadata={"vocab": configs.vocab},
        opset_version=10
    )]
)

Epoch 1 - loss: 5.1343 - CER: 1.0283 - WER: 1.0000: 100%|██████████| 160/160 [01:50<00:00,  1.45it/s]
          val_loss: 3.5807 - val_CER: 1.0000 - val_WER: 1.0000: 100%|██████████| 18/18 [00:04<00:00,  4.13it/s]
2023-06-23 11:50:13,834 INFO ModelCheckpoint: Epoch 1: val_CER improved from inf to 1.00000, saving model to ./Models//model.pt
Epoch 2 - loss: 3.4310 - CER: 0.9645 - WER: 0.9997: 100%|██████████| 160/160 [02:01<00:00,  1.31it/s]
          val_loss: 3.3142 - val_CER: 0.9208 - val_WER: 0.9991: 100%|██████████| 18/18 [00:03<00:00,  4.71it/s]
2023-06-23 11:52:19,651 INFO ModelCheckpoint: Epoch 2: val_CER improved from 1.00000 to 0.92079, saving model to ./Models//model.pt
Epoch 3 - loss: 3.2563 - CER: 0.9239 - WER: 0.9997: 100%|██████████| 160/160 [01:58<00:00,  1.35it/s]
          val_loss: 3.1567 - val_CER: 0.9039 - val_WER: 0.9991: 100%|██████████| 18/18 [00:03<00:00,  4.68it/s]
2023-06-23 11:54:22,244 INFO ModelCheckpoint: Epoch 3: val_CER improved from 0.92079 to 0.90388, s

{'loss': 1.8289907164871693,
 'CER': 0.45859772010751865,
 'WER': 0.7919888200431034,
 'val_loss': 1.7052457398838468,
 'val_CER': 0.3969233360132618,
 'val_WER': 0.7010763888888889}

In [27]:
# Save training and validation datasets as csv files
train_dataProvider.to_csv(os.path.join("./Models/", "train_BR.csv"))
test_dataProvider.to_csv(os.path.join("./Models/", "val_BR.csv"))

In [28]:
# testar com uma imagem
# import cv2
# import numpy as np
# from keras.models import load_model

# # Carrega o modelo treinado
# model = load_model(configs.model_path + "/model.hdf5")

# # Carrega a imagem
# image = cv2.imread("../data/iam_data/words/a01/a01-000u/a01-000u-00-00.png")

# # Pré-processa a imagem
# image = cv2.resize(image, (configs.image_size, configs.image_size))
# image = image.astype("float") / 255.0
# image = np.expand_dims(image, axis=0)

# # Faz a previsão na imagem
# prediction = model.predict(image)

# # Imprime a previsão
# print(prediction)