In [16]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [17]:
# importação dos dados
import os
import tarfile
from tqdm import tqdm
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
import pandas as pd

import torch
import torch.optim as optim
from torchsummaryX import summary

from mltu.torch.model import Model
from mltu.torch.losses import CTCLoss
from mltu.torch.dataProvider import DataProvider
from mltu.torch.metrics import CERMetric, WERMetric
from mltu.torch.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, Model2onnx, ReduceLROnPlateau

from mltu.preprocessors import ImageReader
from mltu.transformers import ImageResizer, LabelIndexer, LabelPadding, ImageShowCV2
from mltu.augmentors import RandomBrightness, RandomRotate, RandomErodeDilate, RandomSharpen
from mltu.annotations.images import CVImage

from handwriting_recognition_torch.model import Network
from handwriting_recognition_torch.configs import ModelConfigs

In [18]:
df = pd.read_csv('labels.csv')

In [19]:
dataset, vocab, max_len = [], set(), 0
for _, row in tqdm(df.iterrows(), total=len(df)):
    # image = CVImage(row['image'])
    label = row['Label']
    path_image = row['ID']
    dataset.append((path_image, label))
    vocab.update(list(label))
    max_len = max(max_len, len(label))
    
configs = ModelConfigs()
configs.model_path = os.path.join( "./Models/")
configs.train_epochs = 10
configs.vocab = "".join(sorted(vocab))
configs.max_text_length = max_len    
configs.save()

  0%|          | 0/26253 [00:00<?, ?it/s]

100%|██████████| 26253/26253 [00:00<00:00, 27373.97it/s]


In [20]:
configs.vocab

" '()+,-.0123456789:<=>ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÁÂÃÇÉÊÍÓÔÕÚáãäçéêíóöüćčšе‘√✓"

In [21]:
# Criação do modelo e definição dos hiperparâmetros
data_provider = DataProvider(
    dataset=dataset,
    skip_validation=True,
    batch_size=configs.batch_size,
    data_preprocessors=[ImageReader(CVImage)],
    transformers=[
        # ImageShowCV2(),  # uncomment to show images when iterating over the data provider
        ImageResizer(configs.width, configs.height, keep_aspect_ratio=False),
        LabelIndexer(configs.vocab),
        LabelPadding(max_word_length=configs.max_text_length,
                     padding_value=len(configs.vocab))
    ],
    use_cache=True,
)

2023-06-22 15:21:01,757 INFO DataProvider: Skipping Dataset validation...


In [22]:
# Divisão do dataset em treino e teste (90% e 10%)
train_dataProvider, test_dataProvider = data_provider.split(split = 0.9)

In [23]:
#
train_dataProvider.augmentors = [
    RandomBrightness(), 
    RandomErodeDilate(),
    RandomSharpen(),
    RandomRotate(angle=10), 
    ]

In [24]:
network = Network(len(configs.vocab), activation="leaky_relu", dropout=0.3)
loss = CTCLoss(blank=len(configs.vocab))
optimizer = optim.Adam(network.parameters(), lr=configs.learning_rate)

In [25]:
# Criação do callback para o treinamento
earlyStopping = EarlyStopping(monitor="val_CER", patience=20, mode="min", verbose=1)
modelCheckpoint = ModelCheckpoint(configs.model_path + "/model.pt", monitor="val_CER", mode="min", save_best_only=True, verbose=1)
tb_callback = TensorBoard(configs.model_path + "/logs")
reduce_lr = ReduceLROnPlateau(monitor="val_CER", factor=0.9, patience=10, verbose=1, mode="min", min_lr=1e-6)
model2onnx = Model2onnx(
    saved_model_path=configs.model_path + "/model.pt",
    input_shape=(1, configs.height, configs.width, 3), 
    verbose=1,
    metadata={"vocab": configs.vocab}
    )

In [26]:
# Criação do modelo para o treinamento
model = Model(network, optimizer, loss, metrics=[CERMetric(configs.vocab), WERMetric(configs.vocab)])
model.fit(
    train_dataProvider, 
    test_dataProvider, 
    epochs=10, 
    callbacks=[earlyStopping, modelCheckpoint, tb_callback, reduce_lr, Model2onnx(
        saved_model_path=configs.model_path + "/model.pt",
        input_shape=(1, configs.height, configs.width, 3), 
        verbose=1,
        metadata={"vocab": configs.vocab},
        opset_version=10
    )]
)

  0%|          | 0/370 [00:00<?, ?it/s]


AttributeError: 'list' object has no attribute 'split'

In [None]:
# Save training and validation datasets as csv files
train_dataProvider.to_csv(os.path.join("./", "train.csv"))
test_dataProvider.to_csv(os.path.join("./", "val.csv"))

In [None]:
# testar com uma imagem
# import cv2
# import numpy as np
# from keras.models import load_model

# # Carrega o modelo treinado
# model = load_model(configs.model_path + "/model.hdf5")

# # Carrega a imagem
# image = cv2.imread("../data/iam_data/words/a01/a01-000u/a01-000u-00-00.png")

# # Pré-processa a imagem
# image = cv2.resize(image, (configs.image_size, configs.image_size))
# image = image.astype("float") / 255.0
# image = np.expand_dims(image, axis=0)

# # Faz a previsão na imagem
# prediction = model.predict(image)

# # Imprime a previsão
# print(prediction)