In [21]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [22]:
# importação das bibliotecas
import os
import tarfile
from tqdm import tqdm
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
import pandas as pd

import torch
import torch.optim as optim
from torchsummaryX import summary

from mltu.torch.model import Model
from mltu.torch.losses import CTCLoss
from mltu.torch.dataProvider import DataProvider
from mltu.torch.metrics import CERMetric, WERMetric
from mltu.torch.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, Model2onnx, ReduceLROnPlateau

from mltu.preprocessors import ImageReader
from mltu.transformers import ImageResizer, LabelIndexer, LabelPadding, ImageShowCV2
from mltu.augmentors import RandomBrightness, RandomRotate, RandomErodeDilate, RandomSharpen
from mltu.annotations.images import CVImage

from handwriting_recognition_torch.model import Network
from handwriting_recognition_torch.configs import ModelConfigs

### Carregar o dataset

In [23]:
df = pd.read_csv('../data/data_20230622/data.csv', sep=';')

In [24]:
df.head()

Unnamed: 0,ID,Label,UserId,UserName,UserRole,Request,SheetId,SheetName,Date,Parent1Name,Parent2Name,Parent3Name
0,0,47,9b705e6cd6,Luis,PARTICIPANT,5f6ae87ce45a8d000156814a,5f6a1c5aa7b11b0001722ac7,100er-Zahlenband II,Sat May 20 10:46:58 UTC 2023,2. Klasse,Niklas,
1,1,26,0f1a3956d7,Matilde,PARTICIPANT,5f6ae88be45a8d000156814c,5f6a1c5aa7b11b0001722ac7,100er-Zahlenband II,Sat May 20 10:46:58 UTC 2023,2. Klasse,Niklas,
2,2,40,ea5dee75ac,Lorenzo,PARTICIPANT,5f6ae8a3e45a8d000156814e,5f6a1c5aa7b11b0001722ac7,100er-Zahlenband II,Sat May 20 10:46:58 UTC 2023,2. Klasse,Niklas,
3,3,48,ea5dee75ac,Lorenzo,PARTICIPANT,5f6ae8a3e45a8d000156814e,5f6a1c5aa7b11b0001722ac7,100er-Zahlenband II,Sat May 20 10:46:58 UTC 2023,2. Klasse,Niklas,
4,4,41,ea5dee75ac,Lorenzo,PARTICIPANT,5f6ae8a3e45a8d000156814e,5f6a1c5aa7b11b0001722ac7,100er-Zahlenband II,Sat May 20 10:46:58 UTC 2023,2. Klasse,Niklas,


In [25]:
df = df[df['UserRole'] == 'PARTICIPANTBR']
df.head()

Unnamed: 0,ID,Label,UserId,UserName,UserRole,Request,SheetId,SheetName,Date,Parent1Name,Parent2Name,Parent3Name
14881,14881,Natureza,448905d42b34dbd,JOSE GABRIEL LIMA COSTA,PARTICIPANTBR,637d057802ae3800010957af,63276c6a8b87bc0001383e01,Avaliação da Escrita 2 Ano,Tue May 23 11:26:46 UTC 2023,2ºC,Emeb Myriam Penteado Rodrigues Alckmin,Ferraz de Vasconcelos
14882,14882,Planeta,448905d42b34dbd,JOSE GABRIEL LIMA COSTA,PARTICIPANTBR,637d057802ae3800010957af,63276c6a8b87bc0001383e01,Avaliação da Escrita 2 Ano,Tue May 23 11:26:46 UTC 2023,2ºC,Emeb Myriam Penteado Rodrigues Alckmin,Ferraz de Vasconcelos
14883,14883,Terra,448905d42b34dbd,JOSE GABRIEL LIMA COSTA,PARTICIPANTBR,637d057802ae3800010957af,63276c6a8b87bc0001383e01,Avaliação da Escrita 2 Ano,Tue May 23 11:26:46 UTC 2023,2ºC,Emeb Myriam Penteado Rodrigues Alckmin,Ferraz de Vasconcelos
14884,14884,Flor,448905d42b34dbd,JOSE GABRIEL LIMA COSTA,PARTICIPANTBR,637d057802ae3800010957af,63276c6a8b87bc0001383e01,Avaliação da Escrita 2 Ano,Tue May 23 11:26:46 UTC 2023,2ºC,Emeb Myriam Penteado Rodrigues Alckmin,Ferraz de Vasconcelos
14885,14885,O nosso planeta é muito bonito,448905d42b34dbd,JOSE GABRIEL LIMA COSTA,PARTICIPANTBR,637d057802ae3800010957af,63276c6a8b87bc0001383e01,Avaliação da Escrita 2 Ano,Tue May 23 11:26:46 UTC 2023,2ºC,Emeb Myriam Penteado Rodrigues Alckmin,Ferraz de Vasconcelos


### Tratamento do dataset

In [26]:
# criar um novo dataframe com as colunas id e label
df = df[['ID', 'Label']]

In [27]:
# transformar a coluna id em string
df['ID'] = df['ID'].astype(str)

In [28]:
# adicionar uma string '../data/data_20230622/' no início da coluna id
df['ID'] = '../data/data_20230622/' + df['ID'] + '.jpeg'

In [29]:
# Salvar novo dataframe em um arquivo csv
name_path = 'labels_BR.csv'
if not os.path.exists(name_path):    
    df.to_csv(name_path, index=False)
    print(f'Arquivo {name_path} criado com sucesso!')
else :
    print(f'Arquivo {name_path} já existe')

Arquivo labels_BR.csv já existe


In [30]:
df.head()

Unnamed: 0,ID,Label
14881,../data/data_20230622/14881.jpeg,Natureza
14882,../data/data_20230622/14882.jpeg,Planeta
14883,../data/data_20230622/14883.jpeg,Terra
14884,../data/data_20230622/14884.jpeg,Flor
14885,../data/data_20230622/14885.jpeg,O nosso planeta é muito bonito


In [31]:
df = pd.read_csv('labels_BR.csv')

In [32]:
dataset, vocab, max_len = [], set(), 0
for _, row in tqdm(df.iterrows(), total=len(df)):
    # image = CVImage(row['image'])
    label = row['Label']
    path_image = row['ID']
    dataset.append([path_image, label])
    vocab.update(list(label))
    max_len = max(max_len, len(label))
    
configs = ModelConfigs()
configs.model_path = os.path.join( "./Models/")
configs.train_epochs = 200
configs.vocab = "".join(sorted(vocab))
configs.max_text_length = max_len    
configs.save()

  0%|          | 0/11372 [00:00<?, ?it/s]

100%|██████████| 11372/11372 [00:00<00:00, 29243.11it/s]


In [33]:
configs.vocab

" '(),-.012345679:ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÁÂÃÇÉÊÍÓÔÕÚáãçéêó"

In [34]:
dataset

[['../data/data_20230622/14881.jpeg', 'Natureza'],
 ['../data/data_20230622/14882.jpeg', 'Planeta'],
 ['../data/data_20230622/14883.jpeg', 'Terra'],
 ['../data/data_20230622/14884.jpeg', 'Flor'],
 ['../data/data_20230622/14885.jpeg', 'O nosso planeta é muito bonito'],
 ['../data/data_20230622/14886.jpeg', 'Natureza'],
 ['../data/data_20230622/14887.jpeg', 'Planeta'],
 ['../data/data_20230622/14888.jpeg', 'Terra'],
 ['../data/data_20230622/14889.jpeg', 'Flor'],
 ['../data/data_20230622/14890.jpeg',
  'Kauane Sofia Alves: O nosso planeta é muito bonito'],
 ['../data/data_20230622/14891.jpeg', 'NATUREVA'],
 ['../data/data_20230622/14892.jpeg', 'PANETA'],
 ['../data/data_20230622/14893.jpeg', 'TERA'],
 ['../data/data_20230622/14894.jpeg', 'FORE'],
 ['../data/data_20230622/14895.jpeg', 'O NOSE PANETA E BONITO'],
 ['../data/data_20230622/14896.jpeg', 'NATUNEZA'],
 ['../data/data_20230622/14897.jpeg', 'PANETA'],
 ['../data/data_20230622/14898.jpeg', 'PERA'],
 ['../data/data_20230622/14899.jpe

In [35]:
# Criação do modelo e definição dos hiperparâmetros
data_provider = DataProvider(
    dataset=dataset,
    skip_validation=True,
    batch_size=configs.batch_size,
    data_preprocessors=[ImageReader(CVImage)],
    transformers=[
        # ImageShowCV2(),  # uncomment to show images when iterating over the data provider
        ImageResizer(configs.width, configs.height, keep_aspect_ratio=False),
        LabelIndexer(configs.vocab),
        LabelPadding(max_word_length=configs.max_text_length,
                     padding_value=len(configs.vocab))
    ],
    use_cache=True,
)

2023-06-26 14:43:52,327 INFO DataProvider: Skipping Dataset validation...


In [36]:
# Divisão do dataset em treino e teste (90% e 10%)
train_dataProvider, test_dataProvider = data_provider.split(split = 0.9)

In [37]:
# Aumento de dados 
train_dataProvider.augmentors = [
    RandomBrightness(), 
    RandomErodeDilate(),
    RandomSharpen(),
    ]

In [38]:
network = Network(len(configs.vocab), activation="leaky_relu", dropout=0.2)
loss = CTCLoss(blank=len(configs.vocab))
optimizer = optim.Adam(network.parameters(), lr=0.001, weight_decay=0.0001)

In [39]:
# Criação do callback para o treinamento
earlyStopping = EarlyStopping(monitor="val_CER", patience=20, mode="min", verbose=1)
modelCheckpoint = ModelCheckpoint(configs.model_path + "/model.pt", monitor="val_CER", mode="min", save_best_only=True, verbose=1)
tb_callback = TensorBoard(configs.model_path + "/logs")
reduce_lr = ReduceLROnPlateau(monitor="val_CER", factor=0.9, patience=10, verbose=1, mode="min", min_lr=1e-6)
model2onnx = Model2onnx(
    saved_model_path=configs.model_path + "/model.pt",
    input_shape=(1, configs.height, configs.width, 3), 
    verbose=1,
    metadata={"vocab": configs.vocab}
    )

In [40]:
# Criação do modelo para o treinamento
model = Model(network, optimizer, loss, metrics=[CERMetric(configs.vocab), WERMetric(configs.vocab)])
model.fit(
    train_dataProvider, 
    test_dataProvider, 
    epochs=configs.train_epochs, 
    callbacks=[earlyStopping, modelCheckpoint, tb_callback, reduce_lr, Model2onnx(
        saved_model_path=configs.model_path + "/model.pt",
        input_shape=(1, configs.height, configs.width, 3), 
        verbose=1,
        metadata={"vocab": configs.vocab},
        opset_version=10
    )]
)

Epoch 1 - loss: 6.1934 - CER: 1.0532 - WER: 1.0000: 100%|██████████| 160/160 [01:55<00:00,  1.38it/s]
          val_loss: 3.6228 - val_CER: 1.0000 - val_WER: 1.0000: 100%|██████████| 18/18 [00:04<00:00,  4.38it/s]
2023-06-26 14:45:52,425 INFO ModelCheckpoint: Epoch 1: val_CER improved from inf to 1.00000, saving model to ./Models//model.pt
Epoch 2 - loss: 3.5288 - CER: 0.9899 - WER: 1.0000: 100%|██████████| 160/160 [01:59<00:00,  1.34it/s]
          val_loss: 3.3567 - val_CER: 0.9508 - val_WER: 0.9991: 100%|██████████| 18/18 [00:03<00:00,  4.89it/s]
2023-06-26 14:47:55,634 INFO ModelCheckpoint: Epoch 2: val_CER improved from 1.00000 to 0.95084, saving model to ./Models//model.pt
Epoch 3 - loss: 3.2986 - CER: 0.9423 - WER: 0.9997: 100%|██████████| 160/160 [02:06<00:00,  1.26it/s]
          val_loss: 3.2217 - val_CER: 0.9361 - val_WER: 1.0000: 100%|██████████| 18/18 [00:03<00:00,  4.77it/s]
2023-06-26 14:50:06,461 INFO ModelCheckpoint: Epoch 3: val_CER improved from 0.95084 to 0.93612, s

{'loss': 1.4494459733366967,
 'CER': 0.3539602530724223,
 'WER': 0.7062735721982759,
 'val_loss': 1.5139315459463332,
 'val_CER': 0.339415041418443,
 'val_WER': 0.6784027777777778}

In [41]:
# Save training and validation datasets as csv files
train_dataProvider.to_csv(os.path.join("./Models/", "train_BR.csv"))
test_dataProvider.to_csv(os.path.join("./Models/", "val_BR.csv"))