In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1, 2, 4, 5"

In [6]:
import torch.backends.cudnn as cudnn
import yaml
from train import train
from utils import AttrDict
import pandas as pd
from datetime import date

In [7]:
def get_config(file_path):
    with open(file_path, 'r', encoding="utf8") as stream:
        opt = yaml.safe_load(stream)
    opt = AttrDict(opt)
    if opt.lang_char == 'None':
        characters = ''
        for data in opt['select_data'].split('-'):
            csv_path = os.path.join(opt['train_data'], data, 'labels.csv')
            df = pd.read_csv(csv_path, sep='^([^,]+),', engine='python', usecols=['filename', 'words'], keep_default_na=False)
            all_char = ''.join(df['words'])
            characters += ''.join(set(all_char))
        characters = sorted(set(characters))
        opt.character= ''.join(characters)
    else:
        opt.character = opt.number + opt.symbol + opt.lang_char
    print(f"Количество символов: {len(opt.character)}")
    os.makedirs(f'./saved_models/{opt.experiment_name}', exist_ok=True)
    return opt

In [None]:
opt = get_config("config_files/ru_filtered_config.yaml")
train(opt, amp=False)

Количество символов: 34
Filtering the images containing characters which are not in opt.character
Filtering the images whose label is longer than opt.batch_max_length
--------------------------------------------------------------------------------
dataset_root: all_data
opt.select_data: ['train']
opt.batch_ratio: ['1']
--------------------------------------------------------------------------------
dataset_root:    all_data	 dataset: train
all_data/train
sub-directory:	/train	 num samples: 500000
num total samples of train: 500000 x 1.0 (total_data_usage_ratio) = 500000
num samples of train per batch: 1536 x 1.0 (batch_ratio) = 1536
--------------------------------------------------------------------------------
Total_batch_size: 1536 = 1536
--------------------------------------------------------------------------------
dataset_root:    all_data/val	 dataset: /
all_data/val/
sub-directory:	/.	 num samples: 50000
-------------------------------------------------------------------------

training time:  151.70438766479492
[50/50000] Train loss: 0.02225, Valid loss: 0.07211, Elapsed_time: 151.70464
Current_accuracy : 77.228, Current_norm_ED  : 0.9819
Best_accuracy    : 77.228, Best_norm_ED     : 0.9819
--------------------------------------------------------------------------------
Ground Truth              | Prediction                | Confidence Score & T/F
--------------------------------------------------------------------------------
тигельной аэролифтом недослышанными | тигельной аэролифтом недослышанными | 0.0576	True
четырёхэтажным сложноподчинённый контрактам пикратам перепруживаемые | четырепотемьта сложнопоочинённый крстреннам паоратая переприжираемый | 0.0000	False
--------------------------------------------------------------------------------
validation time:  429.9054605960846
training time:  116.3916187286377
[100/50000] Train loss: 0.02239, Valid loss: 0.07286, Elapsed_time: 698.00195
Current_accuracy : 77.392, Current_norm_ED  : 0.9819
Best_accuracy   

training time:  116.28256797790527
[600/50000] Train loss: 0.02133, Valid loss: 0.08164, Elapsed_time: 6115.52716
Current_accuracy : 75.960, Current_norm_ED  : 0.9807
Best_accuracy    : 77.560, Best_norm_ED     : 0.9820
--------------------------------------------------------------------------------
Ground Truth              | Prediction                | Confidence Score & T/F
--------------------------------------------------------------------------------
загромождающая пожаперекалявшихинд земледельческих подготавливаемыми | загромождающая пожаперекалявшихинд земледельческих подготавливаемыми | 0.2816	True
неубранным предреченый лексикографического фальсификаторов схоронила | неубранным предреченый лексикографического фальсификаторов схоронила | 0.8000	True
--------------------------------------------------------------------------------
validation time:  476.9168846607208
training time:  115.67104935646057
[650/50000] Train loss: 0.02356, Valid loss: 0.07620, Elapsed_time: 6708.11578


training time:  116.67344951629639
[1100/50000] Train loss: 0.01711, Valid loss: 0.08513, Elapsed_time: 12286.85295
Current_accuracy : 76.746, Current_norm_ED  : 0.9814
Best_accuracy    : 77.560, Best_norm_ED     : 0.9821
--------------------------------------------------------------------------------
Ground Truth              | Prediction                | Confidence Score & T/F
--------------------------------------------------------------------------------
жалостливого визжавшее варнаки | жалостливого визжавшее варнаки | 0.1844	True
веслахст уязвимой орденским эстетству погорельская | веслахст уязвимой орденским эстетству погорельская | 0.0707	True
--------------------------------------------------------------------------------
validation time:  440.3336372375488
training time:  116.6563835144043
[1150/50000] Train loss: 0.01758, Valid loss: 0.08049, Elapsed_time: 12843.84340
Current_accuracy : 77.100, Current_norm_ED  : 0.9818
Best_accuracy    : 77.560, Best_norm_ED     : 0.9821
---