In [1]:
import json
import re
import sys
from math import factorial

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from datasets import Dataset
from eval_utils import (
    Format,
    PrintCallback,
    get_metrics_computer,
    get_to_string_processor,
)
from torch import nn
from transformers import (
    AutoConfig,
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
    DataCollatorForSeq2Seq,
    Seq2SeqTrainer,
    Seq2SeqTrainingArguments,
)



In [2]:
def get_preprocessor(tokenizer, format):
    def preprocess_function(examples):
        inputs = [re.sub("\n", " <NL> ", text) for text in examples["Text"]]
        targets = [
            get_to_string_processor(format)(bundles) for bundles in examples["bundles"]
        ]
        model_inputs = tokenizer(
            inputs, text_target=targets, max_length=256, truncation=True
        )
        return model_inputs

    return preprocess_function

In [3]:
format = Format.SpecTokens

device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(device)

cuda:0


In [4]:
train_data = pd.read_csv("~/work/resources/data/ads_train.csv")
train_data = train_data[train_data["n_bundles"] <= 5]
train_data.set_index(np.arange(len(train_data)), inplace=True)
print(f"We have train datset of size {len(train_data)}")
train_data.head()

We have train datset of size 8632


Unnamed: 0,Text,bundles,n_bundles
0,–ü—Ä–æ–¥–∞–º —Ç–µ–ª–µ—Ñ–æ–Ω POCO F4 128GB —Ç–µ–ª–µ—Ñ–æ–Ω—É 8 –º–µ—Å—è—Ü–µ...,"[{""Title"": ""\u0442\u0435\u043b\u0435\u0444\u04...",1
1,"–ø—Ä–æ–¥–∞–º sup –±–æ—Ä–¥ 10'6 - 320—Ö76—Ö15 —Å–º, –ø–æ–ª—å–∑–æ–≤–∞...","[{""Title"": ""sup \u0431\u043e\u0440\u0434 \u043...",1
2,–ò–∑–º–µ–Ω–∏–ª–∏—Å—å –ø–ª–∞–Ω—ã . –û—Ç–¥–∞–º –∑–∞ 2000,[],0
3,–ü—Ä–æ–¥–∞–º —Ä–∞—Ü–∏—é MegaJet MJ-333 –≤ —Ö–æ—Ä–æ—à–µ–º —Ä–∞–±–æ—á–µ–º ...,"[{""Title"": ""\u0440\u0430\u0434\u0438\u043e\u04...",1
4,–ü—Ä–æ–¥–∞–º PlayStation 3\n3 –¥–∂–æ–π—Å—Ç–∏–∫ –∏ –≤—Å–µ –¥–∏—Å–∫–∏\n...,"[{""Title"": ""PlayStation 3 \u0441 3 \u0434\u043...",1


In [5]:
eval_data = pd.read_csv("~/work/resources/data/ads_eval.csv")
eval_data = eval_data[:200]
eval_data.set_index(np.arange(len(eval_data)), inplace=True)
print(f"We have eval datset of size {len(eval_data)}")
eval_data.head()

We have eval datset of size 200


Unnamed: 0,Text,bundles,n_bundles
0,–†–∞—Å–ø—Ä–æ–¥–∞–∂–∞ \n–ü–ª–∞—Ç—å—è \n–¶–µ–Ω–∞: 300—Ä\n–†–∞–∑–º–µ—Ä: 44-4...,"[{""Title"": ""\u041f\u043b\u0430\u0442\u044c\u04...",1
1,"–≤–µ–≥–µ—Ç–∞—Ä–∏–∞–Ω—Å–∫–∞—è —ç–Ω—Ü–∏–∫–ª–æ–ø–µ–¥–∏—è –≤–∫—É—Å–æ–≤, —á—Ç–æ —Å —á–µ–º ...","[{""Title"": ""\u0432\u0435\u0433\u0435\u0442\u04...",1
2,"–ø—Ä–æ–¥–∞–º —â–µ—Ç–∫—É –¥–ª—è —Å—É—Ö–æ–≥–æ –º–∞—Å—Å–∞–∂–∞, –Ω–æ–≤–∞—è (—Ç–∞–∫ –∏ ...","[{""Title"": ""\u0429\u0435\u0442\u043a\u0430 \u0...",1
3,"–ó–∏–º–Ω–∏–π —Ç—ë–ø–ª—ã–π –∫–æ–º–±–∏–Ω–µ–∑–æ–Ω, –ø—É—Ö, Crockid, p.74...","[{""Title"": ""\u0417\u0438\u043c\u043d\u0438\u04...",1
4,"—É–¥–ª–∏–Ω–∏—Ç–µ–ª—å –∏–∫–µ–∞ 1,5 –º –Ω–∞ 6 —Ä–æ–∑–µ—Ç–æ–∫\n25 –ª–∞—Ä–∏\nüìç...","[{""Title"": ""\u0443\u0434\u043b\u0438\u043d\u04...",1


In [6]:
model_checkpoint = "ai-forever/ruT5-base"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)
tokenizer.add_tokens(["‚Çæ", "$", "‚Ç¨", "<NL>"], special_tokens=False)
if format == Format.SpecTokens:
    tokenizer.add_tokens(
        [
            "<BOB>",
            "<EOB>",
            "<BOT>",
            "<EOT>",
            "<BOP>",
            "<EOP>",
            "<BOC1>",
            "<EOC1>",
            "<BOC2>",
            "<EOC2>",
        ],
        special_tokens=False,
    )
elif format == Format.JustJson:
    tokenizer.add_tokens(["{", "}"], special_tokens=False)
model.resize_token_embeddings(len(tokenizer))

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
  return self.fget.__get__(instance, owner)()


Embedding(32113, 768)

In [7]:
train_dataset = Dataset.from_pandas(train_data[["Text", "bundles"]])
train_ads = train_dataset.map(
    get_preprocessor(tokenizer, format),
    batched=True,
    num_proc=4,
    remove_columns=train_dataset.column_names,
)
train_ads = train_ads.flatten()

eval_dataset = Dataset.from_pandas(eval_data[["Text", "bundles"]])
eval_ads = eval_dataset.map(
    get_preprocessor(tokenizer, format),
    batched=True,
    num_proc=4,
    remove_columns=eval_dataset.column_names,
)
eval_ads = eval_ads.flatten()

Map (num_proc=4): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8632/8632 [00:01<00:00, 4856.32 examples/s]
Map (num_proc=4): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 444.43 examples/s]


In [8]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer)

In [9]:
n_epochs = 10

training_args = Seq2SeqTrainingArguments(
    output_dir="ruT5",
    overwrite_output_dir=True,
    evaluation_strategy="epoch",
    learning_rate=5e-5,  # 3e-5
    per_device_train_batch_size=8,  # 8
    per_device_eval_batch_size=8,  # 8
    weight_decay=0.5,  # 0.01
    save_total_limit=1,
    num_train_epochs=n_epochs,
    predict_with_generate=True,
    generation_max_length=256,
    lr_scheduler_type="cosine",
    group_by_length=False,
    warmup_steps=200,  # 200
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_ads,
    eval_dataset=eval_ads,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=get_metrics_computer(tokenizer, format),
    callbacks=[PrintCallback(format, show=5, device=device)],
)

In [10]:
trainer.train()

  5%|‚ñç         | 500/10790 [01:48<33:25,  5.13it/s]

{'loss': 2.0693, 'grad_norm': 6.453489303588867, 'learning_rate': 4.9901059596372036e-05, 'epoch': 0.46}


  9%|‚ñâ         | 1000/10790 [04:28<35:20,  4.62it/s]  

{'loss': 0.4481, 'grad_norm': 2.4874117374420166, 'learning_rate': 4.9299257516933094e-05, 'epoch': 0.93}


 10%|‚ñà         | 1079/10790 [05:37<33:08,  4.88it/s]   
  0%|          | 0/25 [00:00<?, ?it/s][A
  8%|‚ñä         | 2/25 [00:02<00:32,  1.40s/it][A
 12%|‚ñà‚ñè        | 3/25 [00:04<00:29,  1.34s/it][A
 16%|‚ñà‚ñå        | 4/25 [00:06<00:40,  1.93s/it][A
 20%|‚ñà‚ñà        | 5/25 [00:08<00:36,  1.85s/it][A
 24%|‚ñà‚ñà‚ñç       | 6/25 [00:13<00:51,  2.70s/it][A
 28%|‚ñà‚ñà‚ñä       | 7/25 [00:15<00:44,  2.48s/it][A
 32%|‚ñà‚ñà‚ñà‚ñè      | 8/25 [00:16<00:37,  2.20s/it][A
 36%|‚ñà‚ñà‚ñà‚ñå      | 9/25 [00:23<00:56,  3.52s/it][A
 40%|‚ñà‚ñà‚ñà‚ñà      | 10/25 [00:25<00:47,  3.20s/it][A
 44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 11/25 [00:27<00:37,  2.69s/it][A
 48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 12/25 [00:30<00:35,  2.74s/it][A
 52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 13/25 [00:33<00:34,  2.84s/it][A
 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 14/25 [00:37<00:36,  3.29s/it][A
 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 15/25 [00:43<00:42,  4.23s/it][A
 64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 16/25 [00:46<00:34,  3.79s/it][A
 68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà

{'eval_loss': 0.33411353826522827, 'eval_Global BLEU': 77.58223397179312, 'eval_valid_answer_structure_precent': 99.48717948717949, 'eval_valid_bundles_precent': 100.0, 'eval_n_bundles_mae': 0.23195876288659795, 'eval_accidental_nulls': 0, 'eval_title_bleu_1_bundle': 37.18929084318527, 'eval_title_chrf_1_bundle': 69.57719813715752, 'eval_price_match_precent_1_bundle': 94.90445859872611, 'eval_currency_match_precent_1_bundle': 93.63057324840764, 'eval_count_match_precent_1_bundle': 94.26751592356688, 'eval_title_bleu_multi_bundle': 19.52296356198625, 'eval_title_chrf_multi_bundle': 50.242983623100756, 'eval_price_match_precent_multi_bundle': 39.6969696969697, 'eval_currency_match_precent_multi_bundle': 77.27272727272727, 'eval_count_match_precent_multi_bundle': 82.8030303030303, 'eval_runtime': 333.1562, 'eval_samples_per_second': 0.6, 'eval_steps_per_second': 0.075, 'epoch': 1.0}



                                               [A


ORIGINAL TEXT
	RAW
	 –†–∞—Å–ø—Ä–æ–¥–∞–∂–∞ 
 –ü–ª–∞—Ç—å—è 
 –¶–µ–Ω–∞: 300—Ä 
 –†–∞–∑–º–µ—Ä: 44-46-48-50 
 –ë–µ–∑ –≤—ã–±–æ—Ä–∞ —Ü–≤–µ—Ç–∞ 
 –ê—Ä—Ç: 2–ì-14 –∫–æ—Ä–ø—É—Å –ë</s>

TARGET
	RAW
	 <BOB> <BOT> –ü–ª–∞—Ç—å—è —Ä–∞–∑–º–µ—Ä 44-46-48-50 <EOT> <BOP> 300 <EOP> <BOC1> RUB <EOC1> <BOC2> 1 <EOC2> <EOB></s>
	DECODED:
	 [{'title': '–ü–ª–∞—Ç—å—è —Ä–∞–∑–º–µ—Ä 44-46-48-50', 'price': '300', 'currency': 'RUB', 'count': '1'}]

PREDICTED
	RAW
	 <BOB> <BOT> –ü–ª–∞—Ç—å—è <EOT> <BOP> 300 <EOP> <BOC1> RUB <EOC1> <BOC2> 1 <EOC2> <EOB></s>
	DECODED:
	 [{'title': '–ü–ª–∞—Ç—å—è', 'price': '300', 'currency': 'RUB', 'count': '1'}]
--------------------------------------------------


ORIGINAL TEXT
	RAW
	 –≤–µ–≥–µ—Ç–∞—Ä–∏–∞–Ω—Å–∫–∞—è —ç–Ω—Ü–∏–∫–ª–æ–ø–µ–¥–∏—è –≤–∫—É—Å–æ–≤, —á—Ç–æ —Å —á–µ–º —Å–æ—á–µ—Ç–∞–µ—Ç—Å—è, —Å–ø—Ä–∞–≤–æ—á–Ω–∏–∫ –æ—Ç —à–µ—Ñ –ø–æ–≤–∞—Ä–æ–≤. 15 eur. –ª–∏–º–∞—Å—Å–æ–ª.</s>

TARGET
	RAW
	 <BOB> <BOT> –≤–µ–≥–µ—Ç–∞—Ä–∏–∞–Ω—Å–∫–∞—è —ç–Ω—Ü–∏–∫–ª–æ–ø–µ–¥–∏—è –≤–∫—É—Å–æ–≤, —á—Ç–æ —Å —á–µ–º —Å–æ—á–µ—Ç–

 14%|‚ñà‚ñç        | 1500/10790 [12:40<32:30,  4.76it/s]     

{'loss': 0.3447, 'grad_norm': 1.8054348230361938, 'learning_rate': 4.81638202747802e-05, 'epoch': 1.39}


 19%|‚ñà‚ñä        | 2000/10790 [15:20<29:32,  4.96it/s]   

{'loss': 0.3248, 'grad_norm': 1.2828145027160645, 'learning_rate': 4.65196831683292e-05, 'epoch': 1.85}


 20%|‚ñà‚ñà        | 2158/10790 [16:50<31:17,  4.60it/s]   
  0%|          | 0/25 [00:00<?, ?it/s][A
  8%|‚ñä         | 2/25 [00:03<00:40,  1.78s/it][A
 12%|‚ñà‚ñè        | 3/25 [00:04<00:34,  1.55s/it][A
 16%|‚ñà‚ñå        | 4/25 [00:11<01:10,  3.37s/it][A
 20%|‚ñà‚ñà        | 5/25 [00:15<01:11,  3.56s/it][A
 24%|‚ñà‚ñà‚ñç       | 6/25 [00:21<01:25,  4.50s/it][A
 28%|‚ñà‚ñà‚ñä       | 7/25 [00:23<01:06,  3.70s/it][A
 32%|‚ñà‚ñà‚ñà‚ñè      | 8/25 [00:26<00:58,  3.47s/it][A
 36%|‚ñà‚ñà‚ñà‚ñå      | 9/25 [00:31<01:01,  3.84s/it][A
 40%|‚ñà‚ñà‚ñà‚ñà      | 10/25 [00:34<00:56,  3.79s/it][A
 44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 11/25 [00:36<00:43,  3.08s/it][A
 48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 12/25 [00:39<00:39,  3.00s/it][A
 52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 13/25 [00:45<00:48,  4.01s/it][A
 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 14/25 [00:50<00:47,  4.33s/it][A
 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 15/25 [00:56<00:49,  4.95s/it][A
 64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 16/25 [01:00<00:39,  4.42s/it][A
 68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚

{'eval_loss': 0.29016411304473877, 'eval_Global BLEU': 74.1543630156271, 'eval_valid_answer_structure_precent': 96.41025641025641, 'eval_valid_bundles_precent': 100.0, 'eval_n_bundles_mae': 0.2074468085106383, 'eval_accidental_nulls': 0, 'eval_title_bleu_1_bundle': 37.448142430641866, 'eval_title_chrf_1_bundle': 69.2539722495503, 'eval_price_match_precent_1_bundle': 96.05263157894737, 'eval_currency_match_precent_1_bundle': 92.76315789473685, 'eval_count_match_precent_1_bundle': 94.73684210526315, 'eval_title_bleu_multi_bundle': 26.24948662257288, 'eval_title_chrf_multi_bundle': 65.35314048260699, 'eval_price_match_precent_multi_bundle': 55.7936507936508, 'eval_currency_match_precent_multi_bundle': 85.71428571428571, 'eval_count_match_precent_multi_bundle': 80.3968253968254, 'eval_runtime': 339.394, 'eval_samples_per_second': 0.589, 'eval_steps_per_second': 0.074, 'epoch': 2.0}



                                               [A


ORIGINAL TEXT
	RAW
	 –†–∞—Å–ø—Ä–æ–¥–∞–∂–∞ 
 –ü–ª–∞—Ç—å—è 
 –¶–µ–Ω–∞: 300—Ä 
 –†–∞–∑–º–µ—Ä: 44-46-48-50 
 –ë–µ–∑ –≤—ã–±–æ—Ä–∞ —Ü–≤–µ—Ç–∞ 
 –ê—Ä—Ç: 2–ì-14 –∫–æ—Ä–ø—É—Å –ë</s>

TARGET
	RAW
	 <BOB> <BOT> –ü–ª–∞—Ç—å—è —Ä–∞–∑–º–µ—Ä 44-46-48-50 <EOT> <BOP> 300 <EOP> <BOC1> RUB <EOC1> <BOC2> 1 <EOC2> <EOB></s>
	DECODED:
	 [{'title': '–ü–ª–∞—Ç—å—è —Ä–∞–∑–º–µ—Ä 44-46-48-50', 'price': '300', 'currency': 'RUB', 'count': '1'}]

PREDICTED
	RAW
	 <BOB> <BOT> –ü–ª–∞—Ç—å—è <EOT> <BOP> 300 <EOP> <BOC1> RUB <EOC1> <BOC2> 1 <EOC2> <EOB></s>
	DECODED:
	 [{'title': '–ü–ª–∞—Ç—å—è', 'price': '300', 'currency': 'RUB', 'count': '1'}]
--------------------------------------------------


ORIGINAL TEXT
	RAW
	 –≤–µ–≥–µ—Ç–∞—Ä–∏–∞–Ω—Å–∫–∞—è —ç–Ω—Ü–∏–∫–ª–æ–ø–µ–¥–∏—è –≤–∫—É—Å–æ–≤, —á—Ç–æ —Å —á–µ–º —Å–æ—á–µ—Ç–∞–µ—Ç—Å—è, —Å–ø—Ä–∞–≤–æ—á–Ω–∏–∫ –æ—Ç —à–µ—Ñ –ø–æ–≤–∞—Ä–æ–≤. 15 eur. –ª–∏–º–∞—Å—Å–æ–ª.</s>

TARGET
	RAW
	 <BOB> <BOT> –≤–µ–≥–µ—Ç–∞—Ä–∏–∞–Ω—Å–∫–∞—è —ç–Ω—Ü–∏–∫–ª–æ–ø–µ–¥–∏—è –≤–∫—É—Å–æ–≤, —á—Ç–æ —Å —á–µ–º —Å–æ—á–µ—Ç–

 23%|‚ñà‚ñà‚ñé       | 2500/10790 [23:44<27:12,  5.08it/s]     

{'loss': 0.2797, 'grad_norm': 1.8456127643585205, 'learning_rate': 4.440295303630605e-05, 'epoch': 2.32}


 28%|‚ñà‚ñà‚ñä       | 3000/10790 [26:25<26:10,  4.96it/s]   

{'loss': 0.2601, 'grad_norm': 2.0044031143188477, 'learning_rate': 4.1860115316697356e-05, 'epoch': 2.78}


 30%|‚ñà‚ñà‚ñà       | 3237/10790 [28:11<27:31,  4.57it/s]   
  0%|          | 0/25 [00:00<?, ?it/s][A
  8%|‚ñä         | 2/25 [00:03<00:41,  1.79s/it][A
 12%|‚ñà‚ñè        | 3/25 [00:04<00:34,  1.58s/it][A
 16%|‚ñà‚ñå        | 4/25 [00:07<00:42,  2.03s/it][A
 20%|‚ñà‚ñà        | 5/25 [00:10<00:48,  2.41s/it][A
 24%|‚ñà‚ñà‚ñç       | 6/25 [00:13<00:45,  2.38s/it][A
 28%|‚ñà‚ñà‚ñä       | 7/25 [00:15<00:40,  2.23s/it][A
 32%|‚ñà‚ñà‚ñà‚ñè      | 8/25 [00:17<00:38,  2.25s/it][A
 36%|‚ñà‚ñà‚ñà‚ñå      | 9/25 [00:21<00:44,  2.79s/it][A
 40%|‚ñà‚ñà‚ñà‚ñà      | 10/25 [00:25<00:46,  3.09s/it][A
 44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 11/25 [00:26<00:35,  2.51s/it][A
 48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 12/25 [00:29<00:34,  2.63s/it][A
 52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 13/25 [00:32<00:34,  2.87s/it][A
 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 14/25 [00:36<00:35,  3.27s/it][A
 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 15/25 [00:39<00:31,  3.19s/it][A
 64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 16/25 [00:41<00:25,  2.84s/it][A
 68%|‚ñà‚ñà‚ñà‚ñà‚ñ

{'eval_loss': 0.286516010761261, 'eval_Global BLEU': 79.77928273502508, 'eval_valid_answer_structure_precent': 99.48717948717949, 'eval_valid_bundles_precent': 100.0, 'eval_n_bundles_mae': 0.20618556701030927, 'eval_accidental_nulls': 0, 'eval_title_bleu_1_bundle': 40.91806743151628, 'eval_title_chrf_1_bundle': 72.62147256113701, 'eval_price_match_precent_1_bundle': 94.11764705882352, 'eval_currency_match_precent_1_bundle': 94.77124183006535, 'eval_count_match_precent_1_bundle': 94.77124183006535, 'eval_title_bleu_multi_bundle': 23.522926622566146, 'eval_title_chrf_multi_bundle': 70.97579610075802, 'eval_price_match_precent_multi_bundle': 78.3974358974359, 'eval_currency_match_precent_multi_bundle': 80.76923076923077, 'eval_count_match_precent_multi_bundle': 89.2948717948718, 'eval_runtime': 324.0956, 'eval_samples_per_second': 0.617, 'eval_steps_per_second': 0.077, 'epoch': 3.0}



                                               [A


ORIGINAL TEXT
	RAW
	 –†–∞—Å–ø—Ä–æ–¥–∞–∂–∞ 
 –ü–ª–∞—Ç—å—è 
 –¶–µ–Ω–∞: 300—Ä 
 –†–∞–∑–º–µ—Ä: 44-46-48-50 
 –ë–µ–∑ –≤—ã–±–æ—Ä–∞ —Ü–≤–µ—Ç–∞ 
 –ê—Ä—Ç: 2–ì-14 –∫–æ—Ä–ø—É—Å –ë</s>

TARGET
	RAW
	 <BOB> <BOT> –ü–ª–∞—Ç—å—è —Ä–∞–∑–º–µ—Ä 44-46-48-50 <EOT> <BOP> 300 <EOP> <BOC1> RUB <EOC1> <BOC2> 1 <EOC2> <EOB></s>
	DECODED:
	 [{'title': '–ü–ª–∞—Ç—å—è —Ä–∞–∑–º–µ—Ä 44-46-48-50', 'price': '300', 'currency': 'RUB', 'count': '1'}]

PREDICTED
	RAW
	 <BOB> <BOT> –ü–ª–∞—Ç—å—è <EOT> <BOP> 300 <EOP> <BOC1> RUB <EOC1> <BOC2> 1 <EOC2> <EOB></s>
	DECODED:
	 [{'title': '–ü–ª–∞—Ç—å—è', 'price': '300', 'currency': 'RUB', 'count': '1'}]
--------------------------------------------------


ORIGINAL TEXT
	RAW
	 –≤–µ–≥–µ—Ç–∞—Ä–∏–∞–Ω—Å–∫–∞—è —ç–Ω—Ü–∏–∫–ª–æ–ø–µ–¥–∏—è –≤–∫—É—Å–æ–≤, —á—Ç–æ —Å —á–µ–º —Å–æ—á–µ—Ç–∞–µ—Ç—Å—è, —Å–ø—Ä–∞–≤–æ—á–Ω–∏–∫ –æ—Ç —à–µ—Ñ –ø–æ–≤–∞—Ä–æ–≤. 15 eur. –ª–∏–º–∞—Å—Å–æ–ª.</s>

TARGET
	RAW
	 <BOB> <BOT> –≤–µ–≥–µ—Ç–∞—Ä–∏–∞–Ω—Å–∫–∞—è —ç–Ω—Ü–∏–∫–ª–æ–ø–µ–¥–∏—è –≤–∫—É—Å–æ–≤, —á—Ç–æ —Å —á–µ–º —Å–æ—á–µ—Ç–

 32%|‚ñà‚ñà‚ñà‚ñè      | 3500/10790 [34:32<26:41,  4.55it/s]    

{'loss': 0.2314, 'grad_norm': 1.678099274635315, 'learning_rate': 3.8947013181637624e-05, 'epoch': 3.24}


 37%|‚ñà‚ñà‚ñà‚ñã      | 4000/10790 [37:13<21:18,  5.31it/s]   

{'loss': 0.2192, 'grad_norm': 0.9023706316947937, 'learning_rate': 3.572762116741717e-05, 'epoch': 3.71}


 40%|‚ñà‚ñà‚ñà‚ñà      | 4316/10790 [39:14<24:15,  4.45it/s]   
  0%|          | 0/25 [00:00<?, ?it/s][A
  8%|‚ñä         | 2/25 [00:02<00:32,  1.40s/it][A
 12%|‚ñà‚ñè        | 3/25 [00:04<00:29,  1.36s/it][A
 16%|‚ñà‚ñå        | 4/25 [00:06<00:37,  1.77s/it][A
 20%|‚ñà‚ñà        | 5/25 [00:09<00:45,  2.26s/it][A
 24%|‚ñà‚ñà‚ñç       | 6/25 [00:12<00:47,  2.49s/it][A
 28%|‚ñà‚ñà‚ñä       | 7/25 [00:14<00:41,  2.31s/it][A
 32%|‚ñà‚ñà‚ñà‚ñè      | 8/25 [00:16<00:38,  2.29s/it][A
 36%|‚ñà‚ñà‚ñà‚ñå      | 9/25 [00:20<00:45,  2.84s/it][A
 40%|‚ñà‚ñà‚ñà‚ñà      | 10/25 [00:23<00:40,  2.73s/it][A
 44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 11/25 [00:24<00:32,  2.34s/it][A
 48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 12/25 [00:27<00:32,  2.48s/it][A
 52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 13/25 [00:30<00:32,  2.74s/it][A
 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 14/25 [00:34<00:31,  2.89s/it][A
 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 15/25 [00:37<00:29,  2.93s/it][A
 64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 16/25 [00:39<00:24,  2.67s/it][A
 68%|‚ñà‚ñà‚ñà‚ñà

{'eval_loss': 0.27199411392211914, 'eval_Global BLEU': 80.01128633175354, 'eval_valid_answer_structure_precent': 100.0, 'eval_valid_bundles_precent': 100.0, 'eval_n_bundles_mae': 0.14358974358974358, 'eval_accidental_nulls': 0, 'eval_title_bleu_1_bundle': 40.550227885146825, 'eval_title_chrf_1_bundle': 74.09771463423294, 'eval_price_match_precent_1_bundle': 95.48387096774194, 'eval_currency_match_precent_1_bundle': 94.19354838709677, 'eval_count_match_precent_1_bundle': 94.19354838709677, 'eval_title_bleu_multi_bundle': 26.403786474047045, 'eval_title_chrf_multi_bundle': 71.78520067255481, 'eval_price_match_precent_multi_bundle': 75.46666666666668, 'eval_currency_match_precent_multi_bundle': 85.33333333333333, 'eval_count_match_precent_multi_bundle': 92.33333333333333, 'eval_runtime': 315.5856, 'eval_samples_per_second': 0.634, 'eval_steps_per_second': 0.079, 'epoch': 4.0}



                                               [A


ORIGINAL TEXT
	RAW
	 –†–∞—Å–ø—Ä–æ–¥–∞–∂–∞ 
 –ü–ª–∞—Ç—å—è 
 –¶–µ–Ω–∞: 300—Ä 
 –†–∞–∑–º–µ—Ä: 44-46-48-50 
 –ë–µ–∑ –≤—ã–±–æ—Ä–∞ —Ü–≤–µ—Ç–∞ 
 –ê—Ä—Ç: 2–ì-14 –∫–æ—Ä–ø—É—Å –ë</s>

TARGET
	RAW
	 <BOB> <BOT> –ü–ª–∞—Ç—å—è —Ä–∞–∑–º–µ—Ä 44-46-48-50 <EOT> <BOP> 300 <EOP> <BOC1> RUB <EOC1> <BOC2> 1 <EOC2> <EOB></s>
	DECODED:
	 [{'title': '–ü–ª–∞—Ç—å—è —Ä–∞–∑–º–µ—Ä 44-46-48-50', 'price': '300', 'currency': 'RUB', 'count': '1'}]

PREDICTED
	RAW
	 <BOB> <BOT> –ü–ª–∞—Ç—å—è <EOT> <BOP> 300 <EOP> <BOC1> RUB <EOC1> <BOC2> 1 <EOC2> <EOB></s>
	DECODED:
	 [{'title': '–ü–ª–∞—Ç—å—è', 'price': '300', 'currency': 'RUB', 'count': '1'}]
--------------------------------------------------


ORIGINAL TEXT
	RAW
	 –≤–µ–≥–µ—Ç–∞—Ä–∏–∞–Ω—Å–∫–∞—è —ç–Ω—Ü–∏–∫–ª–æ–ø–µ–¥–∏—è –≤–∫—É—Å–æ–≤, —á—Ç–æ —Å —á–µ–º —Å–æ—á–µ—Ç–∞–µ—Ç—Å—è, —Å–ø—Ä–∞–≤–æ—á–Ω–∏–∫ –æ—Ç —à–µ—Ñ –ø–æ–≤–∞—Ä–æ–≤. 15 eur. –ª–∏–º–∞—Å—Å–æ–ª.</s>

TARGET
	RAW
	 <BOB> <BOT> –≤–µ–≥–µ—Ç–∞—Ä–∏–∞–Ω—Å–∫–∞—è —ç–Ω—Ü–∏–∫–ª–æ–ø–µ–¥–∏—è –≤–∫—É—Å–æ–≤, —á—Ç–æ —Å —á–µ–º —Å–æ—á–µ—Ç–

 42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 4500/10790 [45:10<19:11,  5.46it/s]    

{'loss': 0.1965, 'grad_norm': 2.3314578533172607, 'learning_rate': 3.227264023188011e-05, 'epoch': 4.17}


IOStream.flush timed out
 46%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 5000/10790 [47:51<20:51,  4.63it/s]   

{'loss': 0.1799, 'grad_norm': 1.1777002811431885, 'learning_rate': 2.8657945093108886e-05, 'epoch': 4.63}


 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 5395/10790 [50:11<19:15,  4.67it/s]   
  0%|          | 0/25 [00:00<?, ?it/s][A
  8%|‚ñä         | 2/25 [00:02<00:32,  1.42s/it][A
 12%|‚ñà‚ñè        | 3/25 [00:04<00:29,  1.36s/it][A
 16%|‚ñà‚ñå        | 4/25 [00:06<00:38,  1.81s/it][A
 20%|‚ñà‚ñà        | 5/25 [00:09<00:44,  2.25s/it][A
 24%|‚ñà‚ñà‚ñç       | 6/25 [00:13<00:52,  2.78s/it][A
 28%|‚ñà‚ñà‚ñä       | 7/25 [00:15<00:44,  2.49s/it][A
 32%|‚ñà‚ñà‚ñà‚ñè      | 8/25 [00:17<00:41,  2.45s/it][A
 36%|‚ñà‚ñà‚ñà‚ñå      | 9/25 [00:22<00:48,  3.01s/it][A
 40%|‚ñà‚ñà‚ñà‚ñà      | 10/25 [00:24<00:43,  2.88s/it][A
 44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 11/25 [00:25<00:33,  2.40s/it][A
 48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 12/25 [00:28<00:32,  2.54s/it][A
 52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 13/25 [00:32<00:36,  3.02s/it][A
 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 14/25 [00:37<00:37,  3.37s/it][A
 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 15/25 [00:41<00:36,  3.60s/it][A
 64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 16/25 [00:43<00:28,  3.12s/it][A
 68%|‚ñà‚ñà‚ñà‚

{'eval_loss': 0.27742743492126465, 'eval_Global BLEU': 81.35892169903879, 'eval_valid_answer_structure_precent': 99.48717948717949, 'eval_valid_bundles_precent': 100.0, 'eval_n_bundles_mae': 0.15463917525773196, 'eval_accidental_nulls': 1, 'eval_title_bleu_1_bundle': 38.68624488555682, 'eval_title_chrf_1_bundle': 71.75562598404966, 'eval_price_match_precent_1_bundle': 95.36423841059603, 'eval_currency_match_precent_1_bundle': 94.03973509933775, 'eval_count_match_precent_1_bundle': 94.03973509933775, 'eval_title_bleu_multi_bundle': 28.145558136896824, 'eval_title_chrf_multi_bundle': 73.0117309728352, 'eval_price_match_precent_multi_bundle': 77.65432098765433, 'eval_currency_match_precent_multi_bundle': 82.71604938271605, 'eval_count_match_precent_multi_bundle': 92.1604938271605, 'eval_runtime': 326.8036, 'eval_samples_per_second': 0.612, 'eval_steps_per_second': 0.076, 'epoch': 5.0}



                                               [A


ORIGINAL TEXT
	RAW
	 –†–∞—Å–ø—Ä–æ–¥–∞–∂–∞ 
 –ü–ª–∞—Ç—å—è 
 –¶–µ–Ω–∞: 300—Ä 
 –†–∞–∑–º–µ—Ä: 44-46-48-50 
 –ë–µ–∑ –≤—ã–±–æ—Ä–∞ —Ü–≤–µ—Ç–∞ 
 –ê—Ä—Ç: 2–ì-14 –∫–æ—Ä–ø—É—Å –ë</s>

TARGET
	RAW
	 <BOB> <BOT> –ü–ª–∞—Ç—å—è —Ä–∞–∑–º–µ—Ä 44-46-48-50 <EOT> <BOP> 300 <EOP> <BOC1> RUB <EOC1> <BOC2> 1 <EOC2> <EOB></s>
	DECODED:
	 [{'title': '–ü–ª–∞—Ç—å—è —Ä–∞–∑–º–µ—Ä 44-46-48-50', 'price': '300', 'currency': 'RUB', 'count': '1'}]

PREDICTED
	RAW
	 <BOB> <BOT> –ü–ª–∞—Ç—å—è <EOT> <BOP> 300 <EOP> <BOC1> RUB <EOC1> <BOC2> 1 <EOC2> <EOB></s>
	DECODED:
	 [{'title': '–ü–ª–∞—Ç—å—è', 'price': '300', 'currency': 'RUB', 'count': '1'}]
--------------------------------------------------


ORIGINAL TEXT
	RAW
	 –≤–µ–≥–µ—Ç–∞—Ä–∏–∞–Ω—Å–∫–∞—è —ç–Ω—Ü–∏–∫–ª–æ–ø–µ–¥–∏—è –≤–∫—É—Å–æ–≤, —á—Ç–æ —Å —á–µ–º —Å–æ—á–µ—Ç–∞–µ—Ç—Å—è, —Å–ø—Ä–∞–≤–æ—á–Ω–∏–∫ –æ—Ç —à–µ—Ñ –ø–æ–≤–∞—Ä–æ–≤. 15 eur. –ª–∏–º–∞—Å—Å–æ–ª.</s>

TARGET
	RAW
	 <BOB> <BOT> –≤–µ–≥–µ—Ç–∞—Ä–∏–∞–Ω—Å–∫–∞—è —ç–Ω—Ü–∏–∫–ª–æ–ø–µ–¥–∏—è –≤–∫—É—Å–æ–≤, —á—Ç–æ —Å —á–µ–º —Å–æ—á–µ—Ç–

 51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 5500/10790 [56:01<17:14,  5.11it/s]    

{'loss': 0.1743, 'grad_norm': 1.0153436660766602, 'learning_rate': 2.4962917947337045e-05, 'epoch': 5.1}


 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 6000/10790 [58:41<19:39,  4.06it/s]   

{'loss': 0.1508, 'grad_norm': 1.8940749168395996, 'learning_rate': 2.126870515925464e-05, 'epoch': 5.56}


KeyboardInterrupt: 

In [None]:
output_dir = "good_checkpoints/ruT5-base"
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)