In [None]:
import numpy as np
import pandas as pd
from datasets import load_dataset
from sklearn.metrics import f1_score
from src.data.ru_go_emotions import get_dataloaders
from src.data.russian_sentiment import *
from src.model.models import get_model
from src.trainer.eval import eval
from transformers import pipeline

pd.options.display.max_colwidth = 80

In [None]:
# pipe = pipeline(
#     model="seara/rubert-tiny2-russian-sentiment",
#     device=0,
# )
pipe = pipeline(
    model="seara/rubert-tiny2-russian-sentiment",
    device=0,
)

# pipe = pipeline(
#     model="sismetanin/rubert-ru-sentiment-liniscrowd",
#     device=0,
# )

In [None]:
_, _, russian_sentiment_test = get_russian_sentiment_all()

INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=7393), Label(value='0 / 7393'))), …

Loaded Linis: train 4694, val 586, test 586
Loaded RuReviews: train 72000, val 9000, test 9000
Loaded RuSentiment: train 18969, val 2095, test 2214
Loaded Kaggle: train 6611, val 826, test 826
Loaded all datasets: train 102274, val 12507, test 12626


In [None]:
def get_answers(dataset):
    answers = pipe(
        dataset["text"].to_list(),
        truncation=True,
        max_length=512,
        batch_size=64,
    )
    true_answers = dataset["label"]
    processed_answers = [item["label"] for item in answers]
    return processed_answers, true_answers

In [None]:
processed_answers, true_answers = get_answers(russian_sentiment_test)

In [None]:
errors = pd.DataFrame(
    {
        "text": russian_sentiment_test["text"].to_list(),
        "Предсказание": processed_answers,
        "Факт": true_answers,
    }
)
errors["length"] = errors["text"].apply(len)

In [None]:
errors[(errors["Факт"] != errors["Предсказание"]) & (errors["length"] > 70)].sample(5)

Unnamed: 0,text,Предсказание,Факт,length
330,"разделены, агащазз! аж полтора метра пространства. Во-первых, разделение за...",neutral,negative,700
344,которые тот в одном из ресторанов вручил оперативникам 39-го отдела Управлен...,negative,neutral,783
7678,"Товар пришёл за месяц, цвет немного отличается, более к оранжевому что ли вн...",negative,neutral,274
3754,"Носки я так и не получила, идут с апреля, заказала 2 пары, открыла спор, вер...",neutral,negative,159
10250,"Ключ от моего сердца Ты уже не получишь. Ты для меня бывший, Я не хочу больш...",neutral,negative,152


In [None]:
_, _, rusent_test = get_rusentiment()
_, _, kaggle_test = get_kaggle_russian_news()
_, _, ru_reviews_test = get_rureviews()
_, _, linis_crowd_test = get_linis_crowd()

Loaded RuSentiment: train 18969, val 2095, test 2214
Loaded Kaggle: train 6611, val 826, test 826
Loaded RuReviews: train 72000, val 9000, test 9000
INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=7393), Label(value='0 / 7393'))), …

Loaded Linis: train 4694, val 586, test 586


In [None]:
processed_answers, true_answers = get_answers(rusent_test)
print([f1_score(true_answers, processed_answers, average=x) for x in ["micro", "macro", "weighted"]])
# rubert base 512 max length [0.7854561878952122, 0.7421749036549401, 0.7899501191412032]

[0.7443541102077688, 0.6757648775672119, 0.7460020442809914]


In [None]:
processed_answers, true_answers = get_answers(kaggle_test)
print([f1_score(true_answers, processed_answers, average=x) for x in ["micro", "macro", "weighted"]])
# rubert base 512 max length [0.6828087167070218, 0.6855471137727359, 0.682538032864983]
# rubert base 256 max length [0.6888619854721549, 0.6903670682028832, 0.6884098926374688]

[0.6694915254237288, 0.6634355597666822, 0.6694778038003001]


In [None]:
processed_answers, true_answers = get_answers(ru_reviews_test)
print([f1_score(true_answers, processed_answers, average=x) for x in ["micro", "macro", "weighted"]])
# rubert base 512 max length [0.7765555555555556, 0.7791678279730907, 0.7785926751117154]
# rubert base 256 max length [0.7765555555555556, 0.7791678279730907, 0.7785926751117154]

[0.7672222222222224, 0.7697909701964688, 0.7692519416443958]


In [None]:
processed_answers, true_answers = get_answers(linis_crowd_test)
print([f1_score(true_answers, processed_answers, average=x) for x in ["micro", "macro", "weighted"]])
# rubert base 512 max length [0.6245733788395904, 0.48631446965682823, 0.6139836927631701]
# rubert base 256 max length [0.621160409556314, 0.48313649126484587, 0.6107666718246818]

[0.6467576791808873, 0.5346158182823235, 0.6426316575413891]


In [None]:
# mapping = {"LABEL_2": "positive", "LABEL_1": "neutral", "LABEL_0": "negative"}
# processed_answers = [mapping[x] for x in processed_answers]