In [None]:
import pandas as pd
import random
import numpy as np
import time
import torch
from torch import nn
from torch.optim import AdamW
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import  BertTokenizer, BertForSequenceClassification
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

from transformers import TrainingArguments
from transformers import Trainer
from torch.optim.lr_scheduler import CosineAnnealingLR

import torchvision.models as models
from transformers import EvalPrediction

from tqdm.notebook import tqdm
import warnings
warnings.filterwarnings('ignore')

In [None]:
def set_random_seed(seed):
    torch.backends.cudnn.deterministic = True
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

In [None]:
df_train = pd.read_csv('train_30k.csv', delimiter='\t')
df_test = pd.read_csv('test.csv', delimiter='\t')

In [None]:
df_train.head()

Unnamed: 0,text,is_joke,clean_text,lemmas
0,Да все давно пора с этими дорожниками... В оди...,0,да все давно пора с этими дорожниками в один к...,давно пора этот дорожник котёл сборная россия ...
1,-лада калина теперь с музыкальной стереосистем...,0,лада калина теперь с музыкальной стереосистемо...,лада калина музыкальный стереосистема помирать...
2,"А вы разве программист? Вам до программиста, к...",0,а вы разве программист вам до программиста как...,программист программист луна рак учиться учиться
3,Отборочный матч чемпионата Европы-2012. Сборна...,0,отборочный матч чемпионата европы сборная росс...,отборочный матч чемпионат европа сборная росси...
4,"В центре коррупционного скандала - ФГУП ""Сверд...",0,в центре коррупционного скандала фгуп свердлов...,центр коррупционный скандал фгуп свердловскавт...


#### DeepPavlov/rubert-base-cased
**Описание**:
- **Архитектура**: 12 слоев, 768 скрытых единиц, 12 голов, 180 миллионов параметров.
- **Тренировочные данные**: Русская Википедия и новостные данные.
- **Инициализация**: Многоязычная версия BERT‑base.
- **Цель**: Создание общей языковой модели для русского языка.

**Источники данных**:
- **Википедия**: Энциклопедические статьи.
- **Новости**: Современные новостные статьи.

Итог: Универсальная модель для широкого круга задач обработки естественного языка (NLP), таких как классификация текста, анализ тональности, и другие.

In [None]:
set_random_seed(42)
model_name_base = "DeepPavlov/rubert-base-cased"
tokenizer_base = BertTokenizer.from_pretrained(model_name_base)
model_base = BertForSequenceClassification.from_pretrained(model_name_base, num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at DeepPavlov/rubert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
class JokeDatasetBert(Dataset):
    def __init__(self, data, tokenizer, max_token_len=128):

        self.sentences = data['text']
        self.target = data['is_joke']
        self.tokenizer = tokenizer
        self.max_token_len = max_token_len

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, index: int):
        sentences_index = self.sentences[index]
        target_index = self.target[index]

        encoding = self.tokenizer.encode_plus(

            sentences_index,
            add_special_tokens=True,
            max_length=self.max_token_len,
            return_token_type_ids=False,
            padding="max_length",
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )

        return dict(
            input_ids=encoding["input_ids"].flatten(),
            attention_mask=encoding["attention_mask"].flatten(),
            labels=torch.tensor(target_index, dtype=torch.float))

In [None]:
#датасет
set_random_seed(42)
train_dataset_base = JokeDatasetBert(data=df_train,
                                    tokenizer=tokenizer_base,
                                    max_token_len=128)

test_dataset_base = JokeDatasetBert(data=df_test,
                                   tokenizer=tokenizer_base,
                                   max_token_len=128)

In [None]:
#метрики
def compute_metrics(p: EvalPrediction):
    preds = np.argmax(p.predictions, axis=1)
    precision = precision_score(p.label_ids, preds)
    recall = recall_score(p.label_ids, preds)
    f1 = f1_score(p.label_ids, preds)
    accuracy = accuracy_score(p.label_ids, preds)
    return {
        "precision": precision,
        "recall": recall,
        "f1": f1,
        "accuracy": accuracy,
    }

In [None]:
set_random_seed(42)
class CustomTrainer(Trainer):

    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels").to(torch.int64)
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fct = nn.CrossEntropyLoss()
        loss = loss_fct(logits, labels)
        return (loss, outputs) if return_outputs else loss

    def train(self, *args, **kwargs):
        start_time = time.time()
        result = super().train(*args, **kwargs)
        end_time = time.time()
        training_time = end_time - start_time
        print(f"Training time: {training_time:.2f} seconds")
        return result


training_args = TrainingArguments(
    output_dir='./base',          # Путь для сохранения результатов и модели
    num_train_epochs=5,              # Количество эпох
    per_device_train_batch_size=16,  # Размер пакета для обучения
    per_device_eval_batch_size=16,   # Размер пакета для валидации/тестирования
    warmup_steps=500,                # Шаги разогрева для планировщика скорости обучения
    weight_decay=0.001,               # Вес уменьшения
    logging_dir='./logs_base',            # Путь для логирования
    save_strategy="epoch",           # Сохранять модель после каждой эпохи
    evaluation_strategy="epoch",  # Выполнять валидацию после каждой эпохи
    logging_strategy="epoch",
    logging_steps=100,
    learning_rate=5e-5,                       # Задать скорость обучения
    optim="adamw_torch",
    fp16=True,
    report_to=None,
    load_best_model_at_end=True,      # Загружать лучшую модель в конце тренировки
    metric_for_best_model="f1", # Метрика для определения лучшей модели
    greater_is_better=True,           # Указывать, что более высокое значение метрики лучше
    save_total_limit=1
)


trainer_base = CustomTrainer(
    model=model_base,
    args=training_args,
    train_dataset=train_dataset_base,
    eval_dataset=test_dataset_base,
    compute_metrics=compute_metrics,
)

In [None]:
set_random_seed(42)
trainer_base.train()

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.414,0.308007,0.833974,0.9256,0.877401,0.870459
2,0.2312,0.400361,0.811438,0.942133,0.871915,0.861378
3,0.1129,0.497565,0.862148,0.9056,0.88334,0.880208
4,0.0521,0.638509,0.889698,0.879733,0.884688,0.88515
5,0.0212,0.711836,0.872027,0.899467,0.885534,0.883547


Training time: 701.66 seconds


TrainOutput(global_step=9375, training_loss=0.16627488037109375, metrics={'train_runtime': 701.4118, 'train_samples_per_second': 213.854, 'train_steps_per_second': 13.366, 'total_flos': 9866664576000000.0, 'train_loss': 0.16627488037109375, 'epoch': 5.0})

#### DeepPavlov/rubert-base-cased-conversational
**Описание**:
- **Архитектура**: 12 слоев, 768 скрытых единиц, 12 голов, 180 миллионов параметров.
- **Тренировочные данные**: OpenSubtitles, Dirty, Pikabu, и социальные медиа данные из корпуса Taiga.
- **Инициализация**: Инициализация моделью RuBERT.
- **Цель**: Создание модели, оптимизированной для разговорного русского языка.

**Источники данных**:
- **OpenSubtitles**: Субтитры к фильмам и сериалам, содержащие диалоги и разговорные выражения.
- **Dirty**: Платформа с пользовательским контентом.
- **Pikabu**: Российский социальный новостной сайт.
- **Taiga**: Корпус, включающий данные из социальных медиа.

Итог: Оптимизирована для задач, связанных с диалогами и взаимодействием с пользователями, таких как чат-боты, системы вопрос-ответ, и другие разговорные системы.

In [None]:
set_random_seed(42)
model_name_conversational = "DeepPavlov/rubert-base-cased-conversational"
tokenizer_conversational = BertTokenizer.from_pretrained(model_name_conversational)
model_conversational = BertForSequenceClassification.from_pretrained(model_name_conversational, num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at DeepPavlov/rubert-base-cased-conversational and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
train_dataset_conversational = JokeDatasetBert(data=df_train,
                                    tokenizer=tokenizer_conversational,
                                    max_token_len=128)

test_dataset_conversational = JokeDatasetBert(data=df_test,
                                   tokenizer=tokenizer_conversational,
                                   max_token_len=128)

In [None]:
set_random_seed(42)
training_conv_args = TrainingArguments(
    output_dir='./conversational',          # Путь для сохранения результатов и модели
    num_train_epochs=5,              # Количество эпох
    per_device_train_batch_size=16,  # Размер пакета для обучения
    per_device_eval_batch_size=16,   # Размер пакета для валидации/тестирования
    warmup_steps=500,                # Шаги разогрева для планировщика скорости обучения
    weight_decay=0.001,               # Вес уменьшения
    logging_dir='./logs',            # Путь для логирования
    save_strategy="epoch",           # Сохранять модель после каждой эпохи
    evaluation_strategy="epoch",  # Выполнять валидацию после каждой эпохи
    logging_strategy="epoch",
    logging_steps=100,
    learning_rate=5e-5,                       # Задать скорость обучения
    optim="adamw_torch",
    fp16=True,
    report_to=None,
    load_best_model_at_end=True,      # Загружать лучшую модель в конце тренировки
    metric_for_best_model="f1", # Метрика для определения лучшей модели
    greater_is_better=True,           # Указывать, что более высокое значение метрики лучше
    save_total_limit=1
)


trainer_conversational = CustomTrainer(
    model=model_conversational,
    args=training_conv_args,
    train_dataset=train_dataset_conversational,
    eval_dataset=test_dataset_conversational,
    compute_metrics=compute_metrics,
)

In [None]:
trainer_conversational.train()

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.3693,0.288258,0.837056,0.949333,0.889666,0.882078
2,0.1737,0.450888,0.834738,0.950933,0.889055,0.881143
3,0.0665,0.540329,0.896968,0.891467,0.894209,0.894364
4,0.022,0.712705,0.903836,0.892267,0.898014,0.898504
5,0.0057,0.77307,0.887676,0.9104,0.898894,0.897436


Training time: 703.26 seconds


TrainOutput(global_step=9375, training_loss=0.1274432480875651, metrics={'train_runtime': 703.0531, 'train_samples_per_second': 213.355, 'train_steps_per_second': 13.335, 'total_flos': 9866664576000000.0, 'train_loss': 0.1274432480875651, 'epoch': 5.0})

####  Bi-LSTM

In [None]:
from collections import Counter
import spacy
import os

nlp = spacy.load("ru_core_news_lg")

In [None]:
# Создание словаря
counter = Counter()
for text in df_train['lemmas']:
    counter.update(text.split())

# Создание индексации слов
vocab = {word: idx + 2 for idx, (word, _) in enumerate(counter.items())}  # Индексы начинаются с 2, так как 0 и 1 зарезервированы для <pad> и <unk>
vocab['<pad>'] = 0
vocab['<unk>'] = 1
pad_idx = vocab['<pad>']
unk_idx = vocab['<unk>']

In [None]:
#пример
vocab

{'давно': 2,
 'пора': 3,
 'этот': 4,
 'дорожник': 5,
 'котёл': 6,
 'сборная': 7,
 'россия': 8,
 'футбол': 9,
 'лада': 10,
 'калина': 11,
 'музыкальный': 12,
 'стереосистема': 13,
 'помирать': 14,
 'музыка': 15,
 'программист': 16,
 'луна': 17,
 'рак': 18,
 'учиться': 19,
 'отборочный': 20,
 'матч': 21,
 'чемпионат': 22,
 'европа': 23,
 'андорры': 24,
 'центр': 25,
 'коррупционный': 26,
 'скандал': 27,
 'фгуп': 28,
 'свердловскавтодор': 29,
 'выезжать': 30,
 'стать': 31,
 'город': 32,
 'съёмный': 33,
 'почасовой': 34,
 'квартира': 35,
 'продавать': 36,
 'свой': 37,
 'помёт': 38,
 'всетаки': 39,
 'приходить': 40,
 'мнение': 41,
 'напрасно': 42,
 'американы': 43,
 'выбрать': 44,
 'папуас': 45,
 'юсе': 46,
 'алло': 47,
 'дорогой': 48,
 'плохо': 49,
 'слышно': 50,
 'улететь': 51,
 'ещё': 52,
 'кувейт': 53,
 'чиновник': 54,
 'говно': 55,
 'вроде': 56,
 'утонуть': 57,
 'время': 58,
 'гдето': 59,
 'всплывать': 60,
 'извинить': 61,
 'ради': 62,
 'бог': 63,
 'знать': 64,
 'плач': 65,
 'геморрой'

In [None]:
class TextDataset(Dataset):
    def __init__(self, texts, labels, vocab, max_len=128):
        self.texts = texts
        self.labels = labels
        self.vocab = vocab
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        tokens = text.split()
        indices = [self.vocab.get(token, unk_idx) for token in tokens]
        if len(indices) < self.max_len:
            indices += [pad_idx] * (self.max_len - len(indices))
        else:
            indices = indices[:self.max_len]
        label = self.labels[idx]
        return {"text": torch.tensor(indices), "label": torch.tensor(label, dtype=torch.float)}


train_dataset = TextDataset(df_train['lemmas'].tolist(), df_train['is_joke'].tolist(), vocab)
test_dataset = TextDataset(df_test['lemmas'].tolist(), df_test['is_joke'].tolist(), vocab)

train_dataloader_bert = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader_bert =  DataLoader(test_dataset, batch_size=64, shuffle=False)

In [None]:
print(f"First training example: {train_dataset[50]}")
print(f"First testing example: {test_dataset[0]}")

First training example: {'text': tensor([231, 313, 314, 315, 142, 316,  37, 317, 318,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0]), 'label': tensor(0.)}
First testing example: {'text': tensor([ 231, 9239, 1194,  685, 1119, 9239, 5987, 1050, 9239,  231, 5987, 1050,
         231,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0, 

In [None]:
# Гиперпараметры
VOCAB_SIZE = len(vocab)
MAX_LEN = 128
BATCH_SIZE = 16
HIDDEN_DIM = 128
OUTPUT_DIM = 2
NUM_EPOCHS = 5
LEARNING_RATE = 1e-4
EMBED_DIM = 300

In [None]:
# Создание матрицы эмбеддингов
embedding_matrix = torch.zeros((VOCAB_SIZE, EMBED_DIM))
for word, idx in vocab.items():
    if word in nlp.vocab:
        embedding_matrix[idx] = torch.tensor(nlp.vocab[word].vector)
    else:
        embedding_matrix[idx] = torch.zeros(EMBED_DIM)

In [None]:
class BiLSTMPretrainedModel(nn.Module):
    def __init__(self, embedding_matrix, hidden_dim, output_dim, pad_idx, freeze):
        super(BiLSTMPretrainedModel, self).__init__()
        num_embeddings, embedding_dim = embedding_matrix.shape
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=freeze, padding_idx=pad_idx)
        self.bilstm = nn.LSTM(embedding_dim, hidden_dim, bidirectional=True, batch_first=True)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        self.relu = nn.ReLU()

    def forward(self, text, **kwargs):
        embedded = self.embedding(text)
        lstm_out, _ = self.bilstm(embedded)
        avg_pool = torch.mean(lstm_out, dim=1)
        output = self.fc(self.relu(avg_pool))
        return output


In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
freeze = False
model_pretrained = BiLSTMPretrainedModel(embedding_matrix, HIDDEN_DIM, OUTPUT_DIM, pad_idx, freeze)
optimizer = AdamW(model_pretrained.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()
num_epochs = 15

In [None]:
def train_eval(model, optimizer, train_dataloader, val_dataloader, criterion, device, num_epochs, save_dir):
    model.to(device)
    best_f1 = 0.0
    best_epoch = 0
    best_model_path = os.path.join(save_dir, "best_model.pth")

    # Создаем директорию, если она не существует
    if not os.path.exists(save_dir):
        print(f"Создаем директорию...")
        os.makedirs(save_dir)
        print(f"Успешно создали директорию!")
    else:
        print(f"Директория уже существует")

    total_start_time = time.time()  # Начальное время для общего подсчета

    for epoch in range(num_epochs):
        start_time = time.time()

        # Training phase
        model.train()
        with tqdm(total=len(train_dataloader), desc=f"Epoch {epoch+1}/{num_epochs} - Training") as pbar:
            for batch in train_dataloader:
                input_ids = batch['text'].to(device)
                labels = batch['label'].to(device).long()

                optimizer.zero_grad()
                outputs = model(input_ids)
                loss = criterion(outputs, labels)

                _, y_pred = torch.max(outputs.detach(), 1)
                precision_train = precision_score(labels.cpu(), y_pred.cpu(), average='binary')
                recall_train = recall_score(labels.cpu(), y_pred.cpu(), average='binary')
                f1_train = f1_score(labels.cpu(), y_pred.cpu(), average='binary')
                accuracy_train = accuracy_score(labels.cpu(), y_pred.cpu())

                loss.backward()
                optimizer.step()

                pbar.set_description(f'Loss: {loss.item():.4f}, Acc: {accuracy_train:.4f}, Prec: {precision_train:.4f}, Rec: {recall_train:.4f}, F1: {f1_train:.4f}')
                pbar.update(1)

        # Validation phase
        model.eval()
        val_loss = 0.0
        predictions = torch.tensor([], dtype=torch.long, device=device)
        targets = torch.tensor([], dtype=torch.long, device=device)

        with torch.no_grad():
            with tqdm(total=len(val_dataloader), desc=f"Epoch {epoch+1}/{num_epochs} - Validation") as pbar:
                for batch in val_dataloader:
                    input_ids = batch['text'].to(device)
                    labels = batch['label'].to(device).long()

                    outputs = model(input_ids)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item()

                    _, y_pred = torch.max(outputs.detach(), 1)

                    predictions = torch.cat((predictions, y_pred), dim=0)
                    targets = torch.cat((targets, labels), dim=0)

                    pbar.update(1)

            precision_test = precision_score(targets.cpu(), predictions.cpu(), average='binary')
            recall_test = recall_score(targets.cpu(), predictions.cpu(), average='binary')
            f1_test = f1_score(targets.cpu(), predictions.cpu(), average='binary')
            accuracy_test = accuracy_score(targets.cpu(), predictions.cpu())
            avg_val_loss = val_loss / len(val_dataloader)

            print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {avg_val_loss:.4f}, Acc: {accuracy_test:.4f}, Prec: {precision_test:.4f}, Rec: {recall_test:.4f}, F1: {f1_test:.4f}')

            if f1_test > best_f1:
                best_f1 = f1_test
                best_epoch = epoch + 1  # Запоминаем номер эпохи (начиная с 1)
                torch.save(model.state_dict(), best_model_path)
                print(f"Best model saved with F1: {best_f1:.4f}")

        end_time = time.time()
        epoch_time = end_time - start_time
        print(f"Epoch {epoch + 1} completed in {epoch_time // 60:.0f}m {epoch_time % 60:.0f}s")

    total_end_time = time.time()  # Конечное время для общего подсчета
    total_training_time = total_end_time - total_start_time
    print(f"Total training time: {total_training_time // 60:.0f}m {total_training_time % 60:.0f}s")

    # Выводим лучшую метрику и номер эпохи
    print(f"Best F1 score: {best_f1:.4f} achieved at epoch {best_epoch}")

Для начала рассмотрим ситуацию когда веса эмбеддингового слоя будут обучаемы

In [None]:
train_eval(model=model_pretrained,
           optimizer=optimizer,
           train_dataloader=train_dataloader_bert,
           val_dataloader=test_dataloader_bert,
           criterion=criterion,
           device=device,
           num_epochs=num_epochs,
           save_dir='best_bilstm_false')

Создаем директорию...
Успешно создали директорию!


Epoch 1/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 1/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 1/15, Loss: 0.6539, Acc: 0.6592, Prec: 0.6947, Rec: 0.5699, F1: 0.6261
Best model saved with F1: 0.6261
Epoch 1 completed in 0m 6s


Epoch 2/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 2/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 2/15, Loss: 0.6129, Acc: 0.6774, Prec: 0.6685, Rec: 0.7056, F1: 0.6866
Best model saved with F1: 0.6866
Epoch 2 completed in 0m 6s


Epoch 3/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 3/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 3/15, Loss: 0.6137, Acc: 0.6436, Prec: 0.6056, Rec: 0.8267, F1: 0.6991
Best model saved with F1: 0.6991
Epoch 3 completed in 0m 6s


Epoch 4/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 4/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 4/15, Loss: 0.6291, Acc: 0.6772, Prec: 0.6854, Rec: 0.6571, F1: 0.6709
Epoch 4 completed in 0m 6s


Epoch 5/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 5/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 5/15, Loss: 0.6283, Acc: 0.6828, Prec: 0.6801, Rec: 0.6923, F1: 0.6861
Epoch 5 completed in 0m 6s


Epoch 6/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 6/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 6/15, Loss: 0.6421, Acc: 0.6850, Prec: 0.6954, Rec: 0.6600, F1: 0.6772
Epoch 6 completed in 0m 6s


Epoch 7/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 7/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 7/15, Loss: 0.6699, Acc: 0.6672, Prec: 0.7981, Rec: 0.4491, F1: 0.5747
Epoch 7 completed in 0m 6s


Epoch 8/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 8/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 8/15, Loss: 0.6386, Acc: 0.6703, Prec: 0.7953, Rec: 0.4600, F1: 0.5829
Epoch 8 completed in 0m 6s


Epoch 9/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 9/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 9/15, Loss: 0.6861, Acc: 0.6860, Prec: 0.7131, Rec: 0.6243, F1: 0.6657
Epoch 9 completed in 0m 6s


Epoch 10/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 10/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 10/15, Loss: 0.7280, Acc: 0.6814, Prec: 0.7025, Rec: 0.6309, F1: 0.6648
Epoch 10 completed in 0m 6s


Epoch 11/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 11/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 11/15, Loss: 0.7716, Acc: 0.6752, Prec: 0.6890, Rec: 0.6405, F1: 0.6639
Epoch 11 completed in 0m 6s


Epoch 12/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 12/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 12/15, Loss: 0.8275, Acc: 0.6693, Prec: 0.6714, Rec: 0.6653, F1: 0.6684
Epoch 12 completed in 0m 6s


Epoch 13/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 13/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 13/15, Loss: 0.8326, Acc: 0.6771, Prec: 0.6852, Rec: 0.6571, F1: 0.6708
Epoch 13 completed in 0m 6s


Epoch 14/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 14/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 14/15, Loss: 0.8360, Acc: 0.6820, Prec: 0.6963, Rec: 0.6475, F1: 0.6710
Epoch 14 completed in 0m 6s


Epoch 15/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 15/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 15/15, Loss: 0.7195, Acc: 0.6677, Prec: 0.6535, Rec: 0.7165, F1: 0.6835
Epoch 15 completed in 0m 6s
Total training time: 1m 26s
Best F1 score: 0.6991 achieved at epoch 3


Теперь веса заморозим

In [None]:
freeze = True
model_pretrained = BiLSTMPretrainedModel(embedding_matrix, HIDDEN_DIM, OUTPUT_DIM, pad_idx, freeze)
optimizer = AdamW(model_pretrained.parameters(), lr=1e-4)

In [None]:
train_eval(model=model_pretrained,
           optimizer=optimizer,
           train_dataloader=train_dataloader_bert,
           val_dataloader=test_dataloader_bert,
           criterion=criterion,
           device=device,
           num_epochs=num_epochs,
           save_dir='best_bilstm_true')

Создаем директорию...
Успешно создали директорию!


Epoch 1/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 1/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 1/15, Loss: 0.6931, Acc: 0.5008, Prec: 0.5008, Rec: 1.0000, F1: 0.6674
Best model saved with F1: 0.6674
Epoch 1 completed in 0m 6s


Epoch 2/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 2/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 2/15, Loss: 0.6928, Acc: 0.5008, Prec: 0.5008, Rec: 1.0000, F1: 0.6674
Epoch 2 completed in 0m 6s


Epoch 3/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 3/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 3/15, Loss: 0.6912, Acc: 0.5115, Prec: 0.6608, Rec: 0.0504, F1: 0.0937
Epoch 3 completed in 0m 6s


Epoch 4/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 4/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 4/15, Loss: 0.6904, Acc: 0.5008, Prec: 0.5008, Rec: 0.9997, F1: 0.6673
Epoch 4 completed in 0m 6s


Epoch 5/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 5/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 5/15, Loss: 0.6900, Acc: 0.5106, Prec: 0.6152, Rec: 0.0605, F1: 0.1102
Epoch 5 completed in 0m 6s


Epoch 6/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 6/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 6/15, Loss: 0.6898, Acc: 0.5108, Prec: 0.6239, Rec: 0.0584, F1: 0.1068
Epoch 6 completed in 0m 6s


Epoch 7/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 7/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 7/15, Loss: 0.6897, Acc: 0.5107, Prec: 0.6222, Rec: 0.0584, F1: 0.1068
Epoch 7 completed in 0m 6s


Epoch 8/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 8/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 8/15, Loss: 0.6896, Acc: 0.5108, Prec: 0.6225, Rec: 0.0589, F1: 0.1077
Epoch 8 completed in 0m 6s


Epoch 9/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 9/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 9/15, Loss: 0.6895, Acc: 0.5110, Prec: 0.6215, Rec: 0.0600, F1: 0.1094
Epoch 9 completed in 0m 6s


Epoch 10/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 10/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 10/15, Loss: 0.6895, Acc: 0.5112, Prec: 0.6331, Rec: 0.0571, F1: 0.1047
Epoch 10 completed in 0m 6s


Epoch 11/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 11/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 11/15, Loss: 0.6895, Acc: 0.5115, Prec: 0.6438, Rec: 0.0549, F1: 0.1012
Epoch 11 completed in 0m 6s


Epoch 12/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 12/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 12/15, Loss: 0.6892, Acc: 0.5119, Prec: 0.6498, Rec: 0.0549, F1: 0.1013
Epoch 12 completed in 0m 6s


Epoch 13/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 13/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 13/15, Loss: 0.6893, Acc: 0.5116, Prec: 0.6388, Rec: 0.0571, F1: 0.1048
Epoch 13 completed in 0m 6s


Epoch 14/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 14/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 14/15, Loss: 0.6892, Acc: 0.5119, Prec: 0.6453, Rec: 0.0563, F1: 0.1035
Epoch 14 completed in 0m 6s


Epoch 15/15 - Training:   0%|          | 0/469 [00:00<?, ?it/s]

Epoch 15/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 15/15, Loss: 0.6891, Acc: 0.5122, Prec: 0.6520, Rec: 0.0555, F1: 0.1022
Epoch 15 completed in 0m 6s
Total training time: 1m 24s
Best F1 score: 0.6674 achieved at epoch 1
