In [1]:
import pandas as pd
import random
import numpy as np
import time
import torch
from torch import nn
from torch.optim import AdamW
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import  BertTokenizer, BertForSequenceClassification
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

from transformers import TrainingArguments
from transformers import Trainer
from torch.optim.lr_scheduler import CosineAnnealingLR

import torchvision.models as models
from transformers import EvalPrediction

from tqdm.notebook import tqdm
import warnings
warnings.filterwarnings('ignore')

In [2]:
def set_random_seed(seed):
    torch.backends.cudnn.deterministic = True
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
%cd /content/drive/My Drive/diploma/data/

/content/drive/My Drive/diploma/data


In [5]:
df_train = pd.read_csv('new_train.csv', delimiter='\t')
df_test = pd.read_csv('test.csv', delimiter='\t')

In [6]:
df_train.head()

Unnamed: 0,text,is_joke,clean_text,lemmas
0,Да все давно пора с этими дорожниками... В оди...,0,да все давно пора с этими дорожниками в один к...,давно пора этот дорожник котёл сборная россия ...
1,-лада калина теперь с музыкальной стереосистем...,0,лада калина теперь с музыкальной стереосистемо...,лада калина музыкальный стереосистема помирать...
2,"А вы разве программист? Вам до программиста, к...",0,а вы разве программист вам до программиста как...,программист программист луна рак учиться учиться
3,Отборочный матч чемпионата Европы-2012. Сборна...,0,отборочный матч чемпионата европы сборная росс...,отборочный матч чемпионат европа сборная росси...
4,"В центре коррупционного скандала - ФГУП ""Сверд...",0,в центре коррупционного скандала фгуп свердлов...,центр коррупционный скандал фгуп свердловскавт...


In [6]:
set_random_seed(42)
model_name_base = "DeepPavlov/rubert-base-cased"
tokenizer_base = BertTokenizer.from_pretrained(model_name_base)
model_base = BertForSequenceClassification.from_pretrained(model_name_base, num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at DeepPavlov/rubert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
class JokeDatasetBert(Dataset):
    def __init__(self, data, tokenizer, max_token_len=128):

        self.sentences = data['text']
        self.target = data['is_joke']
        self.tokenizer = tokenizer
        self.max_token_len = max_token_len

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, index: int):
        sentences_index = self.sentences[index]
        target_index = self.target[index]

        encoding = self.tokenizer.encode_plus(

            sentences_index,
            add_special_tokens=True,
            max_length=self.max_token_len,
            return_token_type_ids=False,
            padding="max_length",
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )

        return dict(
            input_ids=encoding["input_ids"].flatten(),
            attention_mask=encoding["attention_mask"].flatten(),
            labels=torch.tensor(target_index, dtype=torch.float))

In [8]:
#датасет
set_random_seed(42)
train_dataset_base = JokeDatasetBert(data=df_train,
                                    tokenizer=tokenizer_base,
                                    max_token_len=128)

test_dataset_base = JokeDatasetBert(data=df_test,
                                   tokenizer=tokenizer_base,
                                   max_token_len=128)

In [9]:
#метрики
def compute_metrics(p: EvalPrediction):
    preds = np.argmax(p.predictions, axis=1)
    precision = precision_score(p.label_ids, preds)
    recall = recall_score(p.label_ids, preds)
    f1 = f1_score(p.label_ids, preds)
    accuracy = accuracy_score(p.label_ids, preds)
    return {
        "precision": precision,
        "recall": recall,
        "f1": f1,
        "accuracy": accuracy,
    }

In [10]:
set_random_seed(42)
class CustomTrainer(Trainer):

    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels").to(torch.int64)
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fct = nn.CrossEntropyLoss()
        loss = loss_fct(logits, labels)
        return (loss, outputs) if return_outputs else loss

    def train(self, *args, **kwargs):
        start_time = time.time()
        result = super().train(*args, **kwargs)
        end_time = time.time()
        training_time = end_time - start_time
        print(f"Training time: {training_time:.2f} seconds")
        return result


training_args = TrainingArguments(
    output_dir='./base',          # Путь для сохранения результатов и модели
    num_train_epochs=5,              # Количество эпох
    per_device_train_batch_size=16,  # Размер пакета для обучения
    per_device_eval_batch_size=16,   # Размер пакета для валидации/тестирования
    warmup_steps=500,                # Шаги разогрева для планировщика скорости обучения
    weight_decay=0.001,               # Вес уменьшения
    logging_dir='./logs_base',            # Путь для логирования
    save_strategy="epoch",           # Сохранять модель после каждой эпохи
    evaluation_strategy="epoch",  # Выполнять валидацию после каждой эпохи
    logging_strategy="epoch",
    logging_steps=100,
    learning_rate=5e-5,                       # Задать скорость обучения
    optim="adamw_torch",
    fp16=True,
    report_to=None,
    load_best_model_at_end=True,      # Загружать лучшую модель в конце тренировки
    metric_for_best_model="f1", # Метрика для определения лучшей модели
    greater_is_better=True,           # Указывать, что более высокое значение метрики лучше
    save_total_limit=1
)


trainer_base = CustomTrainer(
    model=model_base,
    args=training_args,
    train_dataset=train_dataset_base,
    eval_dataset=test_dataset_base,
    compute_metrics=compute_metrics,
)

In [11]:
set_random_seed(42)
trainer_base.train()

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.4211,0.389638,0.758978,0.958133,0.847006,0.826656
2,0.254,0.385338,0.843099,0.928533,0.883756,0.877671
3,0.1522,0.422947,0.847151,0.923733,0.883786,0.878339
4,0.0925,0.531685,0.886818,0.888,0.887408,0.887153
5,0.0425,0.64201,0.86677,0.898667,0.88243,0.880075


Training time: 1967.40 seconds


TrainOutput(global_step=10545, training_loss=0.19243199948509365, metrics={'train_runtime': 1966.7141, 'train_samples_per_second': 85.752, 'train_steps_per_second': 5.362, 'total_flos': 1.1093419871616e+16, 'train_loss': 0.19243199948509365, 'epoch': 5.0})

#### DeepPavlov/rubert-base-cased-conversational
**Описание**:
- **Архитектура**: 12 слоев, 768 скрытых единиц, 12 голов, 180 миллионов параметров.
- **Тренировочные данные**: OpenSubtitles, Dirty, Pikabu, и социальные медиа данные из корпуса Taiga.
- **Инициализация**: Инициализация моделью RuBERT.
- **Цель**: Создание модели, оптимизированной для разговорного русского языка.

**Источники данных**:
- **OpenSubtitles**: Субтитры к фильмам и сериалам, содержащие диалоги и разговорные выражения.
- **Dirty**: Платформа с пользовательским контентом.
- **Pikabu**: Российский социальный новостной сайт.
- **Taiga**: Корпус, включающий данные из социальных медиа.

Итог: Оптимизирована для задач, связанных с диалогами и взаимодействием с пользователями, таких как чат-боты, системы вопрос-ответ, и другие разговорные системы.

In [11]:
set_random_seed(42)
model_name_conversational = "DeepPavlov/rubert-base-cased-conversational"
tokenizer_conversational = BertTokenizer.from_pretrained(model_name_conversational)
model_conversational = BertForSequenceClassification.from_pretrained(model_name_conversational, num_labels=2)

tokenizer_config.json:   0%|          | 0.00/24.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/1.40M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at DeepPavlov/rubert-base-cased-conversational and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
train_dataset_conversational = JokeDatasetBert(data=df_train,
                                    tokenizer=tokenizer_conversational,
                                    max_token_len=128)

test_dataset_conversational = JokeDatasetBert(data=df_test,
                                   tokenizer=tokenizer_conversational,
                                   max_token_len=128)

In [13]:
set_random_seed(42)
training_conv_args = TrainingArguments(
    output_dir='./conversational',          # Путь для сохранения результатов и модели
    num_train_epochs=5,              # Количество эпох
    per_device_train_batch_size=16,  # Размер пакета для обучения
    per_device_eval_batch_size=16,   # Размер пакета для валидации/тестирования
    warmup_steps=500,                # Шаги разогрева для планировщика скорости обучения
    weight_decay=0.001,               # Вес уменьшения
    logging_dir='./logs',            # Путь для логирования
    save_strategy="epoch",           # Сохранять модель после каждой эпохи
    evaluation_strategy="epoch",  # Выполнять валидацию после каждой эпохи
    logging_strategy="epoch",
    logging_steps=100,
    learning_rate=5e-5,                       # Задать скорость обучения
    optim="adamw_torch",
    fp16=True,
    report_to=None,
    load_best_model_at_end=True,      # Загружать лучшую модель в конце тренировки
    metric_for_best_model="f1", # Метрика для определения лучшей модели
    greater_is_better=True,           # Указывать, что более высокое значение метрики лучше
    save_total_limit=1
)


trainer_conversational = CustomTrainer(
    model=model_conversational,
    args=training_conv_args,
    train_dataset=train_dataset_conversational,
    eval_dataset=test_dataset_conversational,
    compute_metrics=compute_metrics,
)

In [14]:
trainer_conversational.train()

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.3601,0.267546,0.877911,0.914667,0.895912,0.893563
2,0.1707,0.431564,0.839784,0.954667,0.893548,0.886084
3,0.0685,0.45442,0.892291,0.901333,0.89679,0.8961
4,0.0237,0.677233,0.891601,0.905867,0.898677,0.897703
5,0.0082,0.701324,0.893935,0.908,0.900913,0.899973


Training time: 1990.66 seconds


TrainOutput(global_step=10545, training_loss=0.1262585460433128, metrics={'train_runtime': 1990.1904, 'train_samples_per_second': 84.741, 'train_steps_per_second': 5.298, 'total_flos': 1.1093419871616e+16, 'train_loss': 0.1262585460433128, 'epoch': 5.0})

####  Bi-LSTM

In [20]:
from collections import Counter
import spacy
import os

nlp = spacy.load("ru_core_news_lg")

In [21]:
# Создание словаря
counter = Counter()
for text in df_train['lemmas']:
    counter.update(text.split())

# Создание индексации слов
vocab = {word: idx + 2 for idx, (word, _) in enumerate(counter.items())}  # Индексы начинаются с 2, так как 0 и 1 зарезервированы для <pad> и <unk>
vocab['<pad>'] = 0
vocab['<unk>'] = 1
pad_idx = vocab['<pad>']
unk_idx = vocab['<unk>']

In [22]:
#пример
vocab

{'давно': 2,
 'пора': 3,
 'этот': 4,
 'дорожник': 5,
 'котёл': 6,
 'сборная': 7,
 'россия': 8,
 'футбол': 9,
 'лада': 10,
 'калина': 11,
 'музыкальный': 12,
 'стереосистема': 13,
 'помирать': 14,
 'музыка': 15,
 'программист': 16,
 'луна': 17,
 'рак': 18,
 'учиться': 19,
 'отборочный': 20,
 'матч': 21,
 'чемпионат': 22,
 'европа': 23,
 'андорры': 24,
 'центр': 25,
 'коррупционный': 26,
 'скандал': 27,
 'фгуп': 28,
 'свердловскавтодор': 29,
 'выезжать': 30,
 'стать': 31,
 'город': 32,
 'съёмный': 33,
 'почасовой': 34,
 'квартира': 35,
 'продавать': 36,
 'свой': 37,
 'помёт': 38,
 'всетаки': 39,
 'приходить': 40,
 'мнение': 41,
 'напрасно': 42,
 'американы': 43,
 'выбрать': 44,
 'папуас': 45,
 'юсе': 46,
 'алло': 47,
 'дорогой': 48,
 'плохо': 49,
 'слышно': 50,
 'улететь': 51,
 'ещё': 52,
 'кувейт': 53,
 'чиновник': 54,
 'говно': 55,
 'вроде': 56,
 'утонуть': 57,
 'время': 58,
 'гдето': 59,
 'всплывать': 60,
 'извинить': 61,
 'ради': 62,
 'бог': 63,
 'знать': 64,
 'плач': 65,
 'геморрой'

In [23]:
class TextDataset(Dataset):
    def __init__(self, texts, labels, vocab, max_len=128):
        self.texts = texts
        self.labels = labels
        self.vocab = vocab
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        tokens = text.split()
        indices = [self.vocab.get(token, unk_idx) for token in tokens]
        if len(indices) < self.max_len:
            indices += [pad_idx] * (self.max_len - len(indices))
        else:
            indices = indices[:self.max_len]
        label = self.labels[idx]
        return {"text": torch.tensor(indices), "label": torch.tensor(label, dtype=torch.float)}


train_dataset = TextDataset(df_train['lemmas'].tolist(), df_train['is_joke'].tolist(), vocab)
test_dataset = TextDataset(df_test['lemmas'].tolist(), df_test['is_joke'].tolist(), vocab)

train_dataloader_bert = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader_bert =  DataLoader(test_dataset, batch_size=64, shuffle=False)

In [24]:
print(f"First training example: {train_dataset[50]}")
print(f"First testing example: {test_dataset[0]}")

First training example: {'text': tensor([231, 313, 314, 315, 142, 316,  37, 317, 318,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0]), 'label': tensor(0.)}
First testing example: {'text': tensor([ 231, 9239, 1194,  685, 1119, 9239, 5987, 1050, 9239,  231, 5987, 1050,
         231,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0, 

In [25]:
# Гиперпараметры
VOCAB_SIZE = len(vocab)
MAX_LEN = 128
BATCH_SIZE = 16
HIDDEN_DIM = 128
OUTPUT_DIM = 2
NUM_EPOCHS = 5
LEARNING_RATE = 1e-4
EMBED_DIM = 300

In [26]:
# Создание матрицы эмбеддингов
embedding_matrix = torch.zeros((VOCAB_SIZE, EMBED_DIM))
for word, idx in vocab.items():
    if word in nlp.vocab:
        embedding_matrix[idx] = torch.tensor(nlp.vocab[word].vector)
    else:
        embedding_matrix[idx] = torch.zeros(EMBED_DIM)

In [27]:
class BiLSTMPretrainedModel(nn.Module):
    def __init__(self, embedding_matrix, hidden_dim, output_dim, pad_idx, freeze):
        super(BiLSTMPretrainedModel, self).__init__()
        num_embeddings, embedding_dim = embedding_matrix.shape
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=freeze, padding_idx=pad_idx)
        self.bilstm = nn.LSTM(embedding_dim, hidden_dim, bidirectional=True, batch_first=True)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        self.relu = nn.ReLU()

    def forward(self, text, **kwargs):
        embedded = self.embedding(text)
        lstm_out, _ = self.bilstm(embedded)
        avg_pool = torch.mean(lstm_out, dim=1)
        output = self.fc(self.relu(avg_pool))
        return output


In [28]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
freeze = False
model_pretrained = BiLSTMPretrainedModel(embedding_matrix, HIDDEN_DIM, OUTPUT_DIM, pad_idx, freeze)
optimizer = AdamW(model_pretrained.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()
num_epochs = 15

In [29]:
def train_eval(model, optimizer, train_dataloader, val_dataloader, criterion, device, num_epochs, save_dir):
    model.to(device)
    best_f1 = 0.0
    best_epoch = 0
    best_model_path = os.path.join(save_dir, "best_model.pth")

    # Создаем директорию, если она не существует
    if not os.path.exists(save_dir):
        print(f"Создаем директорию...")
        os.makedirs(save_dir)
        print(f"Успешно создали директорию!")
    else:
        print(f"Директория уже существует")

    total_start_time = time.time()  # Начальное время для общего подсчета

    for epoch in range(num_epochs):
        start_time = time.time()

        # Training phase
        model.train()
        with tqdm(total=len(train_dataloader), desc=f"Epoch {epoch+1}/{num_epochs} - Training") as pbar:
            for batch in train_dataloader:
                input_ids = batch['text'].to(device)
                labels = batch['label'].to(device).long()

                optimizer.zero_grad()
                outputs = model(input_ids)
                loss = criterion(outputs, labels)

                _, y_pred = torch.max(outputs.detach(), 1)
                precision_train = precision_score(labels.cpu(), y_pred.cpu(), average='binary')
                recall_train = recall_score(labels.cpu(), y_pred.cpu(), average='binary')
                f1_train = f1_score(labels.cpu(), y_pred.cpu(), average='binary')
                accuracy_train = accuracy_score(labels.cpu(), y_pred.cpu())

                loss.backward()
                optimizer.step()

                pbar.set_description(f'Loss: {loss.item():.4f}, Acc: {accuracy_train:.4f}, Prec: {precision_train:.4f}, Rec: {recall_train:.4f}, F1: {f1_train:.4f}')
                pbar.update(1)

        # Validation phase
        model.eval()
        val_loss = 0.0
        predictions = torch.tensor([], dtype=torch.long, device=device)
        targets = torch.tensor([], dtype=torch.long, device=device)

        with torch.no_grad():
            with tqdm(total=len(val_dataloader), desc=f"Epoch {epoch+1}/{num_epochs} - Validation") as pbar:
                for batch in val_dataloader:
                    input_ids = batch['text'].to(device)
                    labels = batch['label'].to(device).long()

                    outputs = model(input_ids)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item()

                    _, y_pred = torch.max(outputs.detach(), 1)

                    predictions = torch.cat((predictions, y_pred), dim=0)
                    targets = torch.cat((targets, labels), dim=0)

                    pbar.update(1)

            precision_test = precision_score(targets.cpu(), predictions.cpu(), average='binary')
            recall_test = recall_score(targets.cpu(), predictions.cpu(), average='binary')
            f1_test = f1_score(targets.cpu(), predictions.cpu(), average='binary')
            accuracy_test = accuracy_score(targets.cpu(), predictions.cpu())
            avg_val_loss = val_loss / len(val_dataloader)

            print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {avg_val_loss:.4f}, Acc: {accuracy_test:.4f}, Prec: {precision_test:.4f}, Rec: {recall_test:.4f}, F1: {f1_test:.4f}')

            if f1_test > best_f1:
                best_f1 = f1_test
                best_epoch = epoch + 1  # Запоминаем номер эпохи (начиная с 1)
                torch.save(model.state_dict(), best_model_path)
                print(f"Best model saved with F1: {best_f1:.4f}")

        end_time = time.time()
        epoch_time = end_time - start_time
        print(f"Epoch {epoch + 1} completed in {epoch_time // 60:.0f}m {epoch_time % 60:.0f}s")

    total_end_time = time.time()  # Конечное время для общего подсчета
    total_training_time = total_end_time - total_start_time
    print(f"Total training time: {total_training_time // 60:.0f}m {total_training_time % 60:.0f}s")

    # Выводим лучшую метрику и номер эпохи
    print(f"Best F1 score: {best_f1:.4f} achieved at epoch {best_epoch}")

Для начала рассмотрим ситуацию когда веса эмбеддингового слоя будут обучаемы

In [30]:
train_eval(model=model_pretrained,
           optimizer=optimizer,
           train_dataloader=train_dataloader_bert,
           val_dataloader=test_dataloader_bert,
           criterion=criterion,
           device=device,
           num_epochs=num_epochs,
           save_dir='best_bilstm_false')

Создаем директорию...
Успешно создали директорию!


Epoch 1/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 1/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 1/15, Loss: 0.6310, Acc: 0.6305, Prec: 0.5949, Rec: 0.8219, F1: 0.6902
Best model saved with F1: 0.6902
Epoch 1 completed in 0m 18s


Epoch 2/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 2/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 2/15, Loss: 0.6115, Acc: 0.6739, Prec: 0.6589, Rec: 0.7232, F1: 0.6895
Epoch 2 completed in 0m 17s


Epoch 3/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 3/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 3/15, Loss: 0.6079, Acc: 0.6663, Prec: 0.6371, Rec: 0.7749, F1: 0.6993
Best model saved with F1: 0.6993
Epoch 3 completed in 0m 18s


Epoch 4/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 4/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 4/15, Loss: 0.6239, Acc: 0.6584, Prec: 0.6215, Rec: 0.8131, F1: 0.7045
Best model saved with F1: 0.7045
Epoch 4 completed in 0m 18s


Epoch 5/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 5/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 5/15, Loss: 0.6460, Acc: 0.6824, Prec: 0.6689, Rec: 0.7245, F1: 0.6956
Epoch 5 completed in 0m 18s


Epoch 6/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 6/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 6/15, Loss: 0.6425, Acc: 0.6935, Prec: 0.6816, Rec: 0.7280, F1: 0.7041
Epoch 6 completed in 0m 16s


Epoch 7/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 7/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 7/15, Loss: 0.6942, Acc: 0.6955, Prec: 0.7242, Rec: 0.6331, F1: 0.6756
Epoch 7 completed in 0m 16s


Epoch 8/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 8/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 8/15, Loss: 0.7757, Acc: 0.6816, Prec: 0.6715, Rec: 0.7131, F1: 0.6917
Epoch 8 completed in 0m 17s


Epoch 9/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 9/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 9/15, Loss: 0.8451, Acc: 0.6830, Prec: 0.6925, Rec: 0.6600, F1: 0.6759
Epoch 9 completed in 0m 16s


Epoch 10/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 10/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 10/15, Loss: 0.8239, Acc: 0.6769, Prec: 0.6792, Rec: 0.6725, F1: 0.6759
Epoch 10 completed in 0m 17s


Epoch 11/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 11/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 11/15, Loss: 0.8976, Acc: 0.6769, Prec: 0.6807, Rec: 0.6685, F1: 0.6746
Epoch 11 completed in 0m 16s


Epoch 12/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 12/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 12/15, Loss: 1.0354, Acc: 0.6625, Prec: 0.6505, Rec: 0.7048, F1: 0.6766
Epoch 12 completed in 0m 16s


Epoch 13/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 13/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 13/15, Loss: 1.0437, Acc: 0.6653, Prec: 0.6630, Rec: 0.6747, F1: 0.6688
Epoch 13 completed in 0m 16s


Epoch 14/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 14/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 14/15, Loss: 0.9493, Acc: 0.6725, Prec: 0.6870, Rec: 0.6357, F1: 0.6604
Epoch 14 completed in 0m 17s


Epoch 15/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 15/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 15/15, Loss: 1.0810, Acc: 0.6643, Prec: 0.6709, Rec: 0.6469, F1: 0.6587
Epoch 15 completed in 0m 17s
Total training time: 4m 14s
Best F1 score: 0.7045 achieved at epoch 4


Теперь веса заморозим и посмотрим, какой результат получится

In [31]:
freeze = True
model_pretrained = BiLSTMPretrainedModel(embedding_matrix, HIDDEN_DIM, OUTPUT_DIM, pad_idx, freeze)
optimizer = AdamW(model_pretrained.parameters(), lr=1e-4)

In [32]:
train_eval(model=model_pretrained,
           optimizer=optimizer,
           train_dataloader=train_dataloader_bert,
           val_dataloader=test_dataloader_bert,
           criterion=criterion,
           device=device,
           num_epochs=num_epochs,
           save_dir='best_bilstm_true')

Создаем директорию...
Успешно создали директорию!


Epoch 1/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 1/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 1/15, Loss: 0.6930, Acc: 0.5112, Prec: 0.6286, Rec: 0.0587, F1: 0.1073
Best model saved with F1: 0.1073
Epoch 1 completed in 0m 15s


Epoch 2/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 2/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 2/15, Loss: 0.6918, Acc: 0.5008, Prec: 0.5008, Rec: 1.0000, F1: 0.6674
Best model saved with F1: 0.6674
Epoch 2 completed in 0m 14s


Epoch 3/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 3/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 3/15, Loss: 0.6899, Acc: 0.5108, Prec: 0.6124, Rec: 0.0632, F1: 0.1146
Epoch 3 completed in 0m 14s


Epoch 4/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 4/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 4/15, Loss: 0.6898, Acc: 0.5106, Prec: 0.6133, Rec: 0.0613, F1: 0.1115
Epoch 4 completed in 0m 14s


Epoch 5/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 5/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 5/15, Loss: 0.6896, Acc: 0.5104, Prec: 0.6180, Rec: 0.0587, F1: 0.1072
Epoch 5 completed in 0m 14s


Epoch 6/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 6/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 6/15, Loss: 0.6896, Acc: 0.5019, Prec: 0.5013, Rec: 0.9979, F1: 0.6674
Best model saved with F1: 0.6674
Epoch 6 completed in 0m 14s


Epoch 7/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 7/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 7/15, Loss: 0.6894, Acc: 0.5115, Prec: 0.6402, Rec: 0.0560, F1: 0.1030
Epoch 7 completed in 0m 14s


Epoch 8/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 8/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 8/15, Loss: 0.6892, Acc: 0.5118, Prec: 0.6433, Rec: 0.0563, F1: 0.1035
Epoch 8 completed in 0m 14s


Epoch 9/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 9/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 9/15, Loss: 0.6892, Acc: 0.5119, Prec: 0.6508, Rec: 0.0547, F1: 0.1009
Epoch 9 completed in 0m 14s


Epoch 10/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 10/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 10/15, Loss: 0.6890, Acc: 0.5119, Prec: 0.6480, Rec: 0.0555, F1: 0.1022
Epoch 10 completed in 0m 14s


Epoch 11/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 11/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 11/15, Loss: 0.6888, Acc: 0.5126, Prec: 0.6667, Rec: 0.0533, F1: 0.0988
Epoch 11 completed in 0m 15s


Epoch 12/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 12/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 12/15, Loss: 0.6886, Acc: 0.5131, Prec: 0.6781, Rec: 0.0528, F1: 0.0980
Epoch 12 completed in 0m 15s


Epoch 13/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 13/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 13/15, Loss: 0.6887, Acc: 0.5123, Prec: 0.6503, Rec: 0.0565, F1: 0.1040
Epoch 13 completed in 0m 15s


Epoch 14/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 14/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 14/15, Loss: 0.6883, Acc: 0.5131, Prec: 0.6793, Rec: 0.0525, F1: 0.0975
Epoch 14 completed in 0m 14s


Epoch 15/15 - Training:   0%|          | 0/528 [00:00<?, ?it/s]

Epoch 15/15 - Validation:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 15/15, Loss: 0.6884, Acc: 0.5127, Prec: 0.6700, Rec: 0.0531, F1: 0.0983
Epoch 15 completed in 0m 14s
Total training time: 3m 34s
Best F1 score: 0.6674 achieved at epoch 6
