In [None]:
!pip install transformers
!pip install scikit-learn
import torch
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score, accuracy_score
from transformers import AutoTokenizer, AutoModel, BertForMaskedLM
from tqdm import tqdm

# Проверка доступности GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
data = pd.read_csv('Train_Only_Sentence.csv', encoding='utf-8-sig', sep=';')
data.head()


data.fillna("", inplace=True)
# Инициализация токенизатора и модели
from transformers import BertTokenizer, BertForMaskedLM

tokenizer = BertTokenizer.from_pretrained("DeepPavlov/rubert-base-cased")

model = BertForMaskedLM.from_pretrained("DeepPavlov/rubert-base-cased")

# Кодирование текстовых данных в токены и перевод на устройство
inputs = tokenizer(data['text'].tolist(), return_tensors='pt', padding=True, truncation=True)
inputs = {key: val.to(device) for key, val in inputs.items()}

# Получение маскированных токенов
masked_inputs = inputs['input_ids'].clone().detach().cpu().numpy()
mask = np.logical_and(np.random.rand(*masked_inputs.shape) < 0.15, inputs['attention_mask'].clone().detach().cpu().numpy())
masked_inputs = np.where(mask, -100, masked_inputs)
masked_inputs = torch.tensor(masked_inputs).to(device)

# Обучение модели с оптимизаторами l1 и l2
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5, weight_decay=0.01, eps=1e-06)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=1, verbose=True)

loss_fn = torch.nn.CrossEntropyLoss(ignore_index=-100)
epochs = 5
batch_size = 8

model.to(device)

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    y_true = []
    y_pred = []
    progress_bar = tqdm(range(0, len(data), batch_size))
    for i in progress_bar:
        batch_inputs = {key: val[i:i+batch_size] for key, val in inputs.items()}
        batch_masked_inputs = masked_inputs[i:i+batch_size]
        optimizer.zero_grad()
        outputs = model(input_ids=batch_inputs['input_ids'], attention_mask=batch_inputs['attention_mask'], labels=batch_masked_inputs)
        loss = outputs[0]
        running_loss += loss.mean().item() * batch_size
        loss.backward()
        
        # L1 regularization
        for name, param in model.named_parameters():
            if 'bias' not in name:
                l1_regularization = torch.norm(param, 1)
                loss += 0.0001 * l1_regularization.to(device)
        
        # L2 regularization
        for name, param in model.named_parameters():
            if 'bias' not in name:
                l2_regularization = torch.norm(param, 2)
                loss += 0.0001 * l2_regularization.to(device)
                
        optimizer.step()

        # Предсказание и сохранение значений для метрик
        y_true.extend(batch_masked_inputs.cpu().numpy().flatten())
        y_pred.extend(outputs[1].argmax(dim=-1).cpu().numpy().flatten())
        progress_bar.set_description(f"Epoch: {epoch + 1}/{epochs}, Loss: {running_loss / (i + batch_size):.5f}")
        
        # Рассчитываем метрики на обучающем наборе данных
        f1 = f1_score(y_true, y_pred, average='weighted')
        acc = accuracy_score(y_true, y_pred)
        print(f"Training F1-score: {f1:.5f}, Training Accuracy: {acc:.5f}")

        # Пересчет оптимизатора
        scheduler.step(running_loss)