In [1]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments
import torch
from torch.utils.data import Dataset
import json

# Проверка доступности GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Используем устройство: {device}")

# Загрузка датасета
class ConspiracyDataset(Dataset):
    def __init__(self, tokenizer, file_path, max_length=512):
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.data = []
        
        with open(file_path, "r", encoding="utf-8") as f:
            raw_data = json.load(f)
        
        for item in raw_data:
            prompt = item["prompt"]
            theory = item["theory"]
            text = f"{prompt}..{theory}"
            self.data.append(text)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        text = self.data[idx]
        inputs = self.tokenizer(
            text,
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )
        return {
            "input_ids": inputs["input_ids"].squeeze(0),
            "attention_mask": inputs["attention_mask"].squeeze(0),
            "labels": inputs["input_ids"].squeeze(0)  # labels are the same as input_ids for language modeling
        }

# Загрузка модели и перемещение её на GPU
model_name = "ai-forever/rugpt3small_based_on_gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name).to(device)  # Перемещаем модель на GPU

# Подготовка датасета
dataset = ConspiracyDataset(tokenizer, "theories.json")

# Настройка тренировки
training_args = TrainingArguments(
    output_dir="./fine_tuned_model",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=2,  # Размер батча на устройстве
    save_steps=500,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=10,
    learning_rate=5e-5,
    weight_decay=0.01,
    warmup_steps=500,
    fp16=True,  # Использование mixed precision для ускорения на GPU
)

# Тренер
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
)

# Дообучение
trainer.train()



  from .autonotebook import tqdm as notebook_tqdm


Используем устройство: cuda


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss
10,17.8848
20,18.0342
30,14.8932
40,12.1143
50,9.5018
60,5.9057
70,3.3155
80,1.8006
90,0.8904
100,0.7763


TrainOutput(global_step=8817, training_loss=0.3344351313843456, metrics={'train_runtime': 6600.4664, 'train_samples_per_second': 2.671, 'train_steps_per_second': 1.336, 'total_flos': 4606839816192000.0, 'train_loss': 0.3344351313843456, 'epoch': 3.0})

In [2]:
# Сохранение модели без использования safetensors
try:
    model.save_pretrained("./fine_tuned_model", safe_serialization=False)
    tokenizer.save_pretrained("./fine_tuned_model")
    print("Модель успешно сохранена!")
except Exception as e:
    print(f"Ошибка при сохранении модели: {e}")

Модель успешно сохранена!
