In [None]:
!pip install -q -U transformers peft bitsandbytes trl accelerate evaluate rouge_score bert_score datasets

In [None]:
import os
import torch
import json
import pandas as pd
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer, SFTConfig
import evaluate

In [None]:
# # Setup device, remove warnings
os.environ["WANDB_DISABLED"] = "true"
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

### Data load & preparation

In [None]:
file_path = "/kaggle/input/avito-descriptions-enhanced/descriptions_enhancement_avito.jsonl" 

data = []
with open(file_path, 'r', encoding='utf-8') as f:
    for line in f:
        if line.strip().startswith('{'):
            data.append(json.loads(line))

df = pd.DataFrame(data)

# format prompt
def format_instruction(sample):
    return f"""### Instruction:
{sample['instruction']}

### Context:
Категория: {sample['category_context']}
Товар: {sample['title']}

### Original Description:
{sample['original_description']}

### Improved Description:
{sample['generated_description']}"""

# Create 'text' column for SFTTrainer
df['text'] = df.apply(format_instruction, axis=1)

# train-test split
test_size = 50
train_df = df.iloc[:-test_size]
test_df = df.iloc[-test_size:]

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

print(f"Train size: {len(train_dataset)}, Test size: {len(test_dataset)}")
print("Пример промпта:\n", train_dataset[0]['text'][:500] + "...")

### Model & tokenizer load

In [None]:
model_id = "Qwen/Qwen2.5-7B-Instruct"

# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4", # Нормализованный float 4 (лучше для весов)
    bnb_4bit_compute_dtype=torch.bfloat16, # Вычисления в bf16
    bnb_4bit_use_double_quant=True,
)

# model load
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True, 
    torch_dtype=torch.bfloat16,
)

# Загрузка токенизатора
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token # Qwen does not have pad_token by default
tokenizer.padding_side = "right" # right for training, left for generation (SFTTrainer справится)

### Validation function

In [None]:
rouge = evaluate.load("rouge")
bertscore = evaluate.load("bertscore")

def evaluate_model(model, tokenizer, dataset, num_samples=10):
    """
    Генерирует ответы и считает метрики.
    Ограничим num_samples, так как генерация на 50 примерах может быть долгой.
    """
    model.eval() # turn on the inference mode
    
    predictions = []
    references = []
    
    print(f"Starting evaluation on {num_samples} samples...")
    
    for i in range(num_samples):
        sample = dataset[i]
        # Pass all before the "### Improved Description:"
        full_text = sample['text']
        split_point = "### Improved Description:\n"
        input_prompt = full_text.split(split_point)[0] + split_point
        
        # Tokenization
        inputs = tokenizer(input_prompt, return_tensors="pt").to(device)
        
        # Generation
        with torch.no_grad():
            outputs = model.generate(
                **inputs, 
                max_new_tokens=256, 
                temperature=0.4,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id
            )
        
        # Decoding
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Extract only the generated part
        response = generated_text.replace(input_prompt, "").strip()
        
        predictions.append(response)
        references.append(sample['generated_description'])
        
        if i == 0:
            print(f"\n--- Пример ---\nInput: {input_prompt[:100]}...\nPred: {response[:100]}...\nRef: {references[-1][:100]}...\n")

    # ROUGE
    rouge_results = rouge.compute(predictions=predictions, references=references)
    
    # BERTScore
    bert_results = bertscore.compute(predictions=predictions, references=references, lang="ru")
    
    return {
        "rouge1": rouge_results["rouge1"],
        "rougeL": rouge_results["rougeL"],
        "bert_f1": sum(bert_results["f1"]) / len(bert_results["f1"])
    }

print("Evaluating Baseline (Pre-trained) Model...")
baseline_metrics = evaluate_model(model, tokenizer, test_dataset, num_samples=10)
print("Baseline Metrics:", baseline_metrics)

### Training

In [None]:
from trl import SFTTrainer, SFTConfig
from peft import LoraConfig

# SFTConfig setup
sft_config = SFTConfig(
    output_dir="./qwen-avito-finetuned",
    max_length=1024,
    dataset_text_field="text",
    completion_only_loss=True,
    
    num_train_epochs=3,
    lr_scheduler_type="cosine",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    warmup_ratio=0.1,

    # --- Validation and checkpoint stategy ---
    eval_strategy="steps",
    eval_steps=50,
    save_strategy="steps",
    save_steps=50,

    # --- Overfitting prevention ---
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss", 
    greater_is_better=False,
    save_total_limit=3,
    
    # --- Learning params ---
    logging_steps=10,
    optim="paged_adamw_32bit",
    fp16=False,
    bf16=True,
    gradient_checkpointing=True,
    report_to="none",
    remove_unused_columns=False
)

# LoRA config
peft_config = LoraConfig(
    lora_alpha=128,
    lora_dropout=0.05,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
)

# Trainer init
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    args=sft_config,
    peft_config=peft_config,
)

In [None]:
print("Starting training...")
trainer.train()

In [None]:
trainer.model.save_pretrained("qwen-avito-adapter")
tokenizer.save_pretrained("qwen-avito-adapter")

### Validation

In [None]:
import gc
torch.cuda.empty_cache()
gc.collect()

trainer.model.eval()

print("Evaluating Fine-tuned Model...")
finetuned_metrics = evaluate_model(trainer.model, tokenizer, test_dataset, num_samples=10)

print("\n" + "="*30)
print("RESULTS COMPARISON")
print("="*30)
print(f"{'Metric':<15} | {'Baseline':<10} | {'Fine-tuned':<10} | {'Delta':<10}")
print("-" * 50)
for key in baseline_metrics:
    base = baseline_metrics[key]
    tuned = finetuned_metrics[key]
    delta = tuned - base
    print(f"{key:<15} | {base:.4f}     | {tuned:.4f}     | {delta:+.4f}")
print("="*30)