In [1]:
!pip install transformers datasets trl huggingface_hub

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting trl
  Downloading trl-0.13.0-py3-none-any.whl.metadata (11 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trl-0.13.0-py3-none-any.whl (293 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m293.4/293.4 kB[0m [31m12.8 MB/s[0

In [3]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3


In [4]:
import torch
import os
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from trl import SFTConfig, SFTTrainer, setup_chat_format
import pandas as pd
from datasets import Dataset
from sklearn.model_selection import train_test_split
import evaluate  # To calculate ROUGE metrics

In [5]:
data_path = 'Recommendation.csv'
df = pd.read_csv(data_path)
dataset = Dataset.from_pandas(df)

In [9]:
def process_data_for_dpo(example):
    messages = [{"role": "user", "content": f"Recommend a food list for a person whose gender is {example['Cinsiyet']}, age: {example['Yaş']}, hypertension: {example['Hipertansiyon']}, diabetes: {example['Diyabet']}"},
                {"role": "assistant", "content": ", ".join(eval(example['Recommended Foods']))}]
    return {"messages": messages}

In [10]:

train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

In [11]:

train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

In [12]:
# Veri setlerini işlemden geçirme
processed_train_dataset = train_dataset.map(process_data_for_dpo)
processed_val_dataset = val_dataset.map(process_data_for_dpo)

Map:   0%|          | 0/4944 [00:00<?, ? examples/s]

Map:   0%|          | 0/1236 [00:00<?, ? examples/s]

In [13]:
!pip install rouge_score

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=6dbf60a277cce09b4b10d3e552c62f6a5114e5da30f5386c7a792950f23ec196
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


In [32]:
# Metric computation
rouge = evaluate.load("rouge")
def compute_metrics(pred):
    labels_ids = pred.label_ids
    pred_ids = pred.predictions[0]

    # Ensure predictions and labels are matched in size
    pred_ids = pred_ids[:len(labels_ids)]

    pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
    labels_ids[labels_ids == -100] = tokenizer.pad_token_id
    label_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)

    rouge_output = rouge.compute(
        predictions=pred_str,
        references=label_str,
        rouge_types=["rouge1", "rouge2", "rougeL", "rougeLsum"],
    )

    # Compute Accuracy
    accuracy = sum(p == t for p, t in zip(pred_str, label_str)) / len(label_str)

    return {
        "R1": round(rouge_output["rouge1"], 4),
        "R2": round(rouge_output["rouge2"], 4),
        "RL": round(rouge_output["rougeL"], 4),
        "RLsum": round(rouge_output["rougeLsum"], 4),
        "Accuracy": round(accuracy, 4),
    }


In [33]:
def preprocess_logits_for_metrics(logits, labels):
    """
    Memory optimization: avoids storing unnecessary tensors.
    """
    pred_ids = torch.argmax(logits[0], dim=-1)
    return pred_ids, labels

In [34]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available() else "cpu"
)

# Load the model and tokenizer
model_name = "HuggingFaceTB/SmolLM2-135M"
model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path=model_name
).to(device)
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name)

# Set up the chat format
model, tokenizer = setup_chat_format(model=model, tokenizer=tokenizer)

model.config.use_cache = False


In [35]:
os.environ["WANDB_DISABLED"] = "true"

# Configure the SFTTrainer
sft_config = SFTConfig(
    output_dir="./sft_output",
    max_steps=1000,  # Adjust based on dataset size and desired training duration
    per_device_train_batch_size=1,  # Set according to your GPU memory capacity
    gradient_accumulation_steps=4,
    learning_rate=5e-5,  # Common starting point for fine-tuning
    logging_steps=10,  # Frequency of logging training metrics
    save_steps=100,  # Frequency of saving model checkpoints
    evaluation_strategy="steps",  # Evaluate the model at regular intervals
    # eval_steps=50,  # Frequency of evaluation
    fp16=True,  # Mixed precision
)


trainer = SFTTrainer(
    model=model,
    args=sft_config,
    train_dataset=processed_train_dataset,
    eval_dataset=processed_val_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    preprocess_logits_for_metrics=preprocess_logits_for_metrics,  # Optimize memory usage
)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  trainer = SFTTrainer(


Map:   0%|          | 0/4944 [00:00<?, ? examples/s]

Map:   0%|          | 0/1236 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [36]:
# Train the model
trainer.train()

# Modeli kaydetme
finetune_name = "SmolLM2-FT-MyDataset"
trainer.save_model(f"./{finetune_name}")

Step,Training Loss,Validation Loss,R1,R2,Rl,Rlsum,Accuracy
10,2.2302,0.937266,0.0215,0.0,0.0215,0.0215,0.0
20,0.781,0.59434,0.0212,0.0,0.0212,0.0212,0.0


KeyboardInterrupt: 

In [None]:
prompt = "Recommend a food list for a person whose gender is male, age: 53, hypertension: yes, diabetes: yes"
test_prompt = [{"content": prompt, "role": "user"}]

formatted_prompt = tokenizer.apply_chat_template(test_prompt, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device)

In [None]:
outputs = model.generate(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_length=100)
print("Model Çıktısı:", tokenizer.decode(outputs[0], skip_special_tokens=True))

Model Çıktısı: user
Recommend a food list for a person whose gender is male, age: 53, hypertension: yes, diabetes: yes
assistant
Drumstick tree, Onion rings, Focaccia, Espresso, Pigeon pea, Gewürztraminer, Pigeon pea, Gewürztraminer, Pigeon pea, Gewürztraminer, Pigeon pea, Pigeon pea, Pige
