In [1]:
# Environment Setup 

!pip -q install -U "transformers>=4.43.3" "datasets>=2.19.0" "peft>=0.11.1" "accelerate>=0.33.0" huggingface_hub

import torch, os, platform, transformers, datasets, peft, accelerate
print("Torch:", torch.__version__)
print("Transformers:", transformers.__version__)
print("Datasets:", datasets.__version__)
print("PEFT:", peft.__version__)
print("Accelerate:", accelerate.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))
    print("VRAM (approx):", torch.cuda.get_device_properties(0).total_memory/1e9, "GB")

os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"   # faster Hub downloads
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"


Torch: 2.8.0+cu126
Transformers: 4.55.4
Datasets: 4.0.0
PEFT: 0.17.1
Accelerate: 1.10.1
CUDA available: True
GPU: NVIDIA A100-SXM4-40GB
VRAM (approx): 42.474471424 GB


In [2]:
# Load Dataset

from datasets import load_dataset

DATASET_NAME = "mohammad-shirkhani/social_movielens_custom_with_reason"
raw_train = load_dataset(DATASET_NAME, split="train")

print(raw_train)
print("Columns:", raw_train.column_names)

MAX_TRAIN = 20_000
if len(raw_train) > MAX_TRAIN:
    train_ds = raw_train.select(range(MAX_TRAIN))
else:
    train_ds = raw_train

len(train_ds), train_ds[0]


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Dataset({
    features: ['user', 'item', 'answer', 'paths', 'reason'],
    num_rows: 20000
})
Columns: ['user', 'item', 'answer', 'paths', 'reason']


(20000,
 {'user': {'UserID': 911,
   'Age': 37,
   'Gender': 'Female',
   'Occupation': 'writer'},
  'item': {'MovieID': 193,
   'Title': 'Right Stuff, The (1983)',
   'ReleaseDate': '01-Jan-1983',
   'Genres': 'Drama'},
  'answer': 4.0,
  'paths': ['user_question -> user_item_2 (rating=2) -> Item{MovieID 98, Title "Silence of the Lambs, The (1991)", Release Date 01-Jan-1991, Genres Drama, Thriller} -> item_user_4 (rating=4) -> User{UserID 498, Age 26, Gender Male, Occupation writer} -> usersim -> User{UserID 58, Age 27, Gender Male, Occupation programmer} -> user_item_3 (rating=3) -> item_question',
   'user_question -> usersim -> User{UserID 716, Age 36, Gender Female, Occupation administrator} -> user_item_5 (rating=5) -> Item{MovieID 517, Title "Manhattan (1979)", Release Date 01-Jan-1979, Genres Comedy, Drama, Romance} -> item_user_3 (rating=3) -> User{UserID 271, Age 51, Gender Male, Occupation engineer} -> user_item_5 (rating=5) -> item_question',
   'user_question -> user_item_

In [3]:
# # Sanity Check

from pprint import pprint

for i in range(2):
    print(f"--- Sample {i} ---")
    ex = train_ds[i]
    print("user keys:", list(ex["user"].keys()))
    print("item keys:", list(ex["item"].keys()))
    print("answer:", ex["answer"])
    print("reason (first 200 chars):", ex["reason"][:200], "...")
    print("paths len:", len(ex["paths"]))
    print("one path:", ex["paths"][0] if ex["paths"] else "N/A")


--- Sample 0 ---
user keys: ['UserID', 'Age', 'Gender', 'Occupation']
item keys: ['MovieID', 'Title', 'ReleaseDate', 'Genres']
answer: 4.0
reason (first 200 chars): The user (UserID 911) is a 37-year-old female writer, and the target item (MovieID 193) is a Drama film. Several meta-paths provide insights into potential user preferences and item characteristics.

 ...
paths len: 20
one path: user_question -> user_item_2 (rating=2) -> Item{MovieID 98, Title "Silence of the Lambs, The (1991)", Release Date 01-Jan-1991, Genres Drama, Thriller} -> item_user_4 (rating=4) -> User{UserID 498, Age 26, Gender Male, Occupation writer} -> usersim -> User{UserID 58, Age 27, Gender Male, Occupation programmer} -> user_item_3 (rating=3) -> item_question
--- Sample 1 ---
user keys: ['UserID', 'Age', 'Gender', 'Occupation']
item keys: ['MovieID', 'Title', 'ReleaseDate', 'Genres']
answer: 5.0
reason (first 200 chars): The user, UserID 617, is described as a 27-year-old female writer. The target item, Mo

In [4]:
# Define SYSTEM_PROMPT

SYSTEM_PROMPT = (
    "You are an expert analyst for rating prediction in a heterogeneous bipartite graph of users and items. "
    "Your job is to analyze user-item interactions and infer a final numerical rating.\n\n"
    "Task: Extract evidence from the provided data and then conclude the numerical rating.\n\n"
    "You are given a heterogeneous bipartite graph setting (users and items). Edges include:\n"
    "- user→item rating interactions (e.g., user_item_k with an explicit rating),\n"
    "- usersim (user-user similarity), and\n"
    "- itemsim (item-item similarity).\n\n"
    "What to do:\n"
    "1) Analyze the user's likely preferences and the item's traits by leveraging ONLY:\n"
    "   - the user attributes,\n"
    "   - the item attributes, and\n"
    "   - the provided meta-paths (treat each path as a weak but interpretable signal; combine corroborating signals).\n"
    "2) Then produce two XML blocks ONLY (no extra text):\n"
    "   a) <reason>...</reason> — Provide a concise, evidence-first explanation that cites the most helpful signals from the meta-paths and attributes.\n"
    "   b) <answer>...</answer> — A single numeric rating (e.g., 1–5). Use a plain number (no extra symbols/units).\n\n"
    "Formatting policy:\n"
    "- Output EXACTLY two blocks and nothing else: first <reason>...</reason>, then a blank line, then <answer>...</answer>.\n"
    "- Do NOT echo or quote the prompt. Do NOT add commentary outside the XML tags.\n"
    "- Focus on robust evidence that appears multiple times across different meta-paths.\n"
    "- If signals conflict, weigh paths with stronger agreement on genre/age/occupation/peer similarity.\n"
    "- Use the evidence to justify the final number you choose.\n"
)
print(SYSTEM_PROMPT[:])

You are an expert analyst for rating prediction in a heterogeneous bipartite graph of users and items. Your job is to analyze user-item interactions and infer a final numerical rating.

Task: Extract evidence from the provided data and then conclude the numerical rating.

You are given a heterogeneous bipartite graph setting (users and items). Edges include:
- user→item rating interactions (e.g., user_item_k with an explicit rating),
- usersim (user-user similarity), and
- itemsim (item-item similarity).

What to do:
1) Analyze the user's likely preferences and the item's traits by leveraging ONLY:
   - the user attributes,
   - the item attributes, and
   - the provided meta-paths (treat each path as a weak but interpretable signal; combine corroborating signals).
2) Then produce two XML blocks ONLY (no extra text):
   a) <reason>...</reason> — Provide a concise, evidence-first explanation that cites the most helpful signals from the meta-paths and attributes.
   b) <answer>...</answe

In [5]:
# Build "problem" & "solution"

import json

def format_user_block(user_dict):
    return json.dumps(user_dict, ensure_ascii=False, indent=2)

def format_item_block(item_dict):
    return json.dumps(item_dict, ensure_ascii=False, indent=2)

def format_paths(paths_list):
    return "\n".join(paths_list)

def build_problem(ex):
    user_txt = format_user_block(ex["user"])
    item_txt = format_item_block(ex["item"])
    paths_txt = format_paths(ex["paths"])
    problem = (
        "User:\n" + user_txt + "\n\n"
        "Item:\n" + item_txt + "\n\n"
        "Meta-path evidence (each path from this user to the target item):\n"
        + paths_txt
    )
    return problem

def build_solution(ex):
    reason = ex["reason"]
    ans = ex["answer"]
    try:
        ans_str = str(float(ans)).rstrip("0").rstrip(".")
    except Exception:
        ans_str = str(ans)
    xml = f"<reason>{reason}</reason>\n\n<answer>{ans_str}</answer>"
    return xml

def enrich_example(ex):
    return {
        "problem": build_problem(ex),
        "solution": build_solution(ex),
    }

tmp = train_ds.map(enrich_example, desc="Building problem/solution")
print(tmp[0]["problem"][:], "\n")
print(tmp[0]["solution"][:])


User:
{
  "UserID": 911,
  "Age": 37,
  "Gender": "Female",
  "Occupation": "writer"
}

Item:
{
  "MovieID": 193,
  "Title": "Right Stuff, The (1983)",
  "ReleaseDate": "01-Jan-1983",
  "Genres": "Drama"
}

Meta-path evidence (each path from this user to the target item):
user_question -> user_item_2 (rating=2) -> Item{MovieID 98, Title "Silence of the Lambs, The (1991)", Release Date 01-Jan-1991, Genres Drama, Thriller} -> item_user_4 (rating=4) -> User{UserID 498, Age 26, Gender Male, Occupation writer} -> usersim -> User{UserID 58, Age 27, Gender Male, Occupation programmer} -> user_item_3 (rating=3) -> item_question
user_question -> usersim -> User{UserID 716, Age 36, Gender Female, Occupation administrator} -> user_item_5 (rating=5) -> Item{MovieID 517, Title "Manhattan (1979)", Release Date 01-Jan-1979, Genres Comedy, Drama, Romance} -> item_user_3 (rating=3) -> User{UserID 271, Age 51, Gender Male, Occupation engineer} -> user_item_5 (rating=5) -> item_question
user_question -> 

In [6]:
# Conversation format

def make_conversation(ex):
    return {
        "prompt": [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user",   "content": ex["problem"]},
        ],
        "solution": ex.get("solution", "")
    }

train_conv = tmp.map(make_conversation, desc="Making conversation objects")
print(train_conv[0]["prompt"][0])
print("----")
print(train_conv[0]["prompt"][1]["content"][:200], "...")
print("----")
print(train_conv[0]["solution"][:160], "...")


{'content': "You are an expert analyst for rating prediction in a heterogeneous bipartite graph of users and items. Your job is to analyze user-item interactions and infer a final numerical rating.\n\nTask: Extract evidence from the provided data and then conclude the numerical rating.\n\nYou are given a heterogeneous bipartite graph setting (users and items). Edges include:\n- user→item rating interactions (e.g., user_item_k with an explicit rating),\n- usersim (user-user similarity), and\n- itemsim (item-item similarity).\n\nWhat to do:\n1) Analyze the user's likely preferences and the item's traits by leveraging ONLY:\n   - the user attributes,\n   - the item attributes, and\n   - the provided meta-paths (treat each path as a weak but interpretable signal; combine corroborating signals).\n2) Then produce two XML blocks ONLY (no extra text):\n   a) <reason>...</reason> — Provide a concise, evidence-first explanation that cites the most helpful signals from the meta-paths and attribut

In [7]:
# # Load Tokenizer & Base Model

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct"  

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
print("pad_token_id:", tokenizer.pad_token_id, "| eos_token_id:", tokenizer.eos_token_id)

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.bfloat16,   
    device_map="auto",            
)
model.config.pad_token_id = tokenizer.pad_token_id
model.config.use_cache = False  

print("Model loaded (no quantization).")


pad_token_id: 151643 | eos_token_id: 151645


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Model loaded (no quantization).


In [8]:
# Tokenization & Dataset Formatting

from typing import Dict, List
import torch

MAX_PROMPT_LEN = 4096
MAX_ANSWER_LEN = 1200
ADD_EOS_TO_ASSISTANT = True

def build_ids_and_labels(messages: List[Dict[str, str]], assistant_text: str):
    # Prompt ids
    prompt_ids = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=True,
        return_tensors="pt"
    )[0]

    # Assistant ids
    assistant_text_final = assistant_text + (tokenizer.eos_token if ADD_EOS_TO_ASSISTANT else "")
    assistant_ids = tokenizer(
        assistant_text_final,
        add_special_tokens=False,
        return_tensors="pt"
    )["input_ids"][0]

    if prompt_ids.size(0) > MAX_PROMPT_LEN:
        prompt_ids = prompt_ids[-MAX_PROMPT_LEN:]  
    if assistant_ids.size(0) > MAX_ANSWER_LEN:
        assistant_ids = assistant_ids[:MAX_ANSWER_LEN]  

    # Build labels 
    labels_prompt = torch.full_like(prompt_ids, fill_value=-100)
    labels_answer = assistant_ids.clone()
    if labels_answer[-1].item() == tokenizer.eos_token_id:
        labels_answer[-1] = -100

    input_ids = torch.cat([prompt_ids, assistant_ids], dim=0)
    attention_mask = torch.ones_like(input_ids)
    labels = torch.cat([labels_prompt, labels_answer], dim=0)

    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels,
        "prompt_len": prompt_ids.size(0),
        "answer_len": assistant_ids.size(0),
    }

def tokenize_example(ex):
    messages = ex["prompt"]  
    assistant_text = ex["solution"]
    pack = build_ids_and_labels(messages, assistant_text)
    return {
        "input_ids": pack["input_ids"],
        "attention_mask": pack["attention_mask"],
        "labels": pack["labels"],
        "prompt_len": pack["prompt_len"],
        "answer_len": pack["answer_len"],
    }

tokenized = train_conv.map(
    tokenize_example,
    remove_columns=train_conv.column_names,
    desc="Tokenizing (4096 prompt / 1200 answer)"
)

# Sanity check
ex0 = tokenized[0]
print(tokenized)
print("Example lens:", int(ex0["prompt_len"]), int(ex0["answer_len"]))

# convert to torch for training
tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])


Dataset({
    features: ['input_ids', 'attention_mask', 'labels', 'prompt_len', 'answer_len'],
    num_rows: 20000
})
Example lens: 2885 381


In [9]:
# Sanity Check

def visualize_example(idx=0, n_tail_prompt_tokens=80, n_head_answer_tokens=80):
    ex = tokenized[idx]
    input_ids = ex["input_ids"].tolist()
    labels = ex["labels"].tolist()

    try:
        prompt_len = next(i for i, lab in enumerate(labels) if lab != -100)
    except StopIteration:
        prompt_len = len(labels)

    answer_len = len(labels) - prompt_len

    # Decode segments
    prompt_tail_ids = input_ids[max(0, prompt_len - n_tail_prompt_tokens):prompt_len]
    answer_head_ids = input_ids[prompt_len: min(prompt_len + n_head_answer_tokens, len(input_ids))]

    prompt_tail = tokenizer.decode(prompt_tail_ids)
    answer_head = tokenizer.decode(answer_head_ids)
    eos = tokenizer.eos_token or "<eos>"

    print(f"Prompt length (inferred): {prompt_len} | Answer length (inferred): {answer_len}")
    print("=== PROMPT (tail) ===")
    print(prompt_tail.replace(eos, "<eos>"))
    print("\n=== ANSWER (head) ===")
    print(answer_head.replace(eos, "<eos>"))

    # Label sanity
    num_supervised = sum(1 for x in labels if x != -100)
    print(f"\nSupervised tokens (labels != -100): {num_supervised} / {len(labels)}")
    print("Label sample (first 40):", labels[:40])

visualize_example(0)


Prompt length (inferred): 2885 | Answer length (inferred): 381
=== PROMPT (tail) ===
Adventures of Robin Hood, The (1938)", Release Date 01-Jan-1938, Genres Action, Adventure} -> item_user_4 (rating=4) -> User{UserID 796, Age 32, Gender Female, Occupation writer} -> user_item_3 (rating=3) -> item_question<eos>
<|im_start|>assistant


=== ANSWER (head) ===
<reason>The user (UserID 911) is a 37-year-old female writer, and the target item (MovieID 193) is a Drama film. Several meta-paths provide insights into potential user preferences and item characteristics.

One path connects through UserID 716, a 36-year-old female administrator, who rated "Streetcar Named Desire, A

Supervised tokens (labels != -100): 380 / 3266
Label sample (first 40): [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100]


In [10]:
# Data Collator


from dataclasses import dataclass
from typing import Optional, Union, List, Dict
import torch

@dataclass
class DataCollatorForCausalLMNoEos:
    tokenizer: AutoTokenizer
    label_pad_token_id: int = -100

    def __call__(self, features: List[Dict[str, torch.Tensor]]) -> Dict[str, torch.Tensor]:
        # Determine max length
        max_length = max(f["input_ids"].size(0) for f in features)

        batch_input_ids, batch_attention_mask, batch_labels = [], [], []
        for f in features:
            pad_len = max_length - f["input_ids"].size(0)
            input_ids = torch.nn.functional.pad(f["input_ids"], (0, pad_len), value=self.tokenizer.pad_token_id)
            attn = torch.nn.functional.pad(f["attention_mask"], (0, pad_len), value=0)
            labels = torch.nn.functional.pad(f["labels"], (0, pad_len), value=self.label_pad_token_id)

            # Mask any EOS in padded tail
            labels = labels.clone()

            batch_input_ids.append(input_ids)
            batch_attention_mask.append(attn)
            batch_labels.append(labels)

        batch = {
            "input_ids": torch.stack(batch_input_ids),
            "attention_mask": torch.stack(batch_attention_mask),
            "labels": torch.stack(batch_labels),
        }
        return batch

collator = DataCollatorForCausalLMNoEos(tokenizer)
_ = collator([tokenized[0], tokenized[1]])
print("Collator OK. Shapes:", _.keys(), {k: v.shape for k, v in _.items()})


Collator OK. Shapes: dict_keys(['input_ids', 'attention_mask', 'labels']) {'input_ids': torch.Size([2, 3276]), 'attention_mask': torch.Size([2, 3276]), 'labels': torch.Size([2, 3276])}


In [11]:
# Setup LoRA

from peft import LoraConfig, get_peft_model, TaskType

model.config.attn_implementation = "sdpa"

CANDIDATES = ["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"]

def detect_present_targets(model, candidates):
    present = set()
    for name, module in model.named_modules():
        for c in candidates:
            if name.endswith(c):
                present.add(c)
    return sorted(list(present))

present_targets = detect_present_targets(model, CANDIDATES)
print("LoRA target modules present:", present_targets)

lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=32,                 
    lora_alpha=64,
    lora_dropout=0.05,
    target_modules=present_targets,
    bias="none",
)

model = get_peft_model(model, lora_config)


model.enable_input_require_grads()

model.gradient_checkpointing_enable(gradient_checkpointing_kwargs={"use_reentrant": False})
model.config.use_cache = False  # already set above, keep it off during training

def print_trainable_params(m):
    trainable, total = 0, 0
    for _, p in m.named_parameters():
        total += p.numel()
        if p.requires_grad:
            trainable += p.numel()
    pct = 100 * trainable / total
    print(f"Trainable params: {trainable:,} / {total:,} ({pct:.2f}%)")

print_trainable_params(model)


LoRA target modules present: ['down_proj', 'gate_proj', 'k_proj', 'o_proj', 'q_proj', 'up_proj', 'v_proj']
Trainable params: 80,740,352 / 7,696,356,864 (1.05%)


In [12]:
# Training Arguments 

from transformers import TrainingArguments

OUTPUT_DIR = "qwen2_5_7b_rating_SFT_lora"

train_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=1,
    per_device_train_batch_size=1,      
    gradient_accumulation_steps=16,     
    learning_rate=5e-5,
    lr_scheduler_type="cosine",
    warmup_ratio=0.03,
    weight_decay=0.01,
    logging_steps=50,
    save_steps=1000,
    save_total_limit=2,
    bf16=True,                          
    fp16=False,
    dataloader_pin_memory=True,
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False}, 
    optim="adamw_torch",
    report_to="none",
    tf32=True,
)

train_args


TrainingArguments(
_n_gpu=1,
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
average_tokens_across_devices=False,
batch_eval_metrics=False,
bf16=True,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=False,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_do_concat_batches=True,
eval_on_start=False,
eval_steps=None,
eval_strategy=IntervalStrategy.NO,
eval_use_gather_object=False,
f

In [13]:
# Build Trainer

from transformers import Trainer

trainer = Trainer(
    model=model,
    args=train_args,
    train_dataset=tokenized,
    data_collator=collator,
)

In [14]:
# Preflight Gradient Check

model.train()
batch = collator([tokenized[0]])  # tiny batch of size 1
batch = {k: v.to(model.device) for k, v in batch.items()}

out = model(**batch)
print("Loss:", out.loss)
print("loss.requires_grad:", out.loss.requires_grad)
assert out.loss.requires_grad, "Loss is not requiring grad! Check GC settings and input grads."

Loss: tensor(1.2408, device='cuda:0', grad_fn=<NllLossBackward0>)
loss.requires_grad: True


In [15]:
# Train!

train_result = trainer.train()
train_result

Step,Training Loss
50,0.8773
100,0.6391
150,0.6058
200,0.5932
250,0.5842
300,0.5805
350,0.5768
400,0.5655
450,0.5706
500,0.5619


Step,Training Loss
50,0.8773
100,0.6391
150,0.6058
200,0.5932
250,0.5842
300,0.5805
350,0.5768
400,0.5655
450,0.5706
500,0.5619


TrainOutput(global_step=1250, training_loss=0.574397427368164, metrics={'train_runtime': 24420.8919, 'train_samples_per_second': 0.819, 'train_steps_per_second': 0.051, 'total_flos': 2.8859738436039813e+18, 'train_loss': 0.574397427368164, 'epoch': 1.0})

In [19]:
# Quick Inference Demo

from transformers import TextStreamer

model.eval()
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

# Build a single prompt from our conversation object (system+user)
messages = train_conv[0]["prompt"]
prompt_ids = tokenizer.apply_chat_template(
    messages, add_generation_prompt=True, tokenize=True, return_tensors="pt"
).to(model.device)

with torch.no_grad():
    gen_ids = model.generate(
        input_ids=prompt_ids,
        max_new_tokens=1200,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        streamer=streamer,
        eos_token_id=tokenizer.eos_token_id,
    )

print("\n\n--- Reference solution ---\n")
print(train_conv[0]["solution"])


<reason>The user (UserID 911) is a 37-year-old female writer. The target item (MovieID 193) is a Drama film released in 1983.

Several meta-paths provide indirect signals:
- One path shows User 911 interacting with an item rated 3 (MovieID 501, Genres Animation, Children's, Musical) via a user-item link, which then connects to another user who rated an item with 4.
- Another path indicates User 911 interacted with an item rated 5 (MovieID 191, Genres Drama, Mystery), and this connection leads to a user who rated an item with 4.
- A path links User 911 to an item rated 5 (MovieID 173, Genres Action, Adventure, Comedy, Romance) via a user-item interaction, then to a user who rated an item with 4.
- A path shows User 911 interacting with an item rated 5 (MovieID 153, Genres Comedy) via a user-item link, which then connects to another user who rated an item with 5.
- A path involves User 911 interacting with an item rated 3 (MovieID 99, Genres Animation, Children's, Musical) via a user-ite

In [18]:
# Push LoRA Adapters to Hugging Face Hub

from huggingface_hub import notebook_login
notebook_login()

ADAPTER_REPO = "qwen2.5_7b_rating_SFT"
model.save_pretrained(OUTPUT_DIR)  
tokenizer.save_pretrained(OUTPUT_DIR)

# push to Hub
model.push_to_hub(ADAPTER_REPO)
tokenizer.push_to_hub(ADAPTER_REPO)

print(f"Adapters and tokenizer pushed to: {ADAPTER_REPO}")


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...pexp1ueyk/adapter_model.safetensors:   0%|          | 46.4kB /  323MB            

README.md: 0.00B [00:00, ?B/s]

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  /tmp/tmpp84da7rf/tokenizer.json       : 100%|##########| 11.4MB / 11.4MB            

Adapters and tokenizer pushed to: qwen2.5_7b_rating_SFT
