In [1]:
!pip -q install -U "transformers>=4.43.3" "datasets>=2.19.0" "peft>=0.11.1" "accelerate>=0.33.0" huggingface_hub tiktoken

import os, torch, transformers, datasets, peft, accelerate, platform
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"         # faster Hub downloads
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

print("Torch:", torch.__version__)
print("Transformers:", transformers.__version__)
print("Datasets:", datasets.__version__)
print("PEFT:", peft.__version__)
print("Accelerate:", accelerate.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))
    print("VRAM (GB):", round(torch.cuda.get_device_properties(0).total_memory/1e9, 2))

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.1/40.1 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.6/11.6 MB[0m [31m119.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m119.0 MB/s[0m eta [36m0:00:00[0m
[?25hTorch: 2.8.0+cu126
Transformers: 4.56.0
Datasets: 4.0.0
PEFT: 0.17.1
Accelerate: 1.10.1
CUDA available: True
GPU: NVIDIA A100-SXM4-40GB
VRAM (GB): 42.47


In [2]:
# Load base model + tokenizer, then merge the LoRA adapter

from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct"
ADAPTER_REPO = "mohammad-shirkhani/qwen2.5_7b_rating_SFT"

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)
# Prefer SDPA on Ampere
try:
    model.config.attn_implementation = "sdpa"
except:
    pass

# Load LoRA adapters
model = PeftModel.from_pretrained(model, ADAPTER_REPO)
model.eval()

print("Model + LoRA loaded.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/663 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/3.95G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/3.56G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/243 [00:00<?, ?B/s]

adapter_config.json:   0%|          | 0.00/936 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/323M [00:00<?, ?B/s]

Model + LoRA loaded.


In [3]:
# prompt used in training
SYSTEM_PROMPT = (
    "You are an expert analyst for rating prediction in a heterogeneous bipartite graph of users and items. "
    "Your job is to analyze user-item interactions and infer a final numerical rating.\n\n"
    "Task: Extract evidence from the provided data and then conclude the numerical rating.\n\n"
    "You are given a heterogeneous bipartite graph setting (users and items). Edges include:\n"
    "- user→item rating interactions (e.g., user_item_k with an explicit rating),\n"
    "- usersim (user-user similarity), and\n"
    "- itemsim (item-item similarity).\n\n"
    "What to do:\n"
    "1) Analyze the user's likely preferences and the item's traits by leveraging ONLY:\n"
    "   - the user attributes,\n"
    "   - the item attributes, and\n"
    "   - the provided meta-paths (treat each path as a weak but interpretable signal; combine corroborating signals).\n"
    "2) Then produce two XML blocks ONLY (no extra text):\n"
    "   a) <reason>...</reason> — Provide a concise, evidence-first explanation that cites the most helpful signals from the meta-paths and attributes.\n"
    "   b) <answer>...</answer> — A single numeric rating (e.g., 1–5). Use a plain number (no extra symbols/units).\n\n"
    "Formatting policy:\n"
    "- Output EXACTLY two blocks and nothing else: first <reason>...</reason>, then a blank line, then <answer>...</answer>.\n"
    "- Do NOT echo or quote the prompt. Do NOT add commentary outside the XML tags.\n"
    "- Focus on robust evidence that appears multiple times across different meta-paths.\n"
    "- If signals conflict, weigh paths with stronger agreement on genre/age/occupation/peer similarity.\n"
    "- Use the evidence to justify the final number you choose.\n"
)
print("SYSTEM_PROMPT ready.")


SYSTEM_PROMPT ready.


In [4]:
# Load dataset.
from datasets import load_dataset
ds = load_dataset("mohammad-shirkhani/social_movielens_new2", split="test")
print(ds)
print("Columns:", ds.column_names)

# Peek first row
from pprint import pprint
ex0 = ds[0]
print("\n--- First row (keys) ---")
for k in ds.column_names:
    print(f"{k} ->", type(ex0[k]).__name__)
print("\nuser:")
pprint(ex0["user"])
print("\nitem:")
pprint(ex0["item"])
print("\nanswer:", ex0["answer"])
print("\n#paths:", len(ex0["paths"]))
print("sample path:", ex0["paths"][0] if ex0["paths"] else "N/A")

README.md:   0%|          | 0.00/909 [00:00<?, ?B/s]

data/train-00000-of-00002.parquet:   0%|          | 0.00/84.5M [00:00<?, ?B/s]

data/train-00001-of-00002.parquet:   0%|          | 0.00/84.4M [00:00<?, ?B/s]

data/validation-00000-of-00001.parquet:   0%|          | 0.00/21.1M [00:00<?, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/21.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/80000 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/10000 [00:00<?, ? examples/s]

Dataset({
    features: ['user', 'item', 'answer', 'paths'],
    num_rows: 10000
})
Columns: ['user', 'item', 'answer', 'paths']

--- First row (keys) ---
user -> dict
item -> dict
answer -> float
paths -> list

user:
{'Age': 27, 'Gender': 'Male', 'Occupation': 'student', 'UserID': 758}

item:
{'Genres': 'Comedy',
 'MovieID': 153,
 'ReleaseDate': '01-Jan-1988',
 'Title': 'Fish Called Wanda, A (1988)'}

answer: 5.0

#paths: 30
sample path: user_question -> usersim -> User{UserID 303, Age 19, Gender Male, Occupation student} -> user_item_3 (rating=3) -> Item{MovieID 549, Title "Rob Roy (1995)", Release Date 01-Jan-1995, Genres Drama, Romance, War} -> item_user_4 (rating=4) -> User{UserID 222, Age 29, Gender Male, Occupation programmer} -> user_item_4 (rating=4) -> item_question


In [5]:
# Helper functions

import json, re, random, torch
from typing import Dict, List

PROMPT_BUDGET_TOKENS = 6000

def format_user_block(user_dict: Dict) -> str:
    return json.dumps(user_dict, ensure_ascii=False, indent=2)

def format_item_block(item_dict: Dict) -> str:
    return json.dumps(item_dict, ensure_ascii=False, indent=2)

def format_paths(paths_list: List[str]) -> str:
    return "\n".join(paths_list)

def build_problem(ex: Dict) -> str:
    user_txt = format_user_block(ex["user"])
    item_txt = format_item_block(ex["item"])
    paths_txt = format_paths(ex["paths"])  # prefer all meta-paths (30 if available)
    problem = (
        "User:\n" + user_txt + "\n\n"
        "Item:\n" + item_txt + "\n\n"
        "Meta-path evidence (each path from this user to the target item):\n"
        + paths_txt
    )
    return problem

def make_messages(problem_text: str) -> List[Dict[str, str]]:
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user",   "content": problem_text},
    ]

def apply_budget(messages: List[Dict[str, str]], prefer_all_paths_text: str, paths: List[str]) -> torch.Tensor:
    """
    Try with all paths first (30). If tokenized prompt > budget, fall back to a 20-path sample and retry.
    Keep the *tail* if still slightly over (closest context to the model input).
    """
    # Try all paths
    input_ids = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=True, return_tensors="pt")[0]
    if input_ids.size(0) <= PROMPT_BUDGET_TOKENS:
        return input_ids

    if len(paths) > 20:
        step = max(1, len(paths) // 20)
        sampled = [paths[i] for i in range(0, len(paths), step)][:20]
        problem_sampled = (
            prefer_all_paths_text.split("Meta-path evidence")[0]
            + "Meta-path evidence (each path from this user to the target item):\n"
            + "\n".join(sampled)
        )
        messages2 = make_messages(problem_sampled)
        input_ids2 = tokenizer.apply_chat_template(messages2, add_generation_prompt=True, tokenize=True, return_tensors="pt")[0]
        if input_ids2.size(0) <= PROMPT_BUDGET_TOKENS:
            return input_ids2
        return input_ids2[-PROMPT_BUDGET_TOKENS:]
    else:
        return input_ids[-PROMPT_BUDGET_TOKENS:]

ANSWER_RE = re.compile(r"<answer>\s*([+-]?(?:\d+(?:\.\d+)?))\s*</answer>", re.IGNORECASE | re.DOTALL)

def extract_numeric_answer(text: str):
    m = ANSWER_RE.search(text)
    if not m:
        return None
    try:
        return float(m.group(1))
    except:
        return None

@torch.no_grad()
def generate_two_block_output(ex: Dict, max_new_tokens: int = 1200, temperature: float = 0.7, top_p: float = 0.9):
    """
    Build messages (preferring 30 meta-paths), apply a 5120-token input budget, and generate.
    We allow >=1024 new tokens by setting max_new_tokens >= 1024 (here 1200).
    """
    problem = build_problem(ex)
    messages = make_messages(problem)

    # Token-budget the input
    input_ids = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=True, return_tensors="pt")[0]
    if input_ids.size(0) > PROMPT_BUDGET_TOKENS:
        input_ids = apply_budget(messages, problem, ex["paths"])

    input_ids = input_ids.unsqueeze(0).to(model.device)
    out_ids = model.generate(
        input_ids=input_ids,
        max_new_tokens=max_new_tokens,   
        do_sample=True,
        temperature=temperature,
        top_p=top_p,
        eos_token_id=None,
        pad_token_id=tokenizer.pad_token_id,
    )
    gen = tokenizer.decode(out_ids[0][input_ids.size(1):], skip_special_tokens=True)
    return gen


In [7]:
# Run one random test example 

import numpy as np, random, textwrap
random.seed(42)
idx = random.randrange(0, len(ds))
ex = ds[int(idx)]

pred_text = generate_two_block_output(ex, max_new_tokens=1200, temperature=0.7, top_p=0.9)
pred_num = extract_numeric_answer(pred_text)
gt = float(ex["answer"])

print(f"Random test index: {idx}")
print("\n--- Generated output (truncated view) ---")
print(textwrap.shorten(pred_text.replace("\n"," "), width=1200, placeholder=" ..."))
print("\nParsed <answer>:", pred_num)
print("Ground truth answer:", gt)
if pred_num is not None:
    print("Absolute error:", abs(pred_num - gt))
else:
    print("Absolute error: N/A (could not parse)")


Random test index: 1824

--- Generated output (truncated view) ---
<reason>The user, UserID 13, is a 47-year-old male educator. The target item, MovieID 913, is a Comedy released in March 1998. The observed rating is 1.0. Analyzing the meta-paths reveals several indirect connections and signals: 1. Several paths involve UserID 13 interacting with items that share genres with the target item. For example, a path shows a `user_item_1` interaction with MovieID 904 ("Ma vie en rose"), which includes the 'Comedy' genre, similar to the target item. Another path involves `user_item_4` with MovieID 170 ("Cinema Paradiso"), also including 'Comedy'. However, these interactions are associated with low ratings (1 or 4). 2. Paths connecting to similar users (e.g., UserID 524, also an educator, with a `usersim` link) show mixed interaction patterns. One path links UserID 524 via `user_item_4` to MovieID 170 ("Cinema Paradiso", Comedy, Drama, Romance), followed by a `user_item_4` interaction from ano

In [8]:
# Infer on the first 300 of test

import json, math, os
from tqdm import tqdm

N = min(300, len(ds))
OUT_PATH = "/content/test_preds_300.jsonl"

rows = []
ok = 0
with open(OUT_PATH, "w", encoding="utf-8") as f:
    for i in tqdm(range(N), desc="Running 300 test inferences"):
        ex = ds[i]
        pred_text = generate_two_block_output(ex, max_new_tokens=1200, temperature=0.7, top_p=0.9)
        pred_num = extract_numeric_answer(pred_text)
        gt = float(ex["answer"])

        rec = {
            "index": i,
            "user": ex.get("user", {}),
            "item": ex.get("item", {}),
            "gt_answer": gt,
            "pred_text": pred_text,
            "pred_answer": pred_num,
        }
        f.write(json.dumps(rec, ensure_ascii=False) + "\n")
        rows.append(rec)
        if pred_num is not None:
            ok += 1

print(f"\nSaved {len(rows)} records to {OUT_PATH}")
print(f"Parsed numeric <answer> for {ok}/{N}")


Running 300 test inferences: 100%|██████████| 300/300 [6:50:43<00:00, 82.14s/it]


Saved 300 records to /content/test_preds_300.jsonl
Parsed numeric <answer> for 293/300





In [19]:
# Compute RMSE and MAE

import json, math
from statistics import mean

IN_PATH = "/content/test_preds_300.jsonl"

y_true, y_pred = [], []
with open(IN_PATH, "r", encoding="utf-8") as f:
    for line in f:
        rec = json.loads(line)
        if rec.get("pred_answer") is None:
            continue
        y_true.append(float(rec["gt_answer"]))
        y_pred.append(float(rec["pred_answer"]))

def mae(a, b):
    return mean(abs(x - y) for x, y in zip(a, b)) if a else float("nan")

def rmse(a, b):
    return math.sqrt(mean((x - y) ** 2 for x, y in zip(a, b))) if a else float("nan")

print("MAE:", mae(y_true, y_pred))
print("RMSE:", rmse(y_true, y_pred))


MAE: 0.8266666666666667
RMSE: 0.9092121131323904
