In [14]:
from pathlib import Path
import os

# Force change into the fine-tune repo root
target = Path("Fine-tune project") / "mental-health-fine-tuned-llm-SA"
if not target.exists():
    # fallback for OneDrive full path (if above relative path fails)
    target = Path(r"C:\Users\User\OneDrive\Desktop\(ZA4309) Emerging Technologies in Intelligence\Project\Fine-tune project\mental-health-fine-tuned-llm-SA")

os.chdir(target)
print("CWD now:", Path.cwd())
print("Has src/?", (Path("src")).exists())
print("Has runs/?", (Path("runs")).exists())


CWD now: c:\Users\User\OneDrive\Desktop\(ZA4309) Emerging Technologies in Intelligence\Project\Fine-tune project\mental-health-fine-tuned-llm-SA
Has src/? True
Has runs/? True


In [15]:
from pathlib import Path

BASE_MODEL = "Qwen/Qwen2.5-0.5B-Instruct"
OUTPUT_DIR = Path("runs") / "sft-qwen"          # where Trainer saved
CKPT_DIR   = OUTPUT_DIR / "checkpoint-last"     # created by Option A
VAL_FILE   = Path("data/processed/counselchat_val.jsonl")

# how many val examples to score (keep small on CPU first)
N_EVAL = 100    # you can raise later if it's fast enough

assert VAL_FILE.exists(), f"Missing: {VAL_FILE}"
assert OUTPUT_DIR.exists(), f"Missing: {OUTPUT_DIR}"
print("Eval file:", VAL_FILE)
print("Checkpoint dir (expected):", CKPT_DIR)


Eval file: data\processed\counselchat_val.jsonl
Checkpoint dir (expected): runs\sft-qwen\checkpoint-last


In [17]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# tokenizer from the base model (safe), weights from local checkpoint
tok = AutoTokenizer.from_pretrained(BASE_MODEL)

ckpt_to_load = CKPT_DIR if CKPT_DIR.exists() else OUTPUT_DIR
print("Loading from:", ckpt_to_load)

model = AutoModelForCausalLM.from_pretrained(
    ckpt_to_load.as_posix(),
    torch_dtype=torch.float32,        # CPU
    local_files_only=True             # don't hit the hub
).to("cpu")

tok.padding_side = "right"
model.config.use_cache = False

print("Loaded OK.")


Loading from: runs\sft-qwen


ValueError: Unrecognized model in runs/sft-qwen. Should have a `model_type` key in its config.json, or contain one of the following strings in its name: aimv2, aimv2_vision_model, albert, align, altclip, apertus, arcee, aria, aria_text, audio-spectrogram-transformer, autoformer, aya_vision, bamba, bark, bart, beit, bert, bert-generation, big_bird, bigbird_pegasus, biogpt, bit, bitnet, blenderbot, blenderbot-small, blip, blip-2, blip_2_qformer, bloom, blt, bridgetower, bros, camembert, canine, chameleon, chinese_clip, chinese_clip_vision_model, clap, clip, clip_text_model, clip_vision_model, clipseg, clvp, code_llama, codegen, cohere, cohere2, cohere2_vision, colpali, colqwen2, conditional_detr, convbert, convnext, convnextv2, cpmant, csm, ctrl, cvt, d_fine, dab-detr, dac, data2vec-audio, data2vec-text, data2vec-vision, dbrx, deberta, deberta-v2, decision_transformer, deepseek_v2, deepseek_v3, deepseek_vl, deepseek_vl_hybrid, deformable_detr, deit, depth_anything, depth_pro, deta, detr, dia, diffllama, dinat, dinov2, dinov2_with_registers, dinov3_convnext, dinov3_vit, distilbert, doge, donut-swin, dots1, dpr, dpt, edgetam, edgetam_video, edgetam_vision_model, efficientformer, efficientloftr, efficientnet, electra, emu3, encodec, encoder-decoder, eomt, ernie, ernie4_5, ernie4_5_moe, ernie_m, esm, evolla, exaone4, falcon, falcon_h1, falcon_mamba, fastspeech2_conformer, fastspeech2_conformer_with_hifigan, flaubert, flava, flex_olmo, florence2, fnet, focalnet, fsmt, funnel, fuyu, gemma, gemma2, gemma3, gemma3_text, gemma3n, gemma3n_audio, gemma3n_text, gemma3n_vision, git, glm, glm4, glm4_moe, glm4v, glm4v_moe, glm4v_moe_text, glm4v_text, glpn, got_ocr2, gpt-sw3, gpt2, gpt_bigcode, gpt_neo, gpt_neox, gpt_neox_japanese, gpt_oss, gptj, gptsan-japanese, granite, granite_speech, granitemoe, granitemoehybrid, granitemoeshared, granitevision, graphormer, grounding-dino, groupvit, helium, hgnet_v2, hiera, hubert, hunyuan_v1_dense, hunyuan_v1_moe, ibert, idefics, idefics2, idefics3, idefics3_vision, ijepa, imagegpt, informer, instructblip, instructblipvideo, internvl, internvl_vision, jamba, janus, jetmoe, jukebox, kosmos-2, kosmos-2.5, kyutai_speech_to_text, layoutlm, layoutlmv2, layoutlmv3, led, levit, lfm2, lfm2_vl, lightglue, lilt, llama, llama4, llama4_text, llava, llava_next, llava_next_video, llava_onevision, longcat_flash, longformer, longt5, luke, lxmert, m2m_100, mamba, mamba2, marian, markuplm, mask2former, maskformer, maskformer-swin, mbart, mctct, mega, megatron-bert, metaclip_2, mgp-str, mimi, minimax, ministral, mistral, mistral3, mixtral, mlcd, mllama, mm-grounding-dino, mobilebert, mobilenet_v1, mobilenet_v2, mobilevit, mobilevitv2, modernbert, modernbert-decoder, moonshine, moshi, mpnet, mpt, mra, mt5, musicgen, musicgen_melody, mvp, nat, nemotron, nezha, nllb-moe, nougat, nystromformer, olmo, olmo2, olmo3, olmoe, omdet-turbo, oneformer, open-llama, openai-gpt, opt, ovis2, owlv2, owlvit, paligemma, parakeet_ctc, parakeet_encoder, patchtsmixer, patchtst, pegasus, pegasus_x, perceiver, perception_encoder, perception_lm, persimmon, phi, phi3, phi4_multimodal, phimoe, pix2struct, pixtral, plbart, poolformer, pop2piano, prompt_depth_anything, prophetnet, pvt, pvt_v2, qdqbert, qwen2, qwen2_5_omni, qwen2_5_vl, qwen2_5_vl_text, qwen2_audio, qwen2_audio_encoder, qwen2_moe, qwen2_vl, qwen2_vl_text, qwen3, qwen3_moe, qwen3_next, qwen3_omni_moe, qwen3_vl, qwen3_vl_moe, qwen3_vl_moe_text, qwen3_vl_text, rag, realm, recurrent_gemma, reformer, regnet, rembert, resnet, retribert, roberta, roberta-prelayernorm, roc_bert, roformer, rt_detr, rt_detr_resnet, rt_detr_v2, rwkv, sam, sam2, sam2_hiera_det_model, sam2_video, sam2_vision_model, sam_hq, sam_hq_vision_model, sam_vision_model, seamless_m4t, seamless_m4t_v2, seed_oss, segformer, seggpt, sew, sew-d, shieldgemma2, siglip, siglip2, siglip2_vision_model, siglip_vision_model, smollm3, smolvlm, smolvlm_vision, speech-encoder-decoder, speech_to_text, speech_to_text_2, speecht5, splinter, squeezebert, stablelm, starcoder2, superglue, superpoint, swiftformer, swin, swin2sr, swinv2, switch_transformers, t5, t5gemma, table-transformer, tapas, textnet, time_series_transformer, timesfm, timesformer, timm_backbone, timm_wrapper, trajectory_transformer, transfo-xl, trocr, tvlt, tvp, udop, umt5, unispeech, unispeech-sat, univnet, upernet, van, vaultgemma, video_llava, videomae, vilt, vipllava, vision-encoder-decoder, vision-text-dual-encoder, visual_bert, vit, vit_hybrid, vit_mae, vit_msn, vitdet, vitmatte, vitpose, vitpose_backbone, vits, vivit, vjepa2, voxtral, voxtral_encoder, wav2vec2, wav2vec2-bert, wav2vec2-conformer, wavlm, whisper, xclip, xcodec, xglm, xlm, xlm-prophetnet, xlm-roberta, xlm-roberta-xl, xlnet, xlstm, xmod, yolos, yoso, zamba, zamba2, zoedepth

In [None]:
from pathlib import Path

val_path = Path("data/processed/counselchat_val.jsonl")
preds, refs = [], []

for line in open(val_path, "r", encoding="utf-8"):
    ex = json.loads(line)
    prompt = ex["input"]
    ref = ex["output"]
    ids = tok(prompt, return_tensors="pt", truncation=True, max_length=512).to("cpu")
    out = model.generate(**ids, max_new_tokens=256, do_sample=False)
    text = tok.decode(out[0], skip_special_tokens=True)
    # simple post-cut: take only the text after the prompt if it appears
    pred = text[len(prompt):].strip() if text.startswith(prompt) else text.strip()
    preds.append(pred)
    refs.append(ref)

print(len(preds), "predictions")


In [None]:
from datasets import load_metric
# ROUGE & BLEU
from rouge_score import rouge_scorer
import numpy as np

# ROUGE-L
scorer = rouge_scorer.RougeScorer(["rougeLsum"], use_stemmer=True)
rouges = [scorer.score(r, p)["rougeLsum"].fmeasure for r,p in zip(refs, preds)]
print("ROUGE-L (mean):", float(np.mean(rouges)))

# SacreBLEU
from sacrebleu import corpus_bleu
bleu = corpus_bleu(preds, [refs])
print("SacreBLEU:", bleu.score)
