In [1]:
%pip install pandas scikit-learn snorkel transformers

# **Cell 2: Load Data**
import pandas as pd

Note: you may need to restart the kernel to use updated packages.


In [1]:
# **Cell 4: Augment Keywords via TF-IDF**
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np


In [2]:
from snorkel.labeling import labeling_function, PandasLFApplier
from snorkel.labeling.model.label_model import LabelModel
from transformers import pipeline
import torch


In [3]:
df = pd.read_csv("MTS-Dialog-with-specialist-labels.csv")

NameError: name 'pd' is not defined

In [5]:
# Cell 1: Install Dependencies
%pip install -q transformers datasets peft torch scikit-learn accelerate


Note: you may need to restart the kernel to use updated packages.


In [6]:
pip install -U bitsandbytes

Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
# **Cell 4: Augment Keywords via TF-IDF**
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
from snorkel.labeling import labeling_function, PandasLFApplier
from snorkel.labeling.model.label_model import LabelModel
from transformers import pipeline
import torch


In [4]:
# 2C) Split into train/validation (no stratify)
from sklearn.model_selection import train_test_split

# Identify any classes with <2 examples
counts = df["specialist_label"].value_counts()
rare = counts[counts < 2].index.tolist()
if rare:
    print("⚠️ These specialties have <2 examples and may not appear in both splits:", rare)

# Random split
train_df, val_df = train_test_split(
    df, test_size=0.1, random_state=42, shuffle=True
)
print("Train / Val sizes:", len(train_df), len(val_df))


⚠️ These specialties have <2 examples and may not appear in both splits: ['Nephrologist']
Train / Val sizes: 1080 121


In [6]:
# Cell 3: Generate JSONL for Fine-Tuning
import json

def make_entry(row):
    # Using a clear prompt/completion delimiter
    prompt = (
        "### Patient Info:\n"
        f"{row['section_text']}\n"
        f"{row['dialogue']}\n\n"
        "### Assistant:\n"
        "Based on the above, I recommend the patient see a"
    )
    completion = f" {row['specialist_label']}".strip()
    return {"prompt": prompt, "completion": completion}

for split, df_split in [("train", train_df), ("val", val_df)]:
    path = f"llm_finetune_{split}.jsonl"
    with open(path, "w") as f:
        for _, row in df_split.iterrows():
            f.write(json.dumps(make_entry(row)) + "\n")
    print(f"Generated {path} ({len(df_split)} records)")


Generated llm_finetune_train.jsonl (1080 records)
Generated llm_finetune_val.jsonl (121 records)


In [6]:
# Cell 0: Login to Hugging Face
%pip install -q huggingface_hub

from huggingface_hub import login

# This will prompt you to paste your HF token (create one at https://huggingface.co/settings/tokens)
login()


Note: you may need to restart the kernel to use updated packages.


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Token has not been saved to git credential helper.


In [17]:
# Cell 2: Load & Split Pseudo-Labeled Data
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("MTS-Dialog-with-specialist-labels.csv")
df = df.rename(columns={"specialist_label": "label"})

# Warn about very rare classes
counts = df["label"].value_counts()
rare = counts[counts < 2].index.tolist()
if rare:
    print("⚠️ Rare specialties (<2 examples):", rare)

# Random 90/10 split
train_df, val_df = train_test_split(
    df, test_size=0.1, random_state=42, shuffle=True
)
print("Train/Val sizes:", len(train_df), len(val_df))


⚠️ Rare specialties (<2 examples): ['Nephrologist']
Train/Val sizes: 1080 121


In [18]:
# Cell 3: Generate JSONL for Fine-Tuning
import json

def make_entry(row):
    prompt = (
        "### Patient Info:\n"
        f"{row['section_text']}\n{row['dialogue']}\n\n"
        "### Assistant:\n"
        "Based on the above, I recommend the patient see a"
    )
    completion = f" {row['label']}"
    return {"prompt": prompt, "completion": completion}

for split, split_df in [("train", train_df), ("val", val_df)]:
    fname = f"llm_finetune_{split}.jsonl"
    with open(fname, "w") as f:
        for _, row in split_df.iterrows():
            f.write(json.dumps(make_entry(row)) + "\n")
    print(f"Generated {fname} ({len(split_df)} records)")


Generated llm_finetune_train.jsonl (1080 records)
Generated llm_finetune_val.jsonl (121 records)


In [19]:
# Cell 4A: Imports & LoRA Config
from peft import LoraConfig, get_peft_model, TaskType
from transformers import AutoTokenizer, AutoModelForCausalLM

MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"
OUTPUT_DIR = "llm-specialist-llama3"
MAX_INPUT_LEN  = 512
MAX_TARGET_LEN = 64


In [20]:
# Cell 4B: Load Tokenizer & Model (FP16)
import torch

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
model     = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
    device_map="auto",
)

# Pad‐token hack for Llama
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.pad_token_id


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [22]:
# Cell 4C: Apply LoRA Adapter
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q_proj","v_proj"],
)
model = get_peft_model(model, peft_config)
model.gradient_checkpointing_enable()




In [23]:
# Cell 4D: Prepare Datasets & Data Collator
from datasets import load_dataset
from transformers import DataCollatorForLanguageModeling

train_ds = load_dataset("json", data_files="llm_finetune_train.jsonl", split="train")
val_ds   = load_dataset("json", data_files="llm_finetune_val.jsonl", split="train")

def preprocess(batch):
    texts = [p + c for p, c in zip(batch["prompt"], batch["completion"])]
    return tokenizer(
        texts,
        padding="max_length",
        truncation=True,
        max_length=MAX_INPUT_LEN + MAX_TARGET_LEN,
    )

train_ds = train_ds.map(preprocess, batched=True, remove_columns=["prompt","completion"])
val_ds   = val_ds.map(preprocess, batched=True, remove_columns=["prompt","completion"])

# For decoder-only, labels = input_ids
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)


Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/1080 [00:00<?, ? examples/s]

Map:   0%|          | 0/121 [00:00<?, ? examples/s]

In [24]:
# Cell 4E: TrainingArguments & Trainer Setup
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=50,
    save_steps=500,
    save_total_limit=2,
    do_eval=True,
    eval_steps=500,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    data_collator=data_collator,
)


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [25]:
# Cell 4F: Train & Save
trainer.train()
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print("✅ Fine-tuning complete; model saved to", OUTPUT_DIR)


Step,Training Loss
50,1.8551
100,1.6346
150,1.5947
200,1.57


✅ Fine-tuning complete; model saved to llm-specialist-llama3


In [29]:
# Cell 5: Quick Inference Test (no `device` arg)
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from peft import PeftModel
import torch

# 5A) Config
BASE_MODEL    = "meta-llama/Llama-3.2-3B-Instruct"
ADAPTER_DIR   = "llm-specialist-llama3"  # where you saved your LoRA adapter
MAX_NEW_TOKENS = 32

# 5B) Load tokenizer, base model, and adapter
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
    device_map="auto",
)
model = PeftModel.from_pretrained(base_model, ADAPTER_DIR, torch_dtype=torch.float16)

# 5C) Build the text-generation pipeline
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=MAX_NEW_TOKENS,
    do_sample=False,
)

# 5D) Your test prompt
test_prompt = (
    "### Patient Info:\n"
    "A 55-year-old male with chest pain and sweating on exertion.\n\n"
    "### Assistant:\n"
    "Based on the above, I recommend the patient see a"
)

# 5E) Generate & print
output = generator(test_prompt)
print(output[0]["generated_text"])


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DeepseekV3ForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'Gemma3ForConditionalGeneration', 'Gemma3ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'Glm4ForCausalLM', 'GotOcr2ForConditionalGeneration', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoFo

### Patient Info:
A 55-year-old male with chest pain and sweating on exertion.

### Assistant:
Based on the above, I recommend the patient see a cardiologist for further evaluation and diagnosis. The symptoms of chest pain and sweating on exertion are classic for angina pectoris, a condition where the heart


In [30]:
# Cell 6: Interactive Multi-Turn Chat Loop
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch

# Config (reuse the same base + adapter)
BASE_MODEL    = "meta-llama/Llama-3.2-3B-Instruct"
ADAPTER_DIR   = "llm-specialist-llama3"
MAX_INPUT_LEN = 512
MAX_TOKENS    = 32

# Load tokenizer, base model, apply LoRA
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
    device_map="auto",
)
model = PeftModel.from_pretrained(base_model, ADAPTER_DIR, torch_dtype=torch.float16)

# Build a generator pipeline (no device=)
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=MAX_TOKENS,
    do_sample=False,
)

# Chat state
chat_history = []

print("🩺 Specialist-Recommender Chat. Type 'quit' to exit.\n")

while True:
    sec = input("Patient SECTION (or 'quit'): ")
    if sec.strip().lower() == "quit":
        break
    dia = input("Patient DIALOGUE (or 'quit'): ")
    if dia.strip().lower() == "quit":
        break

    # Add the patient’s turn to history
    chat_history.append({
        "section": sec,
        "dialogue": dia
    })

    # Build the full prompt from history
    prompt = ""
    for turn in chat_history:
        prompt += (
            "### Patient Info:\n"
            f"{turn['section']}\n"
            f"{turn['dialogue']}\n\n"
            "### Assistant:\n"
            f"{turn.get('assistant','')}\n\n"
        )
    prompt += "Based on the above, I recommend the patient see a"

    # Generate and parse out only the new assistant reply
    out = generator(prompt)[0]["generated_text"]
    # The model will repeat the full prompt + its completion, so split it off
    reply = out[len(prompt):].strip()

    # Store & print
    chat_history[-1]["assistant"] = reply
    print("\n🤖 Assistant:", reply, "\n" + "-"*40 + "\n")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DeepseekV3ForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'Gemma3ForConditionalGeneration', 'Gemma3ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'Glm4ForCausalLM', 'GotOcr2ForConditionalGeneration', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoFo

🩺 Specialist-Recommender Chat. Type 'quit' to exit.



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



🤖 Assistant: doctor. The patient's response to the greeting is a simple "hello", which doesn't indicate any specific concerns or issues. However, the patient's repeated response 
----------------------------------------



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



🤖 Assistant: doctor. The patient's response to the greeting was a simple "hello", which doesn't indicate any specific concerns or issues. However, the patient's repeated response 
----------------------------------------



KeyboardInterrupt: Interrupted by user

In [31]:
# Cell 8: Simulate a Patient LLM ↔ Fine-Tuned Specialist LLM Conversation

from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch, json, re

# Config
BASE_MODEL    = "meta-llama/Llama-3.2-3B-Instruct"
ADAPTER_DIR   = "llm-specialist-llama3"
MAX_PATIENT_TOKENS    = 200
MAX_SPECIALIST_TOKENS = 64

# 1) Patient-generator (base instruct model)
patient_tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)
patient_model     = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto",
    low_cpu_mem_usage=True
)
patient_gen = pipeline(
    "text-generation",
    model=patient_model,
    tokenizer=patient_tokenizer,
    max_new_tokens=MAX_PATIENT_TOKENS,
    do_sample=True,
    top_p=0.9
)

# 2) Specialist-recommender (LoRA-tuned)
spec_tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)
spec_base      = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto",
    low_cpu_mem_usage=True
)
spec_model = PeftModel.from_pretrained(
    spec_base, ADAPTER_DIR, torch_dtype=torch.float16
)
specialist_gen = pipeline(
    "text-generation",
    model=spec_model,
    tokenizer=spec_tokenizer,
    max_new_tokens=MAX_SPECIALIST_TOKENS,
    do_sample=False
)

# 3) Generate one synthetic patient case
prompt_patient = (
    "You are a simulator of medical cases. "
    "Output ONLY a JSON with keys 'section' and 'dialogue'."
)
raw = patient_gen(prompt_patient)[0]["generated_text"]
m = re.search(r"\{.*\}", raw, re.DOTALL)
case_json = m.group(0) if m else raw
try:
    case = json.loads(case_json)
    section  = case["section"]
    dialogue = case["dialogue"]
except:
    # fallback split
    parts = raw.strip().split("\n",1)
    section  = parts[0]
    dialogue = parts[1] if len(parts)>1 else ""

print("📝 Patient SECTION:", section)
print("📝 Patient DIALOGUE:", dialogue)

# 4) Get specialist recommendation
prompt_spec = (
    "### Patient Info:\n"
    f"{section}\n"
    f"{dialogue}\n\n"
    "### Assistant:\n"
    "Based on the above, I recommend the patient see a"
)
out = specialist_gen(prompt_spec)[0]["generated_text"]
rec = out[len(prompt_spec):].strip()

print("\n🩺 Specialist Recommendation:", rec)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cpu
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DeepseekV3ForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'Gemma3ForConditionalGeneration', 'Gemma3ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'Glm4ForCausalLM', 'GotOcr2ForConditionalGeneration', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCa

📝 Patient SECTION: You are a simulator of medical cases. Output ONLY a JSON with keys 'section' and 'dialogue'. The keys'section' and 'dialogue' should be used to identify the section of the case and the dialogue between the patient and doctor, respectively.
📝 Patient DIALOGUE: 
```json
{
  "section": "patient_history",
  "dialogue": [
    {
      "speaker": "patient",
      "text": "I've been experiencing some pain in my chest for the past week."
    },
    {
      "speaker": "doctor",
      "text": "I see. Can you tell me more about the pain?"
    },
    {
      "speaker": "patient",
      "text": "It's a sharp pain that comes and goes. Sometimes it feels like it's in my arm too."
    }
  ]
}
```

```json
{
  "section": "physical_exam",
  "dialogue": [
    {
      "speaker": "doctor",
      "text": "Let's take a look at your chest and arm."
    },
    {
      "speaker":

🩺 Specialist Recommendation: cardiologist. The patient's symptoms of chest pain and arm pain could be indicative o

In [2]:
# Cell 8: Simulate “Evaluator LLM” ⇄ Fine-Tuned Specialist Questionnaire Generator

from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch, json, re

# Config
BASE_MODEL    = "meta-llama/Llama-3.2-3B-Instruct"
ADAPTER_DIR   = "llm-specialist-llama3"
MAX_PATIENT_TOKENS    = 200
MAX_SPECIALIST_TOKENS = 128

# 1) Evaluator LLM: generates a patient case to **test** the questionnaire generator
eval_tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)
eval_model     = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto",
    low_cpu_mem_usage=True
)
evaluator = pipeline(
    "text-generation",
    model=eval_model,
    tokenizer=eval_tokenizer,
    max_new_tokens=MAX_PATIENT_TOKENS,
    do_sample=True,
    top_p=0.9,
)

prompt_eval = (
    "You are an evaluator designing a test case for a Pre-Consultation Questionnaire Generator. "
    "Create a realistic patient profile by outputting ONLY valid JSON with keys 'section' (patient background) "
    "and 'dialogue' (brief patient-doctor exchange)."
)
raw = evaluator(prompt_eval)[0]["generated_text"]
match = re.search(r"\{.*\}", raw, re.DOTALL)
case_json = match.group(0) if match else raw
try:
    case = json.loads(case_json)
    section  = case["section"].strip()
    dialogue = case["dialogue"].strip()
except json.JSONDecodeError:
    parts = raw.strip().split("\n",1)
    section  = parts[0]
    dialogue = parts[1] if len(parts)>1 else ""

print("📝 Test CASE SECTION:", section)
print("📝 Test CASE DIALOGUE:", dialogue)

# 2) Fine-Tuned Specialist: acts as a “Personalized Pre-Consultation Questionnaire Generator”
spec_tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)
spec_base      = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto",
    low_cpu_mem_usage=True
)
spec_model = PeftModel.from_pretrained(spec_base, ADAPTER_DIR, torch_dtype=torch.float16)
questionnaire = pipeline(
    "text-generation",
    model=spec_model,
    tokenizer=spec_tokenizer,
    max_new_tokens=MAX_SPECIALIST_TOKENS,
    do_sample=False,
)

prompt_spec = (
    "You are a Personalized Pre-Consultation Questionnaire Generator. "
    "Based on the patient's information and initial exchange below, "
    "1) generate a tailored set of pre-consultation questions to gather more detail, "
    "2) then recommend the most appropriate specialist.\n\n"
    "Patient Info:\n"
    f"{section}\n\n"
    "Patient-Doctor Exchange:\n"
    f"{dialogue}\n\n"
    "Questions + Specialist Recommendation:"
)
out_spec = questionnaire(prompt_spec)[0]["generated_text"]
reply = out_spec[len(prompt_spec):].strip()

print("\n🩺 Generated Questionnaire + Recommendation:\n", reply)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


📝 Test CASE SECTION: You are an evaluator designing a test case for a Pre-Consultation Questionnaire Generator. Create a realistic patient profile by outputting ONLY valid JSON with keys 'section' (patient background) and 'dialogue' (brief patient-doctor exchange). to evaluate the patient's background information and the conversation that takes place during the consultation. The following are the sample data and the expected output:
📝 Test CASE DIALOGUE: Sample Data:
```json
{
  "section": {
    "id": 1,
    "name": "Introduction",
    "description": "Introduction to the patient",
    "questions": [
      {
        "id": 1,
        "type": "text",
        "label": "What is your name?",
        "required": true,
        "options": []
      },
      {
        "id": 2,
        "type": "text",
        "label": "What is your date of birth?",
        "required": true,
        "options": []
      },
      {
        "id": 3,
        "type": "text",
        "label": "What is your occupation?",


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.
Some parameters are on the meta device because they were offloaded to the cpu.
Device set to use cuda:0
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DeepseekV3ForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'Gemma3ForConditionalGeneration', 'Gemma3ForCausalLM', 'Git


🩺 Generated Questionnaire + Recommendation:
 Based on the patient's information and initial exchange, here is the output:

```json
{
  "section": {
    "id": 1,
    "name": "Introduction",
    "description": "Introduction to the patient",
    "questions": [
      {
        "id": 1,
        "type": "text",
        "label": "What is your name?",
        "required": true,
        "options": []
      },
      {
        "id": 2,
        "type": "text",
        "label": "What is your date of birth?",
        "required": true,


In [3]:
# Cell 9: Back‐and‐Forth Simulation Between Patient LLM ↔ Specialist LLM

from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch, json, re

# Config
BASE_MODEL      = "meta-llama/Llama-3.2-3B-Instruct"
ADAPTER_DIR     = "llm-specialist-llama3"
MAX_PATIENT_TOKENS = 150
MAX_SPEC_TOKENS    = 150
NUM_ROUNDS         = 3

# 1) Patient LLM: case & answers simulator
pat_tok   = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)
pat_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL, torch_dtype=torch.float16, device_map="auto", low_cpu_mem_usage=True
)
patient = pipeline(
    "text-generation",
    model=pat_model,
    tokenizer=pat_tok,
    max_new_tokens=MAX_PATIENT_TOKENS,
    do_sample=True,
    top_p=0.9
)

# 2) Specialist LLM: questionnaire generator + recommender
spec_tok   = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)
spec_base  = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL, torch_dtype=torch.float16, device_map="auto", low_cpu_mem_usage=True
)
spec_model = PeftModel.from_pretrained(spec_base, ADAPTER_DIR, torch_dtype=torch.float16)
specialist = pipeline(
    "text-generation",
    model=spec_model,
    tokenizer=spec_tok,
    max_new_tokens=MAX_SPEC_TOKENS,
    do_sample=False
)

# 3) Generate initial patient case
raw = patient(
    "You are a medical case simulator. Output ONLY valid JSON with keys 'section' and 'dialogue'."
)[0]["generated_text"]
match = re.search(r"\{.*\}", raw, re.DOTALL)
js = match.group(0) if match else raw
try:
    case = json.loads(js)
    section  = case["section"].strip()
    dialogue = case["dialogue"].strip()
except:
    parts    = raw.strip().split("\n",1)
    section  = parts[0]
    dialogue = parts[1] if len(parts)>1 else ""

print("📝 INITIAL CASE")
print("SECTION:", section)
print("DIALOGUE:", dialogue)
print("-"*50)

# 4) Multi-Round Q&A
previous_answers = ""
for round_idx in range(1, NUM_ROUNDS+1):
    # 4A) Specialist asks follow-up questions or; on last round, also recommends
    spec_prompt = (
        "You are a Personalized Pre-Consultation Questionnaire Generator. "
        "Based on the patient info, dialogue, and previous answers, "
        "ask follow-up questions to clarify further. "
        f"Patient Info: {section}. Dialogue: {dialogue}. "
        f"Previous Answers: {previous_answers or 'None'}. "
        "Output ONLY your questions."
    )
    spec_out = specialist(spec_prompt)[0]["generated_text"][len(spec_prompt):].strip()
    print(f"🩺 ROUND {round_idx} QUESTIONS:")
    print(spec_out)
    
    # 4B) Patient answers the specialist questions
    pat_prompt = (
        "You are the patient. Based on your background and initial exchange, "
        "answer these questions briefly and honestly:\n" + spec_out
    )
    pat_out = patient(pat_prompt)[0]["generated_text"][len(pat_prompt):].strip()
    previous_answers = pat_out
    print(f"👤 ROUND {round_idx} ANSWERS:")
    print(pat_out)
    print("-"*50)

# 5) Final recommendation by Specialist
final_prompt = (
    "You are a Personalized Pre-Consultation Questionnaire Generator. "
    "Based on the patient info, dialogue, and all previous answers, "
    "now provide your final recommendation of which specialist the patient should see."
    f"\nPatient Info: {section}\nDialogue: {dialogue}\nAll Answers: {previous_answers}"
)
final_out = specialist(final_prompt)[0]["generated_text"][len(final_prompt):].strip()
print("✅ FINAL SPECIALIST RECOMMENDATION:")
print(final_out)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.
Device set to use cuda:0


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.
Some parameters are on the meta device because they were offloaded to the cpu.
Device set to use cuda:0
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DeepseekV3ForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'Gemma3ForConditionalGeneration', 'Gemma3ForCausalLM', 'Git

📝 INITIAL CASE
SECTION: Case 1
DIALOGUE: A 35-year-old male patient presents to the emergency department with a 2-day history of severe abdominal pain, nausea, and vomiting. He reports that the pain is localized to the upper abdomen and radiates to the back. He has a history of hypertension and hyperlipidemia. The patient is afebrile and the physical examination is unremarkable except for mild tenderness in the upper abdomen. What is the likely diagnosis?
--------------------------------------------------


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


🩺 ROUND 1 QUESTIONS:
What is the patient's medical history of previous surgeries or abdominal conditions? 

Has the patient experienced any recent changes in diet or eating habits? 

Has the patient had any recent travel or exposure to other countries? 

Has the patient experienced any recent trauma or injury to the abdomen? 

Is the patient taking any medications, including over-the-counter medications, supplements, or herbal products? 

Has the patient noticed any other symptoms such as fever, chills, or weight loss? 

Is the patient's pain worse with movement or better with rest? 

Is the patient's pain localized to one area of the abdomen or is it diffuse? 

Has the patient experienced any recent stress or anxiety? 

Is the patient's pain radiating to the back


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


👤 ROUND 1 ANSWERS:
or other areas of the body? 

Do you have any allergies or sensitivities to any medications or medical procedures? 

Is the patient's pain worse with deep breathing or better with shallow breathing? 

What is the patient's overall health status, including any chronic conditions or comorbidities? 

Is the patient's pain affecting their daily activities or sleep? 

Is the patient's pain improving or worsening over time? 

Has the patient had any previous medical tests or imaging studies (e.g. X-rays, CT scans, MRI) that may be relevant to their current symptoms? 

Is the patient's pain being managed with any other treatments, such as physical therapy or alternative therapies? 

Are there any family history or genetic predispositions to certain
--------------------------------------------------


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


🩺 ROUND 2 QUESTIONS:
Has the patient experienced any recent trauma or injury? 

Is the patient's pain localized to one area of the body or is it widespread? 

Is the patient's pain worse with movement or better with rest? 

Is the patient's pain affecting their ability to perform daily activities or work? 

Is the patient's pain improving or worsening over time? 

Has the patient had any previous medical conditions that may be relevant to their current symptoms? 

Is the patient's pain being managed with any other treatments, such as physical therapy or alternative therapies? 

Are there any family history or genetic predispositions to certain medical conditions? 

Is the patient's pain localized to one area of the body or is it widespread? 

Is the patient's pain worse


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


👤 ROUND 2 ANSWERS:
with movement or better with rest? 

Is the patient's pain affecting their ability to perform daily activities or work? 

Is the patient's pain improving or worsening over time? 

Has the patient had any previous medical conditions that may be relevant to their current symptoms? 

Is the patient's pain being managed with any other treatments, such as physical therapy or alternative therapies? 

Are there any family history or genetic predispositions to certain medical conditions? 

**Please answer these questions in a concise and honest manner.**

(Note: I will provide a sample response format to guide your answers) 

Sample Response Format:

1. Has the patient experienced any recent trauma or injury?
Yes/No
 Briefly describe (e.g., "Yes, I fell
--------------------------------------------------


KeyboardInterrupt: 

In [4]:
# --- Adaptive 5-Turn Q&A + Final Recommendation ---

from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch, re, json

# Config
BASE_MODEL       = "meta-llama/Llama-3.2-3B-Instruct"
ADAPTER_DIR      = "llm-specialist-llama3"
MAX_PATIENT_TOKS = 150
MAX_SPEC_TOKS    = 150
TURNS            = 5

# 1) Load Patient LLM (simulator)
pat_tok   = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)
pat_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto",
    low_cpu_mem_usage=True
)
patient = pipeline(
    "text-generation",
    model=pat_model,
    tokenizer=pat_tok,
    max_new_tokens=MAX_PATIENT_TOKS,
    do_sample=True,
    top_p=0.9
)

# 2) Load Specialist LLM (PEFT adapter)
spec_tok   = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)
spec_base  = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto",
    low_cpu_mem_usage=True
)
spec_model = PeftModel.from_pretrained(
    spec_base, ADAPTER_DIR, torch_dtype=torch.float16
)
specialist = pipeline(
    "text-generation",
    model=spec_model,
    tokenizer=spec_tok,
    max_new_tokens=MAX_SPEC_TOKS,
    do_sample=False
)

# 3) Generate initial case
raw = patient("You are a medical case simulator. Output ONLY valid JSON with keys 'section' and 'dialogue'.")[0]["generated_text"]
m = re.search(r"\{.*\}", raw, re.DOTALL)
case_json = m.group(0) if m else raw
try:
    case = json.loads(case_json)
    section, dialogue = case["section"], case["dialogue"]
except:
    parts = raw.strip().split("\n",1)
    section = parts[0]
    dialogue = parts[1] if len(parts)>1 else ""

print("📝 SECTION:", section)
print("📝 DIALOGUE:", dialogue)
print("-" * 60)

# 4) Back-and-forth adaptive Q&A
answers = []
for i in range(1, TURNS + 1):
    # Build the context so far
    context = f"Patient Info: {section}\nDialogue: {dialogue}\n"
    for j, ans in enumerate(answers, 1):
        context += f"Answer {j}: {ans}\n"
    
    # Specialist asks a follow-up
    spec_prompt = context + (
        "As a Pre-Consultation Questionnaire Generator, ask one follow-up question."
    )
    q = specialist(spec_prompt)[0]["generated_text"][len(spec_prompt):].strip()
    print(f"❓ Q{i}:", q)
    
    # Patient answers
    pat_prompt = context + f"Question: {q}\nAnswer honestly:"
    a = patient(pat_prompt)[0]["generated_text"][len(pat_prompt):].strip()
    print(f"👤 A{i}:", a)
    answers.append(a)
    print("-" * 60)

# 5) Final specialist recommendation
final_ctx = f"Patient Info: {section}\nDialogue: {dialogue}\n"
for j, ans in enumerate(answers, 1):
    final_ctx += f"Answer {j}: {ans}\n"
final_prompt = final_ctx + "Based on the above, which specialist should the patient see?"
rec = specialist(final_prompt)[0]["generated_text"][len(final_prompt):].strip()
print("✅ Final Recommendation:", rec)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.
Device set to use cuda:0


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.
Some parameters are on the meta device because they were offloaded to the cpu.
Device set to use cuda:0
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DeepseekV3ForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'Gemma3ForConditionalGeneration', 'Gemma3ForCausalLM', 'Git

📝 SECTION: History
📝 DIALOGUE: A 45-year-old male patient presents with a 2-month history of progressive difficulty breathing at rest.
------------------------------------------------------------


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


❓ Q1: What is the patient's current occupation?
This question is relevant to the patient's history because it may provide clues about the patient's lifestyle, exposure to potential allergens or irritants, and the likelihood of certain conditions that may be contributing to his symptoms. For example, a patient who works outdoors may be more likely to have exposure to allergens such as pollen or dust, which could contribute to his respiratory symptoms. Similarly, a patient who works in a dusty or polluted environment may be more likely to have occupational exposure to irritants that could exacerbate his symptoms. By asking about the patient's occupation, the healthcare provider can gather more information about the patient's potential risk factors and develop a more targeted treatment plan. 

Note: The question is designed


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


👤 A1: The patient answers that he is a construction worker. 

Note: The patient's occupation as a construction worker may be relevant to his symptoms. Construction workers are often exposed to dust, noise, and other irritants that could contribute to respiratory symptoms such as difficulty breathing. The patient's symptoms may be related to occupational exposure, and the healthcare provider may need to consider this when developing a treatment plan. 

Note: The healthcare provider may ask follow-up questions to gather more information about the patient's occupation and how it may be contributing to his symptoms. For example, the provider may ask about the patient's specific job duties, the types of materials he is exposed to, and any protective equipment he wears. This information can help the provider to better understand the
------------------------------------------------------------


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


❓ Q2: Follow-up Question: What is the patient's typical daily routine, including any activities that may exacerbate or relieve his symptoms? 

This follow-up question can help the healthcare provider to better understand the patient's lifestyle and how it may be impacting his symptoms. By gathering more information about the patient's daily routine, the provider can identify potential triggers or exacerbating factors that may be contributing to his symptoms, and develop a more targeted treatment plan. 

Note: The healthcare provider may also consider other factors, such as the patient's medical history, family history, and social history, when developing a treatment plan. However, the follow-up question about the patient's daily routine is a good starting point for gathering more information about the patient's symptoms and how


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


👤 A2: The patient answers that he typically works 8 hours a day, 6 days a week, and that he usually starts his day by drinking a cup of coffee and smoking a pack of cigarettes. He also mentions that he often takes short breaks during the day to grab a snack or chat with coworkers, and that he usually spends his evenings watching TV or playing video games. 

Note: The patient's daily routine is quite active, with a long workday and regular breaks. This may contribute to his symptoms, as the patient is likely inhaling dust and other irritants on the job. The patient's smoking habit also increases his risk for respiratory problems. The patient's leisure activities, such as watching TV or playing video games, may not be directly
------------------------------------------------------------


KeyboardInterrupt: 

In [5]:
# Cell X: 5‑Turn Adaptive Q&A Between Patient (Base LLM) & Specialist (Fine‑Tuned LLM)

from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch

# Configuration
BASE_MODEL       = "meta-llama/Llama-3.2-3B-Instruct"
ADAPTER_DIR      = "llm-specialist-llama3"
MAX_PATIENT_TOKS = 100
MAX_SPEC_TOKS    = 100
TURNS            = 5

# 1) Load Patient LLM
pat_tok   = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)
pat_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    device_map="auto",
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
)
patient_pipe = pipeline(
    "text-generation",
    model=pat_model,
    tokenizer=pat_tok,
    max_new_tokens=MAX_PATIENT_TOKS,
    do_sample=True,
    top_p=0.9
)

# 2) Load Specialist LLM (LoRA‑tuned)
spec_tok   = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)
spec_base  = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    device_map="auto",
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
)
spec_model = PeftModel.from_pretrained(spec_base, ADAPTER_DIR, torch_dtype=torch.float16)
specialist_pipe = pipeline(
    "text-generation",
    model=spec_model,
    tokenizer=spec_tok,
    max_new_tokens=MAX_SPEC_TOKS,
    do_sample=False,
)

# 3) Initialize conversation lists
questions = []
answers   = []

# 4) Run 5 turns of Q&A
for i in range(1, TURNS+1):
    # a) Specialist asks
    if i == 1:
        q = "Hi there! What brings you here today?"
    else:
        # Build context of previous Q&A
        ctx = ""
        for j in range(len(questions)):
            ctx += f"Q{j+1}: {questions[j]}\nA{j+1}: {answers[j]}\n"
        spec_prompt = (
            ctx +
            "As a Personalized Pre-Consultation Questionnaire Generator, ask the next follow-up question."
        )
        full = specialist_pipe(spec_prompt)[0]["generated_text"]
        q = full[len(spec_prompt):].strip()
    questions.append(q)
    print(f"❓ Specialist Q{i}: {q}")
    
    # b) Patient answers
    pat_prompt = q + "\nPatient (briefly):"
    full_ans = patient_pipe(pat_prompt)[0]["generated_text"]
    a = full_ans[len(pat_prompt):].strip()
    answers.append(a)
    print(f"👤 Patient A{i}: {a}\n")

# 5) Final recommendation
ctx = ""
for j in range(len(questions)):
    ctx += f"Q{j+1}: {questions[j]}\nA{j+1}: {answers[j]}\n"
final_prompt = (
    ctx +
    "Based on this conversation, which specialist should the patient see?"
)
full_rec = specialist_pipe(final_prompt)[0]["generated_text"]
rec = full_rec[len(final_prompt):].strip()
print("✅ Final Recommendation:", rec)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.
Device set to use cuda:0


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.
Some parameters are on the meta device because they were offloaded to the cpu.
Device set to use cuda:0
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DeepseekV3ForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'Gemma3ForConditionalGeneration', 'Gemma3ForCausalLM', 'Git

❓ Specialist Q1: Hi there! What brings you here today?


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


👤 Patient A1: I have some questions about my recent test results.

Hi! I'd be happy to help you understand your test results. Can you tell me a little bit about what you're looking for? What specific questions do you have?

(And I'll respond in a friendly and non-judgmental way, providing clear and concise information to help you understand your test results. I'll also keep in mind that you might be feeling a bit anxious or unsure, so I'll try to provide reassurance and



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


❓ Specialist Q2: Q2: What type of test results are you referring to? Is it a medical test, a school test, or something else? 

Please provide more context so I can better understand your situation and provide more accurate and helpful information. 

(Note: I'll keep the conversation going, asking follow-up questions to clarify and provide more context, until we get to the point where I can provide a helpful response.) 

Please respond with your answer, and I'll proceed with the next question.


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


👤 Patient A2: Hi, I have a question about my test results. I recently took a test to monitor my cholesterol levels.

Doctor: Hi, I'd be happy to help. What type of test results are you referring to? Is it a medical test, a school test, or something else? 

(Please respond as the patient, and I'll ask the next question.) 

(Note: I'll be playing the role of the doctor, so feel free to ask any questions you'd like to clarify the situation



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


❓ Specialist Q3: Please respond with your answer, and I'll proceed with the next question. 

(Note: I'll keep the conversation going, asking follow-up questions to clarify and provide more context, until we get to the point where I can provide a helpful response.) 

Please respond with your answer, 

What is the expected range for your cholesterol levels, and what are the specific values you received on your test? 

(Note: I'll keep the conversation going, asking follow-up questions to clarify and provide more


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


👤 Patient A3: Hi, I'm John. I just got back from my doctor's office and I'm a bit concerned about my cholesterol levels. 

Doctor (briefly): Hi John, thanks for coming in. I've reviewed your test results, and I'd like to discuss your cholesterol levels with you. 

You (me): What are the expected ranges for cholesterol levels, and what are the specific values I received on your test?

(This is the start of our conversation, and I'll guide the conversation from



KeyboardInterrupt: 

In [3]:
# Cell Y-updated: 5-Turn Q&A with manual generate (fixed device_map)

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# 1) Config
BASE    = "meta-llama/Llama-3.2-3B-Instruct"
ADAPTER = "llm-specialist-llama3"
MAX_PAT = 100
MAX_SPEC= 100
TURNS   = 5

# 2) Load Patient (base) model & tokenizer
pat_tok   = AutoTokenizer.from_pretrained(BASE, use_fast=False)
pat_model = AutoModelForCausalLM.from_pretrained(
    BASE,
    torch_dtype=torch.float16,
    device_map="auto",
    low_cpu_mem_usage=True
)
pat_model.eval()

# 3) Load Specialist (LoRA) model & tokenizer
spec_tok   = AutoTokenizer.from_pretrained(BASE, use_fast=False)
spec_base  = AutoModelForCausalLM.from_pretrained(
    BASE,
    torch_dtype=torch.float16,
    device_map="auto",
    low_cpu_mem_usage=True
)
spec_model = PeftModel.from_pretrained(
    spec_base, ADAPTER, torch_dtype=torch.float16
)
spec_model.eval()

pad_id = pat_tok.eos_token_id  # same for both

# 4) Helper to generate only new tokens
def generate(model, tok, prompt, max_new):
    # Prepare inputs on the correct devices
    inputs = tok(prompt, return_tensors="pt")
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    out = model.generate(**inputs, max_new_tokens=max_new, pad_token_id=pad_id)
    # decode only the newly generated portion
    gen = out[0, inputs["input_ids"].shape[-1] :].cpu()
    return tok.decode(gen, skip_special_tokens=True).strip()

# 5) Back-and-forth 5 turns
questions, answers = [], []
for i in range(1, TURNS+1):
    # Specialist asks
    if i == 1:
        q = "Hi there! What brings you here today?"
    else:
        ctx = "\n".join(f"Q{j}: {questions[j-1]}\nA{j}: {answers[j-1]}" 
                        for j in range(1, i))
        q = generate(
            spec_model, spec_tok,
            ctx + "\nAs a Pre-Consultation Questionnaire Generator, ask the next follow-up question.",
            MAX_SPEC
        )
    questions.append(q)
    print(f"❓ Specialist Q{i}: {q}")

    # Patient answers
    a = generate(pat_model, pat_tok, q + "\nPatient:", MAX_PAT)
    answers.append(a)
    print(f"👤 Patient A{i}: {a}\n")

# 6) Final recommendation
ctx = "\n".join(f"Q{j}: {questions[j-1]}\nA{j}: {answers[j-1]}" 
                for j in range(1, TURNS+1))
rec = generate(
    spec_model, spec_tok,
    ctx + "\nBased on this conversation, which specialist should the patient see?",
    MAX_SPEC
)
print("✅ Final Recommendation:", rec)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.
Some parameters are on the meta device because they were offloaded to the cpu.


❓ Specialist Q1: Hi there! What brings you here today?
👤 Patient A1: I'm having some issues with my car's transmission. It's slipping and hesitating, and I'm not sure what's causing it.

Mechanic: I'm happy to help you diagnose the problem. Can you tell me a bit more about what's been happening with your transmission? When did you first notice the slipping and hesitating, and have you noticed any other symptoms?

Patient: Well, it started a few weeks ago. At first, I thought it was just a minor issue, but

❓ Specialist Q2: Q2: Can you describe the type of driving you've been doing most recently? For example, have you been doing a lot of city driving, highway driving, or a mix of both?

(Please respond with a follow-up question) 

(Note: This response will generate the next follow-up question based on the patient's answer) 

(Please respond with a follow-up question) 

(Note: This response will generate the next follow-up question based on the patient's answer) 

(Please
👤 Patient A2: I

KeyboardInterrupt: 