In [1]:
import json
import re
import torch

from datasets import load_dataset
from sentence_transformers import SentenceTransformer, util
from transformers import AutoModelForCausalLM, AutoTokenizer
from trl import GRPOConfig, GRPOTrainer

In [2]:
print("CUDA GPU backend", torch.cuda.is_available())


CUDA GPU backend True


In [3]:

ds = load_dataset("qiaojin/PubMedQA", "pqa_unlabeled")

print(ds)

DatasetDict({
    train: Dataset({
        features: ['pubid', 'question', 'context', 'long_answer'],
        num_rows: 61249
    })
})


In [8]:
SYSTEM_PROMPT = (
    "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant "
    "first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning "
    "process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., "
    "<think> reasoning process is placed here </think> <answer> the answer is placed here </answer>"
)

def make_conversation(example):
    return {
        "prompt": [
            {"role": "system", "content": SYSTEM_PROMPT},
            # {"role": "user", "content": "".join(example["context"]['contexts'])},
            {"role": "user", "content": example["question"]},
        ],
    }

ds = ds.map(make_conversation)
train_dataset = ds.remove_columns(['pubid', 'question', 'context'])

print(train_dataset)

Map:   0%|          | 0/61249 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['long_answer', 'prompt'],
        num_rows: 61249
    })
})


In [9]:
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")

In [10]:
def extract_reasoning_and_graph(example):
    prompt_text = example["prompt"]
    input_text = "\n".join([entry["content"] for entry in prompt_text])
    print("\ninput", input_text)
    inputs = tokenizer(input_text, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=300)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print("\nresponse", response)
    
    # Extract reasoning using regex
    think_match = re.search(r"<think>(.*?)</think>", response, re.DOTALL)
    reasoning_process = think_match.group(1) if think_match else ""
    
    # Convert reasoning into a simple graph (JSON format)
    graph_structure = {"nodes": [], "edges": []}
    steps = reasoning_process.split(". ")
    for i, step in enumerate(steps):
        graph_structure["nodes"].append({"id": i, "text": step})
        if i > 0:
            graph_structure["edges"].append({"source": i - 1, "target": i})
    
    return {"reasoning_graph": json.dumps(graph_structure), "model_response": response}

In [11]:
dso = ds['train'].select(range(1)).map(extract_reasoning_and_graph)


Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



input A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process is placed here </think> <answer> the answer is placed here </answer>
Is naturopathy as effective as conventional therapy for treatment of menopausal symptoms?

response A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process is placed here </think> <answer> the answer is placed here </answer>
Is naturopathy as effective as conventional the

In [12]:
print(dso)

Dataset({
    features: ['pubid', 'question', 'context', 'long_answer', 'prompt', 'reasoning_graph', 'model_response'],
    num_rows: 1
})


In [None]:
dso['reasoning_graph']

In [None]:
dso['model_response']

In [36]:
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)




config.json:   0%|          | 0.00/680 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors.index.json:   0%|          | 0.00/28.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-000002.safetensors:   0%|          | 0.00/8.61G [00:00<?, ?B/s]

model-00002-of-000002.safetensors:   0%|          | 0.00/6.62G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

Some parameters are on the meta device because they were offloaded to the cpu.


tokenizer_config.json:   0%|          | 0.00/3.07k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

In [37]:
SYSTEM_PROMPT = (
    "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant "
    "first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning "
    "process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., "
    "<think> reasoning process is placed here </think> <answer> the answer is placed here </answer>"
)

In [38]:
prompt = "Is naturopathy as effective as conventional therapy for treatment of menopausal symptoms?"
messages = [
    {"role": "system", "content": SYSTEM_PROMPT},
    {"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)


In [None]:
generated_ids = model.generate(
    **model_inputs,
    max_new_tokens=512
)
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


In [None]:
response

'Yes. Studies have shown that naturopathy can be just as effective as conventional therapies in treating menopause symptoms.'