In [None]:
%pip install torch transformers datasets accelerate peft trl
%load_ext tensorboard

Preprocess MathDial dataset for finetuning

In [2]:
from google.colab import userdata
from datasets import load_dataset, Dataset
from huggingface_hub import login
import os
import re
import math

In [18]:
HUGGING_FACE_ACCESS_TOKEN = userdata.get('HUGGING_FACE_ACCESS_TOKEN')
login(token = HUGGING_FACE_ACCESS_TOKEN)

In [None]:
# load MathDial dataset
tutor_dialogue = load_dataset("eth-nlped/mathdial")
tutor_train = tutor_dialogue["train"].to_pandas()
tutor_test = tutor_dialogue["test"].to_pandas()

In [5]:
def extract_move(sentence: str):
    """extract teacher moves (probing, focus, telling, generic)"""
    pattern = r"\b(probing|focus|telling|generic)\b"
    match = re.search(rf"\({pattern}\)", sentence)

    if match:
        clean_sentence = re.sub(rf"\({pattern}\)", "", sentence)
        return clean_sentence, match.group(1)

    else:
        return sentence, None

def remove_names(sentence: str, student_name: str):
    """remove names from conversation"""
    pattern = rf"\b(Teacher|Student|{student_name})\b"
    match = re.search(rf"{pattern}: ", sentence)

    if match:
        clean_sentence = re.sub(rf"{pattern}: ", "", sentence)
        return clean_sentence, match.group(1)

    else:
        return sentence, None

def preprocess(conversation: str):
    """split conversation into turns"""

    moves = []
    split_conversation = conversation.split("|EOM|")

    for i in range(len(split_conversation)):
        clean_sentence, move = extract_move(split_conversation[i])
        split_conversation[i] = clean_sentence

        if move:
            moves.append(move)

    split_conversation = list(filter(None, split_conversation))
    return split_conversation, moves

def format_conversation(
        split_conversation: list[str],
        moves: list[str],
        system_prompt: str,
        incorrect_solution: str,
        student_name: str
    ):
    """
    formats conversation into the following structure:
    '{"messages": [{"role": "user", "content": "What color is the sky?"}, {"role": "assistant", "content": "It is blue."}]}'
    """

    messages = [
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": incorrect_solution}
                ]

    move_index = 0

    for i in range(len(split_conversation)):
        clean_sentence, match = remove_names(split_conversation[i], student_name)
        if not clean_sentence or not match:
            continue

        if match == "Teacher":
            messages.append({"role": "assistant", "content": f"[{moves[move_index].upper()}] " + clean_sentence})
            move_index += 1

        else:
            messages.append({"role": "user", "content": clean_sentence})

    return messages

In [6]:
def build_system_prompt(question: str, ground_truth: str, student_name):
    system_prompt = (
        f"""
        You are a mathematics teacher for students in elementary and middle school.
        Your task is to help your student {student_name} solve a problem that they initially solved incorrectly.
        You have access to the problem statement and ground truth solution, which are provided below.

        Make sure to abide by the following guidelines:
            1. Do NOT give away the answer immediately.
            2. Lead the student toward the correct answer using the following strategies: focus, probing, telling, and generic.
            3. Each of your responses should follow this structure: '[STRATEGY] one sentence response'
            4. Keep responses concise (1–2 sentences), but long enough to be natural.
            5. For each response, you will select only 1 strategy to use. You may switch strategies on different turns.
            6. Ensure your selected strategy is displayed in all CAPS (e.g. [FOCUS])

        Escalation rules:
            - Start with [FOCUS] or [PROBING] strategies.
            - Use [TELLING] (revealing strategy) only if the student is stuck after several turns.
            - Use [TELLING] (revealing answer) only as a last resort after repeated failed attempts.
            - If the student provides a correct answer, use [GENERIC] to give encouragement or praise before moving on.

        Here is a description of each strategy, its intent/purpose, and an example of how that strategy is used for each intent:
            1. focus
                - intent 1: seek strategy (e.g. So what should you do next?)
                - intent 2: guide student focus (e.g. Can you calculate...?)
                - intent 3: recall relevant information (e.g. Can you reread the question and tell me what is...?)
            2. probing
                - intent 1: asking for explanation (e.g. Why do you think you need to add these numbers?)
                - intent 2: seeking self correction (e.g. Are you sure you need to add here?)
                - intent 3: perturbing the question (e.g. How would things change if they had ... items instead?)
                - intent 4: seeking world knowledge (e.g. How do you calculate the perimeter of a square?)
            3. telling
                - intent 1: revealing strategy (e.g. You need to add ... to ... get your answer.)
                - intent 2: revealing answer (e.g. No, he had ... items.)
            4. generic
                - intent 1: greeting/fairwell (e.g. Hi..., how are you doing with the word problem?, Good Job! Is there anything else I can help with?)
                - intent 2: general inquiry (e.g. Can you go walk me through your solution?)

        Example conversation:
            your output: '[PROBING] If you had 4 of something and tripled that amount, how much would you have?'
            student response: 'I would have 12 of something.'
            your output: '[PROBING] So if Nancy triples the 18 cubic feet of water, how much would she have?'

        Problem: {question}
        Solution: {ground_truth}
        """
    )

    return system_prompt

def get_student_name(student_profile: str):
    return student_profile.split()[0]

In [7]:
def process_row(row):
    """Constructs a string of messages for each conversation"""
    student_name = get_student_name(row["student_profile"])
    system_prompt = build_system_prompt(row["question"], row["ground_truth"], student_name)
    split_conversation = preprocess(row["conversation"])
    messages = format_conversation(split_conversation[0], split_conversation[1], system_prompt, row["student_incorrect_solution"], student_name)
    return messages

In [8]:
tutor_train["messages"] = tutor_train.apply(process_row, axis = 1)
tutor_test["messages"] = tutor_train.apply(process_row, axis = 1)

In [9]:
dataset = Dataset.from_pandas(tutor_train[["messages"]])

half = -math.ceil(len(tutor_test) / 2)

tutor_validation = tutor_test[:half]
tutor_test = tutor_test[half:]

train = Dataset.from_pandas(tutor_train[["messages"]])
validation = Dataset.from_pandas(tutor_validation[["messages"]])
test = Dataset.from_pandas(tutor_test[["messages"]])

Finetune Qwen2.5-Instruct using LoRA

In [10]:
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
from transformers import AutoTokenizer, AutoModelForCausalLM

In [None]:
model_name = "Qwen/Qwen2.5-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

In [12]:
rank_dimension = 8
lora_alpha = 16
lora_dropout = 0.05

peft_config = LoraConfig(
    r = rank_dimension,
    lora_alpha = lora_alpha,
    lora_dropout = lora_dropout,
    bias = "none",
    target_modules = ["q_proj", "v_proj"],
    task_type = "CAUSAL_LM"
)

In [13]:
def formatting_func(example):
    """formats sequence of messages"""
    return tokenizer.apply_chat_template(example["messages"], tokenize=False)

In [None]:
sft_config = SFTConfig(
    output_dir = "./results",
    logging_dir = "./logs",
    num_train_epochs = 3,
    per_device_train_batch_size = 8,
    learning_rate = 2e-5,
    warmup_ratio = 0.03,
    weight_decay = 0.01,
    save_strategy = "steps",
    logging_steps = 10,
    eval_strategy = "steps",
    eval_steps = 200,
    save_steps = 200,
    max_length = 1024,
    report_to="tensorboard",
    fp16 = True,
)

trainer = SFTTrainer(
    model = model,
    train_dataset = train,
    eval_dataset = validation,
    args = sft_config,
    peft_config = peft_config,
    formatting_func = formatting_func
)

trainer.train()
trainer.save_model("./results/final_model")

In [None]:
%tensorboard --logdir logs

Evaluate fine-tuned model

In [None]:
metrics = trainer.predict(test)

Perform inference using new adapter weights

In [None]:
test[0]

In [None]:
from peft import PeftModel, PeftConfig
import torch

model_name = "Qwen/Qwen2.5-1.5B-Instruct"
adapter_path = "results/final_model"

tok = AutoTokenizer.from_pretrained(model_name)
base = AutoModelForCausalLM.from_pretrained(model_name)

model = PeftModel.from_pretrained(base, adapter_path)

model = model.to("cuda")
model.eval()

test = {"messages":[{'content': "\n        You are a mathematics teacher for students in elementary and middle school.\n        Your task is to help your student Alejandra solve a problem that they initially solved incorrectly.\n        You have access to the problem statement and ground truth solution, which are provided below.\n\n        Make sure to abide by the following guidelines:\n            1. Do NOT give away the answer immediately.\n            2. Lead the student toward the correct answer using the following strategies: focus, probing, telling, and generic.\n            3. Each of your responses should follow this structure: '[STRATEGY] one sentence response'\n            4. Keep responses concise (1–2 sentences), but long enough to be natural.\n            5. For each response, you will select only 1 strategy to use. You may switch strategies on different turns.\n            6. Ensure your selected strategy is displayed in all CAPS (e.g. [FOCUS])\n\n        Escalation rules:\n            - Start with [FOCUS] or [PROBING] strategies.\n            - Use [TELLING] (revealing strategy) only if the student is stuck after several turns.\n            - Use [TELLING] (revealing answer) only as a last resort after repeated failed attempts.\n            - If the student provides a correct answer, use [GENERIC] to give encouragement or praise before moving on.\n\n        Here is a description of each strategy, its intent/purpose, and an example of how that strategy is used for each intent:\n            1. focus\n                - intent 1: seek strategy (e.g. So what should you do next?)\n                - intent 2: guide student focus (e.g. Can you calculate...?)\n                - intent 3: recall relevant information (e.g. Can you reread the question and tell me what is...?)\n            2. probing\n                - intent 1: asking for explanation (e.g. Why do you think you need to add these numbers?)\n                - intent 2: seeking self correction (e.g. Are you sure you need to add here?)\n                - intent 3: perturbing the question (e.g. How would things change if they had ... items instead?)\n                - intent 4: seeking world knowledge (e.g. How do you calculate the perimeter of a square?)\n            3. telling\n                - intent 1: revealing strategy (e.g. You need to add ... to ... get your answer.)\n                - intent 2: revealing answer (e.g. No, he had ... items.)\n            4. generic\n                - intent 1: greeting/fairwell (e.g. Hi..., how are you doing with the word problem?, Good Job! Is there anything else I can help with?)\n                - intent 2: general inquiry (e.g. Can you go walk me through your solution?)\n\n        Example conversation:\n            your output: '[PROBING] If you had 4 of something and tripled that amount, how much would you have?'\n            student response: 'I would have 12 of something.'\n            your output: '[PROBING] So if Nancy triples the 18 cubic feet of water, how much would she have?'\n\n        Problem: Haman’s father has an egg business supplying the local market. On a Wednesday morning, his father sends him to go and collect 10 trays of eggs for sale from their store. While loading the eggs into the car, he accidentally drops two trays. He calls his father telling him this, and is told to add 7 more trays for sale. How many eggs were sold that day?\n        Solution: When Haman collected 10 trays and dropped 2 trays, he was left with 10 trays - 2 trays = 8 trays.\nWhen he added 7 more trays, the total number of trays became 8 trays + 7 trays = 15 trays.\nSince each tray contains 36 eggs, the total number of eggs to be sold is 36 eggs/tray * 15 trays = 540 eggs.\n 540\n        ",
   'role': 'system'}, {'content': 'Haman originally had 10 trays of eggs, but he dropped 2, so he only had 10-2 = 8 trays left.\nThen his father tells him to add 7 more trays, so he ends up with 8+7 = 15 trays in total.\nAssuming each tray contains 30 eggs, then 15 trays would contain 15 x 30 = 450 eggs in total.\nTherefore, 450 eggs were sold that day. \n 450',
  'role': 'user'}]}

inputs = tokenizer(formatting_func(test), return_tensors="pt")

with torch.no_grad():
    outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), max_new_tokens=128)
    print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0])