In [None]:
%pip install torch transformers datasets accelerate peft trl
%load_ext tensorboard

###Preprocess MathDial dataset for finetuning

In [2]:
from google.colab import userdata
from datasets import load_dataset, Dataset
from huggingface_hub import login
import os
import re
import math

In [3]:
HUGGING_FACE_ACCESS_TOKEN = userdata.get('HUGGING_FACE_ACCESS_TOKEN')
login(token = HUGGING_FACE_ACCESS_TOKEN)

In [None]:
# load MathDial dataset
tutor_dialogue = load_dataset("eth-nlped/mathdial")
tutor_train = tutor_dialogue["train"].to_pandas()
tutor_test = tutor_dialogue["test"].to_pandas()

In [5]:
def extract_move(sentence: str):
    """extract teacher moves (probing, focus, telling, generic)"""
    pattern = r"\b(probing|focus|telling|generic)\b"
    match = re.search(rf"\({pattern}\)", sentence)

    if match:
        clean_sentence = re.sub(rf"\({pattern}\)", "", sentence)
        return clean_sentence, match.group(1)

    else:
        return sentence, None

def remove_names(sentence: str, student_name: str):
    """remove names from conversation"""
    pattern = rf"\b(Teacher|Student|{student_name})\b"
    match = re.search(rf"{pattern}: ", sentence)

    if match:
        clean_sentence = re.sub(rf"{pattern}: ", "", sentence)
        return clean_sentence, match.group(1)

    else:
        return sentence, None

def preprocess(conversation: str):
    """split conversation into turns"""

    moves = []
    split_conversation = conversation.split("|EOM|")

    for i in range(len(split_conversation)):
        clean_sentence, move = extract_move(split_conversation[i])
        split_conversation[i] = clean_sentence

        if move:
            moves.append(move)

    split_conversation = list(filter(None, split_conversation))
    return split_conversation, moves

def format_conversation(
        split_conversation: list[str],
        moves: list[str],
        system_prompt: str,
        incorrect_solution: str,
        student_name: str
    ):
    """
    formats conversation into the following structure:
    '{"messages": [{"role": "user", "content": "What color is the sky?"}, {"role": "assistant", "content": "It is blue."}]}'
    """

    messages = [
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": incorrect_solution}
                ]

    move_index = 0

    for i in range(len(split_conversation)):
        clean_sentence, match = remove_names(split_conversation[i], student_name)
        if not clean_sentence or not match:
            continue

        if match == "Teacher":
            messages.append({"role": "assistant", "content": clean_sentence})
            move_index += 1

        else:
            messages.append({"role": "user", "content": clean_sentence})

    return messages

In [37]:
def build_system_prompt(question: str, ground_truth: str, student_name):
    system_prompt = (
        f"""
        You are a math tutor. Your student {student_name} is trying to solve the following problem: {question}.
        Given the correct solution, your goal is to help them solve the problem by guiding them with questions and hints.
        Here is the correct solution: {ground_truth}
        """
    )
    return system_prompt

def get_student_name(student_profile: str):
    return student_profile.split()[0]

In [38]:
def process_row(row):
    """Constructs a string of messages for each conversation"""
    student_name = get_student_name(row["student_profile"])
    system_prompt = build_system_prompt(row["question"], row["ground_truth"], student_name)
    split_conversation = preprocess(row["conversation"])
    messages = format_conversation(split_conversation[0], split_conversation[1], system_prompt, row["student_incorrect_solution"], student_name)
    return messages

In [51]:
tutor_train["messages"] = tutor_train.apply(process_row, axis = 1)
tutor_test["messages"] = tutor_test.apply(process_row, axis = 1)

train = Dataset.from_pandas(tutor_train[["messages"]])
test = Dataset.from_pandas(tutor_test[["messages"]])

###Finetune using LoRA

In [41]:
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
from transformers import AutoTokenizer, AutoModelForCausalLM

In [11]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
model_name = "HuggingFaceTB/SmolLM3-3B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

In [54]:
rank_dimension = 8
lora_alpha = 16
lora_dropout = 0.05

peft_config = LoraConfig(
    r = rank_dimension,
    lora_alpha = lora_alpha,
    lora_dropout = lora_dropout,
    bias = "none",
    target_modules = ["q_proj", "v_proj"],
    task_type = "CAUSAL_LM"
)

In [55]:
def formatting_func(example):
    return [tokenizer.apply_chat_template(
        example["messages"], tokenize=False, add_generation_prompt=False
    )]

In [None]:
sft_config = SFTConfig(
    output_dir = "./results",
    logging_dir = "./logs",
    num_train_epochs = 3,
    per_device_train_batch_size = 8,
    learning_rate = 2e-5,
    warmup_ratio = 0.03,
    weight_decay = 0.01,
    save_strategy = "steps",
    logging_steps = 10,
    eval_strategy = "steps",
    eval_steps = 200,
    save_steps = 200,
    max_length = 1024,
    report_to="tensorboard",
    fp16 = True,
    assistant_only_loss = True
)

trainer = SFTTrainer(
    model = model,
    train_dataset = train,
    eval_dataset = test,
    args = sft_config,
    peft_config = peft_config,
    formatting_func = formatting_func
)

In [None]:
trainer.train()
trainer.save_model("./results/final_model")

In [None]:
%tensorboard --logdir logs

###Perform inference using new adapter weights

In [60]:
from peft import PeftModel, PeftConfig
import torch

In [None]:
model_name = "HuggingFaceTB/SmolLM3-3B"
adapter_path = "results/final_model"

tokenizer = AutoTokenizer.from_pretrained(model_name)
# tokenizer.pad_token = "<pad>"
base = AutoModelForCausalLM.from_pretrained(model_name)

model = PeftModel.from_pretrained(base, adapter_path)

model = model.to("cuda")
model.eval()

In [67]:
def formatting_func(example):
    return tokenizer.apply_chat_template(
        example["messages"],
        tokenize=False,
        add_generation_prompt=True
    )

In [68]:
def chat_turn(conversation: dict, user_input: str = None):
    # Add user input to conversation
    if user_input:
        conversation["messages"].append({"role": "user", "content": user_input})

    # Format with chat template, ensuring assistant is next
    prompt = tokenizer.apply_chat_template(
        conversation["messages"],
        tokenize=False,
        add_generation_prompt=True   # tells model: "assistant should speak next"
    )

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    # Generate assistant reply
    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_new_tokens=128,
            do_sample=True,             # sampling helps avoid repetition
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.2,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,
        )

    # Slice out the new tokens
    gen_tokens = outputs[:, inputs["input_ids"].shape[1]:]
    response = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)[0].strip()

    # Append assistant reply
    conversation["messages"].append({"role": "assistant", "content": response})

    print("\nAssistant:", response, "\n")
    return conversation


In [76]:
question = "A bakery sells muffins in boxes of 6. Each box costs $9. If Dayton has $45, how many muffins can he buy?"
ground_truth = "First divide the total amount of money Dayton has by the cost of each box of muffins. This is the number of boxes of muffins Dayton can buy. The result will be $45/$9 = 5. Then multiply the number of boxes Dayton can buy by the number of muffins in each box. The result will be 5 * 6 = 30."
system_prompt = build_system_prompt(question, ground_truth, "Mia")
conversation = {"messages": [{"role": "system", "content": system_prompt}]}

In [None]:
while True:
    user_input = input("student: ")
    if user_input == "q":
        break
    conversation = chat_turn(conversation, user_input)