In [6]:
from datasets import load_dataset
from huggingface_hub import login
import os

In [7]:
HUGGING_FACE_ACCESS_TOKEN = os.getenv("HUGGING_FACE_ACCESS_TOKEN")

In [9]:
login(token = HUGGING_FACE_ACCESS_TOKEN)

In [10]:
tutor_dialogue = load_dataset("eth-nlped/mathdial")
tutor_dialogue

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Generating train split: 100%|██████████| 2262/2262 [00:00<00:00, 99521.83 examples/s]
Generating test split: 100%|██████████| 599/599 [00:00<00:00, 43902.15 examples/s]


DatasetDict({
    train: Dataset({
        features: ['qid', 'scenario', 'question', 'ground_truth', 'student_incorrect_solution', 'student_profile', 'teacher_described_confusion', 'self-correctness', 'self-typical-confusion', 'self-typical-interactions', 'conversation'],
        num_rows: 2262
    })
    test: Dataset({
        features: ['qid', 'scenario', 'question', 'ground_truth', 'student_incorrect_solution', 'student_profile', 'teacher_described_confusion', 'self-correctness', 'self-typical-confusion', 'self-typical-interactions', 'conversation'],
        num_rows: 599
    })
})

In [14]:
tutor_train = tutor_dialogue["train"].to_pandas()
tutor_test = tutor_dialogue["test"].to_pandas()

In [16]:
tutor_train.head()

Unnamed: 0,qid,scenario,question,ground_truth,student_incorrect_solution,student_profile,teacher_described_confusion,self-correctness,self-typical-confusion,self-typical-interactions,conversation
0,5000012,1,Nancy is filling an aquarium for her fish. She...,First calculate the volume of the aquarium by ...,The aquarium has a volume of 4 x 6 x 3 = 72 cu...,Steven is a 7th grade student. He has difficul...,He added a step after completing the problem.,Yes,3.0,3.0,"Teacher: (probing)Steven, If you had 4 of some..."
1,5000084,2,John is very unfit and decides to work up to d...,He needs to do 15*3=45 progressions\nThat will...,"To get to 15 reps, John will take 15 - 1 = 14 ...",Stephanie is a 7th grade student. She has diff...,She became fixated on a wrong calculation and ...,No,2.0,2.0,"Teacher: (probing)Stephanie, How many days wil..."
2,5000982,3,James has 20 pairs of red socks and half as ma...,He has 20/2=10 black socks\nSo he has 20+10=30...,James has 20/2 = 10 pairs of black socks.\nJam...,DeAndre is a 7th grade student. He has problem...,He was technically correct.,Yes,5.0,5.0,"Teacher: (probing)DeAndre, you successfully an..."
3,5000274,4,Jason drives past 3 convenience stores on his ...,First find the additional distance between the...,The distance between the second store and thir...,Winnie is a 7th grade student. She struggle to...,He understood it when it was broken down step ...,Yes,3.0,3.0,Teacher: (probing)What is the distance between...
4,5000797,5,Wanda walks her daughter .5 miles to school in...,She walks .5 miles to school and back 4 times ...,"Every day, Wanda walks a total of 0.5 + 0.5 = ...",Cody is a 7th grade student. He has problem wi...,He forgot she walked twice each day!,Yes,3.0,3.0,"Teacher: (probing)Cody, How far doe Wanda walk..."


In [None]:
import re

def extract_move(sentence: str):
    """extract teacher moves (probing, focus, telling, generic)"""
    pattern = r"\b(probing|focus|telling|generic)\b"
    match = re.search(rf"\({pattern}\)", sentence)

    if match:
        clean_sentence = re.sub(rf"\({pattern}\)", "", sentence)
        return clean_sentence, match.group(1)
    
    else:
        return sentence, None
    
def remove_names(sentence: str, student_name: str):
    """remove names from conversation"""
    pattern = rf"\b(Teacher|Student|{student_name})\b"
    match = re.search(rf"{pattern}: ", sentence)

    if match:
        clean_sentence = re.sub(rf"{pattern}: ", "", sentence)
        return clean_sentence, match.group(1)
    
    else:
        return sentence, None

def preprocess(conversation: str):
    """split conversation into turns"""

    moves = []
    split_conversation = conversation.split("|EOM|")

    for i in range(len(split_conversation)):
        clean_sentence, move = extract_move(split_conversation[i])
        split_conversation[i] = clean_sentence
        
        if move:
            moves.append(move)

    split_conversation = list(filter(None, split_conversation))
    return split_conversation, moves

In [142]:
def format_conversation(split_conversation: list[str], moves: list[str], system_prompt: str, incorrect_solution: str, student_name: str):
    """
    formats conversation into the following structure:
    '{"messages": [{"role": "user", "content": "What color is the sky?"}, {"role": "assistant", "content": "It is blue."}]}'
    """

    messages = [
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": incorrect_solution}
                ]
    
    move_index = 0

    for i in range(len(split_conversation)):
        clean_sentence, match = remove_names(split_conversation[i], student_name)
        if not clean_sentence or not match:
            continue
        
        if match == "Teacher":
            messages.append({"role": "assistant", "content": f"[{moves[move_index].upper()}] " + clean_sentence})
            move_index += 1

        else:
            messages.append({"role": "user", "content": clean_sentence})

    return {"messages": messages}

In [144]:
def build_system_prompt(question: str, ground_truth: str, student_name):
    system_prompt = (
        f"""
        You are a mathematics teacher for students in elementary and middle school.
        Your task is to help your student {student_name} solve a problem that they initially solved incorrectly.
        You have access to the problem statement and ground truth solution, which are provided below.

        Make sure to abide by the following guidelines:
            1. Do not give away the answer immediately. 
            2. Lead the student toward the correct answer using the following strategies: focus, probing, telling, and generic.
            3. Each of your responses should follow this structure: '[strategy] one sentence response'
            4. For each response, you will select only 1 strategy to use. But you are allowed to select different strategies on different turns.
            4. Ensure your selected strategy is displayed in all caps

        Here is a description of each strategy, its intent/purpose, and an example of how that strategy is used for each intent:
            1. focus
                - intent 1: seek strategy (e.g. So what should you do next?)
                - intent 2: guide student focus (e.g. Can you calculate...?)
                - intent 3: recall relevant information (e.g. Can you reread the question and tell me what is...?)
            2. probing
                - intent 1: asking for explanation (e.g. Why do you think you need to add these numbers?)
                - intent 2: seeking self correction (e.g. Are you sure you need to add here?)
                - intent 3: perturbing the qeustion (e.g. How would things change if they had ... items instead?)
                - intent 4: seeking world knowledge (e.g. How do you calculate the perimeter of a square?)
            3. telling
                - intent 1: revealing strategy (e.g. You need to add ... to ... get your answer.)
                - intent 2: revealing answer (e.g. No, he had ... items.)
            4. generic
                - intent 1: greeting/fairwell (e.g. Hi..., how are you doing with the word problem?, Good Job! Is there anything else I can help with?)
                - intent 2: general inquiry (e.g. Can you go walk me through your solution?)

        Example conversation: 
            your output: '[PROBING] If you had 4 of something and tripled that amount, how much would you have?'
            student response: 'I would have 12 of something.'
            your output: '[PROBING] So if Nancy triples the 18 cubic feet of water, how much would she have?'

        Problem: {question}
        Solution: {ground_truth}
        """
    )

    return system_prompt

def get_student_name(student_profile: str):
    return student_profile.split()[0]

In [147]:
def process_row(row):
    student_name = get_student_name(row["student_profile"])
    system_prompt = build_system_prompt(row["question"], row["ground_truth"], student_name)
    split_conversation = preprocess(row["conversation"])
    messages = format_conversation(split_conversation[0], split_conversation[1], system_prompt, row["student_incorrect_solution"], student_name)
    return messages

In [157]:
tutor_train["messages"] = tutor_train.apply(process_row, axis = 1)
tutor_test["messages"] = tutor_train.apply(process_row, axis = 1)

In [155]:
tutor_train.head()

Unnamed: 0,qid,scenario,question,ground_truth,student_incorrect_solution,student_profile,teacher_described_confusion,self-correctness,self-typical-confusion,self-typical-interactions,conversation,messages
0,5000012,1,Nancy is filling an aquarium for her fish. She...,First calculate the volume of the aquarium by ...,The aquarium has a volume of 4 x 6 x 3 = 72 cu...,Steven is a 7th grade student. He has difficul...,He added a step after completing the problem.,Yes,3.0,3.0,"Teacher: (probing)Steven, If you had 4 of some...","{'messages': [{'role': 'system', 'content': ' ..."
1,5000084,2,John is very unfit and decides to work up to d...,He needs to do 15*3=45 progressions\nThat will...,"To get to 15 reps, John will take 15 - 1 = 14 ...",Stephanie is a 7th grade student. She has diff...,She became fixated on a wrong calculation and ...,No,2.0,2.0,"Teacher: (probing)Stephanie, How many days wil...","{'messages': [{'role': 'system', 'content': ' ..."
2,5000982,3,James has 20 pairs of red socks and half as ma...,He has 20/2=10 black socks\nSo he has 20+10=30...,James has 20/2 = 10 pairs of black socks.\nJam...,DeAndre is a 7th grade student. He has problem...,He was technically correct.,Yes,5.0,5.0,"Teacher: (probing)DeAndre, you successfully an...","{'messages': [{'role': 'system', 'content': ' ..."
3,5000274,4,Jason drives past 3 convenience stores on his ...,First find the additional distance between the...,The distance between the second store and thir...,Winnie is a 7th grade student. She struggle to...,He understood it when it was broken down step ...,Yes,3.0,3.0,Teacher: (probing)What is the distance between...,"{'messages': [{'role': 'system', 'content': ' ..."
4,5000797,5,Wanda walks her daughter .5 miles to school in...,She walks .5 miles to school and back 4 times ...,"Every day, Wanda walks a total of 0.5 + 0.5 = ...",Cody is a 7th grade student. He has problem wi...,He forgot she walked twice each day!,Yes,3.0,3.0,"Teacher: (probing)Cody, How far doe Wanda walk...","{'messages': [{'role': 'system', 'content': ' ..."


In [160]:
tutor_train.loc[1000, "messages"]

{'messages': [{'role': 'system',
   'content': "\n        You are a mathematics teacher for students in elementary and middle school.\n        Your task is to help your student Jian solve a problem that they initially solved incorrectly.\n        You have access to the problem statement and ground truth solution, which are provided below.\n\n        Make sure to abide by the following guidelines:\n            1. Do not give away the answer immediately. \n            2. Lead the student toward the correct answer using the following strategies: focus, probing, telling, and generic.\n            3. Each of your responses should follow this structure: '[strategy] one sentence response'\n            4. For each response, you will select only 1 strategy to use. But you are allowed to select different strategies on different turns.\n            4. Ensure your selected strategy is displayed in all caps\n\n        Here is a description of each strategy, its intent/purpose, and an example of how