In [20]:
!pip install mdtex2html

Collecting mdtex2html
  Downloading mdtex2html-1.3.0-py3-none-any.whl.metadata (4.1 kB)
Collecting markdown (from mdtex2html)
  Using cached Markdown-3.7-py3-none-any.whl.metadata (7.0 kB)
Collecting latex2mathml (from mdtex2html)
  Downloading latex2mathml-3.77.0-py3-none-any.whl.metadata (14 kB)
Downloading mdtex2html-1.3.0-py3-none-any.whl (13 kB)
Downloading latex2mathml-3.77.0-py3-none-any.whl (73 kB)
Using cached Markdown-3.7-py3-none-any.whl (106 kB)
Installing collected packages: latex2mathml, markdown, mdtex2html
Successfully installed latex2mathml-3.77.0 markdown-3.7 mdtex2html-1.3.0


In [32]:
!pip install matplotlib

Collecting matplotlib
  Downloading matplotlib-3.7.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (5.7 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (5.9 kB)
Collecting cycler>=0.10 (from matplotlib)
  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.54.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (163 kB)
Collecting kiwisolver>=1.0.1 (from matplotlib)
  Downloading kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (6.3 kB)
Collecting pillow>=6.2.0 (from matplotlib)
  Downloading pillow-10.4.0-cp38-cp38-manylinux_2_28_aarch64.whl.metadata (9.2 kB)
Collecting pyparsing>=2.3.1 (from matplotlib)
  Downloading pyparsing-3.1.4-py3-none-any.whl.metadata (5.1 kB)
Downloading matplotlib-3.7.5-cp38-cp38-manylinux_2_17_aarch64.man

In [38]:
!pip install markdown



In [2]:
import json
from pyprojroot import here
from datasets import Dataset
import mdtex2html
import matplotlib.pyplot as plt
from markdown import markdown

# Modified

In [3]:
par_dir = here("struct_vs_unstruct/data/mistral_large_2407/modified/non_self_synthesis/math/math-/math_eval")

In [4]:
ds = Dataset.load_from_disk(par_dir)
ds

Dataset({
    features: ['problem', 'level', 'type', 'solution', 'reasoning_formats', 'selected_modules', 'adapted_modules', 'reasoning_plan', 'reasoning', 'trajectory', 'answer_pred'],
    num_rows: 200
})

In [5]:
none_ds = ds.filter(lambda x: x["answer_pred"] == None)
none_ds

Filter:   0%|          | 0/200 [00:00<?, ? examples/s]

Dataset({
    features: ['problem', 'level', 'type', 'solution', 'reasoning_formats', 'selected_modules', 'adapted_modules', 'reasoning_plan', 'reasoning', 'trajectory', 'answer_pred'],
    num_rows: 2
})

In [6]:
import re
def map_fn(instance):
    if instance["answer_pred"] == None:
        text = "The final answer is:"
        pattern = fr"(?<={text}).*"
    
        response = instance["trajectory"]
    
        try:
            answer, trajectory = re.search(pattern, response).group(0).strip(), re.sub(pattern, "", response).replace(text, "").strip()
        except:
            answer, trajectory = None, response
    
        return {
            "trajectory": trajectory,
            "answer_pred": answer
        }

    return {
        "trajectory": instance["trajectory"],
        "answer_pred": instance["answer_pred"]
    }

new_ds = ds.map(map_fn)

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [7]:
none_ds = new_ds.filter(lambda x: x["answer_pred"] == None)
none_ds

Filter:   0%|          | 0/200 [00:00<?, ? examples/s]

Dataset({
    features: ['problem', 'level', 'type', 'solution', 'reasoning_formats', 'selected_modules', 'adapted_modules', 'reasoning_plan', 'reasoning', 'trajectory', 'answer_pred'],
    num_rows: 2
})

In [10]:
print(none_ds[0]["trajectory"])

### Step-by-Step Reasoning Plan

1. **Identify the Core Mathematical Issue**
   - The core issue is to evaluate the expression \((r + s)(s + t)(t + r)\) using the roots \(r\), \(s\), and \(t\) of the polynomial \(x^3 + 9x^2 - 9x - 8\).

2. **Understand the Properties of the Roots**
   - Recognize that \(r\), \(s\), and \(t\) are the roots of the polynomial \(x^3 + 9x^2 - 9x - 8\).
   - Use Vieta's formulas to relate the coefficients of the polynomial to the sums and products of the roots.

3. **Apply Vieta's Formulas**
   - Vieta's formulas for a cubic polynomial \(ax^3 + bx^2 + cx + d = 0\) with roots \(r\), \(s\), and \(t\) are:
     - \(r + s + t = -\frac{b}{a}\)
     - \(rs + rt + st = \frac{c}{a}\)
     - \(rst = -\frac{d}{a}\)
   - For the polynomial \(x^3 + 9x^2 - 9x - 8\), the coefficients are \(a = 1\), \(b = 9\), \(c = -9\), and \(d = -8\).

4. **Calculate the Sums and Products of the Roots**
   - Using Vieta's formulas:
     - \(r + s + t = -9\)
     - \(rs + rt + st = -9\)


In [12]:
def map_fn(ins):
    if "First, let's expand \((r + s)(s + t)(t + r)\):" in ins["trajectory"]:
        return {
            "answer_pred": "\boxed{-486}"
        }

    return {
        "answer_pred": ins["answer_pred"]
    }

new_ds = new_ds.map(map_fn)

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [14]:
none_ds = new_ds.filter(lambda x: x["answer_pred"] == None)
none_ds

Filter:   0%|          | 0/200 [00:00<?, ? examples/s]

Dataset({
    features: ['problem', 'level', 'type', 'solution', 'reasoning_formats', 'selected_modules', 'adapted_modules', 'reasoning_plan', 'reasoning', 'trajectory', 'answer_pred'],
    num_rows: 1
})

In [15]:
new_ds.push_to_hub("sachithgunasekara/self-discover-mistral-modified-MATH-eval")

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/sachithgunasekara/self-discover-mistral-modified-MATH-eval/commit/00ea556a90a37635d332a703f8bdb576e5d418cf', commit_message='Upload dataset', commit_description='', oid='00ea556a90a37635d332a703f8bdb576e5d418cf', pr_url=None, pr_revision=None, pr_num=None)

In [16]:
new_ds.to_csv("./henrycks_cm_modified_evals.csv")

Creating CSV from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

1800573

In [17]:
file_name = "./project-7-at-2024-11-13-05-43-dbc32e36.json"

In [18]:
with open(file_name, "r") as f:
    ds_ann = Dataset.from_list(json.load(f))

ds_ann

Dataset({
    features: ['problem', 'level', 'type', 'solution', 'reasoning_formats', 'selected_modules', 'adapted_modules', 'reasoning_plan', 'reasoning', 'trajectory', 'answer_pred', 'id', 'correct', 'annotator', 'annotation_id', 'created_at', 'updated_at', 'lead_time'],
    num_rows: 200
})

In [19]:
ds_ann[0]

{'problem': 'Find all the rational roots of $2x^4 - x^3 - 18x^2 + 14x + 15 = 0.$  Enter all the rational roots, separated by commas.',
 'level': 'Level 3',
 'type': 'Intermediate Algebra',
 'solution': 'By the Rational Root Theorem, the only possible rational roots are of the form $\\frac{a}{b},$ where $a \\mid 15$ and $b \\mid 2.$  Checking all possibilities, we find that the rational roots are $\\boxed{\\frac{5}{2},-3}.$',
 'reasoning_formats': '\n- should be the final answer based on calculations formatted in Latex style',
 'selected_modules': '1. How could I devise an experiment to help solve that problem?\n4. How can I simplify the problem so that it is easier to solve?\n9. How can I break down this problem into smaller, more manageable parts?\n10. Critical Thinking: This style involves analyzing the problem from different perspectives, questioning assumptions, and evaluating the evidence or information available. It focuses on logical reasoning, evidence-based decision-making, an

In [20]:
corr = ds_ann.filter(lambda x: x["correct"] == "True")
corr.num_rows / ds_ann.num_rows

Filter:   0%|          | 0/200 [00:00<?, ? examples/s]

0.765

In [21]:
ds_ann.push_to_hub("sachithgunasekara/self-discover-mistral-modified-MATH-eval-annotated")

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/sachithgunasekara/self-discover-mistral-modified-MATH-eval-annotated/commit/1d2fb27e8ce1b6053ad1be1955d46c0cb0ffefb3', commit_message='Upload dataset', commit_description='', oid='1d2fb27e8ce1b6053ad1be1955d46c0cb0ffefb3', pr_url=None, pr_revision=None, pr_num=None)