In [1]:
import json
import os
import numpy as np
import pandas as pd

In [5]:
with open('output/MistralBaseTest.jsonl', 'r') as file:
    mistral_base_results = [json.loads(line) for line in file]
with open('output/MistralMMIQCTest.jsonl', 'r') as file:
    mistral_mmiqc_results = [json.loads(line) for line in file]

In [8]:
def get_accuracy(results):
    correct_results = [r for r in results if r['correct']]
    # wrong_results = [r for r in results if not r['correct']]
    return len(correct_results) / len(results)

In [9]:
get_accuracy(mistral_base_results), get_accuracy(mistral_mmiqc_results)

(0.0414, 0.3544)

In [17]:
def get_union(base_results, mmiqc_results, flag):
    if flag == "both_wrong":
        both_wrong = []
        for base, mmiqc in zip(base_results, mmiqc_results):
            if not base['correct'] and not mmiqc['correct']:
                both_wrong.append((base, mmiqc))
        return both_wrong
    elif flag == "both_correct":
        both_correct = []
        for base, mmiqc in zip(base_results, mmiqc_results):
            if base['correct'] and mmiqc['correct']:
                both_correct.append((base, mmiqc))
        return both_correct
    elif flag == "base_wrong_mmiqc_correct":
        base_wrong_mmiqc_correct = []
        for base, mmiqc in zip(base_results, mmiqc_results):
            if not base['correct'] and mmiqc['correct']:
                base_wrong_mmiqc_correct.append((base, mmiqc))
        return base_wrong_mmiqc_correct
    elif flag == "base_correct_mmiqc_wrong":
        base_correct_mmiqc_wrong = []
        for base, mmiqc in zip(base_results, mmiqc_results):
            if base['correct'] and not mmiqc['correct']:
                base_correct_mmiqc_wrong.append((base, mmiqc))
        return base_correct_mmiqc_wrong
    else:
        raise ValueError("flag must be one of 'both_wrong', 'both_correct', 'base_wrong_mmiqc_correct', 'base_correct_mmiqc_wrong'")

In [18]:
both_wrong = get_union(mistral_base_results, mistral_mmiqc_results, "both_wrong")
both_correct = get_union(mistral_base_results, mistral_mmiqc_results, "both_correct")
base_wrong_mmiqc_correct = get_union(mistral_base_results, mistral_mmiqc_results, "base_wrong_mmiqc_correct")
base_correct_mmiqc_wrong = get_union(mistral_base_results, mistral_mmiqc_results, "base_correct_mmiqc_wrong")

In [14]:
both_correct[:5]

[({'correct': True,
   'answer': '\\frac{\\sqrt{3}}{2}',
   'model_answer': '$\\frac{\\sqrt{3}}{2}$',
   'problem': 'Compute $\\sin 120^\\circ$.',
   'solution': 'Let $P$ be the point on the unit circle that is $120^\\circ$ counterclockwise from $(1,0)$, and let $D$ be the foot of the altitude from $P$ to the $x$-axis, as shown below.\n\n[asy]\n\npair A,C,P,O,D;\n\ndraw((0,-1.2)--(0,1.2),p=black+1.2bp,Arrows(0.15cm));\n\ndraw((-1.2,0)--(1.2,0),p=black+1.2bp,Arrows(0.15cm));\n\nA = (1,0);\n\nO= (0,0);\n\nlabel("$x$",(1.2,0),SE);\n\nlabel("$y$",(0,1.2),NE);\n\nP = rotate(120)*A;\n\nD = foot(P,A,-A);\n\ndraw(O--P--D);\n\ndraw(rightanglemark(O,D,P,2));\n\ndraw(Circle(O,1));\n\nlabel("$O$",O,SE);\n\nlabel("$P$",P,NW);\n\n//label("$A$",A,SE);\n\nlabel("$D$",D,S);\n\n[/asy]\n\nTriangle $POD$ is a 30-60-90 triangle, so $DO = \\frac{1}{2}$ and $DP = \\frac{\\sqrt{3}}{2}$.  Therefore, the coordinates of $P$ are $\\left(-\\frac{1}{2}, \\frac{\\sqrt{3}}{2}\\right)$, so $\\sin 120^\\circ = \\boxed{

In [15]:
base_wrong_mmiqc_correct[:5]

[({'correct': False,
   'answer': '72',
   'model_answer': 'The minimum number of degrees a regular pentagon must be rotated to coincide with its original position is 72 degrees. This is because a regular pentagon has five sides and 10 vertices, and 360 degrees divided by 5 is 72 degrees. Therefore, rotating it 72 degrees will bring one of its vertices back to its original position, and since all vertices are equivalent, the entire pentagon will have returned to its original position',
   'problem': 'A regular pentagon is rotated counterclockwise about its center. What is the minimum number of degrees it must be rotated until it coincides with its original position?',
   'solution': 'Each of the five marked angles measures $360/5=72$ degrees, so $\\boxed{72}$ degrees is the minimum angle through which the pentagon may be rotated so that it coincides with its original position.\n\n[asy]\nsize(150);\ndefaultpen(linewidth(0.7));\nint i;\nfor(i=0;i<=4;++i)\n\n{\ndraw(origin--dir(18+72*i)--

In [16]:
both_wrong[:5]

[({'correct': False,
   'answer': '28',
   'model_answer': 'There is no valid solution for $x$',
   'problem': '$\\overline{BC}$ is parallel to the segment through $A$, and $AB = BC$. What is the number of degrees represented by $x$?\n\n[asy]\ndraw((0,0)--(10,0));\ndraw((0,3)--(10,3));\ndraw((2,3)--(8,0));\ndraw((2,3)--(4,0));\nlabel("$A$",(2,3),N);\nlabel("$B$",(4,0),S);\nlabel("$C$",(8,0),S);\nlabel("$124^{\\circ}$",(2,3),SW);\nlabel("$x^{\\circ}$",(4.5,3),S);\n[/asy]',
   'solution': 'Angle $\\angle BCA$ and the angle we\'re trying to measure are alternate interior angles, so they are congruent. Thus, $\\angle BCA=x^\\circ$:\n\n[asy]\ndraw((0,0)--(10,0));\ndraw((0,3)--(10,3));\ndraw((2,3)--(8,0));\ndraw((2,3)--(4,0));\nlabel("$A$",(2,3),N);\nlabel("$B$",(4,0),S);\nlabel("$C$",(8,0),S);\nlabel("$124^{\\circ}$",(2,3),SW);\nlabel("$x^{\\circ}$",(4.5,3),S);\nlabel("$x^{\\circ}$",(6,0),N);\n[/asy]\n\nSince $AB=BC$, we know that $\\triangle ABC$ is isosceles with equal angles at $C$ and $

In [None]:
print(len(both_correct), len(base_correct_mmiqc_wrong), len(base_wrong_mmiqc_correct), len(both_wrong))
# 164 43 1608 3185

164 43 1608 3185


In [20]:
164+43+1608+3185

5000