This notebook:
1. Takes testing problems from /test/MATH.
1. Runs function "answer" on them, which **calls GPT-3.5-turbo**.
1. Saves the result in **results-wolfram-turbo** under "gpt-answer".

This also has a modified version of the function that includes a "skip" functionality in case anything gets interrupted.

In [1]:
import os
import json
import openai
import requests

with open('keys.json') as f:
    data = json.load(f)
openai.api_key = data['openai-key']
wolfram_key = data['wolfram-key']

In [2]:
def wolfram(input_query: str):
    api_url = "https://api.wolframalpha.com/v2/query"
    params = {
        "input": str(input_query), # handles None bs
        "appid": wolfram_key,
        "format": "plaintext",
        "output": "json"
    }
    response = requests.get(api_url, params=params)
    response_json = response.json()
    print(response_json)
    try:
        result = response_json["queryresult"]["pods"][1]["subpods"][0]["plaintext"]
        print(result)
        return result
    except:
        print("Wolfram Alpha could not find an answer and encountered an error.")
        return "Wolfram Alpha could not find an answer and encountered an error."
    
# Test to make sure it's working.
print(wolfram("solve x^2 =5"))

{'queryresult': {'success': True, 'error': False, 'numpods': 6, 'datatypes': 'Solve', 'timedout': '', 'timedoutpods': '', 'timing': 0.496, 'parsetiming': 0.179, 'parsetimedout': False, 'recalculate': '', 'id': 'MSP55012bc62i8h687ici2000068egbaid565h40a9', 'host': 'https://www6b3.wolframalpha.com', 'server': '1', 'related': 'https://www6b3.wolframalpha.com/api/v1/relatedQueries.jsp?id=MSPa55112bc62i8h687ici200001d65h821fgg574i27794153569472399936', 'version': '2.6', 'inputstring': 'solve x^2 =5', 'pods': [{'title': 'Input interpretation', 'scanner': 'Identity', 'id': 'Input', 'position': 100, 'error': False, 'numsubpods': 1, 'subpods': [{'title': '', 'plaintext': 'solve x^2 = 5'}], 'expressiontypes': {'name': 'Default'}}, {'title': 'Result', 'scanner': 'Solve', 'id': 'Result', 'position': 200, 'error': False, 'numsubpods': 1, 'primary': True, 'subpods': [{'title': '', 'plaintext': 'x = ± sqrt(5)'}], 'expressiontypes': {'name': 'Default'}, 'states': [{'name': 'Approximate form', 'input':

In [3]:
with open('prompts.json') as f:
    data = json.load(f)
wolfram_pt1 = data['wolfram_pt1']
wolfram_pt2 = data['wolfram_pt2']

# Calls DaVinci GPT-3 API on input text & returns output.
def prompt1(problem):

    full_message = "[Problem] " + problem + "\n\n" + wolfram_pt1
    
    # Use the OpenAI API to generate a response
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages = [{"role": "user", "content": full_message}],
        temperature=0, # temperature 0 keeps it deterministic
    )

    text_output = response['choices'][0]['message']['content']
    print(text_output)
    return text_output

# Calls DaVinci GPT-3 API on input text & returns output.
def prompt2(problem, wolfram_query, wolfram_answer):

    full_message = "[Problem] " + problem + "\n\n" + "[Wolfram Query] Answer to " + wolfram_query + ": " + wolfram_answer + "\n\n" + wolfram_pt2
    
    # Use the OpenAI API to generate a response
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages = [{"role": "user", "content": full_message}],
        temperature=0, # temperature 0 keeps it deterministic
    )

    text_output = response['choices'][0]['message']['content']
    print(text_output)
    return text_output

In [4]:
# Test code block: run this to test that davinci works, before you call GPT-3 API and potentially waste credits. Will print twice.
prompt1("What is 4+5?")

Query: 4+5


'Query: 4+5'

In [10]:
# Define the paths to the data directories
data_dir = "test/MATH"
modified_data_dir = "results-wolfram-turbo/MATH"

# Loop over the directories | possible: ["train", "test"]:
for data_type in ["test"]:
    # Loop over the subdirectories in the data directory
    for sub_dir in os.listdir(os.path.join(data_dir, data_type)):
        
        # Skip any files that are not directories
        if not os.path.isdir(os.path.join(data_dir, data_type, sub_dir)):
            continue
        
        # Create the corresponding subdirectory in the modified data directory
        os.makedirs(os.path.join(modified_data_dir, data_type, sub_dir), exist_ok=True)

        # Get a list of all the JSON files in the original subdirectory
        json_files = [f for f in os.listdir(os.path.join(data_dir, "test", sub_dir)) if f.endswith(".json")]
        
        # Loop over the selected JSON files in the original subdirectory
        for file_name in json_files:

            # This loop skips files that have already been modified.
            if not os.path.exists(os.path.join(modified_data_dir, data_type, sub_dir, file_name)):
            
                # Load the JSON file
                with open(os.path.join(data_dir, data_type, sub_dir, file_name), "r") as file:
                    data = json.load(file)
                
                problem = data["problem"]
                print(problem)
                wolframquery = prompt1(problem)
                wolframoutput = wolfram(wolframquery)
                gptanswer = prompt2(problem, wolframquery, wolframoutput)
                data["wolframquery"] = wolframquery
                data["wolframoutput"] = wolframoutput
                data["gptanswer"] = gptanswer
                
                # Save the modified JSON data to a new file
                with open(os.path.join(modified_data_dir, data_type, sub_dir, file_name), "w") as file:
                    json.dump(data, file)
            
            else:
                print(f"skipped {file_name}")

                # Load the JSON file
                with open(os.path.join(modified_data_dir, data_type, sub_dir, file_name), "r") as file:
                    data = json.load(file)
                
                problem = data["problem"]
                solution = data["solution"]
                gptanswer = data["gptanswer"]
                print(f"problem: {problem}")
                print(f"solution: {solution}")
                print(f"answer: {gptanswer}")

skipped 586.json
problem: What is $\dbinom{n}{n}$ for any positive integer $n$?
solution: $\dbinom{n}{n}=\dfrac{n!}{n!0!}=\boxed{1}$.  Also, there is only one way to choose $n$ objects out of $n$, which is simply choosing all of them.
answer: $\dbinom{n}{n}=1$ for any positive integer $n$. 

$\boxed{1}$
skipped 139.json
problem: How many paths are there from $A$ to $B$ on the lines of the grid shown, if every step must be up or to the right?[asy]size(3cm,3cm);int w=6;int h=3;int i;for (i=0; i<h; ++i){ draw((0,i) -- (w-1,i));}for (i=0; i<w; ++i){ draw((i, 0)--(i,h-1));}label("$B$", (w-1,h-1), NE);label("$A$", (0,0), SW);[/asy]
For example, here is one such valid path highlighted in blue:
[asy]
size(3cm,3cm);
int w=6;
int h=3;
int i;
for (i=0; i<h; ++i){ 
draw((0,i) -- (w-1,i));
}for (i=0; i<w; ++i){ 
draw((i, 0)--(i,h-1));
}
label("$B$", (w-1,h-1), NE);
label("$A$", (0,0), SW);

draw((0,0)--(1,0), blue+1.5);
draw((1,0)--(1,1), blue+1.5);
draw((1,1)--(2,1), blue+1.5);
draw((2,1)--(3,1), 