This notebook:
1. Defines a Wolfram Query using Full Results API called "wolfram".
1. Takes testing problems from /test/MATH.
1. Runs function "prompt1" and "prompt2" on them, which **calls DaVinci GPT-3** to both give a Wolfram Query and answer the prompt respectively.
1. Saves the result in **results-baseline-davinci** under "gpt-answer".

In [40]:
import os
import json
import openai
import requests

with open('keys.json') as f:
    data = json.load(f)
openai.api_key = data['openai-key']
wolfram_key = data['wolfram-key']

In [41]:
def wolfram(input_query: str):
    api_url = "https://api.wolframalpha.com/v2/query"
    params = {
        "input": str(input_query), # handles None bs
        "appid": wolfram_key,
        "format": "plaintext",
        "output": "json"
    }
    response = requests.get(api_url, params=params)
    response_json = response.json()
    print(response_json)
    try:
        result = response_json["queryresult"]["pods"][1]["subpods"][0]["plaintext"]
        print(result)
        return result
    except:
        print("Wolfram Alpha could not find an answer and encountered an error.")
        return "Wolfram Alpha could not find an answer and encountered an error."

In [42]:
# Test to make sure it's working.

print(wolfram("solve x^2 =5"))

{'queryresult': {'success': True, 'error': False, 'numpods': 6, 'datatypes': 'Solve', 'timedout': '', 'timedoutpods': '', 'timing': 0.53, 'parsetiming': 0.179, 'parsetimedout': False, 'recalculate': '', 'id': 'MSP1518316h860iddh7497ee00002hb1520ea0ei7e96', 'host': 'https://www6b3.wolframalpha.com', 'server': '20', 'related': 'https://www6b3.wolframalpha.com/api/v1/relatedQueries.jsp?id=MSPa1518416h860iddh7497ee000044i7i920eh7h1cah8458888475783217078', 'version': '2.6', 'inputstring': 'solve x^2 =5', 'pods': [{'title': 'Input interpretation', 'scanner': 'Identity', 'id': 'Input', 'position': 100, 'error': False, 'numsubpods': 1, 'subpods': [{'title': '', 'plaintext': 'solve x^2 = 5'}], 'expressiontypes': {'name': 'Default'}}, {'title': 'Result', 'scanner': 'Solve', 'id': 'Result', 'position': 200, 'error': False, 'numsubpods': 1, 'primary': True, 'subpods': [{'title': '', 'plaintext': 'x = ± sqrt(5)'}], 'expressiontypes': {'name': 'Default'}, 'states': [{'name': 'Approximate form', 'inp

In [37]:
with open('prompts.json') as f:
    data = json.load(f)
wolfram_pt1 = data['wolfram_pt1']
wolfram_pt2 = data['wolfram_pt2']

# Calls DaVinci GPT-3 API on input text & returns output.
def prompt1(problem):

    full_message = "[Problem] " + problem + "\n\n" + wolfram_pt1
    
    # Use the OpenAI API to generate a response
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=full_message,
        temperature=0, # temperature 0 keeps it deterministic
        max_tokens = 600
    )

    text_output = response["choices"][0]["text"]
    print(text_output)
    return text_output

# Calls DaVinci GPT-3 API on input text & returns output.
def prompt2(problem, wolfram_query, wolfram_answer):

    full_message = "[Problem] " + problem + "\n\n" + "[Wolfram Query] Answer to " + wolfram_query + ": " + wolfram_answer + "\n\n" + wolfram_pt2
    
    # Use the OpenAI API to generate a response
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=full_message,
        temperature=0, # temperature 0 keeps it deterministic
        max_tokens = 600
    )

    text_output = response["choices"][0]["text"]
    print(text_output)
    return text_output

In [38]:
# Test code block: run this to test that davinci works, before you call GPT-3 API and potentially waste credits. Will print twice.
prompt1("What is 4+5?")



Query: 4+5


'\n\nQuery: 4+5'

In [44]:
# Define the paths to the data directories
data_dir = "test/MATH"
modified_data_dir = "results-wolfram-davinci/MATH"

# Loop over the directories | possible: ["train", "test"]:
for data_type in ["test"]:
    # Loop over the subdirectories in the data directory
    for sub_dir in os.listdir(os.path.join(data_dir, data_type)):
        
        # Skip any files that are not directories
        if not os.path.isdir(os.path.join(data_dir, data_type, sub_dir)):
            continue
        
        # Create the corresponding subdirectory in the modified data directory
        os.makedirs(os.path.join(modified_data_dir, data_type, sub_dir), exist_ok=True)

        # Get a list of all the JSON files in the original subdirectory
        json_files = [f for f in os.listdir(os.path.join(data_dir, "test", sub_dir)) if f.endswith(".json")]
        
        # Loop over the selected JSON files in the original subdirectory
        for file_name in json_files:
            
            # Load the JSON file
            with open(os.path.join(data_dir, data_type, sub_dir, file_name), "r") as file:
                data = json.load(file)
            
            problem = data["problem"]
            print(problem)
            wolframquery = prompt1(problem)
            wolframoutput = wolfram(wolframquery)
            gptanswer = prompt2(problem, wolframquery, wolframoutput)
            data["wolframquery"] = wolframquery
            data["wolframoutput"] = wolframoutput
            data["gptanswer"] = gptanswer
            
            # Save the modified JSON data to a new file
            with open(os.path.join(modified_data_dir, data_type, sub_dir, file_name), "w") as file:
                json.dump(data, file)

What is $\dbinom{n}{n}$ for any positive integer $n$?


Query: binomial[n,n]
{'queryresult': {'success': True, 'error': False, 'numpods': 2, 'datatypes': 'Ratios', 'timedout': '', 'timedoutpods': '', 'timing': 0.385, 'parsetiming': 0.203, 'parsetimedout': False, 'recalculate': '', 'id': 'MSP6401h18g402gi106i7e0000392ghh412g85d27d', 'host': 'https://www6b3.wolframalpha.com', 'server': '6', 'related': 'https://www6b3.wolframalpha.com/api/v1/relatedQueries.jsp?id=MSPa6411h18g402gi106i7e00003bfc84fheeb9dc8g7167914177805443390', 'version': '2.6', 'inputstring': 'Query: binomial[n,n]', 'pods': [{'title': 'Input interpretation', 'scanner': 'Identity', 'id': 'Input', 'position': 100, 'error': False, 'numsubpods': 1, 'subpods': [{'title': '', 'plaintext': 'hit:binomial(n, n)'}], 'expressiontypes': {'name': 'Default'}, 'infos': {'text': 'binomial(n, m) is the binomial coefficient', 'links': [{'url': 'http://reference.wolfram.com/language/ref/Binomial.html', 'text': 'Documentation', 'title': 'Mat