In [23]:
import json
import matplotlib.pyplot as plt
import numpy as np
import pprint

from IPython.display import display, Markdown
from sympy import symbols, Function

import sys
sys.path.append('../')
from functions import gpt

In [27]:
purpose = "- The data is for expressions related to neural ordinary differential equation and holographic quantum chromodynamics"
output_file = 'hashimoto_annotaton_2.py'

base_name = "2021_Hashimoto_Neural_ODE_and_holographic_QCD_PUB"
work_bucket = "AdS-CFT"
project_folder = "diygenomics-projects"
sub_category = "math"

model = 'gpt-4'

In [25]:
data_path = os.getenv('DATA_PATH')
file_path = lambda *args: os.path.join(data_path, project_folder, sub_category, work_bucket, base_name, 'mathpix', 'generated_code', *args)

In [26]:
os.makedirs(file_path(), exist_ok=True)

In [3]:
system_prompt = f"""You are an expert mathematician and data scientist.
- I want to generate a dataset as input for a math expression
{purpose}
- I need the full dataset with 10 points
- Please format your response in JSON. You only speak JSON. Do not write text that isn't JSON.
"""

In [4]:
math_text = """
\sqrt{|g|}=\sqrt{-\operatorname{det} g}=\sqrt{f(\eta) g(\eta)^{d-1}}
"""

In [5]:
math_dataset_response = gpt.chat_create(system_prompt, math_text, model, output_json=True)

In [6]:
math_dataset_response

{'dataset': [{'eta': 1,
   'f_eta': 2,
   'g_eta': 3,
   'd': 4,
   'sqrt_g': 2.8284271247461903},
  {'eta': 2, 'f_eta': 3, 'g_eta': 4, 'd': 5, 'sqrt_g': 6.48074069840786},
  {'eta': 3, 'f_eta': 4, 'g_eta': 5, 'd': 6, 'sqrt_g': 13.856406460551018},
  {'eta': 4, 'f_eta': 5, 'g_eta': 6, 'd': 7, 'sqrt_g': 29.478805945967352},
  {'eta': 5, 'f_eta': 6, 'g_eta': 7, 'd': 8, 'sqrt_g': 64.0078125},
  {'eta': 6, 'f_eta': 7, 'g_eta': 8, 'd': 9, 'sqrt_g': 139.96875},
  {'eta': 7, 'f_eta': 8, 'g_eta': 9, 'd': 10, 'sqrt_g': 307.546875},
  {'eta': 8, 'f_eta': 9, 'g_eta': 10, 'd': 11, 'sqrt_g': 675.390625},
  {'eta': 9, 'f_eta': 10, 'g_eta': 11, 'd': 12, 'sqrt_g': 1478.515625},
  {'eta': 10, 'f_eta': 11, 'g_eta': 12, 'd': 13, 'sqrt_g': 3240.390625}]}

In [7]:
system_prompt = f"""You are an expert mathematician and data scientist.
- Please generate python code to execute the provided dataset given a LaTex math expression and print out the results.
- Please format your response in the python coding language. You only speak python. Do not write text that isn't python.
- Do not include any comments in your code.
- Do not include the dataset in your response. 
- The dataset variable name will be math_dataset_response.
{purpose}
- Check your work and make that the dataset works with the code that you provide.
- Check the first key in the dataset.
"""

In [8]:
math_plus_data = f'math expression: {math_text}\ndataset: {math_dataset_response}'

In [9]:
python_code_response = gpt.chat_create(system_prompt, math_plus_data, model, output_json=False)

In [10]:
display(Markdown(python_code_response))

import math

for data in math_dataset_response['dataset']:
    eta = data['eta']
    f_eta = data['f_eta']
    g_eta = data['g_eta']
    d = data['d']
    sqrt_g = math.sqrt(abs(f_eta * (g_eta ** (d - 1))))
    print(sqrt_g)

In [11]:
exec(python_code_response)

7.3484692283495345
27.712812921102035
111.80339887498948
482.99068313995457
2222.894059553896
10836.997370120564
55671.93109637926
300000.0
1689117.1380665107
9903388.558004579


In [28]:
pretty_dataset = json.dumps(math_dataset_response, indent=4)

with open(file_path(output_file), 'w') as f:
    f.write(f"math_dataset_response = {pretty_dataset}\n\n")
    f.write(python_code_response)

In [None]:
# how can we put datasets together?
# 