In [None]:
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pprint

from IPython.display import display, Markdown
from sympy import symbols, Function

import sys
sys.path.append('../')
from functions import gpt

In [None]:
purpose = "- The data is for expressions related to neural ordinary differential equation and holographic quantum chromodynamics"

paper_short_name = 'hashimoto'
annotation_name = 'a1'
dataset_name = f'{annotation_name}_dataset'

base_name = "2021_Hashimoto_Neural_ODE_and_holographic_QCD_PUB"
work_bucket = "AdS-CFT"
project_folder = "diygenomics-projects"
sub_category = "math"

model = 'gpt-4'
output_file = f'{paper_short_name}_{annotation_name}.py'

# A1 and A2 feed A4 and A8; A3 feeds A5, A6, and A7

In [None]:
data_path = os.getenv('DATA_PATH')
file_path = lambda *args: os.path.join(data_path, project_folder, sub_category, work_bucket, base_name, 'mathpix', 'generated_code', *args)

In [None]:
os.makedirs(file_path(), exist_ok=True)

In [None]:
dataset_prompt = f"""You are an expert mathematician and data scientist.
- I want to generate a dataset as input for a math expression
- The data need to return results that are not NaN or None
{purpose}
- Please format your response in JSON. You only speak JSON. Do not write text that isn't JSON.
- The output of this will be used as input for another expression. 
- Be consice in your output so that it can be feed into the next expression.
"""

In [None]:
a1_math_text = """
\frac{d x_{i}(t)}{d t}=v_{i}(\vec{x}(t), t ; \theta)
"""

In [None]:
a1_dataset = gpt.chat_create(dataset_prompt, a1_math_text, model, output_json=True)

In [None]:
a1_dataset

In [None]:
code_prompt = f"""You are an expert mathematician and data scientist.
- Please generate python code to execute the provided dataset given a LaTex math expression and print out the results.
- Please format your response in the python coding language. You only speak python. Do not write text that isn't python.
- Do not include any comments in your code.
- Do not include the dataset in your response. 
- The dataset variable name will be {dataset_name}.
{purpose}
- Check your work and make that the dataset works with the code that you provide.
- Check the first key in the dataset.
"""

In [None]:
math_plus_data = f'math expression: {a1_math_text}\ndataset: {a1_dataset}'

In [None]:
python_code = gpt.chat_create(code_prompt, math_plus_data, model, output_json=False)

In [None]:
display(Markdown(python_code))

In [None]:
exec(python_code)

In [None]:
pretty_dataset = json.dumps(a1_dataset, indent=4)

with open(file_path(output_file), 'w') as f:
    f.write(f'{dataset_name} = {pretty_dataset}\n\n')
    f.write(python_code)

In [None]:
# annotation_name = 'a2'
# dataset_name = f'{annotation_name}_dataset'