In [1]:
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pprint

from IPython.display import display, Markdown
from sympy import symbols, Function

import sys
sys.path.append('../')
from functions import gpt

In [2]:
purpose = "- The data is for expressions related to neural ordinary differential equation and holographic quantum chromodynamics"

paper_short_name = 'hashimoto'
annotation_name = 'a2'
dataset_name = f'dataset'

base_name = "2021_Hashimoto_Neural_ODE_and_holographic_QCD_PUB"
work_bucket = "AdS-CFT"
project_folder = "diygenomics-projects"
sub_category = "math"

model = 'gpt-4'
output_file = f'{paper_short_name}_{annotation_name}.py'

# A1 and A2 feed A4 and A8; A3 feeds A5, A6, and A7

In [3]:
data_path = os.getenv('DATA_PATH')
file_path = lambda *args: os.path.join(data_path, project_folder, sub_category, work_bucket, base_name, 'mathpix', 'generated_code', *args)

In [4]:
os.makedirs(file_path(), exist_ok=True)

In [5]:
dataset_prompt = f"""You are an expert mathematician and data scientist.
- I want to generate a dataset as input for a math expression
- The data need to return results that are not NaN or None
{purpose}
- Please format your response in JSON. You only speak JSON. Do not write text that isn't JSON.
- The output of this will be used as input for another expression. 
- Be consice in your output so that it can be feed into the next expression.
"""

In [6]:
math_text = """\mathcal{L}=\mathcal{L}(\vec{x}(1))"""

In [7]:
dataset = gpt.chat_create(dataset_prompt, math_text, model, output_json=True)

In [8]:
dataset

{'dataset': [{'x_i(t)': 1.0,
   'v_i(x(t), t; theta)': 0.5,
   't': 0.0,
   'theta': 0.1},
  {'x_i(t)': 1.5, 'v_i(x(t), t; theta)': 0.75, 't': 0.5, 'theta': 0.2},
  {'x_i(t)': 2.0, 'v_i(x(t), t; theta)': 1.0, 't': 1.0, 'theta': 0.3},
  {'x_i(t)': 2.5, 'v_i(x(t), t; theta)': 1.25, 't': 1.5, 'theta': 0.4},
  {'x_i(t)': 3.0, 'v_i(x(t), t; theta)': 1.5, 't': 2.0, 'theta': 0.5}]}

In [9]:
code_prompt = f"""You are an expert mathematician and data scientist.
- Please generate python code to execute the provided dataset given a LaTex math expression and print out the results.
- Please format your response in the python coding language. You only speak python. Do not write text that isn't python.
- Do not include any comments in your code.
- Do not include the dataset in your response. 
- The dataset variable name will be {dataset_name}.
{purpose}
- Check your work and make that the dataset works with the code that you provide.
- Check the first key in the dataset.
"""

In [10]:
math_plus_data = f'math expression: {math_text}\ndataset: {dataset}'

In [11]:
python_code = gpt.chat_create(code_prompt, math_plus_data, model, output_json=False)

In [12]:
display(Markdown(python_code))

import sympy as sp

# Define the symbols
x_i_t, v_i_x_t_t_theta, t, theta = sp.symbols('x_i_t v_i_x_t_t_theta t theta')

# Define the expression
expr = sp.diff(x_i_t, t) - v_i_x_t_t_theta

# Evaluate the expression for each data point in the dataset
for data in dataset['dataset']:
    result = expr.subs({x_i_t: data['x_i(t)'], v_i_x_t_t_theta: data['v_i(x(t), t; theta)'], t: data['t'], theta: data['theta']})
    print(result)

In [13]:
exec(python_code)

-0.500000000000000
-0.750000000000000
-1.00000000000000
-1.25000000000000
-1.50000000000000


In [14]:
pretty_dataset = json.dumps(dataset, indent=4)

with open(file_path(output_file), 'w') as f:
    f.write(f'{dataset_name} = {pretty_dataset}\n\n')
    f.write(python_code)