In [1]:
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pprint
import subprocess

from IPython.display import display, Markdown
from sympy import symbols, Function

import sys
sys.path.append('../')
from functions import gpt

In [2]:
purpose = "- The data is for expressions related to neural ordinary differential equation and holographic quantum chromodynamics"

annotation_name = 'a3'
math_text = """a_{i}(t)=\frac{\partial \mathcal{L}}{\partial x_{i}(t)}"""

paper_short_name = 'hashimoto'
dataset_name = f'dataset'

base_name = "2021_Hashimoto_Neural_ODE_and_holographic_QCD_PUB"
work_bucket = "AdS-CFT"
project_folder = "diygenomics-projects"
sub_category = "math"

model = 'gpt-4'
output_file = f'{paper_short_name}_{annotation_name}.py'

# A1 and A2 feed A4 and A8; A3 feeds A5, A6, and A7

In [3]:
data_path = os.getenv('DATA_PATH')
file_path = lambda *args: os.path.join(data_path, project_folder, sub_category, work_bucket, base_name, 'mathpix', 'generated_code', *args)

In [4]:
os.makedirs(file_path(), exist_ok=True)

In [5]:
dataset_prompt = f"""You are an expert mathematician and data scientist.
- I want to generate a dataset as input for a math expression
- The data need to return results that are not NaN or None
{purpose}
- Please format your response in JSON. You only speak JSON. Do not write text that isn't JSON.
- The output of this will be used as input for another expression. 
- Be consice in your output so that it can be feed into the next expression.
"""

In [6]:
dataset = gpt.chat_create(dataset_prompt, math_text, model, output_json=True)

In [7]:
dataset

{'dataset': [{'time': 1, 'x_i(t)': 0.5, 'a_i(t)': 0.25},
  {'time': 2, 'x_i(t)': 1.0, 'a_i(t)': 0.5},
  {'time': 3, 'x_i(t)': 1.5, 'a_i(t)': 0.75},
  {'time': 4, 'x_i(t)': 2.0, 'a_i(t)': 1.0},
  {'time': 5, 'x_i(t)': 2.5, 'a_i(t)': 1.25}]}

In [8]:
math_plus_data = f'math expression: {math_text}\ndataset: {dataset}'

In [9]:
dataset_prompt = f"""You are an expert mathematician and data scientist.
- I am going to use this dataset as input to the provided math expression. 
- I need you to help me describe the dataset in a way that will allow me to prompt an LLM to create python code from the math expression.
- Please describe the data in a way that will ensure that the math expression can be properly executed from the generated python based on the math 
expression.
"""

In [10]:
dataset_instructions = gpt.chat_create(dataset_prompt, math_plus_data, model, output_json=False)

In [11]:
display(Markdown(dataset_instructions))

The dataset is a list of dictionaries where each dictionary represents a data point in time. Each dictionary contains three key-value pairs: 'time', 'x_i(t)', and 'a_i(t)'. 

- 'time' represents the time at which the data point is recorded. It is an integer starting from 1 and increases by 1 at each subsequent data point.
- 'x_i(t)' represents the value of the function x_i at time t. It is a floating-point number that starts at 0.5 and increases by 0.5 at each subsequent data point.
- 'a_i(t)' represents the value of the function a_i at time t. It is a floating-point number that starts at 0.25 and increases by 0.25 at each subsequent data point.

The math expression is a partial derivative of a function L with respect to x_i(t), which results in a_i(t). In the context of this dataset, it means that a_i(t) is the rate of change of the function L with respect to x_i(t) at each point in time. 

To execute this math expression using the dataset, the Python code needs to iterate over the list of dictionaries, extract the values of 'x_i(t)' and 'a_i(t)' at each point in time, and perform the necessary calculations.

In [12]:
code_prompt = f"""You are an expert mathematician and data scientist.
- Please generate python code to execute the provided dataset given a LaTex math expression and print out the results.
- Please format your response in the python coding language. You only speak python. Do not write text that isn't python.
- Do not include any comments in your code.
- Do not include the dataset in your response. 
- The dataset variable name will be {dataset_name}.
{dataset_instructions}
{purpose}
- Check your work and make that the dataset works with the code that you provide.
- Check the first key in the dataset.
"""

In [13]:
# # code_prompt = f"""You are an expert mathematician and data scientist.

# # Please generate python code that executes the provided dataset given a LaTex math expression and print out the results.
# # Please format your response in the python coding language. You only speak python. Do not write text that isn't python.
# # Do not include any comments in your code.
# # Do not include the dataset in your response.
# # The dataset variable name will be 'dataset'. The dataset is a dictionary which contains a key named 'dataset'. This key points to a list of dictionaries, each dictionary containing the keys 'time', 'x_i', and 'a_i'.
# # The data is for expressions related to neural ordinary differential equations and holographic quantum chromodynamics.
# # Generate a symbolic function, differentiate it with respect to x_i, substitute the values from each data point in the dataset into the equation, solve it and print the solution.
# # Check your work and make sure that the dataset works with the code that you provide.
# # Check the first dictionary in the 'dataset' list. You'll find 'time', 'x_i', and 'a_i' as keys in this dictionary.
# # """

In [14]:
python_code = gpt.chat_create(code_prompt, math_plus_data, model, output_json=False)

In [15]:
display(Markdown(python_code))

for data_point in dataset:
    time = data_point['time']
    x_i_t = data_point['x_i(t)']
    a_i_t = data_point['a_i(t)']
    print(f"At time {time}, the rate of change of the function L with respect to x_i(t) is {a_i_t}.")

In [16]:
pretty_dataset = json.dumps(dataset, indent=4)

with open(file_path(output_file), 'w') as f:
    f.write(f'{dataset_name} = {pretty_dataset}\n\n')
    f.write(python_code)

In [19]:
command = ['python', file_path(output_file)]
result = subprocess.run(command, text=True, capture_output=True)

output = result.stdout
error_output = result.stderr

if result.returncode != 0:
    print("Error output:", error_output)
else:
    print("Output:", output)

Output: At time 1, the rate of change of the function L with respect to x_i(t) is 0.25.
At time 2, the rate of change of the function L with respect to x_i(t) is 0.5.
At time 3, the rate of change of the function L with respect to x_i(t) is 0.75.
At time 4, the rate of change of the function L with respect to x_i(t) is 1.0.
At time 5, the rate of change of the function L with respect to x_i(t) is 1.25.



In [None]:
error_fix_prompt = f"""You are an expert mathematician and data scientist.
I have this Python code and it's throwing an error. 

Here is the dataset:

f'{dataset_name} = {pretty_dataset}\n\n'

Here is the code:

{python_code}

And here is the error message:

{error_output}

- Please generate python code to fix the error and to execute the provided dataset.
- Please format your response in the python coding language. You only speak python. Do not write text that isn't python.
- Make sure that your code prints results that were fully executed and return numbers or lists of numbers
"""

In [None]:
error_fix_code = gpt.chat_create(error_fix_prompt, '', model, output_json=False)

In [None]:
error_fix_code

In [None]:
pretty_dataset = json.dumps(dataset, indent=4)

with open(file_path(output_file), 'w') as f:
    f.write(f'{dataset_name} = {pretty_dataset}\n\n')
    f.write(error_fix_code)

In [None]:
command = ['python', file_path(output_file)]
result = subprocess.run(command, text=True, capture_output=True)

output = result.stdout
error_output = result.stderr

In [None]:
updated_input_prompt = f"""You are an expert mathematician and data scientist.
- Please format your response in the python coding language. You only speak python. Do not write text that isn't python.
- Do not include any comments in your code.
- Do not include the dataset in your response. 
- The dataset variable name will be {dataset_name}.
{purpose}
- Make sure that the generated code prints the results.
"""

user_prompt = f"""Generate Python code that takes as input a list of dictionaries, where each dictionary represents a data point in a dataset. 
Each dictionary has three key-value pairs: 'time', 'x_i', and 'a_i'. 
The code should implement the mathematical expression {math_text}. 
Use the following dataset as an example for testing:
{dataset}
"""

In [None]:
updated_python_code = gpt.chat_create(updated_input_prompt, user_prompt, model, output_json=False)

In [None]:
pretty_dataset = json.dumps(dataset, indent=4)

with open(file_path(output_file), 'w') as f:
    f.write(f'{dataset_name} = {pretty_dataset}\n\n')
    f.write(updated_python_code)

In [None]:
command = ['python', file_path(output_file)]
result = subprocess.run(command, text=True, capture_output=True)

output = result.stdout
error_output = result.stderr

if result.returncode != 0:
    print("The command failed.")
    print("Error output:", error_output)
else:
    print("The command succeeded.")
    print("Output:", output)