In [1]:
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pprint
import subprocess

from IPython.display import display, Markdown
from sympy import symbols, Function

import sys
sys.path.append('../')
from functions import gpt

In [2]:
purpose = "- The data is for expressions related to neural ordinary differential equation and holographic quantum chromodynamics"

paper_short_name = 'hashimoto'
annotation_name = 'a3'
dataset_name = f'dataset'

base_name = "2021_Hashimoto_Neural_ODE_and_holographic_QCD_PUB"
work_bucket = "AdS-CFT"
project_folder = "diygenomics-projects"
sub_category = "math"

model = 'gpt-4'
output_file = f'{paper_short_name}_{annotation_name}.py'

# A1 and A2 feed A4 and A8; A3 feeds A5, A6, and A7

In [3]:
data_path = os.getenv('DATA_PATH')
file_path = lambda *args: os.path.join(data_path, project_folder, sub_category, work_bucket, base_name, 'mathpix', 'generated_code', *args)

In [4]:
os.makedirs(file_path(), exist_ok=True)

In [5]:
dataset_prompt = f"""You are an expert mathematician and data scientist.
- I want to generate a dataset as input for a math expression
- The data need to return results that are not NaN or None
{purpose}
- Please format your response in JSON. You only speak JSON. Do not write text that isn't JSON.
- The output of this will be used as input for another expression. 
- Be consice in your output so that it can be feed into the next expression.
"""

In [6]:
math_text = """a_{i}(t)=\frac{\partial \mathcal{L}}{\partial x_{i}(t)}"""

In [7]:
dataset = gpt.chat_create(dataset_prompt, math_text, model, output_json=True)

In [8]:
dataset

{'dataset': [{'time': 1, 'x_i(t)': 0.5, 'partial_derivative_L': 0.2},
  {'time': 2, 'x_i(t)': 0.7, 'partial_derivative_L': 0.3},
  {'time': 3, 'x_i(t)': 0.9, 'partial_derivative_L': 0.4},
  {'time': 4, 'x_i(t)': 1.1, 'partial_derivative_L': 0.5},
  {'time': 5, 'x_i(t)': 1.3, 'partial_derivative_L': 0.6}]}

In [9]:
code_prompt = f"""You are an expert mathematician and data scientist.
- Please format your response in the python coding language. You only speak python. Do not write text that isn't python.
- Do not include any comments in your code.
- Do not include the dataset in your response. 
- The dataset variable name will be {dataset_name}.
{purpose}
- Make sure that the generated code prints the results.
- Check your work and make that the dataset works with the python code that you provide.
- Check the first key in the dataset. 
- The first key is '{next(iter(dataset))}'. 
- It is very important that you make sure that you access the dataset dict with the provided key -> '{next(iter(dataset))}'.
- Make sure that your code prints results that were fully executed and return numbers or lists of numbers 
based on the provide math expression.
"""

In [10]:
math_plus_data = f'math expression: {math_text}\ndataset: {dataset}'

In [11]:
python_code = gpt.chat_create(code_prompt, math_plus_data, model, output_json=False)

In [12]:
display(Markdown(python_code))

import pandas as pd

df = pd.DataFrame(dataset['dataset'])
df['a_i(t)'] = df['partial_derivative_L'] / df['x_i(t)']
print(df['a_i(t)'].tolist())

In [13]:
pretty_dataset = json.dumps(dataset, indent=4)

with open(file_path(output_file), 'w') as f:
    f.write(f'{dataset_name} = {pretty_dataset}\n\n')
    f.write(python_code)

In [14]:
command = ['python', file_path(output_file)]
result = subprocess.run(command, text=True, capture_output=True)

output = result.stdout
error_output = result.stderr

if result.returncode != 0:
    print("Error output:", error_output)
else:
    print("Output:", output)

Output: [0.4, 0.4285714285714286, 0.4444444444444445, 0.45454545454545453, 0.4615384615384615]

