In [1]:
# notebooks/00-data-generation.ipynb

import pandas as pd
import numpy as np
import sympy as sp


In [2]:
# Template
# Set the random seed for reproducibility
np.random.seed(42)

# Define the size of the dummy dataset
n_samples = 5

# Generate dummy data

# Generate dummy data
dummy_data = pd.DataFrame({
    'id': np.arange(1, n_samples + 1),
    'feature1': np.random.normal(loc=0, scale=1, size=n_samples),
    'feature2': np.random.uniform(low=0, high=100, size=n_samples),
    'feature3': np.random.exponential(scale=1, size=n_samples),
    'feature4': np.random.binomial(n=10, p=0.5, size=n_samples),
    'category': np.random.choice(['A', 'B', 'C', 'D'], size=n_samples),
    'value': np.random.randint(1, 100, size=n_samples),
    'boolean': np.random.choice([True, False], size=n_samples),
    'date': pd.date_range(start='2023-01-01', periods=n_samples, freq='D')
})

# Define symbolic variable
x = sp.symbols('x')

# Define a pool of functions
function_pool = [
    x**2 + 0.5,
    15 * sp.log(x),
    sp.sin(x),
    sp.exp(x),
    x**3 - 2*x + 1
]

# Assign a random function from the pool to each instance
dummy_data['function'] = np.random.choice(function_pool, size=n_samples)

# Convert the functions to strings to store in DataFrame
#dummy_data['function_str'] = dummy_data['function'].apply(str)

# Save the dummy data to the dummy_data folder
dummy_data.to_csv('../data/dummy_data/dummy_data.csv', index=False)

print("Dummy data generated and saved to data/dummy_data/dummy_data.csv")

# Save the symbolic functions and the pool to a file
with open('../src/symbolic_functions.py', 'w') as f:
    f.write("# Symbolic functions for the project\n\n")
    f.write("import sympy as sp\n\n")
    f.write("x = sp.symbols('x')\n")
    f.write("function_pool = [\n")
    for func in function_pool:
        f.write(f"    {sp.srepr(func)},\n")
    f.write("]\n")

print("Symbolic functions and function pool saved to src/symbolic_functions.py")

Dummy data generated and saved to data/dummy_data/dummy_data.csv
Symbolic functions and function pool saved to src/symbolic_functions.py


In [3]:
# Example of reading the data and converting function strings back to symbolic expressions

import pandas as pd
import sympy as sp

# Load the data
dummy_data = pd.read_csv('../data/dummy_data/dummy_data.csv')

# Convert function strings back to symbolic expressions
dummy_data['function_sym'] = dummy_data['function'].apply(sp.sympify)

# Example of using the symbolic functions
for index, row in dummy_data.iterrows():
    function = row['function_sym']
    
    # Substitute a value into the function
    x_val = row['feature1']
    function_val = function.subs(sp.symbols('x'), x_val if x_val > 0 else 1)  # Adjust for log and other domain issues
   # print(f"Instance {index + 1}: function({x_val}) = {function_val}")