In [None]:
import numpy as np
import pandas as pd
from pysr import PySRRegressor

# Generate example data
# Let's say C influences B, and B influences A:
np.random.seed(42)
C = np.random.uniform(-10, 10, 1000)  # Random data for C
B = 2 * C + np.random.normal(0, 1, 1000)  # B depends on C with some noise
A = 3 * B**2 - 5 * C + np.random.normal(0, 2, 1000)  # A depends on B and C with noise

# Create a DataFrame
data = pd.DataFrame({'C': C, 'B': B, 'A': A})

# Separate features (C, B) and target (A)
X = data[['B', 'C']].to_numpy()  # Features: B and C
y = data['A'].to_numpy()  # Target: A

# Use PySR to find the symbolic relationship
model = PySRRegressor(
    niterations=40,  # Number of iterations to search for equations
    binary_operators=["+", "-", "*", "/"],
    unary_operators=["sin", "cos", "exp", "log", "abs", "sqrt"],
    elementwise_loss="loss(x, y) = (x - y)^2",  # Define loss function (mean squared error)
    verbosity=1,
)

# Fit the model
model.fit(X, y)





Expressions evaluated per second: 9.710e+04
Progress: 470 / 1240 total iterations (37.903%)
════════════════════════════════════════════════════════════════════════════════════════════════════
───────────────────────────────────────────────────────────────────────────────────────────────────
Complexity  Loss       Score      Equation
1           1.316e+05  1.594e+01  y = 410.29
3           1.308e+05  2.717e-03  y = 409.96 - x₀
4           1.907e+04  1.926e+00  y = abs(x₀ * -45.369)
5           8.481e+02  3.113e+00  y = x₀ * (x₀ * 3.0063)
7           9.664e+00  2.237e+00  y = (x₀ * (x₀ + -0.82647)) * 3.0012
9           3.740e+00  4.747e-01  y = (x₁ * -4.9765) - ((x₀ * -3.0004) * x₀)
11          3.738e+00  2.308e-04  y = ((x₁ * -4.9765) - (x₀ * (x₀ * -3.0007))) + -0.062926
13          3.737e+00  1.093e-04  y = (x₁ * -4.9192) - (((x₀ + -0.0095888) * (x₀ * -3.0007))...
                                       - -0.058997)
14          3.733e+00  1.158e-03  y = ((x₁ * -4.9765) - ((x₀ * -3.000

[ Info: Started!
[ Info: Final population:
[ Info: Results saved to:


In [3]:
# Display the best equations found
print(model)

PySRRegressor.equations_ = [
	    pick     score                                           equation  \
	0         0.000000                                           410.2884   
	1         0.002717                                      409.9592 - x0   
	2         1.925773                               abs(x0 * -45.370483)   
	3         3.113010                              x0 * (x0 * 3.0063436)   
	4         2.237322               x0 * ((x0 + -0.8264929) * 3.0012128)   
	5   >>>>  0.474700        ((x0 * x0) * 3.0004373) + (x1 * -4.9765296)   
	6         0.000231  ((x1 * -4.9765434) - (x0 * (x0 * -3.0006967)))...   
	7         0.000014  (x1 * -4.9765387) + abs(-0.0655916 - ((x0 * x0...   
	8         0.001057  ((x1 * -4.976102) - ((x0 * x0) * -3.0004392)) ...   
	9         0.000308  ((x1 * -4.9764247) - ((x0 * x0) * -3.0004566))...   
	10        0.000394  (x1 * -4.9767213) - ((x0 * (x0 * -3.0009363)) ...   
	11        0.005691  (x1 * -4.9751363) - ((x0 * -3.0004444) * (x0 -...   
	12      

In [29]:
from sympy import symbols, simplify, sin, cos, log, Abs, exp, expand_power_base, Mul, Rational, sqrt
import numpy as np
import pandas as pd

# Define variables
x0, x1, x2 = symbols('x0 x1 x2')

# Define the expression
expr = (((x0 * 0.013395232) + sin(x1 * (x2 * 0.011614522))) * 12.717946) + 4.773303

# Simplify the expression
simplified_expr = simplify(expr)

print("Simplified Expression:", simplified_expr)

Simplified Expression: 0.170359837233472*x0 + 12.717946*sin(0.011614522*x1*x2) + 4.773303


In [17]:
# Define symbols
x0, x1, x2 = symbols('x0 x1 x2')

# Define and simplify the expression
expr = simplify((((x0 * 0.013395232) + sin(x1 * (x2 * 0.011614522))) * 12.717946) + 4.773303)

lst = []

# Function to print the tree structure
def traverse_expr_tree(node, level=0):
    print("  " * level + f"Node: {node}")
    for child in node.args:
        traverse_expr_tree(child, level + 1)
        lst.append(child)

# Traverse and print the tree structure
traverse_expr_tree(expr)
print(lst)

Node: 0.170359837233472*x0 + 12.717946*sin(0.011614522*x1*x2) + 4.773303
  Node: 4.77330300000000
  Node: 0.170359837233472*x0
    Node: 0.170359837233472
    Node: x0
  Node: 12.717946*sin(0.011614522*x1*x2)
    Node: 12.7179460000000
    Node: sin(0.011614522*x1*x2)
      Node: 0.011614522*x1*x2
        Node: 0.0116145220000000
        Node: x1
        Node: x2
[4.77330300000000, 0.170359837233472, x0, 0.170359837233472*x0, 12.7179460000000, 0.0116145220000000, x1, x2, 0.011614522*x1*x2, sin(0.011614522*x1*x2), 12.717946*sin(0.011614522*x1*x2)]


In [32]:
import json
from sympy import symbols, simplify

# Define a function to recursively convert the expression tree
def expression_to_json(expr):
    # Base case: If the expression is a symbol (variable)
    if expr.is_Symbol:
        return f"$self.{expr}"

    # Base case: If the expression is a number (constant)
    if expr.is_Number:
        return str(expr)

    # If the expression is an addition
    if expr.is_Add:
        args = list(expr.args)
        arg_num = 1
        result = {}
        for arg in args:
            result[f'term{arg_num}'] = expression_to_json(arg)
            arg_num += 1
        result['type'] = 'ADDITIONDOUBLE'
        return result

    # If the expression is a multiplication
    if expr.is_Mul:
        args = list(expr.args)
        arg_num = 1
        result = {}
        for arg in args:
            result[f'term{arg_num}'] = expression_to_json(arg)
            arg_num += 1
        result['type'] = 'MULTIPLICATIONDOUBLE'
        return result

    # If the expression is a power
    if expr.is_Pow:
        if expr.exp == Rational(1, 2):
            args = list(expr.args)
            return {
                "term1": expression_to_json(args[0]),
                "type": "SQUAREROOT"
            }
        else:
            base, exponent = expr.as_base_exp()
            return {
                "term1": expression_to_json(base),
                "term2": expression_to_json(exponent),
                "type": "POWER"
            }
    
    if isinstance(expr, sin):
        args = list(expr.args)
        return {
                "term1": expression_to_json(args[0]),
                "type": "SINE"
            }
        
    if isinstance(expr, cos):
        args = list(expr.args)
        return {
                "term1": expression_to_json(args[0]),
                "type": "COSINE"
            }
    
    if isinstance(expr, exp):
        args = list(expr.args)
        return {
                "term1": expression_to_json(args[0]),
                "type": "EXPONENT"
            }
    
    if isinstance(expr, log):
        args = list(expr.args)
        return {
                "term1": expression_to_json(args[0]),
                "type": "LOGARITHM"
            }
    
    if isinstance(expr, Abs):
        args = list(expr.args)
        return {
                "term1": expression_to_json(args[0]),
                "type": "ABSOLUTEVALUE"
            }
    
    # If none of the above cases apply, return the string representation
    return str(expr)

# Define symbols for the example
x0, x1, x2 = symbols('x0 x1 x2')

# Example expression
expr = simplify(exp(sin(sqrt((x1 + x2) + ((x0 * 0.22069037) - -28.435165)))) * 8.929338)
print(expr)

# Convert the expression to JSON-like format
json_representation = expression_to_json(expr)

# Print the result as a JSON string
print(json.dumps(json_representation, indent=4))


8.929338*exp(sin(5.33246331445421*sqrt(0.0077611777529689*x0 + 0.0351677227826883*x1 + 0.0351677227826883*x2 + 1)))
{
    "term1": "8.92933800000000",
    "term2": {
        "term1": {
            "term1": {
                "term1": "5.33246331445421",
                "term2": {
                    "term1": {
                        "term1": "1",
                        "term2": {
                            "term1": "0.0351677227826883",
                            "term2": "$self.x1",
                            "type": "MULTIPLICATIONDOUBLE"
                        },
                        "term3": {
                            "term1": "0.0351677227826883",
                            "term2": "$self.x2",
                            "type": "MULTIPLICATIONDOUBLE"
                        },
                        "term4": {
                            "term1": "0.00776117775296890",
                            "term2": "$self.x0",
                            "type": "MULTIPLICATI

In [6]:
# Define symbols
x0, x1 = symbols('x0 x1')

# Define the expression
expr = 3.0004373 * x0**3 - 4.9765296 * x1

# Function to expand powers explicitly
def expand_powers_as_multiplication(expr):
    if expr.is_Pow:  # If the node is a power
        # Expand x**n into x*x*...*x
        return Mul(*[expr.base] * expr.exp, evaluate=False)
    elif expr.args:  # Recursively expand for other composite nodes
        return expr.func(*[expand_powers_as_multiplication(arg) for arg in expr.args], evaluate=False)
    else:
        return expr  # Return the node as-is for constants or symbols

# Expand the expression
expanded_expr = expand_powers_as_multiplication(expr)

# Function to traverse and print the tree structure
def traverse_expr_tree(node, level=0):
    print("  " * level + f"Node: {node}")
    for child in node.args:  # Recursively traverse the arguments
        traverse_expr_tree(child, level + 1)

# Traverse and print the tree structure
traverse_expr_tree(expanded_expr)

Node: 3.0004373*(x0*x0*x0) - 4.9765296*x1
  Node: 3.0004373*(x0*x0*x0)
    Node: 3.00043730000000
    Node: x0*x0*x0
      Node: x0
      Node: x0
      Node: x0
  Node: -4.9765296*x1
    Node: -4.97652960000000
    Node: x1


In [4]:
print(data)

            C          B           A
0   -2.509198  -4.840694   80.030316
1    9.014286  16.693228  790.753931
2    4.639879   9.659956  253.735387
3    1.973170   4.556925   53.950963
4   -6.879627 -13.199464  557.240560
..        ...        ...         ...
995 -8.168359 -17.656740  975.553399
996  8.346272  16.080774  733.530153
997 -7.263627 -14.564292  672.190908
998  9.004747  17.580192  882.042181
999 -1.079885  -2.852190   30.763270

[1000 rows x 3 columns]


In [6]:
print(data[['B', 'C']])

             B         C
0    -4.840694 -2.509198
1    16.693228  9.014286
2     9.659956  4.639879
3     4.556925  1.973170
4   -13.199464 -6.879627
..         ...       ...
995 -17.656740 -8.168359
996  16.080774  8.346272
997 -14.564292 -7.263627
998  17.580192  9.004747
999  -2.852190 -1.079885

[1000 rows x 2 columns]
