# PySIPS Usage Example

### Start by generating some data: $ y= x^2 + \text{noise} $

In [None]:
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(42)
X = np.linspace(-3, 3, 100).reshape(-1, 1)
y = X[:, 0]**2 + np.random.normal(0, 0.1, size=X.shape[0])

Do a train/test split

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

plt.plot(X_train, y_train, '.', label="Train Data")
plt.plot(X_test, y_test, '.', label="Test Data")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()

### Fitting with PySIPS

Start by creating the regressor 

In [None]:
from pysips import PysipsRegressor

regressor = PysipsRegressor(
    # the mathematical operations that can be used in equations
    operators=['+', '-', '*'],  
    
    # a complexity limit for equations
    max_complexity=12,  

    # the number of equations that will represent the model posterior 
    # similar to a population size in a genetic algorithm
    # computation times increase with this value, effectiveness does too
    num_particles=100, 

    # length of MCMC chains between SMC target distributions
    # computation times increase with this value
    # effectiveness also increases (but may saturate at larger values)
    num_mcmc_samples=20,

    # to control randomness
    random_state=42,

    # setting a time limit
    max_time=25,
)

Fit the regressor to the training data

In [None]:
regressor.fit(X_train, y_train)

See how good the fit is

In [None]:
from sklearn.metrics import r2_score

expression = regressor.get_expression()
y_pred = regressor.predict(X_test)
print(f"Discovered expression: {expression}")
print(f"R² score: {r2_score(y_test, y_pred):.4f}")

Plot the expression

In [None]:
plt.plot(X_train, y_train, '.', label="Train Data")
plt.plot(X_test, y_test, '.', label="Test Data")
plt.plot(X, regressor.predict(X), '-', label=expression)
plt.xlabel("x")
plt.ylabel("y")
plt.legend()