## Lecture 7b : Symbolic Regression

## Table of Contents
#### 1. [Function Approximation](#1-Function-Approximation)
#### 2. [PK Model with no noise](#2-PK-Model-with-no-noise)
#### 3. [PK Model with noise](#3-PK-Model-with-noise)



## 1-Function Approximation

In [10]:
import numpy as np
from pysr import PySRRegressor 
from  numpy import cos as cos
from numpy import sin as sin

np.random.seed(1234)


x = 1.8 * np.random.randn(100, 5)
y = 6.53829874 * cos(x[:, 3]) + x[:, 0] ** 2 - 0.5

model = PySRRegressor(
    niterations=40,
    binary_operators = ["*", "+"],
    unary_operators = ["cos", 
                       "exp", 
                       "sin",
                       "inv(x) = 1/x", #"Julia Sntax"
                      ], 
    extra_sympy_mappings = {"inv": lambda x: 1/x},
    loss="loss(prediction, target) = (prediction - target)^2")

model.fit(x, y)



Started!

Expressions evaluated per second: 1.630e+05
Head worker occupation: 6.1%
Progress: 360 / 600 total iterations (60.000%)
Hall of Fame:
---------------------------------------------------------------------------------------------------
Complexity  Loss       Score     Equation
1           3.705e+01  2.058e-07  3.5138583
3           2.012e+01  3.053e-01  (x0 * x0)
4           1.736e+01  1.479e-01  (cos(x3) * 8.009311)
5           1.731e+01  2.633e-03  (sin(cos(x3)) * 9.078553)
6           1.415e+01  2.019e-01  ((cos(x3) * 7.1501193) + 1.8915201)
7           1.318e+01  7.086e-02  ((x0 * x0) + exp(cos(x3)))
8           2.242e-01  4.074e+00  ((cos(x3) * 6.311219) + (x0 * x0))
10          2.454e-13  1.151e+01  (((cos(x3) * 6.5382986) + -0.5) + (x0 * x0))
---------------------------------------------------------------------------------------------------
Press 'q' and then <enter> to stop execution early.


PySRRegressor.equations_ = [
	   pick      score                                      equation  \
	0         0.000000                                     3.5138583   
	1         0.305251                                     (x0 * x0)   
	2         0.147924                          (cos(x3) * 8.009311)   
	3         0.002633                     (sin(cos(x3)) * 9.078553)   
	4         0.201883           ((cos(x3) * 7.1501193) + 1.8915201)   
	5         0.070864                    ((x0 * x0) + exp(cos(x3)))   
	6         4.073615            ((cos(x3) * 6.311219) + (x0 * x0))   
	7  >>>>  13.770430  (((cos(x3) * 6.5382986) + -0.5) + (x0 * x0))   
	
	           loss  complexity  
	0  3.705405e+01           1  
	1  2.012325e+01           3  
	2  1.735624e+01           4  
	3  1.731060e+01           5  
	4  1.414606e+01           6  
	5  1.317830e+01           7  
	6  2.242389e-01           8  
	7  2.453959e-13          10  
]

In [11]:
model.sympy()

x0**2 + 6.5382986*cos(x3) - 0.5

In [12]:
print(model)

PySRRegressor.equations_ = [
	   pick      score                                      equation  \
	0         0.000000                                     3.5138583   
	1         0.305251                                     (x0 * x0)   
	2         0.147924                          (cos(x3) * 8.009311)   
	3         0.002633                     (sin(cos(x3)) * 9.078553)   
	4         0.201883           ((cos(x3) * 7.1501193) + 1.8915201)   
	5         0.070864                    ((x0 * x0) + exp(cos(x3)))   
	6         4.073615            ((cos(x3) * 6.311219) + (x0 * x0))   
	7  >>>>  13.770430  (((cos(x3) * 6.5382986) + -0.5) + (x0 * x0))   
	
	           loss  complexity  
	0  3.705405e+01           1  
	1  2.012325e+01           3  
	2  1.735624e+01           4  
	3  1.731060e+01           5  
	4  1.414606e+01           6  
	5  1.317830e+01           7  
	6  2.242389e-01           8  
	7  2.453959e-13          10  
]


## 2-PK Model with no noise

In [13]:
import pysr
import sympy
import numpy as np
from matplotlib import pyplot as plt
from pysr import PySRRegressor
import csv


## Data Import
G = []
B = []
f = []
t = []
U = []
with open('data_PINN.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        G.append(float(row[1]))
        B.append(float(row[2]))
        U.append(float(row[3]))
        f.append(float(row[4]))
        t.append(float(row[0]))
        line_count += 1
    print(f'Processed {line_count} lines.')

G = np.array(G).reshape(-1,1)
B = np.array(B).reshape(-1,1)
f = np.array(f).reshape(-1,1)
t = np.array(t).reshape(-1,1)
d = np.concatenate((G, B, f), axis=1)
y = d[:, 2]
X = d[:, 0:2]


pysr_params = dict(
    populations=100,
    model_selection="best",
)


model = PySRRegressor(
    niterations=50,
    binary_operators=["plus", "mult"],
    **pysr_params
)

# Run model:
model.fit(X, y)

# reload Model
#model = PySRRegressor.from_file("hall_of_fame_2023-09-15_120648.996.pkl")

y_pred = model.predict(X)
err = np.square(np.subtract(y,y_pred)).mean()
print(f"Projection Error: {err}")
print(f"Model: {model}")

print(f"Model SymPy: {model.sympy(3)}")

#model.sympy(2)
print(f"Latex Equation: {model.latex(3)}")



Processed 501 lines.




Started!

Expressions evaluated per second: 1.780e+05
Head worker occupation: 5.6%
Progress: 422 / 5000 total iterations (8.440%)
Hall of Fame:
---------------------------------------------------------------------------------------------------
Complexity  Loss       Score     Equation
1           3.422e-05  6.215e-01  x0
3           1.006e-05  6.123e-01  (x0 * 0.5984119)
5           7.361e-06  1.560e-01  ((x0 * 0.6304787) + -0.0016880715)
7           6.058e-07  1.249e+00  ((-0.11316414 * x1) + (x0 * 0.69717956))
9           3.668e-08  1.402e+00  (((x0 + (-0.41532958 * x1)) + x0) * 0.35537067)
11          2.201e-08  2.554e-01  ((((0.6532741 * x1) * (x0 + -0.328951)) + x0) * 0.70121336)
---------------------------------------------------------------------------------------------------
Press 'q' and then <enter> to stop execution early.

Expressions evaluated per second: 1.790e+05
Head worker occupation: 6.0%
Progress: 869 / 5000 total iterations (17.380%)
Hall of Fame:
------------------

## 3-PK Model with noise


In [14]:
import pysr
import sympy
import numpy as np
from matplotlib import pyplot as plt
from pysr import PySRRegressor
import csv
import sys
import matplotlib.pyplot as plt

## Data Import
G = []
B = []
f = []
t = []


weights = (1/np.sqrt( 0.01)) * np.ones((501,))


with open('pred_500_0.01.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        G.append(float(row[1]))
        B.append(float(row[2]))
        f.append(float(row[4]))
        t.append(float(row[0]))
    line_count += 1
    print(f'Processed {line_count} lines.')

G = np.array(G).reshape(-1,1)
B = np.array(B).reshape(-1,1)
f = np.array(f).reshape(-1,1)
t = np.array(t).reshape(-1,1)
d = np.concatenate((G, B, f), axis=1)
y = d[:, 2]
X = d[:, 0:2]


pysr_params = dict(
    populations=50,
    procs = 8,
    ncyclesperiteration=2,
    population_size=30,
    model_selection="best",
    loss="myloss(x, y, w) = w * abs(x - y)",
    binary_operators=["plus", "sub", "mult"],
    maxsize=9,               
)

model = PySRRegressor(
    niterations=50,
    **pysr_params
)

# Run model:
#model = PySRRegressor.from_file("hall_of_fame_2024-01-17_155335.906.pkl")
model.fit(X, y, weights=weights)
y_pred = model.predict(X)
err = np.square(np.subtract(y,y_pred)).mean()

best_idx = model.equations_.query(
    f"loss < {2 * model.equations_.loss.min()}"
).score.idxmax()


print(f"Best model:", model.sympy(best_idx))

print(f"Projection Error: {err}")
print(f"Model: {model}")

print(f"Model SymPy: {model.sympy()}")

#print("4 model:", model.sympy(5))
print(f"Latex Equation: {model.latex()}")

Processed 1 lines.




Started!

Expressions evaluated per second: 6.720e+04
Head worker occupation: 19.1%
Progress: 1474 / 2500 total iterations (58.960%)
Hall of Fame:
---------------------------------------------------------------------------------------------------
Complexity  Loss       Score     Equation
1           2.650e-03  4.608e-02  -0.00023209452
3           1.982e-03  1.454e-01  (0.58497137 * x0)
5           8.474e-04  4.247e-01  ((x1 * -0.16856305) + x0)
7           1.029e-04  1.054e+00  ((x1 * -0.15544352) + (x0 * 0.7217424))
9           5.133e-05  3.477e-01  (((x1 * -0.15247023) + (x0 * 0.7220072)) - 8.938527e-5)
---------------------------------------------------------------------------------------------------
Press 'q' and then <enter> to stop execution early.
Best model: 0.7217896*x0 - 0.15246952*x1 - 8.918227e-5
Projection Error: 3.2867550239652986e-08
Model: PySRRegressor.equations_ = [
	   pick     score                                           equation  \
	0        0.000000           