In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
from matplotlib import cm
from pysr import PySRRegressor
from sympy import lambdify
from sympy import symbols

In [2]:
train_filepath = '../../../data/train'
val_filepath = '../../../data/valid'
test_filepath = '../../data/test'

In [3]:
# Load 2SLAQ LRG Data
df_train = pd.read_csv(train_filepath, header=None, sep=' ')
df_val = pd.read_csv(val_filepath, header=None, sep=' ')
df_train = pd.concat([df_train, df_val])
df_test = pd.read_csv(test_filepath, header=None, sep=' ')

In [5]:
# Training data
df_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,22.701,21.46,20.019,19.095,18.726,2.157,0.225,0.09,0.06,0.19,0.5556
1,26.848,22.075,20.559,19.639,19.156,0.358,0.157,0.057,0.038,0.084,0.5302
2,21.427,21.527,19.89,19.058,18.688,0.431,0.145,0.05,0.035,0.097,0.566
3,25.333,22.171,20.524,19.717,19.307,0.863,0.139,0.048,0.037,0.099,0.4989
4,24.651,21.751,19.811,18.812,18.361,2.846,0.218,0.049,0.033,0.076,0.5633


In [5]:
X_train = df_train.iloc[:,:5].to_numpy()
y_train = df_train.iloc[:,-1].to_numpy()
X_test = df_test.iloc[:,:5].to_numpy()
y_test = df_test.iloc[:,-1].to_numpy()

In [6]:
model = PySRRegressor(
    niterations=5,
    binary_operators=['plus', 'sub', 'mult', 'div'],
    unary_operators=[],
    variable_names = ['u','g','r','i','z'],
    model_selection='best',
    loss='L2DistLoss()',
    equation_file='del.csv',
    progress=True
)



In [None]:
model.fit(X_train, y_train)

Your Python version is statically linked to libpython. For example, this could be the python included with conda, or maybe your system's built-in python.
This will still work, but the precompilation cache for Julia will be turned off, which may result in slower startup times on the initial pysr() call.

To install a Python version that is dynamically linked to libpython, pyenv is recommended (https://github.com/pyenv/pyenv). With pyenv, you can run: `PYTHON_CONFIGURE_OPTS="--enable-shared" pyenv install 3.9.10` to install Python 3.9.10 with dynamic linking.

  Activating project at `~/.julia/environments/pysr-0.7.11`
    Updating registry at `~/.julia/registries/General.toml`
   Resolving package versions...
  No Changes to `~/.julia/environments/pysr-0.7.11/Project.toml`
  No Changes to `~/.julia/environments/pysr-0.7.11/Manifest.toml`
  No Changes to `~/.julia/environments/pysr-0.7.11/Project.toml`
  No Changes to `~/.julia/environments/pysr-0.7.11/Manifest.toml`


In [None]:
print(model)

### Plotting an expression

Here, let's use the same equations, but get a format we can actually use and test. We can add this option after a search via the set_params function:

In [None]:
model.set_params(extra_sympy_mappings={"inv": lambda x: 1/x})
model.sympy()

If you look at the lists of expressions before and after, you will see that the sympy format now has replaced inv with 1/. We can again look at the equation chosen:

In [None]:
model.latex()

In [None]:
model.equations

In [None]:
__author__ = 'Robert Hogan'
'''
Script to plot predictions of model vs. true value for test set
'''


import numpy as np
import matplotlib.pyplot as plt
#data=np.loadtxt('prediction_out')
y=y_test#data[:,0]
y_predict=model.equations.lambda_format[3](X_test)
fig=plt.figure()
ax=fig.add_subplot(111)
H,xedges,yedges=np.histogram2d(y,y_predict, bins=50)
level=np.linspace(0,np.round(2*np.max(np.log(np.transpose(H+1))))/2.0,20)
ax.set_facecolor('black')
xe=np.zeros(len(xedges)-1)
ye=np.zeros(len(yedges)-1)
for i in range(len(xedges)-1):
    xe[i]=0.5*(xedges[i+1]+xedges[i])
    ye[i]=0.5*(yedges[i+1]+yedges[i])

plt.contourf(xe,ye,np.log(np.transpose(H+1)),levels=level,cmap='hot')
plt.plot([min(y),max(y)],[min(y),max(y)],'-',color='grey',alpha=0.9, linewidth=1.5)
plt.ylim((min(y),max(y)))

cbar=plt.colorbar()

plt.xlabel(r'$z_{spec}$',fontsize=30)
plt.tick_params(axis='both', which='major', labelsize=20)
plt.ylabel(r'$z_{phot}$',fontsize=30)
plt.ylim((min(y),max(y)))
plt.xlim((min(y),max(y)))
cbar.set_label('$log(density)$',fontsize=20)
cbar.ax.tick_params(labelsize=20)
cbar.solids.set_edgecolor("face")


print('Metric a')
outliers=y_predict[abs(y-y_predict)>0.1]
print('Catastrophic Outliers: ',outliers.shape[0]*100.0/y.shape[0],'%')
print('Total rms: ', np.sqrt(np.mean((y-y_predict)**2)))
print('rms w/o outliers', np.sqrt(np.mean(((y-y_predict)[abs(y-y_predict)<0.1])**2)))
print('Bias:', np.mean(y-y_predict))
plt.savefig('density_plot.png')
plt.show()