# Testing gsgp_slim library:

### Install library:

In [1]:
pip install gsgp_slim

Note: you may need to restart the kernel to use updated packages.


### Install Requirements:

In [2]:
pip install -r requirements.txt

Collecting numpy~=2.0.2 (from -r requirements.txt (line 1))
  Downloading numpy-2.0.2-cp312-cp312-macosx_14_0_arm64.whl.metadata (60 kB)
Collecting pandas~=2.2.3 (from -r requirements.txt (line 2))
  Downloading pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl.metadata (89 kB)
Collecting torch~=2.4.1 (from -r requirements.txt (line 3))
  Using cached torch-2.4.1-cp312-none-macosx_11_0_arm64.whl.metadata (26 kB)
Collecting pytest~=8.3.3 (from -r requirements.txt (line 5))
  Downloading pytest-8.3.3-py3-none-any.whl.metadata (7.5 kB)
Collecting setuptools~=75.1.0 (from -r requirements.txt (line 6))
  Using cached setuptools-75.1.0-py3-none-any.whl.metadata (6.9 kB)
Collecting scipy~=1.13.1 (from -r requirements.txt (line 8))
  Downloading scipy-1.13.1-cp312-cp312-macosx_12_0_arm64.whl.metadata (60 kB)
Collecting iniconfig (from pytest~=8.3.3->-r requirements.txt (line 5))
  Downloading iniconfig-2.0.0-py3-none-any.whl.metadata (2.6 kB)
Collecting pluggy<2,>=1.5 (from pytest~=8.3.3->-r requ

## Running GP:

In [4]:
#pip install scipy
#!pip install sklearn --> deprecated
#!pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.5.2-cp312-cp312-macosx_12_0_arm64.whl.metadata (13 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.5.2-cp312-cp312-macosx_12_0_arm64.whl (11.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.0/11.0 MB[0m [31m61.9 MB/s[0m eta [36m0:00:00[0m
[?25hUsing cached joblib-1.4.2-py3-none-any.whl (301 kB)
Using cached threadpoolctl-3.5.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, joblib, scikit-learn
Successfully installed joblib-1.4.2 scikit-learn-1.5.2 threadpoolctl-3.5.0


In [3]:
from slim_gsgp.main_gp import gp  # import the slim_gsgp library
from slim_gsgp.datasets.data_loader import load_ppb  # import the loader for the dataset PPB
from slim_gsgp.evaluators.fitness_functions import rmse  # import the rmse fitness metric
from slim_gsgp.utils.utils import train_test_split  # import the train-test split function

In [5]:
# Load the PPB dataset
X, y = load_ppb(X_y=True)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, p_test=0.4)

# Split the test set into validation and test sets
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, p_test=0.5)

In [6]:
# Apply the GP algorithm
final_tree = gp(X_train=X_train, y_train=y_train,
                X_test=X_val, y_test=y_val,
                dataset_name='ppb', pop_size=100, n_iter=100)

# Show the best individual structure at the last generation
final_tree.print_tree_representation()

# Get the prediction of the best individual on the test set
predictions = final_tree.predict(X_test)

# Compute and print the RMSE on the test set
print(float(rmse(y_true=y_test, y_pred=predictions)))

Verbose Reporter
-----------------------------------------------------------------------------------------------------------------------------------------
|         Dataset         |  Generation  |     Train Fitness     |       Test Fitness       |        Timing          |      Nodes       |
-----------------------------------------------------------------------------------------------------------------------------------------
|     ppb                 |       0      |   40.88839340209961   |   43.040855407714844     |   0.05284690856933594  |      15          |
|     ppb                 |       1      |   40.71933364868164   |   42.63808822631836      |   0.0075931549072265625|      15          |
|     ppb                 |       2      |   39.167301177978516  |   41.406654357910156     |   0.0064470767974853516|      3           |
|     ppb                 |       3      |   30.20241355895996   |   32.765968322753906     |   0.00580906867980957  |      3           |
|     ppb        

## Running standard GSGP:

In [7]:
from slim_gsgp.main_gsgp import gsgp  # import the slim_gsgp library
from slim_gsgp.datasets.data_loader import load_ppb  # import the loader for the dataset PPB
from slim_gsgp.evaluators.fitness_functions import rmse  # import the rmse fitness metric
from slim_gsgp.utils.utils import train_test_split  # import the train-test split function

In [9]:
# Load the PPB dataset
X, y = load_ppb(X_y=True)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, p_test=0.4)

# Split the test set into validation and test sets
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, p_test=0.5)

In [10]:
# Apply the Standard GSGP algorithm
final_tree = gsgp(X_train=X_train, y_train=y_train,
                  X_test=X_val, y_test=y_val,
                  dataset_name='ppb', pop_size=100, n_iter=100,
                  reconstruct=True, ms_lower=0, ms_upper=1)

# Get the prediction of the best individual on the test set
predictions = final_tree.predict(X_test)

# Compute and print the RMSE on the test set
print(float(rmse(y_true=y_test, y_pred=predictions)))

Verbose Reporter
-----------------------------------------------------------------------------------------------------------------------------------------
|         Dataset         |  Generation  |     Train Fitness     |       Test Fitness       |        Timing          |      Nodes       |
-----------------------------------------------------------------------------------------------------------------------------------------
|     ppb                 |       0      |   57.22980880737305   |   60.118648529052734     |   0.1286931037902832   |      3           |
|     ppb                 |       1      |   57.012939453125     |   59.90546417236328      |   0.030501127243041992 |      9           |
|     ppb                 |       2      |   57.012939453125     |   59.90546417236328      |   0.028802156448364258 |      9           |
|     ppb                 |       3      |   56.786285400390625  |   59.68736267089844      |   0.08918404579162598  |      15          |
|     ppb        

## Running SLIM-GSGP:

In [11]:
from slim_gsgp.main_slim import slim  # import the slim_gsgp library
from slim_gsgp.datasets.data_loader import load_ppb  # import the loader for the dataset PPB
from slim_gsgp.evaluators.fitness_functions import rmse  # import the rmse fitness metric
from slim_gsgp.utils.utils import train_test_split  # import the train-test split function

In [12]:
X, y = load_ppb(X_y=True)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, p_test=0.4)

# Split the test set into validation and test sets
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, p_test=0.5)

In [13]:
# Apply the SLIM GSGP algorithm
final_tree = slim(X_train=X_train, y_train=y_train,
                  X_test=X_val, y_test=y_val,
                  dataset_name='ppb', slim_version='SLIM+SIG2', pop_size=100, n_iter=100,
                  ms_lower=0, ms_upper=1, p_inflate=0.5)

# Show the best individual structure at the last generation
final_tree.print_tree_representation()

# Get the prediction of the best individual on the test set
predictions = final_tree.predict(X_test)

# Compute and print the RMSE on the test set
print(float(rmse(y_true=y_test, y_pred=predictions)))

Verbose Reporter
-----------------------------------------------------------------------------------------------------------------------------------------
|         Dataset         |  Generation  |     Train Fitness     |       Test Fitness       |        Timing          |      Nodes       |
-----------------------------------------------------------------------------------------------------------------------------------------
|     ppb                 |       0      |   44.30246353149414   |   46.856666564941406     |   0.051442861557006836 |      3           |
|     ppb                 |       1      |   44.30246353149414   |   46.856666564941406     |   0.0182342529296875   |      3           |
|     ppb                 |       2      |   44.30246353149414   |   46.856666564941406     |   0.016237974166870117 |      3           |
|     ppb                 |       3      |   44.18924331665039   |   46.75093078613281      |   0.02097606658935547  |      14          |
|     ppb        