# Demo: GPSR Using Bingo

## (1) Import Base Packages ##

In [1]:
import numpy as np;np.seterr(all="ignore")
%matplotlib nbagg
import matplotlib.pyplot as plt
from tqdm import tqdm

## (2) Import Relevant Bingo Classes ##

In [3]:
#Classes for Genetic Programming (GP)
from bingo.symbolic_regression import ComponentGenerator
from bingo.evolutionary_optimizers.island import Island
from bingo.symbolic_regression.agraph.agraph import AGraph
from bingo.stats.pareto_front import ParetoFront

#Classes for the EVALUATION stage of GPSR
from bingo.symbolic_regression import ExplicitRegression, \
                                      ExplicitTrainingData
from bingo.evaluation.evaluation import Evaluation
from bingo.local_optimizers.continuous_local_opt import ContinuousLocalOptimization

#Classes for the EVOLUTION stage of GPSR
from bingo.symbolic_regression import AGraphGenerator, \
                                      AGraphCrossover, \
                                      AGraphMutation

#Classes for the SELECTION stage of GPSR
from bingo.evolutionary_algorithms.generalized_crowding import \
                                      GeneralizedCrowdingEA
from bingo.selection.deterministic_crowding import DeterministicCrowding

## (2) Initialize GPSR Hyperparameters ##
Note: Hyperparameters relating to the criterion check and information storage not present in this code. Refer to BingoExample/example_code/main_script.py for additional information.

In [4]:
POPULATION_SIZE = 104
STACK_SIZE = 24
MAX_GENERATIONS = 1000

## (3) Make Training Data ##

In [5]:
model = AGraph(equation="3*sin(X_0) + 2*X_0")
X = np.linspace(0, np.pi, 25).reshape((-1,1))
y = model.evaluate_equation_at(X)
training_data = ExplicitTrainingData(x=X, y=y)

## (4) Initialize component generator, mutation and crossover objects, and AGraph generators ##

In [6]:
component_generator = ComponentGenerator(training_data.x.shape[1])
component_generator.add_operator("+")
component_generator.add_operator("-")
component_generator.add_operator("*")
component_generator.add_operator("sin")
component_generator.add_operator("cos")
component_generator.add_operator("exp")
component_generator.add_operator("pow")
component_generator.add_operator("sqrt")

crossover = AGraphCrossover()
mutation = AGraphMutation(component_generator)
agraph_generator = AGraphGenerator(STACK_SIZE, component_generator,
                                       use_simplification=True)

## (5) Initialize fitness metric, parameter optimizer, and evaluator ##

In [7]:
fitness = ExplicitRegression(training_data=training_data)
local_opt_fitness = ContinuousLocalOptimization(fitness, algorithm='lm')
evaluator = Evaluation(local_opt_fitness)#, multiprocess=4)

## (6) Initialize selection and evolutionary algorithm ##

In [8]:
selection_phase = DeterministicCrowding()
ea = GeneralizedCrowdingEA(evaluator, crossover,
                      mutation, 0.4, 0.4, selection_phase)

## (7) Initialize Bingo Island with Hall of Fame ##

In [9]:
def agraph_similarity(ag_1, ag_2):
    """a similarity metric between agraphs"""
    return ag_1.fitness == ag_2.fitness and ag_1.get_complexity() == ag_2.get_complexity()

pareto_front = ParetoFront(secondary_key=lambda ag: ag.get_complexity(),
                           similarity_function=agraph_similarity) 
def make_island():
    """
    Function created for repeated runs.
    """
    island = Island(ea, agraph_generator, POPULATION_SIZE, hall_of_fame=pareto_front)
    return island

## (8) Run GPSR and Visualize Results ##

In [10]:
# Simple Visualization Code

def plot_best_n_individuals(ax, island, n=10):
    ax.clear()
    fits = [ind.fitness for ind in island.population]
    idxs = np.argsort(fits)[:n]
    for i in idxs:
        ind = island.population[i]
        ax.plot(X, ind.evaluate_equation_at(X)) 
    
def update_ax(ax, x, y):
    ax.clear()
    ax.plot(x, y)


In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
island = make_island()
fig, axs = plt.subplots(2)
axs[0].set_xlabel("X")
axs[0].set_ylabel("y")

print("Best individual at initialization\n f(X_0) =", island.get_best_individual())
best_indv_values = []
best_indv_values.append(island.get_best_individual())
best_indv_gen = []
best_indv_gen.append(island.generational_age)
fits = [ind.fitness for ind in island.population]
means = [np.nanmean(fits)]

for i in tqdm(range(MAX_GENERATIONS)):
    island.evolve(1)
    best_indv = island.get_best_individual()
    if best_indv.fitness < best_indv_values[-1].fitness:
        best_indv_values.append(best_indv)
        best_indv_gen.append(island.generational_age)
        print(f"Best individual at generation {island.generational_age}\n f(X_0) =", island.get_best_individual())
    fits = [ind.fitness for ind in island.population]
    means += [np.nanmean(fits)]
    update_ax(axs[0], np.arange(island.generational_age+1), means)


Best individual at initialization
 f(X_0) = sqrt(sqrt(((2)(-381.7505085397798))(X_0) - (-381.7505085397798)))


  0%|▏                                         | 3/1000 [00:00<00:45, 22.02it/s]

Best individual at generation 1
 f(X_0) = (-2.0)(-0.9154231012959331 - (X_0))
Best individual at generation 2
 f(X_0) = 1.1922831043724296 + (2)(X_0) + (sin(X_0))^(X_0)


  2%|▋                                        | 18/1000 [00:00<00:49, 19.96it/s]

Best individual at generation 15
 f(X_0) = (0.7670641891677978)((0.7892508289828413 + X_0 + (X_0)^(X_0))(0.8802495017853309 + sin(0.7892508289828413 + X_0)))


  2%|▉                                        | 24/1000 [00:01<00:52, 18.75it/s]

Best individual at generation 22
 f(X_0) = (0.9782112626153797 + exp(-3.9810116995063116 - (X_0)))^(-94.14811418982998)


  3%|█▎                                       | 33/1000 [00:01<00:53, 17.91it/s]

Best individual at generation 31
 f(X_0) = -10.614757217416374 + (2.426668274555507)((1.291717987640874 + X_0 + (X_0)^(X_0))(1.1338942065248312 + sin(1.291717987640874 + X_0)))


  6%|██▎                                      | 57/1000 [00:03<01:04, 14.64it/s]

Best individual at generation 55
 f(X_0) = -49.058067465757965 + (0.12726262750139)((21.972224831876062 + X_0)(17.430267892364164 + sin(0.1141374990782164 + X_0)))


 14%|█████▊                                  | 145/1000 [00:09<01:12, 11.84it/s]

Best individual at generation 144
 f(X_0) = 52.278647302579614 + (0.11045597439662193)((-28.704761979972726 + X_0)(16.38875886197151 + sin(3.04243062808244 + X_0)))


 25%|█████████▊                              | 246/1000 [00:20<01:25,  8.79it/s]

Best individual at generation 244
 f(X_0) = (2.0)(X_0 + (1.5)(sin(X_0)))


 52%|████████████████████▉                   | 524/1000 [01:14<01:29,  5.30it/s]