In [3]:
import pandas as pd
import numpy as np
import sparkge as sg
from sparkge import evolution as evo
from sparkge.evolution import length_of
from sparkge.providers.fitness import *
import warnings
%matplotlib inline
res = np.seterr(divide='ignore', invalid='ignore') 
warnings.filterwarnings("ignore")
#set recursion limit sparge.configure()

This notebook describes basic useage of sparkge. See the concepts notebook for a better understanding of the patterns used

# Grammar

In [4]:
from sparkge.symbols import *

@terminal
def var():   return ["X"]

@terminal
def param():  return [ "A", "B"]

@terminal(is_constant=True)
def const():return [ 2,3,4,5]#, np.pi

@operator()
def uop(): return [np.exp, np.log]#np.sin, np.cos , np.log, np.sin,

@operator()
def op(): return [add, sub, mul, truediv]

@operator()
def power(): return [np.power]

def expr(genome=None, args = []): 
    return as_expression(genome, [ op(expr,expr), power(expr,param) ,var, param])


# Sample Expressions

In [10]:
power_sample = np.array([1,0,2])
power_law_sample = np.array([0,2,3,1, 1, 1,2])
#sample using a random genome - re-run cell to see other instances
f= expr(evo.chain(l=power_law_sample,max_wraps=2))#np.array([0,0,3,3])
print(f(X=3,A=4,B=6,C=5))
from sparkge.symbols import _repr_
print(_repr_(f))
S= display(f)
S

486
(B*(X**A))


   A
B⋅X 

## Get some sample data

In [11]:
SAMPLE = pd.read_csv("./sample_data/power_law_data").set_index("L")
SAMPLE.head()

Unnamed: 0_level_0,M1
L,Unnamed: 1_level_1
125,34.378366
125,34.414432
125,34.234352
125,34.135606
125,34.30445


# Fit the data to sample expression

In [12]:
# S = expr(chain(examples[10]))
# display(S)

In [13]:

#display(S)
f= S
print(display(f)) 
_f = accepting_ordered_params(f) #see comments in fnuction accepting_ordered_params
est = curve_fit(_f, SAMPLE, estimate=[[0.1,2,1,]], display=True)
print("Est params", est[0])
print("GoF", goodness_of_fit(_f,SAMPLE, est))

nan


AttributeError: 'float' object has no attribute 'free_symbols'

when fitting functions with scipy the convention is to pass the variables parameters in an ordered list f(X, params). To respect that we try to unravel our strict function of f(X=,A=,B=) into something that takes an ordered list. It is important of course to make sure that we map the right ordinal to the right key

# Strategy

In [14]:
s = evo.strategy(pop_size=200, genome_size=100, srate=0.52, mrate=0.01)

# Evolve a test function

In [None]:
lengths, fitnesses, genes = s.init()
for generation in range(1000): 
    fitnesses = sg.symbols._evaluate(genes,expr)#default test function
    meta = np.stack([length_of(genes),fitnesses],axis=1)
    genes = s.select_and_mutate(meta,genes)
    sg.store.update(meta,genes)

# Curve fitting data


In [None]:
lengths, fitnesses, genes = s.init()
examples = []
for generation in range(1000):   
    fitnesses = sg.symbols._evaluate_against_data(genes,expr,SAMPLE,p0=[[0.1, 2, 1, 1]]) 
    meta = np.stack([length_of(genes),fitnesses],axis=1)
    sg.store.update(meta,genes)
    print(fitnesses.min())
    if fitnesses.min() < 0.23:
        examples.append(genes[np.argmin(fitnesses)])
    genes = s.select_and_mutate(meta,genes)

In [None]:
S = expr(chain(examples[0]))
display(S)

# Spark

This strategy focuses on the situation where the fitness eval is expensive. Evolution happens centrally and symbols are evaluated in parallel.

We broadcast the data that is used for ftting so that workers have it in memory to process their batches

In [None]:
#helper to deplot sparkge latest to a cluster - ensure deps installed there
#sg.__deploy__(["10.132.0.8","10.132.0.9","10.132.0.10"])
#its almost the same except we wrap the call - we broadcast the data so it is shared for everyone in memory
def par_eval(pop):
    with sg.sparkge_context() as sc:
        DATA = sc.sc.broadcast(SAMPLE)
        f = lambda genes : sg.symbols._evaluate_against_data(genes,expr,DATA,p0=[[0.1, 2, 1, 1]]) 
        return sc.apply_function(pop, f)

lengths, fitnesses, genes = s.init()
for generation in range(100):   
    fitnesses = par_eval(genes) 
    meta = np.stack([length_of(genes),fitnesses],axis=1)
    sg.store.update(meta,genes)
    genes = s.select_and_mutate(meta,genes)