In [1]:
from gp_poly_test import gp_sin_test
from IPython.display import Audio
from gp import GP
from gp_trees import GPNonTerminal, Constant
from figtree import showtree

In [2]:
from functools import reduce

<h3>How to call <code>gp_sin_test</code></h3>

This notebook can be used to test a slightly simplified version of genetic programming, in which the initial seed population is a set of single-variable <i>n</i>th order polynomials with randomised coefficients. During training, both mutation and crossover operators are used 

The test is performed with a single function call to `gp_sin_test`. First edit the function call below as needed, then hit the 'run all' button. 

The cell at the top of the notebook imports the code needed to run everything. The second cell is the one you may want to edit to change the parameters of the test. The basic syntax for is `gp_sin_test` is:

`gp_sin_test(n)`

... where `n` is the number of samples used, e.g.:

`gp_poly_test(100)`

... would create an array `X` of 100 values of _x_, and a `target` array with the corresponding values of _sin x_. The trees in the genetic program are then scored on how well they estimate `target` given `X`.

Further optional keyword parameters can be set, which otherwise will take default values:

<ul>
    <li><b>generations (default = 100)</b>: Number of generations the GP runs for</li>
    <li><b>pop (default = 100)</b>: Number of trees in a generation</li>
    <li><b>iv_min (default = -100)</b>: Minimum value of a variable in the independent variables dataset</li>
    <li><b>iv_max (default = 100)</b>: Maximum value of a variable in the independent variables dataset</li>
    <li><b>coeff_min (default = -20)</b>: Minimum value of a coefficient in the target polynomial</li>
    <li><b>coeff_max (default = 20)</b>: Minimum value of a coefficient in the target polynomial</li>
    <li><b>mutation_rate (default =  0.2)</b>: Probability that a coefficient value in a GP tree will mutate from one generation to the next</li>
    <li><b>mutation_sd (default = 1.0)</b>: Standard deviation of the normal distribution defining the deltas added to coefficient values when they are mutated</li>
    <li><b>temperature_coeff (default = 0.0)</b>: coefficient $ T $ determining the relationship between the standard deviation of the raw fitness scores and the standard deviation of the Gaussian noise term added to the raw fitness score to determine final fitness (the relative probability of survival): $ SD_{noise} = SD_{fitness_{raw}} \times T $ and $ fitness_{final} = fitness_{raw} + noise $</li>
    <li><b>elitism (default = 5)</b>: The <em>k</em> best trees in each generation pass on one copy each to the next generation without any chance of mutation: this parameter sets the value of <em>k</em>. If it's zero, no trees are protected from mutation.</li>
   <li><b>order (default = 6)</b>: The order of the initial seed polynomials</li>
    <li><b>crossover_rate (default = 0.2)</b></li>
</ul>

One word of caution: GP takes a while to run - the program will appear to be doing nothing in between showing the tree for the target polynomial and displaying the results. This is normal. Make yourself a cuppa and come back in a bit. The final cell will ding to tell you it's done, and the variable `result` will then contain a dictionary with more detailed output data, which you can use to create further visualisations.

In [3]:
result = gp_sin_test(100, coeff_min=-0.001, coeff_max=0.001, order=7, crossover_rate=0.2, mutation_sd=0.00001)

  results["mses"].append(np.square(estimate - target).mean())


TypeError: cannot concatenate object of type '<class 'numpy.float64'>'; only Series and DataFrame objs are valid

In [None]:
Audio(filename = './sounds/ElevatorBell.mp3', autoplay=True, rate=22000)

In [None]:
bestcopy = result['best'].copy()

In [None]:
def tree_reduce(tree_, func_):
    if isinstance(tree_, GPNonTerminal):
        for t in tree_:
            tree_reduce(t, func_)
        return func_(tree_)
    else:
        return tree_

def combine_constants(tree_):
    if reduce(lambda x,y: x*y, [isinstance(t, Constant) for t in tree_]):
        replacement = Constant(tree_.treebank, tree_.label, tree_(), metadata=tree_.metadata)
        if tree_.parent:
            tree_.parent[tree_.parent.index_of(tree_)] = replacement
        tree_.delete()
        return replacement
    else:
        return tree_

redux = tree_reduce(bestcopy.copy(), combine_constants)
print(redux)
showtree(redux)

In [None]:
def combine_operators(tree_):
    def const_x_pair(tr, x_test):
        if (len(tree_)==2): 
            consts = list(filter(lambda t: isinstance(t, Constant), tr))
            ops = list(filter(x_test, tr))
            if len(consts)==len(ops):
                return consts[0], ops[0]
        return None, None
    const0, x0 = const_x_pair(tree_, lambda t: isinstance(t, GPNonTerminal))
    if const0 is not None: 
        const1, x1 = const_x_pair(x0, lambda t: not isinstance(t, Constant))
        if (const1 is not None) and (tree_._operator == x0._operator):
            new_const = Constant(
                tree_.treebank, 
                tree_.label, 
                tree_._operator(const0[0][0].item(), const1[0][0].item()), # double indices won't be needed due to chage to gp_trees
                metadata=tree_.metadata
            )
            tree_.children = [new_const, x1]
    return tree_

redux1 = tree_reduce(redux.copy(), combine_operators)
print(redux1)
showtree(redux1)

In [None]:
redux1.size()

In [None]:
while subtree.size() > 20 or len(subtree()) == 1:
    subtree = result['best'][0,0,0,1,0,1,1,0].copy(gp_copy=False)
subtree.size(), len(subtree())

In [None]:
showtree(subtree)

In [None]:
tree_reduce(result['best'])

In [None]:
print(result['best'])

In [None]:
len(filter(lambda x: bool(x%2), range(10)))

In [None]:
import numpy as np

np.std([1,5])

In [4]:
import pandas as pd



In [10]:
df = pd.DataFrame({'data': [1,2,3,4,5,6,7,8,9,10]})
df['mean'] = 5.5
df

Unnamed: 0,data,mean
0,1,5.5
1,2,5.5
2,3,5.5
3,4,5.5
4,5,5.5
5,6,5.5
6,7,5.5
7,8,5.5
8,9,5.5
9,10,5.5


In [11]:
def func(x):
    if x%2==0:
        return pd.DataFrame([1,2,3,4])
    else:
        return pd.DataFrame([0])

In [12]:
df['xyz'] = df['data'].apply(func)
df

Unnamed: 0,data,mean,xyz
0,1,5.5,0 0 0
1,2,5.5,0 0 1 1 2 2 3 3 4
2,3,5.5,0 0 0
3,4,5.5,0 0 1 1 2 2 3 3 4
4,5,5.5,0 0 0
5,6,5.5,0 0 1 1 2 2 3 3 4
6,7,5.5,0 0 0
7,8,5.5,0 0 1 1 2 2 3 3 4
8,9,5.5,0 0 0
9,10,5.5,0 0 1 1 2 2 3 3 4


In [16]:
len(df['xyz'][1])

4