## Imports 

In [1]:
import numpy as np
import os
import random
from random import choice 

## Reading databases

In [2]:
def read_databases(filename):
    x1 = []
    x2 = []
    y = []
    for line in open(filename):
        csv_row = line.strip().split(',')
        csv_row = [float(number) for number in csv_row]
        x1.append(csv_row[0])
        x2.append(csv_row[1])
        y.append(csv_row[2])
    return x1, x2, y

In [3]:
filename = 'datasets/synth1/synth1-test.csv'
x1_train, x2_train, y_train = read_databases(filename)

In [4]:
filename = 'datasets/synth1/synth1-train.csv'
x1_test, x2_test, y_test = read_databases(filename)

## Defining individual class

In [5]:
expression = [['expression', 'op', 'expression'], 'variable', 'value']
operation = ['+', '-', '*']
variable = ['x1', 'x2']
value = ['random']

In [6]:
terminals_dict = dict()
terminals_dict['op'] = operation
terminals_dict['variable'] = variable
terminals_dict['value'] = value

In [7]:
class Node(object):
    def __init__(self, data='op', level=0):
        self.data = data
        self.type = data
        self.left_child = None
        self.right_child = None
        self.level = level
        self.max_level = 3
        self.terminal = False
    
    def initial_node(self):
        self.add_left('expression')
        self.add_right('expression')
    
    def initialize(self):
        self.add_left('expression')
        self.add_right('expression')
        self.expand_children()
        self.get_parameters()

    def add_left(self, value):
        self.left_child = Node(value, level=self.level+1)
    
    def add_right(self, value):
        self.right_child = Node(value, level=self.level+1)
        
    def show(self):
        print(self.level, ":  ", self.data)
        if self.left_child:
            self.left_child.show()
            self.right_child.show()
    
    def get_expression(self):
        if self.data == 'expression':
            self.data = choice(expression)
            if type(self.data) == list:
                if self.level < self.max_level: 
                    self.add_left(self.data[0])
                    self.add_right(self.data[2])
                    self.data = self.data[1]
                else:
                    self.data = 'value'
                    
        if self.data != 'op':
            self.terminal = True
                    
        if self.left_child is not None:
            self.left_child.get_expression()
        
        if self.right_child is not None:
            self.right_child.get_expression()
        
        self.type = self.data
    
    def expand_children(self):
        self.left_child.get_expression()
        self.right_child.get_expression()
    
    def get_parameters(self):
        self.data = choice(terminals_dict[self.data])
        if self.data == 'random':
            self.data = random.random()*5
        
        if self.left_child is not None:
            self.left_child.get_parameters()
        
        if self.right_child is not None:
            self.right_child.get_parameters()
    
    def evaluate(self, x1=0, x2=0):
        
        if self.terminal:
            if self.data == 'x1':
                return x1
            elif self.data == 'x2':
                return x2
            else:
                return self.data
            
        if self.left_child is not None:
            result_left = self.left_child.evaluate(x1, x2)
        if self.right_child is not None:
            result_right = self.right_child.evaluate(x1, x2)
        
        if self.data == '+':
            return result_left + result_right
        elif self.data == '-':
            return result_left - result_right
        elif self.data == '*':
            return result_left * result_right
    
    def error(self, x1, x2, y):
        return (y - self.evaluate(x1, x2))**2
    
    def fitness(self, x1_list, x2_list, y_list):
        error_sum = 0
        for x1, x2, y in zip(x1_list, x2_list, y_list):
            error_sum += self.error(x1, x2, y)
        y_mean = y_list - np.mean(y_list)
        y_mean_sum = sum(y_mean**2)
            
        return (error_sum/y_mean_sum)**0.5

In [8]:
root = Node()
root.initial_node()
root.expand_children()
root.show()

root.get_parameters()
print()
root.show()

root.evaluate()

0 :   op
1 :   value
1 :   op
2 :   op
3 :   value
3 :   value
2 :   op
3 :   value
3 :   variable

0 :   +
1 :   0.5184079708290501
1 :   -
2 :   +
3 :   4.521674764358857
3 :   2.6150115859625744
2 :   +
3 :   0.32469927554585387
3 :   x2


7.330395045604628

In [9]:
root.evaluate(2,5)

2.330395045604628

In [10]:
root.error(2,5, 2)

0.10916088616008411

## Generating initial population

In [11]:
##Parameters
population_size = 10

In [12]:
class Genetic():
    def __init__(self, population_size):
        self.population_size = population_size
        self.population = []
        for i in range(self.population_size):
            root = Node()
            root.initialize()
            self.population.append(root)
    
    def sort_by_fitness(self, x1_list, x2_list, y_list):
        self.population.sort(key = lambda x: x.fitness(x1_list, x2_list, y_list))
    
    def rank_selection_pair(self):
        ## Assign probabilities inversely proportional to the position index
        p = list(range(len(solution1.population))) + np.array(1)
        p = np.flipud(p)
        p = p/sum(p)
        return np.random.choice(self.population,size=2, p=p) #Return two individuals from population
    
    def crossover(self, probability=0.9):
        pass
    
    def mutation(self, probability=0.1):
        pass

In [13]:
solution1 = Genetic(population_size)

In [14]:
solution1.population[0].show()

0 :   -
1 :   *
2 :   0.8763086637812206
2 :   *
3 :   x2
3 :   x1
1 :   x1


In [15]:
solution1.population[9].show()

0 :   -
1 :   x2
1 :   x1


## Selection

In [16]:
solution1.sort_by_fitness(x1_test, x2_test, y_test)

In [17]:
solution1.population[0].show()

0 :   *
1 :   3.083526419863846
1 :   +
2 :   *
3 :   x1
3 :   x1
2 :   x1


In [18]:
solution1.population[9].show()

0 :   +
1 :   3.5020910483755037
1 :   *
2 :   2.986716799017022
2 :   *
3 :   2.5191720398465884
3 :   3.1604551630016053


In [61]:
p = list(range(len(solution1.population))) + np.array(1)
p = np.flipud(p)
p = p/sum(p)
p

array([0.18181818, 0.16363636, 0.14545455, 0.12727273, 0.10909091,
       0.09090909, 0.07272727, 0.05454545, 0.03636364, 0.01818182])

In [103]:
parents = np.random.choice(solution1.population,size=2, p=p)
parents[0]

<__main__.Node at 0x1414b40db00>

In [68]:
solution1.population[0].show()

0 :   *
1 :   3.083526419863846
1 :   +
2 :   *
3 :   x1
3 :   x1
2 :   x1
