## Imports 

In [1]:
import numpy as np
import os
import random
from random import choice 
from copy import deepcopy

## Reading databases

In [2]:
def read_databases(filename):
    x1 = []
    x2 = []
    y = []
    for line in open(filename):
        csv_row = line.strip().split(',')
        csv_row = [float(number) for number in csv_row]
        x1.append(csv_row[0])
        x2.append(csv_row[1])
        y.append(csv_row[2])
    return x1, x2, y

In [3]:
filename = 'datasets/synth1/synth1-test.csv'
x1_train, x2_train, y_train = read_databases(filename)

In [4]:
filename = 'datasets/synth1/synth1-train.csv'
x1_test, x2_test, y_test = read_databases(filename)

## Defining individual class

In [5]:
expression = [['expression', 'op', 'expression'], 'variable', 'value']
operation = ['+', '-', '*']
variable = ['x1', 'x2']
value = ['random']

In [6]:
terminals_dict = dict()
terminals_dict['op'] = operation
terminals_dict['variable'] = variable
terminals_dict['value'] = value

In [7]:
class Node(object):
    def __init__(self, data='op', level=0):
        self.data = data
        self.type = data
        self.left_child = None
        self.right_child = None
        self.level = level
        self.max_level = 3
        self.terminal = False
    
    def __copy__(self):
        clone = Node()
        clone.data = self.data
        clone.type = self.type
        clone.left_child = self.left_child
        clone.right_child = self.right_child
        clone.level = self.level
        clone.max_level = self.max_level
        clone.terminal = seld.terminal

    def initial_node(self):
        self.add_left('expression')
        self.add_right('expression')
    
    def initialize(self):
        self.add_left('expression')
        self.add_right('expression')
        self.expand_children()
        self.get_parameters()

    def add_left(self, value):
        self.left_child = Node(value, level=self.level+1)
    
    def add_right(self, value):
        self.right_child = Node(value, level=self.level+1)
        
    def show(self):
        print(self.level, ": " , self.data)
        if self.left_child:
            self.left_child.show()
        
        if self.right_child:
            self.right_child.show()
    
    def get_expression(self):
        if self.data == 'expression':
            self.data = choice(expression)
            if type(self.data) == list:
                if self.level < self.max_level: 
                    self.add_left(self.data[0])
                    self.add_right(self.data[2])
                    self.data = self.data[1]
                else:
                    self.data = 'value'
                    
        if self.data != 'op':
            self.terminal = True
                    
        if self.left_child is not None:
            self.left_child.get_expression()
        
        if self.right_child is not None:
            self.right_child.get_expression()
        
        self.type = self.data
    
    def expand_children(self):
        self.left_child.get_expression()
        self.right_child.get_expression()
    
    def get_parameters(self):
        self.data = choice(terminals_dict[self.data])
        if self.data == 'random':
            self.data = random.random()*5
        
        if self.left_child is not None:
            self.left_child.get_parameters()
        
        if self.right_child is not None:
            self.right_child.get_parameters()
    
    def evaluate(self, x1=0, x2=0):
        
        if self.terminal:
            if self.data == 'x1':
                return x1
            elif self.data == 'x2':
                return x2
            else:
                return self.data
            
        if self.left_child is not None:
            result_left = self.left_child.evaluate(x1, x2)
        if self.right_child is not None:
            result_right = self.right_child.evaluate(x1, x2)
        
        if self.data == '+':
            return result_left + result_right
        elif self.data == '-':
            return result_left - result_right
        elif self.data == '*':
            return result_left * result_right
    
    def error(self, x1, x2, y):
        return (y - self.evaluate(x1, x2))**2
    
    def fitness(self, x1_list, x2_list, y_list):
        error_sum = 0
        for x1, x2, y in zip(x1_list, x2_list, y_list):
            error_sum += self.error(x1, x2, y)
        y_mean = y_list - np.mean(y_list)
        y_mean_sum = sum(y_mean**2)
            
        return (error_sum/y_mean_sum)**0.5
    
    def change_node(self):
        self.data = choice(terminals_dict[self.type])
        if self.data == 'random':
            self.data = random.random()*5
    
    def mutate(self, m_level):
        if m_level == self.level:
            return self.change_node()
        elif np.random.rand() < 0.5 and self.left_child is not None:
            self.left_child.mutate(m_level)
        elif self.right_child is not None:
            self.right_child.mutate(m_level)
        else:
            return self.change_node()
    
    def subtree(self, directions_list):
        if len(directions_list) <= 1:
            if directions_list[0] == 'left':
                #return self.left_child
                return self.left_child if self.left_child else self
            elif directions_list[0] == 'right':
                return self.right_child if self.right_child else self
            else:
                raise ValueError('Please give a valid id for tree directions ["left"] or ["right"].', directions_list[0])
        elif directions_list[0] == 'left':
            return self.left_child.subtree(directions_list[1:]) if self.left_child else self
        elif directions_list[0] == 'right':
            return self.right_child.subtree(directions_list[1:]) if self.right_child else self
        else:
            raise ValueError('Please give a valid id for tree directions ["left"] or ["right"].', directions_list[0])
    
    def alter_subtree(self, directions_list, subtree):
        if self.left_child == None:
                self.data = subtree.data
                self.left_child = subtree.left_child
                self.right_child = subtree.right_child
                self.fix_level(self.level)
                #print("Terminal")
        
        elif len(directions_list) == 1:
            if directions_list[0] == 'left':
                self.left_child = subtree
                self.left_child.fix_level(self.level+1)
            elif directions_list[0] == 'right':
                self.right_child = subtree
                self.right_child.fix_level(self.level+1)
            else:
                raise ValueError('Please give a valid id for tree directions ["left"] or ["right"].', directions_list[0])
        
        elif directions_list[0] == 'left':
            if self.left_child:
                self.left_child.alter_subtree(directions_list[1:], subtree)
            else:
                self.left_child = subtree
                self.left_child.fix_level(self.level+1)
        
        elif directions_list[0] == 'right':
            if self.right_child:
                self.right_child.alter_subtree(directions_list[1:], subtree)
            else:
                self.right_child = subtree
                self.right_child.fix_level(self.level+1)
        
        else:
            raise ValueError('Please give a valid id for tree directions ["left"] or ["right"].', directions_list[0])
    
    def gen_directions(self, level):
        directions = ['left', 'right']
        directions_list = []
        for i in range(level):
            directions_list.append(np.random.choice(directions))
        return directions_list
    
    def fix_level(self, level):
        self.level = level
        if self.left_child:
            self.left_child.fix_level(level+1)
        if self.right_child:
            self.right_child.fix_level(level+1)
        
    def crossover(self, parent, crossover_level1, crossover_level2):
        directions_list1 = self.gen_directions(crossover_level1)
        directions_list2 = self.gen_directions(crossover_level2)
        print(directions_list1, directions_list2)
        subtree1 = deepcopy(self.subtree(directions_list1))
        subtree2 = deepcopy(parent.subtree(directions_list2))
        #subtree1.show()
        #subtree2.show()
        self.alter_subtree(directions_list1, subtree2)
        parent.alter_subtree(directions_list2, subtree1)
        self.fix_level(0)
        parent.fix_level(0)

In [8]:
root = Node()
root.initial_node()
root.expand_children()
root.show()

root.get_parameters()
print()
root.show()

root.evaluate()

0 :  op
1 :  variable
1 :  op
2 :  value
2 :  variable

0 :  -
1 :  x2
1 :  -
2 :  3.598398204469962
2 :  x1


-3.598398204469962

In [9]:
root.evaluate(2,5)

3.401601795530038

In [10]:
root.error(2,5, 2)

1.964487593233027

## Generating initial population

In [11]:
##Parameters
population_size = 10
crossover_probability = 0.9

In [12]:
class Genetic():
    def __init__(self, population_size, crossover_probability=0.9):
        if crossover_probability < 0 or crossover_probability > 1 :
            raise ValueError('Please give a valid number for crossover probability.')
        self.population_size = population_size
        self.crossover_probability = crossover_probability
        self.mutation_probability = 1 - crossover_probability
        self.population = []
        for i in range(self.population_size):
            root = Node()
            root.initialize()
            self.population.append(root)
    
    def sort_by_fitness(self, x1_list, x2_list, y_list):
        self.population.sort(key = lambda x: x.fitness(x1_list, x2_list, y_list))
    
    def rank_selection_pair(self):
        ## Assign probabilities inversely proportional to the position index
        p = list(range(len(solution1.population))) + np.array(1)
        p = np.flipud(p)  # reverse array
        p = p/sum(p)
        return np.random.choice(self.population,size=2, p=p) #Return two individuals from population
    
    def tournament(self):
        pass
    
    def generate_offspring(self):
        if np.random.rand() < self.crossover_probability:
            ## Crossover
            print("Crossover")
            return self.crossover()
        else:
            ## Mutation
            print("Mutation")
            return self.mutation()
    
    def crossover(self, elitism=False):
        ## Chosing mutation level with a pobability 
        parents = self.rank_selection_pair()
        children = [deepcopy(parents[0]), deepcopy(parents[1])]
        print(children[0].show())
        print(children[1].show())
        p = list(range(3)) + np.array(1)
        p = p/sum(p)
        
        crossover_level1 = np.random.choice(3, p=p) + 1  ## Avoid this number to be zero
        crossover_level2 = np.random.choice(3, p=p) + 1
        children[0].crossover(children[1], crossover_level1, crossover_level2)
        return children
        
    
    def mutation(self, elitism=False):
        parents = self.rank_selection_pair()
        children = [deepcopy(parents[0]), deepcopy(parents[1])]
        print(children[0].show())
        print(children[1].show())
        p = list(range(3)) + np.array(1)
        p = p/sum(p)
        for child in children:
            mutation_level = np.random.choice(3, p=p)
            child.mutate(mutation_level)
        return children

In [13]:
solution1 = Genetic(population_size, crossover_probability)

In [14]:
solution1.sort_by_fitness(x1_test, x2_test, y_test)

In [15]:
print(solution1.population[0].fitness(x1_test, x2_test, y_test))
solution1.population[0].show()

1.0313029334552932
0 :  *
1 :  3.3180707471782456
1 :  2.4481773646159954


In [16]:
print(solution1.population[1].fitness(x1_test, x2_test, y_test))
solution1.population[1].show()

1.1016180470229489
0 :  -
1 :  3.9226334066243007
1 :  -
2 :  +
3 :  3.310203279612275
3 :  x1
2 :  +
3 :  3.549892804912735
3 :  x2


In [17]:
print(solution1.population[2].fitness(x1_test, x2_test, y_test))
solution1.population[2].show()

1.1700002399677105
0 :  +
1 :  1.6666845994447455
1 :  *
2 :  x1
2 :  +
3 :  1.5591774047770541
3 :  0.41695668746976666


In [18]:
print(solution1.population[3].fitness(x1_test, x2_test, y_test))
solution1.population[3].show()

1.1971119067990734
0 :  -
1 :  0.544908022793642
1 :  -
2 :  x2
2 :  x2


In [19]:
print(solution1.population[4].fitness(x1_test, x2_test, y_test))
solution1.population[4].show()

1.2000645388249587
0 :  -
1 :  1.293582707356371
1 :  -
2 :  1.0455647970990745
2 :  x2


## Selection

In [20]:
solution1.rank_selection_pair()

array([<__main__.Node object at 0x000001A7A655BFD0>,
       <__main__.Node object at 0x000001A7A655BEB8>], dtype=object)

In [21]:
children = solution1.generate_offspring()

Crossover
0 :  -
1 :  0.544908022793642
1 :  -
2 :  x2
2 :  x2
None
0 :  -
1 :  2.838212058429778
1 :  2.4405613569856897
None
['right', 'right', 'left'] ['right', 'right']


In [22]:
children[0].show()

0 :  -
1 :  0.544908022793642
1 :  -
2 :  x2
2 :  2.4405613569856897


In [23]:
children[1].show()

0 :  -
1 :  2.838212058429778
1 :  x2


In [24]:
p = list(range(3)) + np.array(1)
p = p/sum(p)
np.random.choice(3, p=p)


1

## Tests area