## Imports 

In [1]:
import numpy as np
import os
import random
from random import choice 

## Reading databases

In [2]:
def read_databases(filename):
    x1 = []
    x2 = []
    y = []
    for line in open(filename):
        csv_row = line.strip().split(',')
        csv_row = [float(number) for number in csv_row]
        x1.append(csv_row[0])
        x2.append(csv_row[1])
        y.append(csv_row[2])
    return x1, x2, y

In [3]:
filename = 'datasets/synth1/synth1-test.csv'
x1_train, x2_train, y_train = read_databases(filename)

In [4]:
filename = 'datasets/synth1/synth1-train.csv'
x1_test, x2_test, y_test = read_databases(filename)

## Defining individual class

In [5]:
expression = [['expression', 'op', 'expression'], 'variable', 'value']
operation = ['+', '-', '*']
variable = ['x1', 'x2']
value = ['random']

In [6]:
terminals_dict = dict()
terminals_dict['op'] = operation
terminals_dict['variable'] = variable
terminals_dict['value'] = value

In [7]:
class Node(object):
    def __init__(self, data='op', level=0):
        self.data = data
        self.left_child = None
        self.right_child = None
        self.level = level
        self.max_level = 3
        self.terminal = False
    
    def initial_node(self):
        self.add_left('expression')
        self.add_right('expression')

    def add_left(self, value):
        self.left_child = Node(value, level=self.level+1)
    
    def add_right(self, value):
        self.right_child = Node(value, level=self.level+1)
        
    def show(self):
        print(self.level, ":  ", self.data)
        if self.left_child:
            self.left_child.show()
            self.right_child.show()
    
    def get_expression(self):
        if self.data == 'expression':
            self.data = choice(expression)
            if type(self.data) == list:
                if self.level < self.max_level: 
                    self.add_left(self.data[0])
                    self.add_right(self.data[2])
                    self.data = self.data[1]
                else:
                    self.data = 'value'
                    
        if self.data != 'op':
            self.terminal = True
                    
        if self.left_child is not None:
            self.left_child.get_expression()
        
        if self.right_child is not None:
            self.right_child.get_expression()
    
    def expand_children(self):
        self.left_child.get_expression()
        self.right_child.get_expression()
    
    def get_parameters(self):
        self.data = choice(terminals_dict[self.data])
        if self.data == 'random':
            self.data = random.random()*5
        
        if self.left_child is not None:
            self.left_child.get_parameters()
        
        if self.right_child is not None:
            self.right_child.get_parameters()
    
    def evaluate(self, x1=0, x2=0):
        
        if self.terminal:
            if self.data == 'x1':
                return x1
            elif self.data == 'x2':
                return x2
            else:
                return self.data
            
        if self.left_child is not None:
            result_left = self.left_child.evaluate(x1, x2)
        if self.right_child is not None:
            result_right = self.right_child.evaluate(x1, x2)
        
        if self.data == '+':
            return result_left + result_right
        elif self.data == '-':
            return result_left - result_right
        elif self.data == '*':
            return result_left * result_right
    
    def error(self, x1, x2, y):
        return (y - self.evaluate(x1, x2))**2
    
    def fitness(self, x1_list, x2_list, y_list):
        error_sum = 0
        for x1, x2, y in zip(x1_list, x2_list, y_list):
            error_sum += self.error(x1, x2, y)
        y_mean = y_list - np.mean(y_list)
        y_mean_sum = sum(y_mean**2)
        print(y_mean_sum)
            
        return (error_sum/y_mean_sum)**0.5

In [8]:
root = Node()
root.initial_node()
root.expand_children()
root.show()

root.get_parameters()
print()
root.show()

root.evaluate()

0 :   op
1 :   value
1 :   variable

0 :   +
1 :   2.6747840004669228
1 :   x2


2.6747840004669228

In [9]:
root.evaluate(2,5)

7.674784000466923

In [10]:
root.error(2,5, 2)

32.20317345195538

## Generating initial population

In [11]:
root.fitness(x1_test, x2_test, y_test)

20912.604212548344


1.1321722321070435

In [18]:
y_mean = y_test - np.mean(y_test)
y_mean = y_mean**2
y_mean = sum(y_mean)

In [19]:
y_mean

20912.604212548344