In [1]:
def generate_data():
    """
    We will generate data with a clear pattern.
    This ensures we have an idea of the desired result.
    This is only for demonstration purposes, real data is needed in practice.
    """
    coeff = [2.4, -5.3, 4.2, -2.1]
    x = [[random() for j in range(len(coeff))] for i in range(500)]   # generate random data set with range of 500
    y = [dot(i, coeff) for i in x]
    return array(x), array(y)


In [2]:
def multiple_linear_regression(inputs, outputs):
    """
    Get the best expected outcome.
    This is expected to equal the coefficients in generate_data().
    """
    X, Y = array(inputs), array(outputs)
    X_t, Y_t = X.transpose(), Y.transpose()
    coeff = dot((pinv((dot(X_t, X)))), (dot(X_t, Y)))
    Y_p = dot(X, coeff)
    Y_mean = mean(Y)
    SST = array([(i - Y_mean) ** 2 for i in Y]).sum()         # SST: the total error in a model, it is the sum of all deviations squared
    SSR = array([(i - j) ** 2 for i, j in zip(Y, Y_p)]).sum() # SSR: a measure of the explained variation in SST.
    COD = (1 - (SSR / SST)) * 100.0                           # COD: stands for ‘coefficient of determination’ which is basically a measure of how good a model is.
    av_error = (SSR / len(Y))
    return {'COD': COD, 'coeff': coeff, 'error': av_error}    # error: the average error, is an average of all deviations from expected values

In [3]:
def check_termination_condition(best_individual):
    """
    Check if the current_best_individual is better of equal to the expected.
    """
    if ((best_individual['COD'] >= 98.5)
            or (generation_count == max_generations)):
        return True
    else:
        return False

In [4]:
def create_individual(individual_size):
    """
    Create an individual.
    """
    return [random() for i in range(individual_size)]   # To create an initial individual, we will use random assigning of variables.


def create_population(individual_size, population_size):
    """
    Create an initial population.
    """
    return [create_individual(individual_size) for i in range(population_size)]


def get_fitness(individual, inputs):
    """
    Calculate the fitness of an individual.
    Return the Coefficient of Determination, average error and weight.
    We use the error to get the best individual.
    """
    predicted_outputs = dot(array(inputs), array(individual))
    output_mean = mean(outputs)
    SST = array(
        [(i - output_mean) ** 2 for i in outputs]).sum()
    SSR = array(
        [(i - j) ** 2 for i, j in zip(outputs, predicted_outputs)]).sum()
    COD = (1 - (SSR / SST)) * 100.0
    av_error = (SSR / len(outputs))
    return {'COD': COD, 'error': av_error, 'coeff': individual}


def evaluate_population(population):
    """
    Evaluate a population of individuals and return the best among them.
    """
    fitness_list = [get_fitness(individual, inputs)
                    for individual in tqdm(population)]
    error_list = sorted(fitness_list, key=lambda i: i['error'])
    best_individuals = error_list[: selection_size]
    best_individuals_stash.append(best_individuals[0]['coeff'])
    print('Error: ', best_individuals[0]['error'],
          'COD: ', best_individuals[0]['COD'])
    return best_individuals


def crossover(parent_1, parent_2):
    """
    Return offspring given two parents.
    Unlike real scenarios, genes in the chromosomes aren't necessarily linked.
    """
    child = {}
    loci = [i for i in range(0, individual_size)]
    loci_1 = sample(loci, floor(0.5*(individual_size)))
    loci_2 = [i for i in loci if i not in loci_1]
    chromosome_1 = [[i, parent_1['coeff'][i]] for i in loci_1]
    chromosome_2 = [[i, parent_2['coeff'][i]] for i in loci_2]
    child.update({key: value for (key, value) in chromosome_1})
    child.update({key: value for (key, value) in chromosome_2})
    return [child[i] for i in loci]


def mutate(individual):
    """
    Mutate an individual.
    The gene transform decides whether we'll add or deduct a random value.
    """
    loci = [i for i in range(0, individual_size)]
    no_of_genes_mutated = floor(probability_of_gene_mutating*individual_size)
    loci_to_mutate = sample(loci, no_of_genes_mutated)
    for locus in loci_to_mutate:
        gene_transform = choice([-1, 1])
        change = gene_transform*random()
        individual[locus] = individual[locus] + change
    return individual


def get_new_generation(selected_individuals):
    """
    Given selected individuals, create a new population by mating them.
    Here we also apply variation operations like mutation and crossover.
    """
    parent_pairs = [sample(selected_individuals, 2)
                    for i in range(population_size)]
    offspring = [crossover(pair[0], pair[1]) for pair in parent_pairs]
    offspring_indices = [i for i in range(population_size)]
    offspring_to_mutate = sample(
        offspring_indices,
        floor(probability_of_individual_mutating*population_size)
    )
    mutated_offspring = [[i, mutate(offspring[i])]
                         for i in offspring_to_mutate]
    for child in mutated_offspring:
        offspring[child[0]] = child[1]
    return offspring

In [5]:
import pandas as pd
import numpy as np
from random import random, sample, choice
from math import floor
from tqdm import tqdm                  # instantly make your loops show a smart progress meter
from numpy import array, dot, mean
from numpy.linalg import pinv
from sys import exit

In [6]:
# MAIN CODE
# Initialization
inputs, outputs = generate_data()
print(inputs.shape)


(500, 4)


In [7]:
inputs

array([[0.82928884, 0.7914251 , 0.11348121, 0.52817478],
       [0.38021648, 0.65639506, 0.13819931, 0.96459584],
       [0.41304763, 0.28510538, 0.34782921, 0.89924431],
       ...,
       [0.30648863, 0.79493632, 0.77491432, 0.27583662],
       [0.12403831, 0.49625996, 0.55969417, 0.2771384 ],
       [0.99201965, 0.76975983, 0.80812592, 0.94929362]])

In [8]:
print(outputs.shape)

(500,)


In [9]:
outputs

array([-2.83680575e+00, -4.01158843e+00, -9.47274585e-01, -1.49012707e+00,
       -4.30560034e-01, -1.46663540e-02,  2.19417393e+00, -1.04243903e+00,
       -3.91205327e+00, -1.65225247e+00, -1.27385164e+00, -2.74881724e-01,
        1.40052858e+00, -2.38649626e+00, -1.01393380e+00, -1.33828924e-01,
        6.24356418e-01, -1.59310170e+00,  7.84727262e-01, -4.50787391e-02,
        2.97030362e+00, -3.47714666e+00, -6.01600874e+00, -8.78934814e-01,
        3.13278664e+00,  1.53592127e+00, -7.29157121e-02, -5.36422447e+00,
       -2.13128566e+00, -3.56524568e-01,  2.04470882e+00, -1.85460102e+00,
        4.21023370e-01,  6.96792855e-01,  3.06323082e+00,  6.55086128e-01,
       -3.42425228e+00,  1.24633933e+00,  3.24606191e-02, -1.40331668e+00,
       -3.01518070e+00, -6.29807621e-01,  2.07615844e+00,  2.93313828e-01,
       -9.83598665e-01, -3.84088117e+00,  1.56813366e+00,  2.58244294e+00,
       -2.93597839e-01,  6.53892263e-01,  1.22013800e+00, -4.50423740e-01,
        3.57686627e-02, -

In [12]:
individual_size = len(inputs[0])
population_size = 500
selection_size  = floor(0.1*population_size)
max_generations = 50
probability_of_individual_mutating = 0.1
probability_of_gene_mutating = 0.25
best_possible = multiple_linear_regression(inputs, outputs)
best_individuals_stash = [create_individual(individual_size)]
initial_population = create_population(individual_size, 1000)
current_population = initial_population
termination = False
generation_count = 0
while termination is False:
    current_best_individual = get_fitness(best_individuals_stash[-1], inputs)
    print('Generation: ', generation_count)
    print('current_best_individual: ', current_best_individual)
    
    best_individuals   = evaluate_population(current_population)
    #print('best_individuals: ', best_individuals)
    
    current_population = get_new_generation(best_individuals)
    #print('current_population: ', current_population)
    termination        = check_termination_condition(current_best_individual)
   
      #  generation_count  += 1
    print ("status termination = ",termination)
    if termination :
        generation_count=generation_count
    else:
        generation_count  += 1
    
    
else:
    print("generation = ",generation_count)
    print(get_fitness(best_individuals_stash[-1], inputs))

Generation:  0
current_best_individual:  {'COD': -39.59958428160952, 'error': 6.463205042019981, 'coeff': [0.03277137315784173, 0.3139258594122334, 0.6766678004719331, 0.661151378098761]}


100%|████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 3606.34it/s]


Error:  4.992125305600489 COD:  -7.82554673922673
status termination =  False
Generation:  1
current_best_individual:  {'COD': -7.82554673922673, 'error': 4.992125305600489, 'coeff': [0.2630042764724788, 0.00345702536888004, 0.20118385836470998, 0.11130881742024545]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3504.84it/s]


Error:  3.571794589517768 COD:  22.852356285428034
status termination =  False
Generation:  2
current_best_individual:  {'COD': 22.852356285428034, 'error': 3.571794589517768, 'coeff': [0.20874105759428108, -0.9361400397605871, 0.9413883206324353, 0.03380274926485749]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3506.16it/s]


Error:  3.2366267387170757 COD:  30.091689144611767
status termination =  False
Generation:  3
current_best_individual:  {'COD': 30.091689144611767, 'error': 3.2366267387170757, 'coeff': [0.4539642415011904, -0.9361400397605871, 0.9413883206324353, -0.4055834620157216]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3530.53it/s]


Error:  2.986501788201313 COD:  35.49416962967242
status termination =  False
Generation:  4
current_best_individual:  {'COD': 35.49416962967242, 'error': 2.986501788201313, 'coeff': [0.4493308475638291, -1.7700284246148903, 0.2907022140288096, 0.02089001726635753]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3505.89it/s]


Error:  2.4710068379197527 COD:  46.628410349364
status termination =  False
Generation:  5
current_best_individual:  {'COD': 46.628410349364, 'error': 2.4710068379197527, 'coeff': [0.4493308475638291, -1.7700284246148903, 0.9413883206324353, -0.39181476956083205]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3433.83it/s]


Error:  2.2910211156651017 COD:  50.515944759926626
status termination =  False
Generation:  6
current_best_individual:  {'COD': 50.515944759926626, 'error': 2.2910211156651017, 'coeff': [0.7201048263911747, -1.7700284246148903, 0.9413883206324353, -0.7176971195013012]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3433.01it/s]


Error:  2.1415960557418634 COD:  53.743395554218495
status termination =  False
Generation:  7
current_best_individual:  {'COD': 53.743395554218495, 'error': 2.1415960557418634, 'coeff': [0.4539642415011904, -1.7700284246148903, 1.5227736273477133, -0.7176971195013012]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3505.89it/s]


Error:  1.5084483821402745 COD:  67.41883234587348
status termination =  False
Generation:  8
current_best_individual:  {'COD': 67.41883234587348, 'error': 1.5084483821402745, 'coeff': [0.7201048263911747, -2.670482500062059, 1.6642185625428692, -0.7176971195013012]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3580.97it/s]


Error:  1.4372999415695096 COD:  68.95557652486666
status termination =  False
Generation:  9
current_best_individual:  {'COD': 68.95557652486666, 'error': 1.4372999415695096, 'coeff': [0.7201048263911747, -2.786955323894451, 1.7316483032234604, -0.7176971195013012]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3410.47it/s]


Error:  1.2743850865238697 COD:  72.47439511251928
status termination =  False
Generation:  10
current_best_individual:  {'COD': 72.47439511251928, 'error': 1.2743850865238697, 'coeff': [0.7201048263911747, -2.670482500062059, 2.0537729985207753, -1.1995460424831812]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3255.46it/s]


Error:  1.11566009422365 COD:  75.9027163240783
status termination =  False
Generation:  11
current_best_individual:  {'COD': 75.9027163240783, 'error': 1.11566009422365, 'coeff': [1.275554225597824, -3.587689939559045, 1.6642185625428692, -0.6677171864718257]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3364.70it/s]


Error:  0.870010927041668 COD:  81.20852379804491
status termination =  False
Generation:  12
current_best_individual:  {'COD': 81.20852379804491, 'error': 0.870010927041668, 'coeff': [1.5035004937596836, -3.323665088220043, 1.9741133319561834, -1.1995460424831812]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3457.50it/s]


Error:  0.6335591061756568 COD:  86.3156766240689
status termination =  False
Generation:  13
current_best_individual:  {'COD': 86.3156766240689, 'error': 0.6335591061756568, 'coeff': [1.428002171041816, -3.5809478078536223, 2.485427280747646, -1.1995460424831812]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3320.12it/s]


Error:  0.5758218673221024 COD:  87.56275055861408
status termination =  False
Generation:  14
current_best_individual:  {'COD': 87.56275055861408, 'error': 0.5758218673221024, 'coeff': [1.275554225597824, -3.587689939559045, 2.882742776316264, -1.1995460424831812]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3342.26it/s]


Error:  0.44079350321540217 COD:  90.47924529659163
status termination =  False
Generation:  15
current_best_individual:  {'COD': 90.47924529659163, 'error': 0.44079350321540217, 'coeff': [1.428002171041816, -3.9685971662644834, 2.882742776316264, -1.1995460424831812]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3323.81it/s]


Error:  0.33990357192347415 COD:  92.6583797004959
status termination =  False
Generation:  16
current_best_individual:  {'COD': 92.6583797004959, 'error': 0.33990357192347415, 'coeff': [2.2189247784866573, -4.292388456622954, 2.882742776316264, -1.1995460424831812]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3481.52it/s]


Error:  0.24269658760197846 COD:  94.75796566633254
status termination =  False
Generation:  17
current_best_individual:  {'COD': 94.75796566633254, 'error': 0.24269658760197846, 'coeff': [2.2189247784866573, -4.292388456622954, 2.882742776316264, -1.6611701527675886]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3555.68it/s]


Error:  0.17854842000199198 COD:  96.14351006283003
status termination =  False
Generation:  18
current_best_individual:  {'COD': 96.14351006283003, 'error': 0.17854842000199198, 'coeff': [2.2189247784866573, -4.537809469206622, 3.16600501758069, -1.4764611738803037]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3477.94it/s]


Error:  0.17153029313310333 COD:  96.29509547393224
status termination =  False
Generation:  19
current_best_individual:  {'COD': 96.29509547393224, 'error': 0.17153029313310333, 'coeff': [2.2189247784866573, -4.537809469206622, 3.16600501758069, -1.5149345504800058]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3320.36it/s]


Error:  0.14348582366476792 COD:  96.90083152187205
status termination =  False
Generation:  20
current_best_individual:  {'COD': 96.90083152187205, 'error': 0.14348582366476792, 'coeff': [2.2189247784866573, -4.719099864919626, 3.16600501758069, -1.6611701527675886]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3342.27it/s]


Error:  0.12071611973319837 COD:  97.39263724092271
status termination =  False
Generation:  21
current_best_individual:  {'COD': 97.39263724092271, 'error': 0.12071611973319837, 'coeff': [2.2189247784866573, -4.719099864919626, 3.4018735848249637, -1.5149345504800058]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3410.47it/s]


Error:  0.0907599508132242 COD:  98.03966432743938
status termination =  False
Generation:  22
current_best_individual:  {'COD': 98.03966432743938, 'error': 0.0907599508132242, 'coeff': [2.2189247784866573, -4.719099864919626, 3.5665656371335, -1.6611701527675886]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3170.57it/s]


Error:  0.0907599508132242 COD:  98.03966432743938
status termination =  False
Generation:  23
current_best_individual:  {'COD': 98.03966432743938, 'error': 0.0907599508132242, 'coeff': [2.2189247784866573, -4.719099864919626, 3.5665656371335, -1.6611701527675886]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3298.29it/s]


Error:  0.07874836593232215 COD:  98.29910407057541
status termination =  False
Generation:  24
current_best_individual:  {'COD': 98.29910407057541, 'error': 0.07874836593232215, 'coeff': [2.2189247784866573, -4.782871027279995, 3.5859821737500632, -1.6611701527675886]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3433.80it/s]


Error:  0.05893972931565104 COD:  98.72695332166595
status termination =  False
Generation:  25
current_best_individual:  {'COD': 98.72695332166595, 'error': 0.05893972931565104, 'coeff': [2.2189247784866573, -5.040332724976551, 3.5859821737500632, -1.6611701527675886]}


100%|██████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 3290.97it/s]

Error:  0.055678628671788556 COD:  98.79739024749824
status termination =  True
generation =  25
{'COD': 98.79739024749824, 'error': 0.055678628671788556, 'coeff': [2.3746791559348503, -4.850718946629284, 3.5859821737500632, -2.114135310045026]}





In [11]:
best_possible

{'COD': 100.0,
 'coeff': array([ 2.4, -5.3,  4.2, -2.1]),
 'error': 1.0419126154367418e-29}