In [2]:
import numpy as np
import pandas as pd

np.random.seed(42)

data = pd.read_csv("data/coindesk-bpi-USD-close_data-2014-07-01_2017-07-01.csv")
print(data.head(5))

WINDOW_SIZE = 10 # num of inputs
test_period = 60 # no. days

training_data = data[:(len(data)-test_period)]['Close Price'].values
test_data = data[(len(data)-test_period):]['Close Price'].values

print("\nTraining data size: %d" % len(training_data - WINDOW_SIZE))
print("Test data size: %d" % len(test_data))

                  Date  Close Price
0  2014-07-01 00:00:00       635.59
1  2014-07-02 00:00:00       647.34
2  2014-07-03 00:00:00       640.69
3  2014-07-04 00:00:00       626.96
4  2014-07-05 00:00:00       628.33

Training data size: 1039
Test data size: 60


In [3]:
import operator
import math
import random

import numpy

from deap import algorithms
from deap import base
from deap import creator
from deap import tools
from deap import gp

from scoop import futures

In [4]:
def protected_div(left, right):
    try:
        return left / right
    except ZeroDivisionError:
        return 1

pset = gp.PrimitiveSet("MAIN", arity=WINDOW_SIZE)
pset.addPrimitive(operator.add, 2)
pset.addPrimitive(operator.sub, 2)
pset.addPrimitive(operator.mul, 2)
pset.addPrimitive(protected_div, 2)

try:
    pset.addEphemeralConstant("eph_const", lambda: random.uniform(-1, 1))
except:
    pass

In [5]:
# Creates the fitness object (minimisation if the weight is negative)
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))

# Create the individual object
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

In [8]:
def evaluate_fitness(individual, X, window_size):
    # Compile the GP tree into a function
    func = toolbox.compile(expr=individual)
    
    sse = 0.0
    for i in range(len(X)-window_size):
        sse += (func(*X[i:i+window_size]) - X[i+window_size])**2
        
    # Fitness needs to be returned as an iterable according to DEAP doc.
    return sse / (len(X) - window_size),

In [9]:
toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)
toolbox.register("evaluate", evaluate_fitness, X=training_data, window_size=WINDOW_SIZE)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePointLeafBiased, termpb=0.1)
toolbox.register("expr_mut", gp.genHalfAndHalf, min_=0, max_=2)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)
toolbox.register("map", futures.map)

# Decorate the mate and mutate method to limit the height/tree depth of generated individuals
toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=20))
toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=20))

In [None]:
stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
stats_size = tools.Statistics(len)
mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
mstats.register("min", numpy.min)

pop = toolbox.population(n=300) # 300 individuals

# The hall of fame is a specific structure which contains the n best individuals 
hof = tools.HallOfFame(1) 
pop, log = algorithms.eaSimple(pop, 
                               toolbox, 
                               0.8, 
                               0.1, 
                               1000, 
                               stats=mstats,
                               halloffame=hof, 
                               verbose=True)

Be sure to start your program with the '-m scoop' parameter. You can find further information in the documentation.
Your map call has been replaced by the builtin serial Python map().
  This is separate from the ipykernel package so we can avoid doing imports until


   	      	fitness	size
   	      	-------	--- 
gen	nevals	min    	min 
0  	300   	324.374	3   
1  	255   	324.374	1   
2  	233   	324.374	1   
3  	238   	323.965	1   
4  	250   	323.915	1   
5  	245   	323.915	1   
6  	259   	323.915	1   
7  	230   	323.915	1   
8  	242   	323.915	1   
9  	249   	323.915	1   
10 	223   	323.915	1   
11 	234   	323.915	1   
12 	240   	323.885	1   
13 	247   	323.885	1   
14 	252   	323.885	1   
15 	248   	323.885	1   
16 	233   	323.885	1   
17 	254   	323.885	1   
18 	258   	323.885	1   
19 	246   	323.885	1   
20 	251   	323.885	1   
21 	222   	323.885	1   
22 	247   	323.885	1   
23 	256   	323.885	1   
24 	256   	323.847	1   
25 	240   	323.847	1   
26 	251   	323.885	1   
27 	244   	323.885	1   
28 	247   	323.885	1   
29 	233   	323.885	1   
30 	236   	323.885	1   
31 	232   	323.885	1   
32 	248   	323.885	1   
33 	248   	323.885	1   
34 	251   	323.885	1   
35 	244   	323.885	1   
36 	246   	323.885	1   
37 	235   	323.885	1   
38 	244   	323.8

339	251   	312.836	21  
340	237   	312.836	5   
341	250   	312.822	11  
342	266   	312.848	3   
343	242   	312.848	13  
344	243   	312.848	15  
345	256   	312.848	3   
346	259   	312.708	7   
347	246   	312.67 	9   
348	239   	312.685	5   
349	251   	312.67 	21  
350	235   	312.602	13  
351	251   	312.67 	9   
352	227   	312.62 	9   
353	252   	312.552	7   
354	241   	312.552	7   
355	254   	312.519	5   
356	239   	312.264	3   
357	240   	312.303	5   
358	256   	312.367	9   
359	255   	312.367	3   
360	244   	312.367	5   
361	252   	312.367	7   
362	241   	312.367	9   
363	246   	312.346	7   
364	247   	312.362	5   
365	245   	312.297	5   
366	247   	312.367	7   
367	253   	312.354	3   
368	246   	312.171	5   
369	246   	312.336	9   
370	252   	312.318	9   
371	247   	312.336	11  
372	260   	312.272	17  
373	248   	312.211	9   
374	234   	312.22 	5   
375	246   	312.2  	21  
376	259   	312.203	13  
377	256   	312.052	5   
378	260   	312.127	11  
379	231   	312.127	7   
380	249   	312.1

681	246   	307.796	89  
682	240   	307.784	5   
683	244   	307.784	89  
684	251   	307.784	81  
685	246   	307.784	141 
686	261   	307.784	9   
687	252   	307.784	111 
688	237   	307.779	99  
689	247   	307.779	81  
690	242   	307.785	5   
691	234   	307.775	9   
692	251   	307.723	99  
693	223   	307.273	95  
694	246   	307.273	5   
695	244   	307.235	105 
696	255   	307.314	5   
697	236   	307.455	5   
698	251   	307.455	13  
699	257   	307.376	9   
700	263   	307.515	3   
701	230   	307.515	31  
702	240   	307.592	31  
703	237   	307.419	35  
704	243   	307.376	3   
705	243   	307.534	21  
706	262   	307.542	569 
707	242   	307.525	1   
708	239   	307.522	103 
709	240   	307.484	169 
710	240   	307.522	5   
711	242   	307.522	13  
712	251   	307.283	39  
713	251   	307.441	17  
714	245   	307.353	543 
715	244   	307.241	5   
716	247   	307.107	3   
717	252   	307.107	1   
718	251   	307.01 	289 
719	232   	307.107	173 
720	250   	307.107	121 
721	230   	306.922	743 
722	241   	306.9

In [None]:
import matplotlib.pyplot as plt
%matplotlib notebook
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import mean_absolute_error as MAE


def evaluate_training(func, X_training, window_size):
    y = []
    for i in range(len(X_training)-window_size):
        y.append(func(*X_training[i:i+window_size])) 
        
    return y


def evaluate_test(func, X_test, X_training, window_size):
    X = np.concatenate((X_training[-window_size:], X_test)) 
    y = []              
    for i in range(len(X)-window_size):
        y.append(func(*X[i:i+window_size])) 
    return y 


func = toolbox.compile(expr=hof[0])

y_training, y_hat_training = training_data[WINDOW_SIZE:], evaluate_training(func, training_data, WINDOW_SIZE) 
y_test, y_hat_test = test_data, evaluate_test(func, test_data, training_data, WINDOW_SIZE)

print(len(y_training), len(y_hat_training))
print(len(y_test), len(y_hat_test))

print("Training MSE: %.3f, MAE: %.3f" % (MSE(y_training, y_hat_training), MAE(y_training, y_hat_training)))
print("Test MSE %.3f, MAE: %.3f" % (MSE(y_test, y_hat_test), MAE(y_test, y_hat_test)))

fig, axes = plt.subplots(nrows=2, figsize=(10, 6))

axes[0].set_title('Training', fontsize=8)
axes[0].plot(np.arange(0, len(y_hat_training), 1), 
             y_hat_training, 
             label='Genetic Programming', 
             linestyle='--',
             marker='o', 
             markersize=3)
axes[0].plot(np.arange(0, len(y_training), 1), 
             y_training, 
             label='True', 
             linestyle='--',
             marker='o',
             markersize=3)
axes[0].grid(b=True, which='major', color='black', linestyle='--')
axes[0].set_ylabel('Close Price [$]')

axes[1].set_title('Test', fontsize=8)
axes[1].plot(np.arange(0, len(y_hat_test), 1),
             y_hat_test, 
             label='Genetic Programming',
             linestyle='--',
             marker='o',
             markersize=3)
axes[1].plot(np.arange(0, len(y_test), 1),
             y_test, 
             label='True', 
             linestyle='--', 
             marker='o', 
             markersize=3)
axes[1].grid(b=True, which='major', color='black', linestyle='--')
axes[1].set_ylabel('Close Price [$]')

plt.legend(loc='best')
plt.show()