In [2]:
import numpy as np
import pandas as pd

np.random.seed(42)

data = pd.read_csv("data/coindesk-bpi-USD-close_data-2014-07-01_2017-07-01.csv")
print(data.head(5))

WINDOW_SIZE = 10 # num of inputs
test_period = 60 # no. days

training_data = data[:(len(data)-test_period)]['Close Price'].values
test_data = data[(len(data)-test_period):]['Close Price'].values

print("\nTraining data size: %d" % len(training_data) - WINDOW_SIZE)
print("Test data size: %d" % len(test_data))

                  Date  Close Price
0  2014-07-01 00:00:00       635.59
1  2014-07-02 00:00:00       647.34
2  2014-07-03 00:00:00       640.69
3  2014-07-04 00:00:00       626.96
4  2014-07-05 00:00:00       628.33

Training data size: 1039
Test data size: 60


In [3]:
import operator
import math
import random

import numpy

from deap import algorithms
from deap import base
from deap import creator
from deap import tools
from deap import gp

from scoop import futures

In [4]:
def protected_div(left, right):
    try:
        return left / right
    except ZeroDivisionError:
        return 1

pset = gp.PrimitiveSet("MAIN", arity=WINDOW_SIZE)
pset.addPrimitive(operator.add, 2)
pset.addPrimitive(operator.sub, 2)
pset.addPrimitive(operator.mul, 2)
pset.addPrimitive(protected_div, 2)

try:
    pset.addEphemeralConstant("eph_const", lambda: random.uniform(-1, 1))
except:
    pass

In [5]:
# Creates the fitness object (minimisation if the weight is negative)
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))

# Create the individual object
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

In [8]:
def evaluate_fitness(individual, X, window_size):
    # Compile the GP tree into a function
    func = toolbox.compile(expr=individual)
    
    sse = 0.0
    for i in range(len(X)-window_size):
        sse += (func(*X[i:i+window_size]) - X[i+window_size])**2
        
    # Fitness needs to be returned as an iterable according to DEAP doc.
    return np.sqrt(sse / (len(X) - window_size)),

In [9]:
toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)
toolbox.register("evaluate", evaluate_fitness, X=training_data, window_size=WINDOW_SIZE)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePointLeafBiased, termpb=0.1)
toolbox.register("expr_mut", gp.genHalfAndHalf, min_=0, max_=2)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)
toolbox.register("map", futures.map)

# Decorate the mate and mutate method to limit the height/tree depth of generated individuals
toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=20))
toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=20))

In [None]:
stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
stats_size = tools.Statistics(len)
mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
mstats.register("min", numpy.min)

pop = toolbox.population(n=300) # 300 individuals

# The hall of fame is a specific structure which contains the n best individuals 
hof = tools.HallOfFame(1) 
pop, log = algorithms.eaSimple(pop, 
                               toolbox, 
                               0.8, 
                               0.1, 
                               1000, 
                               stats=mstats,
                               halloffame=hof, 
                               verbose=True)

Be sure to start your program with the '-m scoop' parameter. You can find further information in the documentation.
Your map call has been replaced by the builtin serial Python map().


   	      	fitness	size
   	      	-------	--- 
gen	nevals	min    	min 
0  	300   	17.9989	3   


  """


1  	258   	17.9989	1   
2  	255   	17.9989	1   
3  	254   	17.9989	1   
4  	246   	17.9989	1   
5  	244   	17.9989	1   
6  	234   	17.9989	1   
7  	250   	17.9989	1   
8  	253   	17.9989	1   
9  	242   	17.9971	1   
10 	250   	17.9971	1   
11 	239   	17.9968	1   
12 	250   	17.9968	1   
13 	248   	17.9968	1   
14 	261   	17.9968	1   
15 	243   	17.9968	1   
16 	248   	17.9968	1   
17 	263   	17.9968	1   
18 	237   	17.9968	1   
19 	256   	17.9968	1   
20 	234   	17.9968	1   
21 	262   	17.9968	1   
22 	238   	17.9968	1   
23 	231   	17.9968	1   
24 	231   	17.9968	1   
25 	233   	17.9968	1   
26 	250   	17.9968	1   
27 	246   	17.9968	1   
28 	251   	17.9968	1   
29 	245   	17.9968	1   
30 	242   	17.9968	1   
31 	239   	17.9968	1   
32 	262   	17.9968	1   
33 	239   	17.9968	1   
34 	239   	17.9968	1   
35 	233   	17.9968	1   
36 	245   	17.9968	1   
37 	261   	17.9968	1   
38 	241   	17.9968	1   
39 	249   	17.9968	1   
40 	242   	17.9968	1   
41 	247   	17.9968	3   
42 	246   	17.99

343	252   	17.6586	5   
344	256   	17.653 	5   
345	262   	17.6561	3   
346	248   	17.6561	1   
347	251   	17.6527	157 
348	247   	17.6494	3   
349	257   	17.6494	1   
350	253   	17.6409	7   
351	236   	17.6409	7   
352	253   	17.6409	145 
353	250   	17.6401	5   
354	250   	17.6401	1   
355	246   	17.6007	7   
356	243   	17.6245	5   
357	237   	17.6245	7   
358	243   	17.6243	7   
359	256   	17.6243	3   
360	232   	17.6168	7   
361	245   	17.5557	5   
362	239   	17.5557	1   
363	248   	17.5554	5   
364	243   	17.5554	3   
365	245   	17.5554	3   
366	228   	17.5554	1   
367	246   	17.5538	17  
368	243   	17.5536	7   
369	246   	17.5536	1   
370	235   	17.553 	3   
371	231   	17.553 	3   
372	261   	17.5527	25  
373	240   	17.5527	3   
374	224   	17.5503	3   
375	235   	17.5503	7   
376	236   	17.5503	7   
377	236   	17.5503	5   
378	258   	17.5503	1   
379	270   	17.5496	3   
380	245   	17.5494	13  
381	238   	17.5495	13  
382	251   	17.548 	11  
383	243   	17.548 	7   
384	228   	17.54

In [None]:
import matplotlib.pyplot as plt
%matplotlib notebook
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import mean_absolute_error as MAE


def evaluate_training(func, X_training, window_size):
    y = []
    for i in range(len(X_training)-window_size):
        y.append(func(*X_training[i:i+window_size])) 
        
    return y


def evaluate_test(func, X_test, X_training, window_size):
    X = np.concatenate((X_training[-window_size:], X_test)) 
    y = []              
    for i in range(len(X)-window_size):
        y.append(func(*X[i:i+window_size])) 
    return y 


func = toolbox.compile(expr=hof[0])

y_training, y_hat_training = training_data[WINDOW_SIZE:], evaluate_training(func, training_data, WINDOW_SIZE) 
y_test, y_hat_test = test_data, evaluate_test(func, test_data, training_data, WINDOW_SIZE)

print(len(y_training), len(y_hat_training))
print(len(y_test), len(y_hat_test))

print("Training MSE: %.3f, MAE: %.3f" % (MSE(y_training, y_hat_training), MAE(y_training, y_hat_training)))
print("Test MSE %.3f, MAE: %.3f" % (MSE(y_test, y_hat_test), MAE(y_test, y_hat_test)))

fig, axes = plt.subplots(nrows=2, figsize=(10, 6))

axes[0].set_title('Training', fontsize=8)
axes[0].plot(np.arange(0, len(y_hat_training), 1), 
             y_hat_training, 
             label='Genetic Programming', 
             linestyle='--',
             marker='o', 
             markersize=3)
axes[0].plot(np.arange(0, len(y_training), 1), 
             y_training, 
             label='True', 
             linestyle='--',
             marker='o',
             markersize=3)
axes[0].grid(b=True, which='major', color='black', linestyle='--')
axes[0].set_ylabel('Close Price [$]')

axes[1].set_title('Test', fontsize=8)
axes[1].plot(np.arange(0, len(y_hat_test), 1),
             y_hat_test, 
             label='Genetic Programming',
             linestyle='--',
             marker='o',
             markersize=3)
axes[1].plot(np.arange(0, len(y_test), 1),
             y_test, 
             label='True', 
             linestyle='--', 
             marker='o', 
             markersize=3)
axes[1].grid(b=True, which='major', color='black', linestyle='--')
axes[1].set_ylabel('Close Price [$]')

plt.legend(loc='best')
plt.show()