In [1]:
from __future__ import division

from deap import algorithms
from deap import base
from deap import creator
from deap import tools

from sklearn.preprocessing import StandardScaler

from read_series import read_series
from tabulate import tabulate

from segmentsf import *

import numpy as np
import random

import numpy as np

import multiprocessing
import time
import csv

In [2]:
def register_general_operators(toolbox, S):
    toolbox.register('generate', generate.random_generate, L=len(S[0]), S=S)
    toolbox.register('population', tools.initRepeat, list, toolbox.generate)
    toolbox.register('mutate', mutation.mutation, 
                     mu=0, 
                     sigma=0.05, 
                     lows=generate.minimos(S), 
                     ups=generate.maximos(S), 
                     desp = int(len(S[0]) * 0.1))
    toolbox.register('mate', crossover.crossover)
    toolbox.register('select', tools.selTournament, tournsize=10)
    toolbox.register('evaluate', 
                     fitness.fitness_fastdtw, 
                     S=S, 
                     vp=0.01)
    
    #pool = multiprocessing.Pool()
    #toolbox.register('map', pool.map)

In [4]:
ss = StandardScaler()

#Parametros fijos
POPULATION = 100
GENERACIONES = 50
CXPB = 0.2
MUTPB = 0.1
LAMBDA = 100
MU = 70
datasets = ['45_series', '50words', 'adiac', 'cricket', \
           'ecg_five_days_df', 'gestures', 'medical_images',\
           'syntetic_control']

creator.create('FitnessMin', base.Fitness, weights=(-1.0,))
creator.create('Individual', list, fitness=creator.FitnessMin)

In [4]:
#Seleccion de ventana de DTW
#Los % de radio mas pequenos tendran el mismo efecto en 45 series, 
#porque la ventana sera demasiado pequeña
ventanas = [.01, .02, .03, .04, .05, .06, .07, .08, \
            .09, .10, .15]
ventanas_resultados = []
ventanas_names = map(lambda x: str(x)+'%', ventanas)

toolbox = base.Toolbox()
for datos in datasets:
    S = read_series('../../Datos/aux/{0}.csv'.format(datos))
    S_norm = ss.fit_transform(S)
    register_general_operators(toolbox, S_norm)
    for i, ventana in enumerate(ventanas):
        toolbox.register('evaluate', 
                         fitness.fitness_fastdtw, 
                         S=S_norm, 
                         vp=ventana)
        hof = tools.HallOfFame(1)
        pop = toolbox.population(n=POPULATION)
        
        t1 = time.time()
        pop, log = algorithms.eaSimple(pop, 
                            toolbox, 
                            cxpb=CXPB, 
                            mutpb=MUTPB, 
                            ngen=GENERACIONES, 
                            stats = None, 
                            halloffame=hof, verbose=False)
        tfinal = time.time() - t1
        C = hof[0]
        C = ss.inverse_transform(hof[0])
        fitness_mejor = fitness.fitness_dtw(C, S)[0]
        print 'Fitness mejor serie[{0}][{1}]: '.format(datos, ventanas_names[i]), fitness_mejor
        ventanas_resultados.append([ventanas_names[i], datos, fitness_mejor, tfinal])

f = open('ventana_medidas.csv', 'w')
csvf =csv.writer(f)
csvf.writerow(['Ventana', 'Dataset', 'Fitness', 'Tiempo(s)'])
csvf.writerows(ventanas_resultados)

Fitness mejor serie[45_series][0.01%]:  4047.78475997
Fitness mejor serie[45_series][0.02%]:  2997.61013399
Fitness mejor serie[45_series][0.03%]:  5820.34283791
Fitness mejor serie[50words][0.01%]:  4.30957388088
Fitness mejor serie[50words][0.02%]:  4.92265860537
Fitness mejor serie[50words][0.03%]:  3.56853925762
Fitness mejor serie[gestures][0.01%]:  7.37898559233
Fitness mejor serie[gestures][0.02%]:  7.63360952507
Fitness mejor serie[gestures][0.03%]:  3.79678912704
Fitness mejor serie[medical_images][0.01%]:  0.374804214701
Fitness mejor serie[medical_images][0.02%]:  0.515134718381
Fitness mejor serie[medical_images][0.03%]:  0.582181550237


In [5]:
#Seleccion de algoritmo
algoritmos = [algorithms.eaSimple, algorithms.eaMuCommaLambda, algorithms.eaMuPlusLambda]
algoritmos_resultados = []
algoritmos_names = ['eaSimple', 'eaMuCommaLambda', 'eaMuPlusLambda']

toolbox = base.Toolbox()


for datos in datasets:
    S = read_series('../../Datos/aux/{0}.csv'.format(datos))
    S = ss.fit_transform(S)
    register_general_operators(toolbox, S)
    for i, algoritmo in enumerate(algoritmos):
        print 'Algoritmo[{0}]: {1}'.format(algoritmos_names[i], datos)     

        pop = toolbox.population(n=POPULATION)
        hof = tools.HallOfFame(3)
        stats = tools.Statistics(lambda ind: ind.fitness.values)
        stats.register("avg", np.mean)
        stats.register("std", np.std)
        stats.register("min", np.min)
        stats.register("max", np.max)
        
        t1 = time.time()

        if algoritmos_names[i] == 'eaSimple':
            pop, log = algoritmos[i](pop, 
                                     toolbox, 
                                     cxpb=CXPB, 
                                     mutpb=MUTPB,
                                     ngen=GENERACIONES, 
                                     stats = stats, 
                                     halloffame=hof, 
                                     verbose=False)
        elif algoritmos_names[i] == 'eaMuCommaLambda':
            pop, log = algoritmos[i](pop, 
                                     toolbox, 
                                     lambda_ = LAMBDA, 
                                     mu = MU, 
                                     cxpb=CXPB, 
                                     mutpb=MUTPB, 
                                     ngen=GENERACIONES, 
                                     stats = stats, 
                                     halloffame=hof, 
                                     verbose=False)
        else:
            pop, log = algoritmos[i](pop, 
                                     toolbox, 
                                     lambda_ = LAMBDA, 
                                     mu = MU, 
                                     cxpb=CXPB, 
                                     mutpb=MUTPB, 
                                     ngen=GENERACIONES, 
                                     stats = stats, 
                                     halloffame=hof, 
                                     verbose=False)
        tfinal = time.time() - t1

        print 'Fitness mejor serie: ', hof[0].fitness.values[0]
        algoritmos_resultados.append([algoritmos_names[i], datos, hof[0].fitness.values[0], tfinal])


print tabulate(algoritmos_resultados, headers=['Algoritmo', 'Dataset', 'Fitness', 'Tiempo(s)'])

f = open('algoritmos_medidas.csv', 'w')
csvf =csv.writer(f)
csvf.writerow(['Algoritmo', 'Dataset', 'Fitness', 'Tiempo(s)'])
csvf.writerows(algoritmos_resultados)

Algoritmo[eaSimple]: 45_series


KeyboardInterrupt: 

In [None]:
generarar = [generate.random_generate, generate.sample_generate]
generar_resultados = []
generar_names = ['random', 'sample']

In [None]:
rondas_torneo = [5, 10, 20]
rondas_torneo_resultados = [] 
ronda_torneo_names = ['5', '10', '20']

In [None]:
deplazamiento_horizontal_mutacion = [.05, .075, .1, .15, .2]
desplazamiento_horizontal_mutacion_resultados = []
desplazamiento_horizontal_mutacion_names = ['5%', '7.5%', '10%', '15%', '20%']

In [None]:
desplazamiento_vertical_mutacion = [.01, .025, .05, .075, .1, .15, .2]
desplazamiento_vertical_mutacion_resultados = []
desplazamiento_vertical_mutacion_names = ['1%', '2.5%', '5%', '7.5%', '10%', '15%', '20%']

In [7]:
range(int(-123 * 0.1), int(123 * 0.1)+1)

[-12,
 -11,
 -10,
 -9,
 -8,
 -7,
 -6,
 -5,
 -4,
 -3,
 -2,
 -1,
 0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12]