In [2]:
import os
import time
import warnings
import numpy as np
import random as rnd
import pandas as pd
from collections import defaultdict

# Librería Genética
from deap import base, creator, tools, algorithms

from sklearn.utils import shuffle
# Subfunciones de estimadores
from sklearn.base import clone
# [https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/base.py][30]
from sklearn.base import is_classifier
# [https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/base.py][535]
from sklearn.model_selection._validation import _fit_and_score
# [https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/model_selection/_validation.py][346]
from sklearn.model_selection._search import BaseSearchCV
# [https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/model_selection/_search.py][386]
from sklearn.model_selection._search import check_cv
# [https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/model_selection/_split.py][1866]
from sklearn.model_selection._search import _check_param_grid
# [https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/model_selection/_search.py][343]
from sklearn.metrics.scorer import check_scoring
# [https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/metrics/scorer.py][250]
from sklearn.utils.validation import _num_samples
# [https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/utils/validation.py][105]
from sklearn.utils.validation import indexable
# [https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/utils/validation.py][208]
from multiprocessing import Pool

# Selección para estimadores
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

# Metricas para estimadores
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error

# Estimadores
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

## Funciones Genéticas

In [3]:
"""
Extracción
"""

def _createDataset(frecuencias, values):
    names_ = frecuencias[0].columns.values
    dataset = pd.DataFrame()
    dataset[names_[0]] = frecuencias[values[0]][names_[0]]
    dataset[names_[1]] = frecuencias[values[1]][names_[1]]
    dataset[names_[2]] = frecuencias[values[2]][names_[2]]
    dataset[names_[3]] = frecuencias[values[3]][names_[3]]
    dataset[names_[4]] = frecuencias[values[4]][names_[4]]
    dataset[names_[5]] = frecuencias[0][names_[5]]
    # separación de data en X,y 
    y = dataset[names_[5]]
    del dataset[names_[5]]
    X = dataset
    return X,y

seed = 7
frecuencias = []
frecuencias.append(pd.read_csv('Tx_0x01'))
frecuencias.append(pd.read_csv('Tx_0x02'))
frecuencias.append(pd.read_csv('Tx_0x03'))
frecuencias.append(pd.read_csv('Tx_0x04'))
frecuencias.append(pd.read_csv('Tx_0x05'))
frecuencias.append(pd.read_csv('Tx_0x06'))
frecuencias.append(pd.read_csv('Tx_0x07'))
frecuencias[0] = shuffle(frecuencias[0], random_state=seed).reset_index(drop=True)
frecuencias[1] = shuffle(frecuencias[1], random_state=seed).reset_index(drop=True)
frecuencias[2] = shuffle(frecuencias[2], random_state=seed).reset_index(drop=True)
frecuencias[3] = shuffle(frecuencias[3], random_state=seed).reset_index(drop=True)
frecuencias[4] = shuffle(frecuencias[4], random_state=seed).reset_index(drop=True)
frecuencias[5] = shuffle(frecuencias[5], random_state=seed).reset_index(drop=True)
frecuencias[6] = shuffle(frecuencias[6], random_state=seed).reset_index(drop=True)


In [4]:
values = np.random.randint(6,size=5) # orden randomización
X,y = _createDataset(frecuencias, values)

In [5]:
print("X")
display(X.head())
print("y")
display(y.head())

X


Unnamed: 0,Be01,Be02,Be03,Be04,Be05
0,-66,-77,-78,-73,-66
1,-68,-64,-71,-73,-68
2,-77,-84,-72,-69,-77
3,-74,-60,-79,-65,-89
4,-65,-56,-79,-55,-67


y


0    10
1     7
2     7
3     3
4     2
Name: Sector, dtype: int64

In [6]:
def _initIndividual(individuo, maxints):
	"""[Iniciar Individuo]
	Arguments:
		pcls {[creator.Individual]} -- [Iniciar individuo con indices aleatorios]
		maxints {[params_size]} -- [lista de máximos índices]
	Returns:
		[creator.Individual] -- [Creación de individuo]
	"""
	return individuo(rnd.randint(0, maxint) for maxint in maxints)

def _mutIndividual(individual, maxints, prob_mutacion):
	"""[Mutación Individuo]
	Arguments:
		individual {[creator.Individual]} -- [Individuo de población]
		maxints {[lista]} -- [lista de máximos índices]
		prob_mutacion {[float]} -- [probabilidad de mutación del gen]
	Returns:
		[creator.Individual] -- [Individuo mutado]
	"""
	for i in range(len(maxints)):
		if rnd.random() < prob_mutacion:
			individual[i] = rnd.randint(0, maxints[i])
	return individual,

def _cxIndividual(ind1, ind2, prob_cruce):
	"""[Cruce de Individuos]
	Arguments:
		ind1 {[creator.Individual]} -- [Individuo 1]
		ind2 {[creator.Individual]} -- [Individuo 2]
		indpb {[float]} -- [probabilidad de emparejar]
		gene_type {[list]} -- [tipos de dato de los parámetros, CATEGORICO o NUMERICO]
	Returns:
		[creator.Individual,creator.Individual] -- [nuevos Individuos]
	"""
	CATEGORICO = 1  # int o str
	NUMERICO = 2  # float
	for i in range(len(ind1)):
		if rnd.random() < prob_cruce:
			sorted_ind = sorted([ind1[i], ind2[i]])
			ind1[i] = rnd.randint(sorted_ind[0], sorted_ind[1])
			ind2[i] = rnd.randint(sorted_ind[0], sorted_ind[1])
	return ind1, ind2

def _individual_to_params(individual, frecuencias):
	names_ = frecuencias[0].columns.values
	dataset = pd.DataFrame()
	dataset[names_[0]] = frecuencias[individual[0]][names_[0]]
	dataset[names_[1]] = frecuencias[individual[1]][names_[1]]
	dataset[names_[2]] = frecuencias[individual[2]][names_[2]]
	dataset[names_[3]] = frecuencias[individual[3]][names_[3]]
	dataset[names_[4]] = frecuencias[individual[4]][names_[4]]
	dataset[names_[5]] = frecuencias[0][names_[5]]
	# separación de data en X,y 
	y = dataset[names_[5]]
	del dataset[names_[5]]
	X = dataset
	return X,y


def _evalFunction(individual, frecuencias, scorer, num_folds, uniform, fit_params,
				verbose=0, error_score='raise', 8={}):
	"""[Evaluación del modelo]
	Arguments:
		individual {[creator.Individual]} -- [Individuo]
		frecuencias {[list]} -- [lista de dataframes]
		X {[array]} -- [Input]
		y {[array]} -- [Output]
		scorer {[string]} -- [Parámetro de evaluación, precisión]
		cv {[int | cross-validation]} -- [Especificación de los folds]
		uniform {[boolean]} -- [True hace que la data se distribuya uniformemente en los folds]
		fit_params {[dict | None]} -- [parámetros para estimator.fit]
	Keyword Arguments:
		verbose {integer} -- [Mensajes de descripción] (default: {0})
		error_score {numerico} -- [valor asignado si ocurre un error en fitting] (default: {'raise'})
		score_cache {dict} -- [description] (default: {{}})
	"""
	X, y = _individual_to_params(individual, frecuencias)
	score = 0
	n_test = 0
	paramkey = str(individual)
	if paramkey in score_cache:
		score = score_cache[paramkey]
	else:
		kfold = KFold(n_splits=10, shuffle=False)
		#cv_results = cross_val_score(estimator, X, y, cv=kfold, scoring=scoring)
		cv_results = cross_val_score(individual.est, X, y, cv=kfold, scoring="accuracy")
		score = np.mean(cv_results)
		score_cache[paramkey] = score
	return (score,)

In [7]:
class EvolutiveSearchCV:
	def __init__(self, estimator, scoring=None, num_folds=4,
				refit=True, verbose=False, population_size=50,
				gene_mutation_prob=0.2, gene_crossover_prob=0.5,
				tournament_size=3, generations_number=10, gene_type=None,
				n_jobs=1, uniform=True, error_score='raise',
				fit_params={}):
		# Parámetros iniciales
		self.estimator = estimator
		#self.params = params
		self.scoring = scoring
		self.num_folds = num_folds
		self.refit = refit
		self.verbose = verbose
		self.population_size = population_size
		self.gene_mutation_prob = gene_mutation_prob
		self.gene_crossover_prob = gene_crossover_prob
		self.tournament_size = tournament_size
		self.generations_number = generations_number
		self.gene_type = gene_type
		self.n_jobs = n_jobs
		self.uniform = uniform
		self.error_score = error_score
		self.fit_params = fit_params
		# Parámetros adicionales
		self._individual_evals = {}
		self.all_history_ = None
		self.all_logbooks_ = None
		self._cv_results = None
		self.best_score_ = None
		self.best_params_ = None
		self.scorer_ = None
		self.score_cache = {}
		# Fitness [base.Fitness], objetivo 1
		creator.create("FitnessMax", base.Fitness, weights=(1.0,))
		# Individuo [list], parámetros:est, FinessMax
		creator.create("Individual", list, est=clone(self.estimator), fitness=creator.FitnessMax)
	#@property
	def cv_results_(self):
		if self._cv_results is None:
			out = defaultdict(list)
			gen = self.all_history_
			# Get individuals and indexes, their list of scores,
			# and additionally the name_values for this set of parameters
			idxs, individuals, each_scores = zip(*[(idx, indiv, np.mean(indiv.fitness.values))
											for idx, indiv in list(gen.genealogy_history.items())
											if indiv.fitness.valid and not np.all(np.isnan(indiv.fitness.values))])
			#name_values, _, _ = _get_param_types_maxint(self.params)
			# Add to output
			#out['param_index'] += [p] * len(idxs)
			out['index'] += idxs
			#out['params'] += [_individual_to_params(indiv, name_values) for indiv in individuals]
			out['params'] += [str(np.add(indiv,1)) for indiv in individuals]
			out['mean_test_score'] += [np.nanmean(scores)*100 for scores in each_scores]
			out['std_test_score'] += [np.nanstd(scores)*100 for scores in each_scores]
			out['min_test_score'] += [np.nanmin(scores) for scores in each_scores]
			out['max_test_score'] += [np.nanmax(scores) for scores in each_scores]
			out['nan_test_score?'] += [np.any(np.isnan(scores)) for scores in each_scores]
			self._cv_results = out
		return self._cv_results
	@property
	def best_index_(self):
		return np.argmax(self.cv_results_['max_test_score'])
	# fit y refit general
	def fit(self, frecuencias):
		self.best_estimator_ = None
		self.best_mem_score_ = float("-inf")
		self.best_mem_params_ = None
		#_check_param_grid(self.params)
		self._fit(frecuencias)
		if self.refit:
			self.best_estimator_ = clone(self.estimator)
			#self.best_estimator_.set_params(**self.best_mem_params_)
			if self.fit_params is not None:
				self.best_estimator_.fit(X, y, **self.fit_params)
			else:
				self.best_estimator_.fit(X, y)
	# fit individual
	def _fit(self, frecuencias):
		self._cv_results = None  # Indicador de necesidad de actualización
		self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)
		#n_samples = _num_samples(X)
		# verificar longitudes x,y 
		#if _num_samples(y) != n_samples:
		#	raise ValueError('Target [y], data [X] dont agree')
		#cv = check_cv(self.cv, y=y, classifier=is_classifier(self.estimator))
		toolbox = base.Toolbox()
		# name_values = lista de parametros, gene_type = [1:categorico; 2:numérico], maxints = size(parametros)
		#name_values, self.gene_type, maxints = _get_param_types_maxint(parameter_dict)
		maxints = [5]*5
		#if self.verbose:
		#	print("Tipos: %s, rangos: %s" % (self.gene_type, maxints))
		# registro de función Individuo
		toolbox.register("individual", _initIndividual, creator.Individual, maxints=maxints)
		# registro de función Población
		toolbox.register("population", tools.initRepeat, list, toolbox.individual)
		# Paralelísmo, create pool
		if not isinstance(self.n_jobs, int):
			self.n_jobs=1
		pool = Pool(self.n_jobs)
		toolbox.register("map", pool.map)
		# registro de función Evaluación
		toolbox.register("evaluate", _evalFunction,
						frecuencias=frecuencias,
						scorer=self.scorer_, num_folds=4, uniform=self.uniform,
						error_score=self.error_score, fit_params=self.fit_params,
						score_cache=self.score_cache)
		# registro de función Cruce
		toolbox.register("mate", _cxIndividual, prob_cruce=self.gene_crossover_prob)
		# registro de función Mutación
		toolbox.register("mutate", _mutIndividual, prob_mutacion=self.gene_mutation_prob, maxints=maxints)
		# registro de función Selección
		toolbox.register("select", tools.selTournament, tournsize=self.tournament_size)
		# Creación de Población
		pop = toolbox.population(n=self.population_size)
		# Mejor Individuo que ha existido
		hof = tools.HallOfFame(1)
		# Stats
		stats = tools.Statistics(lambda ind: ind.fitness.values)
		stats.register("avg", np.nanmean)
		stats.register("min", np.nanmin)
		stats.register("max", np.nanmax)
		stats.register("std", np.nanstd)
		# Genealogía
		hist = tools.History()
		# Decoración de operadores de variaznza
		toolbox.decorate("mate", hist.decorator)
		toolbox.decorate("mutate", hist.decorator)
		hist.update(pop)
		# Posibles combinaciones
		if self.verbose:
			print('--- Evolve in {0} possible combinations ---'.format(np.prod(np.array(maxints) + 1)))
		pop, logbook = algorithms.eaSimple(pop, toolbox, cxpb=self.gene_crossover_prob, 
										mutpb=self.gene_mutation_prob,
										ngen=self.generations_number, 
										stats=stats,
										halloffame=hof, 
										verbose=self.verbose)
		#pop, logbook = algorithms.eaGenerateUpdate(toolbox,
		#								ngen=self.generations_number, stats=stats,
		#								halloffame=hof, verbose=self.verbose)
		# Save History
		self.all_history_ = hist
		self.all_logbooks_ = logbook
		# Mejor score y parametros
		current_best_score_ = hof[0].fitness.values[0]
		current_best_params_ = str(hof[0]) #_individual_to_params(hof[0], name_values)
		#if self.verbose:
		#	print("Best individual is: %s\nwith fitness: %s" % (
		#		current_best_params_, current_best_score_))
		if current_best_score_ > self.best_mem_score_:
			self.best_mem_score_ = current_best_score_
			self.best_mem_params_ = current_best_params_
		# fin paralelización, close pool
		pool.close()
		pool.join()
		self.best_score_ = current_best_score_
		self.best_params_ = current_best_params_


In [31]:
"""
maxints = [5]*5
scoring = "accuracy"
num_folds = 4
maxints = [5]*5
model = KNeighborsClassifier()
def __init__(self, estimador, scoring=None, num_folds=4,
				refit=True, verbose=False, population_size=50,
				gene_mutation_prob=0.2, gene_crossover_prob=0.5,
				tournament_size=3, generations_number=10, gene_type=None,
				n_jobs=1, uniform=True, error_score='raise',
				fit_params={}):
"""
num_jobs=4
gs = EvolutiveSearchCV(estimator=KNeighborsClassifier(), scoring="accuracy", num_folds=10, n_jobs=num_jobs,
                        verbose=True, refit=True, 
                        population_size=100, 
                        gene_mutation_prob=0.3, 
                        gene_crossover_prob=0.5,
                        tournament_size=4,
                        generations_number=12)



In [32]:
gs.fit(frecuencias)
h = gs.cv_results_()

--- Evolve in 7776 possible combinations ---
gen	nevals	avg     	min     	max     	std      
0  	100   	0.755822	0.652242	0.845848	0.0409894
1  	59    	0.792973	0.671963	0.86436 	0.0342908
2  	68    	0.811471	0.71342 	0.86436 	0.0296467
3  	70    	0.825281	0.735355	0.86436 	0.0296963
4  	58    	0.839179	0.721069	0.868186	0.02645  
5  	68    	0.846583	0.752262	0.868186	0.0244825
6  	63    	0.855958	0.78406 	0.868186	0.0192382
7  	51    	0.858215	0.731529	0.868186	0.0247746
8  	67    	0.857137	0.761319	0.868186	0.0248609
9  	54    	0.861035	0.741794	0.868186	0.0199039
10 	66    	0.855649	0.737565	0.868186	0.0258272
11 	61    	0.857795	0.764135	0.868186	0.0227434
12 	63    	0.854865	0.744816	0.868186	0.0275645


In [23]:
resultados = {}
resultados['Frecuencias'] = dict(h)['params']
resultados['Precision'] = dict(h)['mean_test_score']
df = pd.DataFrame(resultados)
df.drop_duplicates(subset=['Frecuencias', 'Precision']).sort_values(['Precision'],ascending=False).head(25)

Unnamed: 0,Frecuencias,Precision
739,[5 1 1 3 5],86.778372
810,[6 1 3 3 5],86.778372
794,[6 1 2 3 4],86.778372
746,[6 1 2 3 6],86.778372
677,[6 1 2 3 5],86.778372
743,[2 6 2 3 5],86.778372
691,[6 1 2 3 3],86.778372
813,[2 3 2 3 2],86.133981
603,[4 1 2 3 2],86.133981
601,[2 2 1 3 6],86.133981


## EDAS

In [11]:

# The problem to optimize
def getAccuracy( frecuencias, individual, estimator, score_cache ):
	X,y = _createDataset(frecuencias, individual)
	score = 0
	scorer = "accuracy"
	paramkey = str(np.int32(individual)+1)
	if paramkey in score_cache:
		score = score_cache[paramkey]
	else:
		kfold = KFold(n_splits=10, shuffle=False)
		cv_results = cross_val_score(estimator, X, y, cv=kfold, scoring=scorer)
		score = np.mean(cv_results)
		score_cache[paramkey] = score
	return score

def _createDataset(frecuencias, values):
    names_ = frecuencias[0].columns.values
    dataset = pd.DataFrame()
    dataset[names_[0]] = frecuencias[int(values[0])][names_[0]]
    dataset[names_[1]] = frecuencias[int(values[1])][names_[1]]
    dataset[names_[2]] = frecuencias[int(values[2])][names_[2]]
    dataset[names_[3]] = frecuencias[int(values[3])][names_[3]]
    dataset[names_[4]] = frecuencias[int(values[4])][names_[4]]
    dataset[names_[5]] = frecuencias[0][names_[5]]
    # separación de data en X,y 
    y = dataset[names_[5]]
    del dataset[names_[5]]
    X = dataset
    return X,y

class eda:
	def __init__(self, of, frecuencias, estimator):
		# Algorithm parameters
		self.iterations = 100
		self.sample_size = 60
		self.select_ratio = 0.6
		self.epsilon = 10e-6

		# class members
		self.objective_function = of
		self.dimensions = 5
		self.sample = []
		self.means = []
		self.stdevs = []	

		self.debug = False
		# aditional parameters
		self.frecuencias = frecuencias
		self.estimator = estimator
		self.score_cache = {}


	def sample_sort(self): 
		# sort rows on the last column
		self.sample = self.sample[ np.argsort( self.sample[:,-1], 0 ) ]


	def dispersion_reduction(self):
		self.sample_sort()

		# number of points to select
		nb = int( np.floor( self.sample_size * self.select_ratio ) )

		# selection
		#self.sample = self.sample[:nb]
		self.sample = self.sample[self.sample_size-nb:]

		if self.debug:
		    print ("dispersion reduction")
		    print (str(self.sample))
		    print


	def estimate_parameters( self ):
		# points sub array (without values)
		mat = self.sample[:,:self.dimensions]
		
		# row means (axis 0 in scipy)
		self.means = np.mean( mat, 0 )
		
		# row standard deviation
		self.stdevs = np.std( mat, 0 )

		if self.debug:
		    print ("estimate parameters")
		    print ("\tmean=" +str(self.means))
		    print ("\tstd-dev=" + str(self.stdevs))
		    print


	def draw_sample(self):
		# for each variable to optimize
		for i in range(self.dimensions):
			# if the dispersion is null
			if self.stdevs[i] == 0.0:
				# set it to a minimal value
				self.stdevs[i] = self.epsilon
		
		# empty sample
		self.sample = np.zeros( (self.sample_size, self.dimensions+1) )
		
		# for each point
		for i in range( self.sample_size ):
			# draw in random normal
			p = np.random.normal( self.means, self.stdevs )
			p = np.array([0 if i<0 else (5 if i>5 else i) for i in p])
			# put it into the sample
			self.sample[i][:self.dimensions] = np.round(p)%(self.dimensions+1)

		if self.debug:
		    print ("draw sample")
		    print (self.sample)
		    print


	def evaluate(self):
		# for each point
		for i in range( self.sample_size ):
			d = self.dimensions
			# call the objective function
			#   the third element is the result of the objective function call
			#   taking the first two elements as variables
			r = self.objective_function( self.frecuencias, self.sample[i][:d], self.estimator, self.score_cache )
			self.sample[i][-1] = r

		if self.debug:
		    print ("evaluate")
		    print (self.sample)
		    print


	def run(self):
		# uniform initialization
		self.sample = np.random.rand( self.sample_size, self.dimensions+1 )
		# cosmetic
		#self.sample = self.sample * 200 - 100
		top_freq = 6
		self.sample = np.floor(np.random.rand(self.sample_size, self.dimensions +1)*top_freq)
		
		if self.debug:
		    print ("initialization")
		    print (self.sample)
		    print

		self.evaluate()

		
		i = 0
		while i < self.iterations:
			if self.debug:
			    print ("iteration",i)
			    print

			i += 1
			self.dispersion_reduction()
			self.estimate_parameters()
			self.draw_sample()
			self.evaluate()


		# sort the final sample
		self.sample_sort()
		# output the optimum
		ranking = self.sample_size
		print ("#[ Configuración ]\t Accuracy")
		for i in range(ranking):
			linea = str(self.sample[-i-1][:-1]+1) + "\t" +str(self.sample[-i-1][-1])
			print(linea)



In [12]:
seed = 7
frecuencias = []
frecuencias.append(pd.read_csv('Tx_0x01'))
frecuencias.append(pd.read_csv('Tx_0x02'))
frecuencias.append(pd.read_csv('Tx_0x03'))
frecuencias.append(pd.read_csv('Tx_0x04'))
frecuencias.append(pd.read_csv('Tx_0x05'))
frecuencias.append(pd.read_csv('Tx_0x06'))
frecuencias.append(pd.read_csv('Tx_0x07'))
frecuencias[0] = shuffle(frecuencias[0], random_state=seed).reset_index(drop=True)
frecuencias[1] = shuffle(frecuencias[1], random_state=seed).reset_index(drop=True)
frecuencias[2] = shuffle(frecuencias[2], random_state=seed).reset_index(drop=True)
frecuencias[3] = shuffle(frecuencias[3], random_state=seed).reset_index(drop=True)
frecuencias[4] = shuffle(frecuencias[4], random_state=seed).reset_index(drop=True)
frecuencias[5] = shuffle(frecuencias[5], random_state=seed).reset_index(drop=True)
frecuencias[6] = shuffle(frecuencias[6], random_state=seed).reset_index(drop=True)
estimator = KNeighborsClassifier(n_jobs=num_jobs)
a = eda( getAccuracy, frecuencias, estimator )
a.run()

#[ Configuración ]	 Accuracy
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4.  2.  3.  6.]	0.850674206529
[ 6.  4. 

## Grid Search

In [13]:
import itertools
combinaciones = [p for p in itertools.product(range(6), repeat=5)]
score_GridSearch = {}
for individual in combinaciones:
    getAccuracy( frecuencias, individual, estimator, score_GridSearch )

## Resultados Genéticos EAS

In [34]:
#df.sort_values(['Precision'],ascending=False).head(30)
#dfEasGen = pd.DataFrame.from_dict(gs.score_cache, orient='index')
#dfEasGen.reset_index(level=0, inplace=True)
#dfEasGen.sort_values([0],ascending=False).head(30)
gs2.score_cache

{}


## Resultados EDAS

In [15]:
dfEdas = pd.DataFrame.from_dict(a.score_cache, orient='index')
dfEdas.reset_index(level=0, inplace=True)
dfEdas.sort_values([0],ascending=False).head(30)

Unnamed: 0,index,0
615,[6 1 2 3 4],0.861141
443,[6 1 3 3 5],0.859531
617,[6 4 2 3 2],0.85289
495,[6 1 3 3 3],0.851681
507,[6 4 2 3 6],0.850674
358,[6 3 2 3 6],0.850271
621,[6 6 2 3 2],0.849469
556,[6 5 2 3 6],0.848858
266,[6 5 2 3 4],0.84765
586,[6 5 2 3 2],0.847249


## Resultados Grid Search

In [17]:
dfGSearch = pd.DataFrame.from_dict(score_GridSearch, orient='index')
dfGSearch.reset_index(level=0, inplace=True)
dfGSearch.sort_values([0],ascending=False).head(30)

Unnamed: 0,index,0
6586,[6 1 3 6 5],0.868187
6546,[6 1 2 6 1],0.868186
6532,[6 1 2 3 5],0.867784
6551,[6 1 2 6 6],0.864563
6582,[6 1 3 6 1],0.86436
6550,[6 1 2 6 5],0.862549
6690,[6 1 6 6 1],0.862547
6533,[6 1 2 3 6],0.862142
6529,[6 1 2 3 2],0.86134
6531,[6 1 2 3 4],0.861141


In [20]:
display(dfGSearch[dfGSearch['index']=='[1 1 1 1 1]'])
display(dfGSearch[dfGSearch['index']=='[2 2 2 2 2]'])
display(dfGSearch[dfGSearch['index']=='[3 3 3 3 3]'])
display(dfGSearch[dfGSearch['index']=='[4 4 4 4 4]'])
display(dfGSearch[dfGSearch['index']=='[5 5 5 5 5]'])
display(dfGSearch[dfGSearch['index']=='[6 6 6 6 6]'])

Unnamed: 0,index,0
0,[1 1 1 1 1],0.773596


Unnamed: 0,index,0
1555,[2 2 2 2 2],0.717442


Unnamed: 0,index,0
3110,[3 3 3 3 3],0.778629


Unnamed: 0,index,0
4665,[4 4 4 4 4],0.765751


Unnamed: 0,index,0
6220,[5 5 5 5 5],0.733553


Unnamed: 0,index,0
7775,[6 6 6 6 6],0.833367


In [39]:
#display(dfGSearch[dfGSearch['index']=='[5 1 1 3 5]'])
#dfGSearch.sort_values([0],ascending=False).to_csv('Exhaustive.csv', sep=',', index=False) 
#dfEdas.sort_values([0],ascending=False).to_csv('edas.csv', sep=',', index=False) 