In [1]:
import os
import time
import warnings
import numpy as np
import random as rnd
import pandas as pd
from collections import defaultdict

# Librería Genética
from deap import base, creator, tools, algorithms

from sklearn.utils import shuffle
# Subfunciones de estimadores
from sklearn.base import clone
# [https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/base.py][30]
from sklearn.base import is_classifier
# [https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/base.py][535]
from sklearn.model_selection._validation import _fit_and_score
# [https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/model_selection/_validation.py][346]
from sklearn.model_selection._search import BaseSearchCV
# [https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/model_selection/_search.py][386]
from sklearn.model_selection._search import check_cv
# [https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/model_selection/_split.py][1866]
from sklearn.model_selection._search import _check_param_grid
# [https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/model_selection/_search.py][343]
from sklearn.metrics.scorer import check_scoring
# [https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/metrics/scorer.py][250]
from sklearn.utils.validation import _num_samples
# [https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/utils/validation.py][105]
from sklearn.utils.validation import indexable
# [https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/utils/validation.py][208]
from multiprocessing import Pool, Manager

# Selección para estimadores
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

# Metricas para estimadores
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error

# Estimadores
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import FunctionTransformer

#Algorithms
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

#Ensembles algorithms
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import VotingClassifier
warnings.filterwarnings("ignore")


In [2]:

# find distance error al 0.2%
def distance_error(estimator, X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 7)
    estimator.fit(X_train, y_train)
    y_pred = estimator.predict(X_test)
    # coord pred
    x1 = np.int32((y_pred + 2) % 3)
    y1 = np.int32((y_pred - 1) / 3)
    # coord real
    x2 = np.int32((y_test + 2) % 3)
    y2 = np.int32((y_test - 1) / 3)
    # pasar variacion a distancias metros
    vx = np.abs(x1 - x2)
    vy = np.abs(x1 - x2)
    vx = vx*0.5 + (vx-1)*(vx>0)
    vy = vy*0.5 + (vy-1)*(vy>0)
    # pitagoras
    err_distance = np.mean(np.sqrt(vx*vx + vy*vy))
    return err_distance

def _createDataset(frecuencias, values, seed = 7):
    # crear dataset
    names_ = frecuencias[0].columns.values
    # reestructuracion
    salida_final = pd.DataFrame(columns=names_)
    for sec in range(1,16):
        dataset = pd.DataFrame(columns=names_)
        corte = min([frecuencias[i][frecuencias[i]['Sector']==sec].shape[0] for i in values])
        tx = 0
        dataset[names_[tx]] = dataset[names_[tx]].append(frecuencias[int(values[tx])][frecuencias[int(values[tx])]['Sector']==sec][:corte][names_[tx]])
        dataset = dataset.reset_index(drop=True)
        for tx in range(1,5):
            dataset[names_[tx]] = frecuencias[int(values[tx])][frecuencias[int(values[tx])]['Sector']==sec][:corte][names_[tx]].reset_index()
        dataset[names_[tx+1]] = frecuencias[int(values[tx])][frecuencias[int(values[tx])]['Sector']==sec][:corte][names_[tx+1]].reset_index()
        # join parts
        salida_final = salida_final.append(dataset)
    # shuffle dataset
    salida_final = shuffle(salida_final, random_state=seed).reset_index(drop=True)
    salida_final = salida_final.apply(pd.to_numeric)
    # dataframe to X,y 
    X = salida_final[names_[:-1]]
    y = salida_final[names_[-1]]
    return X,y

def set_models():
    rs = 1
    models = []
    # LDA : Warning(Variables are collinear)
    models.append(('LinearDiscriminantAnalysis', LinearDiscriminantAnalysis()))
    models.append(('SVC', SVC(random_state=rs)))
    models.append(('GaussianNB', GaussianNB()))
    models.append(('MLPClassifier', MLPClassifier()))
    models.append(('KNeighborsClassifier', KNeighborsClassifier()))
    models.append(('DecisionTreeClassifier', DecisionTreeClassifier(random_state=rs)))
    models.append(('LogisticRegression', LogisticRegression()))
    # Bagging and Boosting
    # models.append(('ExtraTreesClassifier', ExtraTreesClassifier(n_estimators=150)))
    models.append(('ExtraTreesClassifier', ExtraTreesClassifier(random_state=rs)))
    models.append(('AdaBoostClassifier', AdaBoostClassifier(DecisionTreeClassifier(random_state=rs),
                                                            random_state=rs)))
    # models.append(('AdaBoostClassifier', AdaBoostClassifier(DecisionTreeClassifier())))
    models.append(('RandomForestClassifier', RandomForestClassifier(random_state=rs)))
    models.append(('GradientBoostingClassifier',
                   GradientBoostingClassifier(random_state=rs)))
    # models.append(('GradientBoostingClassifier', GradientBoostingClassifier()))
    # Voting
    estimators = []
    estimators.append(("Voting_GradientBoostingClassifier", GradientBoostingClassifier(random_state=rs)))
    estimators.append(("Voting_ExtraTreesClassifier", ExtraTreesClassifier(random_state=rs)))
    voting = VotingClassifier(estimators)
    models.append(('VotingClassifier', voting))
    return models


# The problem to optimize
def evaluatex2( frecuencias, individual, estimator, score_cache={}, error_cache={}, 
             n_splits = 10, shuffle = False, scorer = "accuracy"):
    X, y = _createDataset(frecuencias, individual)
    metric_err = distance_error(estimator, X, y)
    score = 0
    paramkey = str(np.int32(individual)+1)
    if paramkey in score_cache:
        score = score_cache[paramkey]
        error = error_cache[paramkey]
    else:
        kfold = KFold(n_splits=n_splits, shuffle=shuffle)
        cv_results = cross_val_score(estimator, X, y, cv=kfold, scoring=scorer)
        score = cv_results.mean()
        error = cv_results.std()
        #score_cache[paramkey] = score
        #error_cache[paramkey] = error
    return score, error, metric_err


In [3]:
def _initIndividual(individuo, maxints):
	"""[Iniciar Individuo]
	Arguments:
		pcls {[creator.Individual]} -- [Iniciar individuo con indices aleatorios]
		maxints {[params_size]} -- [lista de máximos índices]
	Returns:
		[creator.Individual] -- [Creación de individuo]
	"""
	return individuo(rnd.randint(0, maxint) for maxint in maxints)

def _mutIndividual(individual, maxints, prob_mutacion):
	"""[Mutación Individuo]
	Arguments:
		individual {[creator.Individual]} -- [Individuo de población]
		maxints {[lista]} -- [lista de máximos índices]
		prob_mutacion {[float]} -- [probabilidad de mutación del gen]
	Returns:
		[creator.Individual] -- [Individuo mutado]
	"""
	for i in range(len(maxints)):
		if rnd.random() < prob_mutacion:
			individual[i] = rnd.randint(0, maxints[i])
	return individual,

def _cxIndividual(ind1, ind2, prob_cruce):
	"""[Cruce de Individuos]
	Arguments:
		ind1 {[creator.Individual]} -- [Individuo 1]
		ind2 {[creator.Individual]} -- [Individuo 2]
		indpb {[float]} -- [probabilidad de emparejar]
		gene_type {[list]} -- [tipos de dato de los parámetros, CATEGORICO o NUMERICO]
	Returns:
		[creator.Individual,creator.Individual] -- [nuevos Individuos]
	"""
	CATEGORICO = 1  # int o str
	NUMERICO = 2  # float
	for i in range(len(ind1)):
		if rnd.random() < prob_cruce:
			sorted_ind = sorted([ind1[i], ind2[i]])
			ind1[i] = rnd.randint(sorted_ind[0], sorted_ind[1])
			ind2[i] = rnd.randint(sorted_ind[0], sorted_ind[1])
	return ind1, ind2

def _individual_to_params(frecuencias, values):
    # crear dataset
    names_ = frecuencias[0].columns.values
    seed = 7
    # reestructuracion
    salida_final = pd.DataFrame(columns=names_)
    for sec in range(1,16):
        dataset = pd.DataFrame(columns=names_)
        corte = min([frecuencias[i][frecuencias[i]['Sector']==sec].shape[0] for i in values])
        #l = [frecuencias[i][frecuencias[i]['Sector']==sec].shape[0] for i in values]
        #corte = max(l)
        #tx=l.index(max(l))
        tx = 0
        dataset[names_[tx]] = dataset[names_[tx]].append(frecuencias[int(values[tx])][frecuencias[int(values[tx])]['Sector']==sec][:corte][names_[tx]])
        dataset = dataset.reset_index(drop=True)
        for tx in range(1,5):
            dataset[names_[tx]] = frecuencias[int(values[tx])][frecuencias[int(values[tx])]['Sector']==sec][:corte][names_[tx]].reset_index(drop=True)
        dataset[names_[tx+1]] = frecuencias[int(values[tx])][frecuencias[int(values[tx])]['Sector']==sec][:corte][names_[tx+1]].reset_index(drop=True)
        # join parts
        salida_final = salida_final.append(dataset)
    # shuffle dataset
    salida_final = shuffle(salida_final, random_state=seed).reset_index(drop=True)
    salida_final = salida_final.apply(pd.to_numeric)
    # dataframe to X,y 
    X = salida_final[names_[:-1]]
    y = salida_final[names_[-1]]
    return X,y

def _evalFunction(individual, frecuencias, scorer, num_folds, score_cache, desv_cache, error_cache):
	"""[Evaluación del modelo]
	Arguments:
		individual {[creator.Individual]} -- [Individuo]
		frecuencias {[list]} -- [lista de dataframes]
		X {[array]} -- [Input]
		y {[array]} -- [Output]
		scorer {[string]} -- [Parámetro de evaluación, precisión]
		cv {[int | cross-validation]} -- [Especificación de los folds]
		uniform {[boolean]} -- [True hace que la data se distribuya uniformemente en los folds]
		fit_params {[dict | None]} -- [parámetros para estimator.fit]
	Keyword Arguments:
		verbose {integer} -- [Mensajes de descripción] (default: {0})
		error_score {numerico} -- [valor asignado si ocurre un error en fitting] (default: {'raise'})
		score_cache {dict} -- [description] (default: {{}})
	"""
	X, y = _individual_to_params(frecuencias, individual)
	score = 0
	n_test = 0
	paramkey = str(np.array(individual)+1)
	if paramkey in score_cache:
		score = score_cache[paramkey]
	else:
		kfold = KFold(n_splits=10, shuffle=False)
		#cv_results = cross_val_score(estimator, X, y, cv=kfold, scoring=scoring)
		cv_results = cross_val_score(individual.est, X, y, cv=kfold, scoring=scorer)
		score = cv_results.mean()
		score_cache[paramkey] = score
		desv_cache[paramkey] = cv_results.std()
		error_cache[paramkey] = distance_error(individual.est, X, y)
	return (score,)


In [4]:
class EvolutiveSearchCV:
	def __init__(self, estimator, scoring=None, num_folds=4,
				refit=True, verbose=False, population_size=50,
				gene_mutation_prob=0.2, gene_crossover_prob=0.5,
				tournament_size=3, generations_number=10, gene_type=None,
				n_jobs=1, uniform=True, error_score='raise',
				fit_params={}):
		# Parámetros iniciales
		self.estimator = estimator
		#self.params = params
		self.scoring = scoring
		self.num_folds = num_folds
		self.refit = refit
		self.verbose = verbose
		self.population_size = population_size
		self.gene_mutation_prob = gene_mutation_prob
		self.gene_crossover_prob = gene_crossover_prob
		self.tournament_size = tournament_size
		self.generations_number = generations_number
		self.gene_type = gene_type
		self.n_jobs = n_jobs
		self.uniform = uniform
		self.error_score = error_score
		self.fit_params = fit_params
		# Parámetros adicionales
		self._individual_evals = {}
		self.all_history_ = None
		self.all_logbooks_ = None
		self._cv_results = None
		self.best_score_ = None
		self.best_params_ = None
		self.scorer_ = None
		#self.score_cache = {}
		self.__manager = Manager()
		self.score_cache = self.__manager.dict()
		self.desv_cache = self.__manager.dict()
		self.error_cache = self.__manager.dict()
		#self.score_cache = dict()
		#self.desv_cache = dict()
		#self.error_cache = dict()
		# Fitness [base.Fitness], objetivo 1
		creator.create("FitnessMax", base.Fitness, weights=(1.0,))
		# Individuo [list], parámetros:est, FinessMax
		creator.create("Individual", list, est=clone(self.estimator), fitness=creator.FitnessMax)
	#@property
	def cv_results_(self):
		if self._cv_results is None:
			out = defaultdict(list)
			gen = self.all_history_
			# Get individuals and indexes, their list of scores,
			# and additionally the name_values for this set of parameters
			idxs, individuals, each_scores = zip(*[(idx, indiv, np.mean(indiv.fitness.values))
											for idx, indiv in list(gen.genealogy_history.items())
											if indiv.fitness.valid and not np.all(np.isnan(indiv.fitness.values))])
			#name_values, _, _ = _get_param_types_maxint(self.params)
			# Add to output
			#out['param_index'] += [p] * len(idxs)
			out['index'] += idxs
			#out['params'] += [_individual_to_params(indiv, name_values) for indiv in individuals]
			out['params'] += [str(np.add(indiv,1)) for indiv in individuals]
			out['mean_test_score'] += [np.nanmean(scores) for scores in each_scores]
			out['std_test_score'] += [np.nanstd(scores) for scores in each_scores]
			out['min_test_score'] += [np.nanmin(scores) for scores in each_scores]
			out['max_test_score'] += [np.nanmax(scores) for scores in each_scores]
			out['nan_test_score?'] += [np.any(np.isnan(scores)) for scores in each_scores]
			self._cv_results = out
		return self._cv_results
	@property
	def best_index_(self):
		return np.argmax(self.cv_results_['max_test_score'])
	# fit y refit general
	def fit(self, frecuencias):
		self.best_estimator_ = None
		self.best_mem_score_ = float("-inf")
		self.best_mem_params_ = None
		#_check_param_grid(self.params)
		self._fit(frecuencias)
		#if self.refit:
		#	self.best_estimator_ = clone(self.estimator)
		#	#self.best_estimator_.set_params(**self.best_mem_params_)
		#	self.best_estimator_.fit(frecuencias)
	# fit individual
	def _fit(self, frecuencias):
		self._cv_results = None  # Indicador de necesidad de actualización
		self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)
		#n_samples = _num_samples(X)
		# verificar longitudes x,y 
		#if _num_samples(y) != n_samples:
		#	raise ValueError('Target [y], data [X] dont agree')
		#cv = check_cv(self.cv, y=y, classifier=is_classifier(self.estimator))
		toolbox = base.Toolbox()
		# name_values = lista de parametros, gene_type = [1:categorico; 2:numérico], maxints = size(parametros)
		#name_values, self.gene_type, maxints = _get_param_types_maxint(parameter_dict)
		maxints = [5]*5
		#if self.verbose:
		#	print("Tipos: %s, rangos: %s" % (self.gene_type, maxints))
		# registro de función Individuo
		toolbox.register("individual", _initIndividual, creator.Individual, maxints=maxints)
		# registro de función Población
		toolbox.register("population", tools.initRepeat, list, toolbox.individual)
		# Paralelísmo, create pool
		if not isinstance(self.n_jobs, int):
			self.n_jobs=1
		pool = Pool(self.n_jobs)
		toolbox.register("map", pool.map)
		# registro de función Evaluación
		toolbox.register("evaluate", _evalFunction,
						frecuencias=frecuencias,
						scorer=self.scorer_, num_folds=10, 
						score_cache=self.score_cache,
						desv_cache=self.desv_cache,
						error_cache=self.error_cache)
		# registro de función Cruce
		toolbox.register("mate", _cxIndividual, prob_cruce=self.gene_crossover_prob)
		# registro de función Mutación
		toolbox.register("mutate", _mutIndividual, prob_mutacion=self.gene_mutation_prob, maxints=maxints)
		# registro de función Selección
		toolbox.register("select", tools.selTournament, tournsize=self.tournament_size)
		# Creación de Población
		pop = toolbox.population(n=self.population_size)
		# Mejor Individuo que ha existido
		hof = tools.HallOfFame(1)
		# Stats
		stats = tools.Statistics(lambda ind: ind.fitness.values)
		stats.register("avg", np.nanmean)
		stats.register("min", np.nanmin)
		stats.register("max", np.nanmax)
		stats.register("std", np.nanstd)
		# Genealogía
		hist = tools.History()
		# Decoración de operadores de variaznza
		toolbox.decorate("mate", hist.decorator)
		toolbox.decorate("mutate", hist.decorator)
		hist.update(pop)
		# Posibles combinaciones
		if self.verbose:
			print('--- Evolve in {0} possible combinations ---'.format(np.prod(np.array(maxints) + 1)))
		pop, logbook = algorithms.eaSimple(pop, toolbox, cxpb=self.gene_crossover_prob, 
										mutpb=self.gene_mutation_prob,
										ngen=self.generations_number, 
										stats=stats,
										halloffame=hof, 
										verbose=self.verbose)
		#pop, logbook = algorithms.eaGenerateUpdate(toolbox,
		#								ngen=self.generations_number, stats=stats,
		#								halloffame=hof, verbose=self.verbose)
		# Save History
		self.all_history_ = hist
		self.all_logbooks_ = logbook
		# Mejor score y parametros
		current_best_score_ = hof[0].fitness.values[0]
		current_best_params_ = str(hof[0]) #_individual_to_params(hof[0], name_values)
		#if self.verbose:
		#	print("Best individual is: %s\nwith fitness: %s" % (
		#		current_best_params_, current_best_score_))
		if current_best_score_ > self.best_mem_score_:
			self.best_mem_score_ = current_best_score_
			self.best_mem_params_ = current_best_params_
		# fin paralelización, close pool
		pool.close()
		pool.join()
		self.best_score_ = current_best_score_
		self.best_params_ = current_best_params_

In [5]:
test_size = 0.2
num_folds = 10
seed = 7
frecuencias = []
names_ = ['Be01', 'Be02', 'Be03', 'Be04', 'Be05', 'Sector']

frecuencias.append(pd.read_csv('sinFiltro/Tx_0x01'))#, names=names_))
frecuencias.append(pd.read_csv('sinFiltro/Tx_0x02'))#, names=names_))
frecuencias.append(pd.read_csv('sinFiltro/Tx_0x03'))#, names=names_))
frecuencias.append(pd.read_csv('sinFiltro/Tx_0x04'))#, names=names_))
frecuencias.append(pd.read_csv('sinFiltro/Tx_0x05'))#, names=names_))
frecuencias.append(pd.read_csv('sinFiltro/Tx_0x06'))#, names=names_))
"""
frecuencias.append(pd.read_csv('Filtrado/LocalizationNew_Tx1.csv', names=names_))
frecuencias.append(pd.read_csv('Filtrado/LocalizationNew_Tx2.csv', names=names_))
frecuencias.append(pd.read_csv('Filtrado/LocalizationNew_Tx3.csv', names=names_))
frecuencias.append(pd.read_csv('Filtrado/LocalizationNew_Tx4.csv', names=names_))
frecuencias.append(pd.read_csv('Filtrado/LocalizationNew_Tx5.csv', names=names_))
frecuencias.append(pd.read_csv('Filtrado/LocalizationNew_Tx6.csv', names=names_))
"""
num_jobs=4
estimadores = set_models()
salida = {}

In [6]:
"""
def set_models():
    rs = 1
    models = []
    models.append(('LinearDiscriminantAnalysis', LinearDiscriminantAnalysis()))
    models.append(('KNeighborsClassifier', KNeighborsClassifier()))
    return models
"""
estimadores = set_models()

reserva = {}
for name, model in estimadores:
    print("\nModeling...", name)
    splits = 10
    simetricas = [[i]*5 for i in range(6)]
    #for individual in simetricas:
    #acc, desv, err = evaluate(frecuencias, individual, model)
    #salida[str(name)+"-"+str(individual)] = str(acc) + "-"+ str(desv) + "-" + str(err)
    #print(name," ", individual, "\t", acc, "\t", desv, "\t", err)
    gs = EvolutiveSearchCV(estimator=model, scoring="accuracy", num_folds=10, n_jobs=num_jobs,
                        verbose=True, refit=True, 
                        population_size=100, 
                        gene_mutation_prob=0.3, 
                        gene_crossover_prob=0.5,
                        tournament_size=4,
                        generations_number=10)
    gs.fit(frecuencias)
    reserva[name]=(gs.score_cache, gs.desv_cache , gs.error_cache)
    


Modeling... LinearDiscriminantAnalysis
--- Evolve in 7776 possible combinations ---
gen	nevals	avg     	min     	max     	std      
0  	100   	0.603781	0.409123	0.727984	0.0634116
1  	66    	0.642876	0.518503	0.756297	0.0514629
2  	59    	0.680571	0.531916	0.756297	0.0426576
3  	68    	0.704988	0.482856	0.771807	0.0442866
4  	70    	0.72309 	0.629422	0.762404	0.0292228
5  	66    	0.726897	0.56596 	0.762404	0.0394565
6  	60    	0.744907	0.627461	0.771807	0.0262246
7  	68    	0.748743	0.625958	0.771807	0.0279566
8  	75    	0.743548	0.557136	0.771807	0.0393694
9  	70    	0.741351	0.542247	0.771807	0.0439555
10 	63    	0.75127 	0.576871	0.771807	0.0408778

Modeling... SVC
--- Evolve in 7776 possible combinations ---
gen	nevals	avg     	min     	max     	std      
0  	100   	0.712462	0.596599	0.815693	0.0447295
1  	61    	0.747574	0.631108	0.815693	0.0355255
2  	67    	0.778086	0.7059  	0.834931	0.0302074
3  	59    	0.791435	0.675392	0.834931	0.0322874
4  	57    	0.794772	0.655209	0.834931

In [7]:
for name,_ in estimadores:
    df = pd.DataFrame.from_dict(dict(reserva[name][0]), orient='index')
    df[1] = pd.DataFrame.from_dict(dict(reserva[name][1]), orient='index')[0]
    df[2] = pd.DataFrame.from_dict(dict(reserva[name][2]), orient='index')[0]
    df.reset_index(level=0, inplace=True)
    df.columns = ['Configuracion', 'Precision', 'desvPrecision', 'errorMetrico']
    df = df.sort_values(['Precision'],ascending=False)
    df.to_csv(name+'Filtrado.csv', sep=',', index=False) 
    print(name)
    display(df)

LinearDiscriminantAnalysis


Unnamed: 0,Configuracion,Precision,desvPrecision,errorMetrico
205,[6 1 3 3 5],0.771807,0.020370,0.161899
257,[6 1 3 3 1],0.762404,0.014479,0.176129
260,[4 1 3 3 1],0.760269,0.018905,0.198850
138,[6 1 2 3 1],0.756297,0.013424,0.253833
326,[6 1 3 1 5],0.752972,0.014543,0.218783
252,[6 1 2 3 5],0.747522,0.018656,0.196030
238,[4 1 2 3 1],0.746997,0.023865,0.218462
176,[6 1 3 4 1],0.745232,0.022616,0.289585
281,[6 1 1 3 5],0.745048,0.016805,0.210032
192,[6 1 3 4 5],0.744244,0.019147,0.279513


SVC


Unnamed: 0,Configuracion,Precision,desvPrecision,errorMetrico
158,[6 1 6 6 1],0.834931,0.021038,0.224101
212,[6 6 1 3 1],0.829871,0.019038,0.178719
275,[6 1 3 6 1],0.829138,0.012082,0.241746
187,[6 1 2 6 1],0.828692,0.019240,0.227497
346,[6 5 3 6 1],0.826564,0.015904,0.246787
291,[6 6 6 3 1],0.823515,0.011920,0.169222
174,[4 6 1 3 1],0.823227,0.014425,0.168565
271,[6 6 4 3 1],0.819071,0.021061,0.133123
308,[6 3 2 6 1],0.818863,0.015246,0.217571
289,[6 1 4 6 1],0.817119,0.019162,0.219857


GaussianNB


Unnamed: 0,Configuracion,Precision,desvPrecision,errorMetrico
206,[4 1 2 6 1],0.860133,0.017097,0.129028
301,[6 1 3 3 5],0.859765,0.017182,0.107641
209,[6 1 3 3 1],0.846975,0.014854,0.144184
367,[6 1 3 6 5],0.845400,0.019620,0.145272
317,[4 1 3 3 1],0.845033,0.014302,0.158159
272,[6 1 4 3 1],0.843765,0.016885,0.210057
377,[6 1 2 3 5],0.842918,0.014495,0.159274
312,[6 1 3 6 1],0.842339,0.017688,0.186490
190,[6 1 3 3 3],0.841605,0.017096,0.167495
241,[6 1 3 4 1],0.840587,0.017178,0.163378


MLPClassifier


Unnamed: 0,Configuracion,Precision,desvPrecision,errorMetrico
329,[4 1 4 6 1],0.692386,0.018300,0.383688
399,[6 1 4 6 1],0.671947,0.023863,0.404061
408,[4 1 4 1 1],0.671304,0.019013,0.302941
361,[4 1 6 6 1],0.671225,0.036407,0.416794
321,[6 5 4 6 1],0.664316,0.029730,0.408877
357,[5 1 4 6 1],0.660624,0.021420,0.335400
337,[6 1 4 4 1],0.658011,0.024962,0.403212
224,[6 1 6 6 1],0.657856,0.018690,0.413399
214,[4 1 5 6 1],0.657434,0.027598,0.398504
392,[6 5 4 4 1],0.654760,0.026666,0.397640


KNeighborsClassifier


Unnamed: 0,Configuracion,Precision,desvPrecision,errorMetrico
354,[6 1 3 6 1],0.877780,0.015037,0.128643
314,[6 1 3 6 5],0.875878,0.019070,0.120768
245,[6 1 3 3 5],0.872901,0.017856,0.110267
210,[6 1 2 6 1],0.871699,0.012775,0.172320
259,[4 1 3 6 1],0.870416,0.022302,0.138309
324,[6 1 2 3 5],0.869915,0.012169,0.133020
329,[6 1 3 3 1],0.866293,0.011922,0.095835
307,[6 1 2 3 1],0.866290,0.010901,0.131233
170,[6 1 3 4 5],0.862702,0.017740,0.126175
230,[6 1 2 6 5],0.861053,0.020190,0.155598


DecisionTreeClassifier


Unnamed: 0,Configuracion,Precision,desvPrecision,errorMetrico
362,[6 1 3 6 5],0.859512,0.020820,0.124269
220,[6 1 3 3 5],0.852578,0.015386,0.105891
260,[6 1 3 3 3],0.837207,0.012857,0.135550
158,[6 1 3 6 1],0.836225,0.018626,0.176993
407,[6 1 3 4 5],0.835952,0.025078,0.191015
164,[6 1 2 6 1],0.835653,0.017729,0.240230
219,[6 1 2 3 6],0.835006,0.014284,0.165769
200,[6 1 2 3 2],0.834762,0.013223,0.179583
172,[6 1 2 3 5],0.834730,0.017834,0.169776
342,[6 1 2 3 1],0.834270,0.010541,0.155408


LogisticRegression


Unnamed: 0,Configuracion,Precision,desvPrecision,errorMetrico
342,[6 1 3 3 5],0.698477,0.017146,0.277417
157,[6 1 3 3 1],0.692501,0.018743,0.245199
155,[4 1 6 1 1],0.692137,0.020861,0.303046
349,[6 1 1 6 5],0.689814,0.022275,0.318112
199,[6 1 3 1 5],0.689295,0.020399,0.287918
281,[4 1 4 1 1],0.689148,0.012664,0.286070
321,[4 1 3 3 1],0.688760,0.024377,0.261038
304,[4 1 4 3 1],0.686591,0.013850,0.343189
258,[4 1 6 3 1],0.686064,0.011582,0.366520
140,[6 1 3 1 3],0.682965,0.016893,0.285778


ExtraTreesClassifier


Unnamed: 0,Configuracion,Precision,desvPrecision,errorMetrico
340,[6 1 3 3 5],0.892472,0.013311,0.084888
369,[6 1 3 3 3],0.883163,0.008794,0.096698
309,[6 1 3 6 1],0.882427,0.015735,0.120010
359,[6 1 2 6 1],0.882022,0.014403,0.144308
383,[6 1 3 6 5],0.881562,0.014420,0.100640
197,[6 1 2 3 1],0.880719,0.010639,0.113966
357,[6 1 3 3 1],0.880713,0.012938,0.105332
246,[6 1 2 3 5],0.878343,0.013236,0.100640
344,[6 1 1 3 5],0.877102,0.015820,0.081387
338,[6 1 2 3 6],0.875830,0.016527,0.126053


AdaBoostClassifier


Unnamed: 0,Configuracion,Precision,desvPrecision,errorMetrico
232,[6 1 3 6 5],0.893959,0.015106,0.106766
255,[6 1 3 3 5],0.886523,0.021657,0.112017
6,[6 1 2 3 5],0.886021,0.009519,0.120768
267,[6 1 1 3 1],0.881202,0.012816,0.096698
240,[6 1 2 3 4],0.881174,0.013039,0.191040
234,[6 1 4 3 5],0.880549,0.009858,0.168234
248,[6 1 1 3 3],0.879001,0.015058,0.076841
173,[6 1 3 3 1],0.878758,0.013924,0.101015
152,[6 1 1 3 5],0.878347,0.016312,0.080512
184,[6 1 2 3 1],0.877051,0.009101,0.121736


RandomForestClassifier


Unnamed: 0,Configuracion,Precision,desvPrecision,errorMetrico
356,[6 1 3 3 5],0.898412,0.007043,0.077887
386,[6 1 3 6 5],0.892714,0.013661,0.081387
283,[6 1 2 6 1],0.886346,0.016899,0.140912
394,[6 1 3 6 1],0.884131,0.017527,0.105332
410,[6 1 3 3 3],0.883404,0.012360,0.096698
180,[6 1 2 3 5],0.882797,0.016676,0.126019
334,[6 1 3 3 1],0.882418,0.009747,0.125190
323,[6 1 1 3 3],0.880957,0.014018,0.083748
316,[6 1 2 3 1],0.880471,0.011367,0.122600
352,[6 1 1 3 4],0.880196,0.012176,0.122750


GradientBoostingClassifier


Unnamed: 0,Configuracion,Precision,desvPrecision,errorMetrico
300,[6 1 3 3 5],0.914520,0.008447,0.068260
371,[6 1 3 6 5],0.909314,0.015718,0.089263
114,[6 1 3 1 5],0.902372,0.011416,0.105891
278,[6 1 2 3 1],0.901734,0.015191,0.109649
270,[6 1 1 3 3],0.901487,0.013594,0.063890
310,[6 1 2 3 5],0.900138,0.016621,0.109392
311,[6 1 1 3 5],0.899650,0.014597,0.077012
304,[6 1 1 3 4],0.899511,0.013050,0.087308
267,[6 1 2 3 4],0.899267,0.012695,0.114970
262,[6 1 3 3 1],0.895867,0.009047,0.088928


VotingClassifier


Unnamed: 0,Configuracion,Precision,desvPrecision,errorMetrico
261,[6 1 2 3 1],0.889516,0.012902,0.119146
252,[6 1 3 6 5],0.888253,0.018566,0.102390
347,[6 1 2 3 5],0.886269,0.011046,0.123394
377,[6 1 3 3 1],0.885847,0.012231,0.108786
114,[6 1 2 6 1],0.885631,0.011496,0.136668
357,[6 1 3 4 5],0.884766,0.016744,0.110403
216,[6 1 3 6 1],0.884134,0.017270,0.121736
307,[6 1 2 3 4],0.883374,0.013214,0.145225
179,[6 3 3 3 5],0.882362,0.012155,0.104114
333,[6 1 6 3 1],0.882178,0.019476,0.131233


In [None]:
def _individual_to_params(frecuencias, values):
    # crear dataset
    names_ = frecuencias[0].columns.values
    seed = 7
    # reestructuracion
    salida_final = pd.DataFrame(columns=names_)
    for sec in range(1,16):
        dataset = pd.DataFrame(columns=names_)
        l = [frecuencias[i][frecuencias[i]['Sector']==sec].shape[0] for i in values]
        corte = max(l)
        tx=l.index(max(l))
        dataset[names_[tx]] = dataset[names_[tx]].append(frecuencias[int(values[tx])][frecuencias[int(values[tx])]['Sector']==sec][:corte][names_[tx]])
        dataset[names_[-1]] = frecuencias[int(values[tx])][frecuencias[int(values[tx])]['Sector']==sec][:corte][names_[-1]].reset_index(drop=True)
        dataset = dataset.reset_index(drop=True)
        for tx in range(5):
            dataset[names_[tx]] = frecuencias[int(values[tx])][frecuencias[int(values[tx])]['Sector']==sec][:corte][names_[tx]].reset_index(drop=True)
        # join parts
        salida_final = salida_final.append(dataset)
    # shuffle dataset
    salida_final = shuffle(salida_final, random_state=seed).reset_index(drop=True)
    salida_final = salida_final.apply(pd.to_numeric)
    # dataframe to X,y 
    X = salida_final[names_[:-1]]
    y = salida_final[names_[-1]]
    return X,y