# Test de Búsqueda de Hiperparámetros
-----------------------------------------------------------

## Importación de Dependencias

In [1]:
import pandas as pd
import sys
sys.path.append("./data/")
sys.path.append("./lib/")
from lib.Methods import GeneralMethods
from lib.edasSearch import EdasHyperparameterSearch
from lib.Hiperparametros import HyperparameterSwitcher
from lib.ImportacionModelos import getClassifierNames
from lib.ImportacionModelos import getClassifierModels
from lib.ImportacionModelos import getRegressorNames
from lib.ImportacionModelos import getRegressorModels

## Test Edas Search

In [2]:
specificModelName = 'KNeighborsClassifier'
estimadorDictionary = getClassifierModels(includeEnsambled=True)
estimador = estimadorDictionary[specificModelName]
hypSwitcher = HyperparameterSwitcher()
parametros = hypSwitcher.getHyperparameters(specificModelName)()
gm = GeneralMethods(estimador, urlDataset='./data/Tx_0x06')
test = EdasHyperparameterSearch(
    gm, parametros, estimador, iterations=2, sample_size=15, select_ratio=0.3, debug=False)
test.run()
dataframe = pd.DataFrame(list(test.resultados)).sort_values(
    ['Accuracy'], ascending=False).reset_index(drop=True)
dataframe.to_csv("./result/result_" + specificModelName + ".csv")

indice	Accuracy algorithm  leaf_size  n_neighbors  p   weights
0	0.819244   kd_tree         10           30  1  distance
1	0.812391   kd_tree         30            3  1  distance
2	0.822523  ball_tree         30            5  1  uniform


## Test Eas Search

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from lib.easSearch import GeneticSearchCV

# Lectura de los datos y separación
dataset = pd.read_csv("./data/Tx_0x06")
validation_size = 0.20
X = dataset.values[:, 0:dataset.shape[1] - 1].astype(float)
Y = dataset.values[:, dataset.shape[1] - 1]
# Split randomizando los datos
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=validation_size, random_state=7)

# Modelos para el Test
models = { 
    'ExtraTreesClassifier': ExtraTreesClassifier(),
    'GradientBoostingClassifier': GradientBoostingClassifier(),
    'KNeighborsClassifier': KNeighborsClassifier()
}

# Parametros de los modelos para el Test
params = { 
    'ExtraTreesClassifier': { 
        'n_estimators': [10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30],
    },
    'GradientBoostingClassifier': { 
        'n_estimators': [17, 22, 27, 32], 
        'learning_rate': [0.3, 0.5, 0.8, 1.0],
    },
    'KNeighborsClassifier': {
        'n_neighbors': [3, 4, 5, 6, 7, 8, 10, 12, 15, 20, 30, 50],
        'weights': ['uniform', 'distance'],
        'algorithm': ['ball_tree', 'kd_tree', 'brute'],
        'p': [1, 2, 3, 4, 5],
        'leaf_size': [10, 30, 50, 70]
    }
}

cv = KFold(n_splits=5)
n_jobs = 4
verbose = 1
scoring = "accuracy"
refit = False
key = 'KNeighborsClassifier'
model = models[key]
params = params[key]
gs2 = GeneticSearchCV(model, params, cv=cv, n_jobs=n_jobs, verbose=verbose, scoring=scoring, refit=refit)
result = gs2.fit(X, Y)

Tipos: [1, 1, 1, 1, 1], rangos: [11, 1, 2, 4, 3]




--- Evolve in 1440 possible combinations ---
gen	nevals	avg      	min      	max      	std      
0  	20    	0.0453788	0.0161982	0.0607432	0.0131481
1  	14    	0.0540138	0.0402573	0.0621725	0.00448369
2  	13    	0.0570033	0.050262 	0.0621725	0.00278215
3  	14    	0.059445 	0.0526441	0.0640781	0.00277334
4  	16    	0.0589447	0.0195331	0.0621725	0.00917103
5  	15    	0.0596236	0.0164364	0.0621725	0.0099185 
6  	10    	0.0616841	0.0543116	0.0621725	0.00171523
7  	14    	0.062101 	0.0607432	0.0621725	0.000311498
8  	14    	0.0612434	0.044545 	0.0621725	0.00383649 
9  	10    	0.0621725	0.0621725	0.0621725	1.38778e-17
10 	11    	0.061839 	0.0555026	0.0621725	0.00145366 
Best individual is: {'n_neighbors': 4, 'weights': 'distance', 'algorithm': 'ball_tree', 'p': 2, 'leaf_size': 10}
with fitness: 0.06407813244402097
