In [1]:
from sge import StructuredGrammarEvolution
import numpy as np
from copy import deepcopy
from pandas import read_csv
from multiprocessing import Pool, Lock
from json import dumps, dump

In [2]:
def train(params, X, Y):
    model = StructuredGrammarEvolution(array_len=len(X[0]), **params)
    history = model.fit(X, Y)
    return {"model": model, "history":history}

In [3]:
def evaluate(train_file, test_file, target_column, params, n_iter):
    with open(train_file) as file:
        df_train = read_csv(file)
    X_train = df_train.drop([target_column], axis=1).values
    Y_train = df_train[target_column]

    with open(test_file) as file:
        df_test = read_csv(file)
    X_test = df_test.drop([target_column], axis=1).values
    Y_test = df_test[target_column]

    max_fitness = 0
    avg_fitness = 0
    total_fitness = 0
    fitnesses = []
    best_history = None
    for it in range(n_iter):
        train_result = train(params, X_train, Y_train)
        fitness = train_result["model"](X_test, Y_test)
        total_fitness += fitness
        fitnesses += [fitness]
        if fitness > max_fitness:
            best_history = train_result["history"]
        max_fitness = max(fitness, max_fitness)
        avg_fitness = total_fitness / (it + 1)
    fitnesses = np.array(fitnesses)

    return {
        "max": max_fitness,
        "avg": avg_fitness,
        "std": fitnesses.std(),
        "best_history": best_history
    }

In [4]:
def recursiveGridSearchCV(params_full,
                        index_of_param=0,  
                        params_current={},
                        index_of_choice=0):
    keys = sorted(list(params_full.keys()))

    if index_of_param == len(keys) - 1:
        yield {**params_current, keys[index_of_param]: params_full[keys[index_of_param]][index_of_choice]}
    else:
        params_current[keys[index_of_param]] = params_full[keys[index_of_param]][index_of_choice]
        ret = []
        for i in range(len(params_full[keys[index_of_param + 1]])):
            for res in recursiveGridSearchCV(params_full, index_of_param + 1, deepcopy(params_current), i):
                yield res

def gridSearchCV(params_full):
    keys = sorted(list(params_full.keys()))
    ret = []
    for i in range(len(params_full[keys[0]])):
        for res in recursiveGridSearchCV(params_full, index_of_choice=i):
            yield res
    


In [6]:
def CrossValidate(train_file, test_file, target_column, n_iter, n_classes):
    l = Lock
    params = {
            "population_size": [30, 50, 100, 500],
            "mutation_prob": [0.05, 0.3],
            "elitism": [0, 1, 5],
            "max_levels": [4], # três níveis gera até 16 nós.
            "min_levels": [1],
            "crossover_prob": [0.6 ,0.9],
            "k": [3, 7],
            "generations": [30, 50, 100, 500],
    }
    results = []
    it = 0
    for param in gridSearchCV(params):
        print("parameters: ")
        print(dumps(param, indent=4))
        result = evaluate("data/glass_train.csv", 
                          "data/glass_test.csv", 
                          "glass_type", 
                          {**param, "classes": n_classes, "checkpoint_prefix": str(it) + "_"}, 
                          n_iter)
        results.append({
            "result": result,
            "params": param 
        })

    return results

        
        
    


In [7]:
results_glass = CrossValidate("data/glass_train.csv", "data/glass_test.csv", "glass_type", 10, 7)

parameters: 
{
    "crossover_prob": 0.6,
    "elitism": 0,
    "generations": 30,
    "k": 3,
    "max_levels": 4,
    "min_levels": 1,
    "mutation_prob": 0.05,
    "population_size": 30
}
Generation 1 out of 30
|████████████████████| avg: 0.02967 - max: 0.20408 - min: 0.00000
Generation 2 out of 30
|████████████████████| avg: 0.06842 - max: 0.25324 - min: 0.00000
Generation 3 out of 30
|████████████████████| avg: 0.09862 - max: 0.20408 - min: 0.00000
Generation 4 out of 30
|████████████████████| avg: 0.10566 - max: 0.20408 - min: 0.00000
Generation 5 out of 30
|████████████████████| avg: 0.10238 - max: 0.20408 - min: 0.00000
Generation 6 out of 30
|████████████████████| avg: 0.10161 - max: 0.22267 - min: 0.00000
Saving checkpoint at checkpoints/checkpoint_1.json
Generation 7 out of 30
|████████████████████| avg: 0.11596 - max: 0.22267 - min: 0.00000
Generation 8 out of 30
|████████████████████| avg: 0.11644 - max: 0.26054 - min: 0.00000
Generation 9 out of 30
|████████████████████|

KeyboardInterrupt: 

In [None]:
results_breast_cancer = CrossValidate("data/breast_cancer_coimbra_train.csv", "data/breast_cancer_coimbra_test.csv", "Classification", 10, 2)