In [2]:
import pandas as pd
import numpy as np
from GeneticAlgorithm import GA
from sklearn import datasets
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split

In [7]:
if __name__ == '__main__':
    # Set random state
    random_state = 1228
    
    # Define estimator
    rf_clf = XGBClassifier(n_estimators=300, random_state=random_state)

    # Load example dataset from Scikit-learn
    dataset = datasets.load_wine()
    X = pd.DataFrame(data=dataset['data'])
    y = pd.Series(data=dataset['target'])

    # Split into train and test
    train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.20, random_state=random_state)

    # Set a initial best chromosome for first population
    best_chromosome = np.array([1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1])

    # Create GeneticSelector instance
    # You should not set the number of cores (n_jobs) in the Scikit-learn
    # model to avoid UserWarning. The genetic selector is already parallelizable.
    genetic_selector = GA(
        estimator=rf_clf, cv=5, n_gen=30, population_size=10,
        crossover_rate=0.8, mutation_rate=0.15, tournament_k=2,
        calc_train_score=True, initial_best_chromosome=best_chromosome,
        n_jobs=-1, random_state=random_state, verbose=0)
    
    # Fit features
    genetic_selector.fit(train_X, train_y)

    # Show the results
    support = genetic_selector.support()
    best_chromosome = support[0][0]
    score = support[0][1]
    best_epoch = support[0][2]
    print(f'Best chromosome: {best_chromosome} -> (Selected Features IDs: {np.where(best_chromosome)[0]})')
    print(f'Best score: {score}')
    print(f'Best epoch: {best_epoch}')

    test_scores = support[1]
    train_scores = support[2]
    chromosomes_history = support[3]
    print(f'Test scores: {test_scores}')
    print(f'Train scores: {train_scores}')
    print(f'Chromosomes history: {chromosomes_history}')

# Creating initial population with 10 chromosomes...
# Evaluating initial population...
# Current best chromosome: (array([1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1]), 0.9652709359605911, 0)

# Creating generation 1...
# Selection 1 done.
# Crossover 1 done.
# Mutation 1 done.
# Evaluating population of new generation 0...
# (BETTER) A better chromosome than the current one has been found (0.9721674876847292).
# Current best chromosome: (array([1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1]), 0.9721674876847292, 1)
# Elapsed generation time: 4.66 seconds

# Creating generation 2...
# Selection 2 done.
# Crossover 2 done.
# Mutation 2 done.
# Evaluating population of new generation 1...
# Same scoring value found 1/5 times.
# Current best chromosome: (array([1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1]), 0.9721674876847292, 1)
# Elapsed generation time: 4.82 seconds

# Creating generation 3...
# Selection 3 done.
# Crossover 3 done.
# Mutation 3 done.
# Evaluating population of new generation 2...
# Same

# (WORST) No better chromosome than the current one has been found (0.9721674876847292).
# Current best chromosome: (array([1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]), 0.9790640394088669, 19)
# Elapsed generation time: 5.05 seconds

# Creating generation 25...
# Selection 25 done.
# Crossover 25 done.
# Mutation 25 done.
# Evaluating population of new generation 24...
# (WORST) No better chromosome than the current one has been found (0.951231527093596).
# Current best chromosome: (array([1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]), 0.9790640394088669, 19)
# Elapsed generation time: 4.83 seconds

# Creating generation 26...
# Selection 26 done.
# Crossover 26 done.
# Mutation 26 done.
# Evaluating population of new generation 25...
# (WORST) No better chromosome than the current one has been found (0.958128078817734).
# Current best chromosome: (array([1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]), 0.9790640394088669, 19)
# Elapsed generation time: 4.82 seconds

# Creating generation 27...
# Select