# Ailerons norm
> Regression

In [None]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
from sklearn.model_selection import RepeatedKFold
import xgboost as xgb
from sklearn.metrics import mean_squared_error

from GAparsimony import GAparsimony, Population, getFitness

In [None]:
df = pd.read_csv("../data/ailerons_norm.csv")
print(df.shape)
X, y = df.iloc[:, :-1], df.iloc[:, -1]
df.head()

In [None]:
rerank_error = 0.01
params = {"n_estimators":{"range": (10, 2000), "type": Population.INTEGER}, 
            "max_depth":{"range": (2, 20), "type": Population.INTEGER}, 
            "min_child_weight": {"range": (1, 20), "type": Population.INTEGER},
             "reg_alpha": {"range": (0., 1.), "type": Population.FLOAT},
             "reg_lambda": {"range": (0., 1.), "type": Population.FLOAT},
             "subsample": {"range": (0.6, 1.), "type": Population.FLOAT},
             "subsample": {"range": (0.6, 1.), "type": Population.FLOAT},
             "colsample_bytree": {"range": (0.8, 1.), "type": Population.FLOAT},
             "learning_rate": {"value": 0.01, "type": Population.CONSTANT},
             "random_state": {"value": 1234, "type": Population.CONSTANT},
             "verbosity": {"value": 0, "type": Population.CONSTANT}}

def complexity(model, nFeatures, **kwargs):
    return nFeatures*1E9 + len(model.get_booster().get_dump())

fitness = getFitness(xgb.XGBRegressor, mean_squared_error, complexity, minimize=True, test_size=0.2, random_state=42, n_jobs=-1)

In [None]:
GAparsimony_model = GAparsimony(fitness=fitness,
                                  params=params,
                                  features=len(df.columns[:-1]),
                                  keep_history = True,
                                  rerank_error = rerank_error,
                                  popSize = 64,
                                  elitism = 16,
                                  maxiter = 25, early_stop=10,
                                  feat_thres=0.90, # Perc selected features in first generation
                                  feat_mut_thres=0.10, # Prob of a feature to be one in mutation
                                  seed_ini = 1234)

In [None]:
GAparsimony_model.fit(X, y)

In [None]:
GAparsimony_model.summary()

In [None]:
GAparsimony_model.importance()

In [None]:
GAparsimony_model.plot()