In [1]:
from src.Dataset import Dataset
from src.Evolution import Evolution
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import logging
from pathlib import Path
import os

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
import xgboost as xgb

logging.getLogger("imported_module").setLevel(logging.CRITICAL)
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)  

# Instructions

In this notebook feature selection can be carried for the census-income datasets from the UCI repository. This code could be modified to load a different dataset. Four wrapper feature selection methods are used to identify the infromative features using a Decision Tree classifier:

* ## CHC$_{QX}$: 
The hyper-parameter choices of CHC$_{QX}$ are based on the paper “Fast Genetic Algorithm For Feature Selection - A Qualitative Approximation Approach”. The values are set to $q=10$ and $f=10$.

* ## PSO$_{QX}$: 
The hyper-parameter choices of PSO$_{QX}$ are based on the paper “Fast Genetic Algorithm For Feature Selection - A Qualitative Approximation Approach”. The values are set to $q=10$ and $f=10$.

* ## CHC: 
The implementation of a CHC algorithm is according to the paper: “The CHC Adaptive Search Algorithm: How to Have Safe Search When Engaging in Nontraditional Genetic Recombination”. The population size of is 50, the diversity parameter is set to $(d = \frac{k}{4})$, where $k$ is the length of the individual (number of features), while the divergence rate is $(div = 0.35)$.

* ## PSO:
The global version of PSO with a topology connecting all particles to one another. The following options are used \{c1: 1.49618, c2: 1.49618, w: 0.7298\}, while the number of particles is set to 50.

# CHC$_{QX}$

In [2]:
#Vorverarbeitete Daten aus CSV auslesen
df = pd.read_csv("data/df_preprocessed_all.csv", sep = ",")
df = df.drop(columns=['FELT_LIFE','REMOVAL_DATE', 'INSTALLATION_DATE', 'REPORT_DATE'])

#Das Label nennen
label = df.columns.get_loc('FELT_LIFE_NET')

#Machine Learning Algorithms die benutzt werden
MLA = [
    #('Linear Regressor',LinearRegression()),
   #('SVR', SVR()),
    #('Random Forest',RandomForestRegressor()),
    ('Gradient Boosting', xgb.XGBRegressor())
]

results_chcqx = {}
predictions_chcqx = {}
header = df.columns


population_size = 20


# Für alle Modelle in MLA FS mit CHCqx mit anschließender Validierung
for model_name, model_instance in MLA:
    
    dataset = Dataset(df, 'df', label, divide_dataset=False, header=header)

    dataset.divide_dataset(model_instance, normalize=False, shuffle=True, all_features=True, all_instances=True, evaluate=False, partial_sample=False,folds=5)
    
    #FS mit Approximation des CHC
    ind_size = dataset.X_train.shape[1]
    meta_dict, log, baseline_full_data = Evolution.CHCqx(dataset, 5, 5, 2, population_size, verbose=1)
    feature_subset = log.iloc[-1]['ind']
    feature_subset = np.array(feature_subset)
    
    selected_features = list(np.where(feature_subset == 1)[0])
    num_selected_features = len(selected_features)
    duration = np.round(log.iloc[-1]['time'], 2)
    
    dataset.set_features(selected_features)
    
    #Regressor trainieren
    dataset.fit_classifier()
    
    # Cross-Validation
    dataset.set_CV()
    cv = dataset.get_CV()
    
    #Validierung auf Testset
    dataset.set_train_metrics()
    train = dataset.get_train_metrics()
    traintime = dataset.get_traintime()
    
    
    dataset.set_test_metrics()
    test = dataset.get_test_metrics()
    
    
    
    
    
    results_chcqx[model_name] = {
        'Model_name': model_name,
        'CV_TrainMAE': cv['CV_TrainMAE'],
        'CV_TrainRMSE': cv['CV_TrainRMSE'],
        'CV_TestMAE': cv['CV_TestMAE'],
        'CV_TestRMSE': cv['CV_TestRMSE'],
        'CV_fit_time': cv['CV_fit_time'],
        'CV_fit_time_ges':cv['CV_fit_time']+duration,
        'TrainRMSE': train['TrainRMSE'],
        'TrainMAE': train['TrainMAE'],
        'TestRMSE': test['TestRMSE'],
        'TestMAE': test['TestMAE'],
        'TrainTime': traintime,
        'TrainTime_ges': traintime+duration,
        'Features': selected_features,
        'Feature-Anzahl': num_selected_features,
        'FS-Laufzeit': duration,
        'Meta-Modell-Zeit': meta_dict['meta_model_time'],
        'Spearman-Rangkoeffizient Meta-Modell': meta_dict['coef'],
        'Sample_size': meta_dict['sample_size'],
        'Full_sample_size': meta_dict['full_sample_size'],
        'F_s(g)': meta_dict['f_s(g)']
    }
    
    predictions_chcqx[model_name] = {
    'Model_name': model_name,
    'y_train': dataset.get_y_train(),
    'y_test': dataset.get_y_test(),
    'pred_train': dataset.get_y_pred_train(),
    'pred_test':dataset.get_y_pred_test()
    }
    print(model_name)

Suchzeit Meta-Modell: 371.5473289489746
Meta-Modell Bewertung:

f_s(g): 0.1675667993177943 = (1 - 0.9999999999999999) + 1179/7036
Berechnungszeit: 371.5473289489746 Sekunden
Meta-model sample size: 1179
Gradient Boosting  75.8045 , Gen =  5 


## Speichern der Ergebnisse

In [3]:
df_chcqx = pd.DataFrame(results_chcqx)
df_chcqx = df_chcqx.transpose()
df_chcqx.to_csv('data/CHCqx/All_FS_chcqx_metrics.csv', index=False)

# Features der chcqx speichern
df_features_chcqx = pd.DataFrame()
# Maximale Anzahl von ausgewählten Features über alle Modelle bestimmen
max_selected_features = max(len(results_chcqx[model_name]['Features']) for model_name, _ in MLA)

# Iteration über die Modelle und Hinzufügen der Feature-Namen in df_features_chcqx
for model_name, model_instance in MLA:
    selected_features = df.columns[results_chcqx[model_name]['Features']]
    
    # Auffüllen der nicht ausgewählten Features mit NaN
    if len(selected_features) < max_selected_features:
        selected_features = np.append(selected_features, [np.nan] * (max_selected_features - len(selected_features)))
    
    # Die ausgewählten Spaltennamen in eine neue Spalte in df_features_chcqx einfügen
    df_features_chcqx[model_name] = selected_features

df_features_chcqx.to_csv('data/CHCqx/All_FS_chcqx_Features.csv', index=False)

#Predictions speichern
chcqx_data = []
# Iteriere über die Modelle und ihre Daten
for model_name, data in predictions_chcqx.items():
    model_data = data.copy()
    model_data.pop('Model_name')  # Entferne den Eintrag 'Model_name'
    for data_type, values in model_data.items():
        # Iteriere über die Werte in jedem Datenfeld und füge sie zur flattened_data-Liste hinzu
        for value in values:
            chcqx_data.append({'Model_name': model_name, 'Data_type': data_type, 'Value': value})


df_chcqx2 = pd.DataFrame(chcqx_data)
df_chcqx2.to_csv('data/CHCqx/All_FS_chcqx_predictions.csv', index=False)

# PSO$_{QX}$

In [2]:
# Vorverarbeitete Daten aus CSV auslesen
df = pd.read_csv("data/df_preprocessed_all.csv", sep=",")
df = df.drop(columns=['FELT_LIFE','REMOVAL_DATE', 'INSTALLATION_DATE', 'REPORT_DATE'])

# Das Label nennen
label = df.columns.get_loc('FELT_LIFE_NET')

# Machine Learning Algorithms die benutzt werden
MLA = [
    #('Linear Regressor', LinearRegression()),
    #('SVR', SVR()),
    #('Random Forest',RandomForestRegressor()),
    ('Gradient Boosting', xgb.XGBRegressor())
]

# PSOqx-Optionen
population_size = 20
options = {'c1': 2, 'c2': 2, 'w': 0.6, 'k': population_size, 'p':2}

# Anzahl der Durchläufe
num_iterations = 1

# Ergebnisse über alle Iterationen speichern
for iteration in range(1, num_iterations + 1):
    results_psoqx = {}
    predictions_psoqx = {}
    header = df.columns
    
    # Für alle Modelle in MLA FS mit PSOqx mit anschließender Validierung
    for model_name, model_instance in MLA:
        
        dataset = Dataset(df, 'df', label, divide_dataset=False, header=header)
        dataset.divide_dataset(model_instance, normalize=False, shuffle=True, all_features=True, all_instances=True, evaluate=False, partial_sample=False, folds=5)
        
        # FS mit Approximation des PSOqx
        ind_size = dataset.X_train.shape[1]
        meta_dict, log, baseline_full_data = Evolution.PSOqx(dataset, options, f=5, n_individual=5, f_no_change=2, n_particles=population_size, verbose=1)
        
        feature_subset = log.iloc[-1]['ind']
        feature_subset = np.array(feature_subset)
        
        selected_features = list(np.where(feature_subset == 1)[0])
        num_selected_features = len(selected_features)
        duration = np.round(log.iloc[-1]['time'], 2)
        
        dataset.set_features(selected_features)
        
        # Regressor trainieren
        dataset.fit_classifier()
        
        # Cross-Validation
        dataset.set_CV()
        cv = dataset.get_CV()
        
        # Validierung auf Testset
        dataset.set_train_metrics()
        train = dataset.get_train_metrics()
        traintime = dataset.get_traintime()
        
        
        dataset.set_test_metrics()
        test = dataset.get_test_metrics()
        
        # Ergebnisse speichern
        results_psoqx[model_name] = {
            'Model_name': model_name,
            'CV_TrainMAE': cv['CV_TrainMAE'],
            'CV_TrainRMSE': cv['CV_TrainRMSE'],
            'CV_TestMAE': cv['CV_TestMAE'],
            'CV_TestRMSE': cv['CV_TestRMSE'],
            'CV_fit_time': cv['CV_fit_time'],
            'CV_fit_time_ges': cv['CV_fit_time'] + duration,
            'TrainRMSE': train['TrainRMSE'],
            'TrainMAE': train['TrainMAE'],
            'TestRMSE': test['TestRMSE'],
            'TestMAE': test['TestMAE'],
            'TrainTime': traintime,
            'TrainTime_ges': traintime + duration,
            'Features': selected_features,
            'Feature-Anzahl': num_selected_features,
            'FS-Laufzeit': duration,
            'Meta-Modell-Zeit': meta_dict['meta_model_time'],
            'Spearman-Rangkoeffizient Meta-Modell': meta_dict['coef'],
            'Sample_size': meta_dict['sample_size'],
            'Full_sample_size': meta_dict['full_sample_size'],
            'F_s(g)': meta_dict['f_s(g)']
        }
        
        predictions_psoqx[model_name] = {
            'Model_name': model_name,
            'y_train': dataset.get_y_train(),
            'y_test': dataset.get_y_test(),
            'pred_train': dataset.get_y_pred_train(),
            'pred_test': dataset.get_y_pred_test()
        }
        
        print(f"Finished: Model {model_name}, Iteration: {iteration}")
    
    # Ergebnisse der aktuellen Iteration in DataFrames umwandeln
    df_psoqx = pd.DataFrame(results_psoqx).transpose()
    df_psoqx.to_csv(f'data/PSOqx/{iteration:02d}_All_FS_psoqx_metrics.csv', index=False)
    
    # Features der aktuellen Iteration speichern
    df_features_psoqx = pd.DataFrame()
    max_selected_features = max(len(results_psoqx[model_name]['Features']) for model_name, _ in MLA)
    for model_name, model_instance in MLA:
        selected_features = df.columns[results_psoqx[model_name]['Features']]
        if len(selected_features) < max_selected_features:
            selected_features = np.append(selected_features, [np.nan] * (max_selected_features - len(selected_features)))
        df_features_psoqx[model_name] = selected_features
    df_features_psoqx.to_csv(f'data/PSOqx/{iteration:02d}_All_FS_psoqx_Features.csv', index=False)
    
    # Predictions speichern
    psoqx_data = []
    for model_name, data in predictions_psoqx.items():
        model_data = data.copy()
        model_data.pop('Model_name')
        for data_type, values in model_data.items():
            for value in values:
                psoqx_data.append({'Model_name': model_name, 'Data_type': data_type, 'Value': value})
    df_psoqx2 = pd.DataFrame(psoqx_data)
    df_psoqx2.to_csv(f'data/PSOqx/{iteration:02d}_All_FS_psoqx_predictions.csv', index=False)

print("Alle Iterationen abgeschlossen und Ergebnisse gespeichert.")

Suchzeit Meta-Modell: 883.6395637989044
Meta-Modell Bewertung:

f_s(g): 0.1675667993177943 = (1 - 0.9999999999999999) + 1179/7036
Berechnungszeit: 883.6395637989044 Sekunden
Meta-model sample size: 1179
Finished: Model Gradient Boosting, Iteration: 1
Alle Iterationen abgeschlossen und Ergebnisse gespeichert.


# CHC

In [2]:
#Vorverarbeitete Daten aus CSV auslesen
df = pd.read_csv("data/df_preprocessed_all.csv", sep = ",")
df = df.drop(columns=['FELT_LIFE','REMOVAL_DATE', 'INSTALLATION_DATE', 'REPORT_DATE'])

#Das Label nennen
label = df.columns.get_loc('FELT_LIFE_NET')

#Machine Learning Algorithms die benutzt werden
MLA = [
    #('Linear Regressor',LinearRegression()),
    #('SVR', SVR(kernel='linear')),
    #('Random Forest',RandomForestRegressor()),
    ('Gradient Boosting', xgb.XGBRegressor())
]


evaluation = ['train', 'cv_train', 'cv_test', 'validation', 'test']
header = df.columns
task = 'feature_selection'
target_dataset = 'validation'

# Anzahl der Durchläufe
num_iterations = 5

population_size = 20

# Ergebnisse über alle Iterationen speichern
for iteration in range(1, num_iterations + 1):
    results_chc = {}
    predictions_chc = {}
    header = df.columns
    
    # Für alle Modelle in MLA FS mit chc mit anschließender Validierung
    for model_name, model_instance in MLA:

        dataset = Dataset(df, 'df', label, divide_dataset=False, header=header)

        dataset.divide_dataset(model_instance, normalize=False, shuffle=True, all_features=True, all_instances=True, evaluate=False, partial_sample=False,folds=5)

        #FS mit Approximation des CHC
        ind_size = dataset.X_train.shape[1]
        toolbox = Evolution.create_toolbox(task, target_dataset, dataset, dataset)
        population = Evolution.create_population(population_size, ind_size)
        d = ind_size // 4

        log, population, d, FS_time = Evolution.CHC(dataset, toolbox, d, population, verbose=1, max_no_change=2, max_generations=5)
        feature_subset = log.iloc[-1]['best_solution']  
        feature_subset = np.array(feature_subset)

        selected_features = list(np.where(feature_subset == 1)[0])
        num_selected_features = len(selected_features)
        duration = np.round(log.iloc[-1]['time'], 2)


        dataset.set_features(selected_features)

        #Regressor trainieren
        dataset.fit_classifier()

        # Cross-Validation
        dataset.set_CV()
        cv = dataset.get_CV()

        #Validierung auf Validation- und Testset
        dataset.set_train_metrics()
        train = dataset.get_train_metrics()
        traintime = dataset.get_traintime()


        dataset.set_test_metrics()
        test = dataset.get_test_metrics()



        #dataset.plot_shapley_values("FS_corr_"+model_name)

        results_chc[model_name] = {
            'Model_name': model_name,
            'CV_TrainMAE': cv['CV_TrainMAE'],
            'CV_TrainRMSE': cv['CV_TrainRMSE'],
            'CV_TestMAE': cv['CV_TestMAE'],
            'CV_TestRMSE': cv['CV_TestRMSE'],
            'CV_fit_time': cv['CV_fit_time'],
            'CV_fit_time_ges':cv['CV_fit_time']+duration,
            'TrainRMSE': train['TrainRMSE'],
            'TrainMAE': train['TrainMAE'],
            'TestRMSE': test['TestRMSE'],
            'TestMAE': test['TestMAE'],
            'TrainTime': traintime,
            'TrainTime_ges': traintime+FS_time,
            'Features': selected_features,
            'Feature-Anzahl': num_selected_features,
            'FS-Laufzeit': FS_time
        }

        predictions_chc[model_name] = {
        'Model_name': model_name,
        'y_train': dataset.get_y_train(),
        'y_test': dataset.get_y_test(),
        'pred_train': dataset.get_y_pred_train(),
        'pred_test':dataset.get_y_pred_test()
        }
        print(f"Finished: Model {model_name}")

    # Ergebnisse der aktuellen Iteration in DataFrames umwandeln
    df_chc = pd.DataFrame(results_chc).transpose()
    df_chc.to_csv(f'data/CHC/{iteration:02d}_FS_chc_metrics.csv', index=False)
    
    # Features der aktuellen Iteration speichern
    df_features_chc = pd.DataFrame()
    max_selected_features = max(len(results_chc[model_name]['Features']) for model_name, _ in MLA)
    for model_name, model_instance in MLA:
        selected_features = df.columns[results_chc[model_name]['Features']]
        if len(selected_features) < max_selected_features:
            selected_features = np.append(selected_features, [np.nan] * (max_selected_features - len(selected_features)))
        df_features_chc[model_name] = selected_features
    df_features_chc.to_csv(f'data/CHC/{iteration:02d}_FS_chc_Features.csv', index=False)
    
    # Predictions speichern
    chc_data = []
    for model_name, data in predictions_chc.items():
        model_data = data.copy()
        model_data.pop('Model_name')
        for data_type, values in model_data.items():
            for value in values:
                chc_data.append({'Model_name': model_name, 'Data_type': data_type, 'Value': value})
    df_chc2 = pd.DataFrame(chc_data)
    df_chc2.to_csv(f'data/CHC/{iteration:02d}_FS_chc_predictions.csv', index=False)

    print(f"Finished: Iteration: {iteration}")

print("Alle Iterationen abeschlossen und Ergebnisse gespeichert.")

Finished: Model Gradient Boosting=  2 
Finished: Iteration: 1
Finished: Model Gradient Boosting=  3 
Finished: Iteration: 2
Finished: Model Gradient Boosting=  2 
Finished: Iteration: 3
Finished: Model Gradient Boosting=  2 
Finished: Iteration: 4
Finished: Model Gradient Boosting=  2 
Finished: Iteration: 5
Alle Iterationen abeschlossen und Ergebnisse gespeichert.


# PSO

In [None]:
from pyswarms.backend.topology import Ring
from pyswarms.backend.topology import Star

#Vorverarbeitete Daten aus CSV auslesen
df = pd.read_csv("data/df_preprocessed_all.csv", sep = ",")
df = df.drop(columns=['FELT_LIFE','REMOVAL_DATE', 'INSTALLATION_DATE', 'REPORT_DATE'])

#Das Label nennen
label = df.columns.get_loc('FELT_LIFE_NET')

#Machine Learning Algorithms die benutzt werden
MLA = [
    #('Linear Regressor',LinearRegression()),
    #('SVR', SVR(kernel='linear')),
    #('Random Forest',RandomForestRegressor()),
    ('Gradient Boosting', xgb.XGBRegressor())
]

header = df.columns


population_size = 20
options = {'c1': 2, 'c2': 2, 'w': 0.6, 'k': population_size, 'p':2}
topology = Star()

# Anzahl der Durchläufe
num_iterations = 30

# Ergebnisse über alle Iterationen speichern
for iteration in range(1, num_iterations + 1):
    results_pso = {}
    predictions_pso = {}
    header = df.columns
    
    # Für alle Modelle in MLA FS mit PSO mit anschließender Validierung
    for model_name, model_instance in MLA:

        dataset = Dataset(df, 'df', label, divide_dataset=False, header=header)

        dataset.divide_dataset(model_instance, normalize=False, shuffle=True, all_features=True, all_instances=True, evaluate=False, partial_sample=False,folds=5)

        #FS mit Approximation des PSO

        log = Evolution.PSO(dataset, options, population_size, steps_no_change=2, steps=5, topology=topology, verbose=1)
        feature_subset = log.iloc[-1]['ind']  
        feature_subset = np.array(feature_subset)

        selected_features = list(np.where(feature_subset == 1)[0])
        num_selected_features = len(selected_features)
        duration = np.round(log.iloc[-1]['time'], 2)


        dataset.set_features(selected_features)

        #Regressor trainieren
        dataset.fit_classifier()

        # Cross-Validation
        dataset.set_CV()
        cv = dataset.get_CV()

        #Validierung auf Testset
        dataset.set_train_metrics()
        train = dataset.get_train_metrics()
        traintime = dataset.get_traintime()


        dataset.set_test_metrics()
        test = dataset.get_test_metrics()



        #dataset.plot_shapley_values("FS_corr_"+model_name)

        results_pso[model_name] = {
            'Model_name': model_name,
            'CV_TrainMAE': cv['CV_TrainMAE'],
            'CV_TrainRMSE': cv['CV_TrainRMSE'],
            'CV_TestMAE': cv['CV_TestMAE'],
            'CV_TestRMSE': cv['CV_TestRMSE'],
            'CV_fit_time': cv['CV_fit_time'],
            'CV_fit_time_ges':cv['CV_fit_time']+duration,
            'TrainRMSE': train['TrainRMSE'],
            'TrainMAE': train['TrainMAE'],
            'TestRMSE': test['TestRMSE'],
            'TestMAE': test['TestMAE'],
            'TrainTime': traintime,
            'TrainTime_ges': traintime+duration,
            'Features': selected_features,
            'Feature-Anzahl': num_selected_features,
            'FS-Laufzeit': duration
        }

        predictions_pso[model_name] = {
        'Model_name': model_name,
        'y_train': dataset.get_y_train(),
        'y_test': dataset.get_y_test(),
        'pred_train': dataset.get_y_pred_train(),
        'pred_test':dataset.get_y_pred_test()
        }
        print(f"Finished: Model: {model_name}")
    
    # Ergebnisse der aktuellen Iteration in DataFrames umwandeln
    df_pso = pd.DataFrame(results_pso).transpose()
    df_pso.to_csv(f'data/PSO/{iteration:02d}_FS_pso_metrics.csv', index=False)
    
    # Features der aktuellen Iteration speichern
    df_features_pso = pd.DataFrame()
    max_selected_features = max(len(results_pso[model_name]['Features']) for model_name, _ in MLA)
    for model_name, model_instance in MLA:
        selected_features = df.columns[results_pso[model_name]['Features']]
        if len(selected_features) < max_selected_features:
            selected_features = np.append(selected_features, [np.nan] * (max_selected_features - len(selected_features)))
        df_features_pso[model_name] = selected_features
    df_features_pso.to_csv(f'data/PSO/{iteration:02d}_FS_pso_Features.csv', index=False)
    
    # Predictions speichern
    pso_data = []
    for model_name, data in predictions_pso.items():
        model_data = data.copy()
        model_data.pop('Model_name')
        for data_type, values in model_data.items():
            for value in values:
                pso_data.append({'Model_name': model_name, 'Data_type': data_type, 'Value': value})
    df_pso2 = pd.DataFrame(pso_data)
    df_pso2.to_csv(f'data/PSO/{iteration:02d}_FS_pso_predictions.csv', index=False)
    
    print(f"Finished: Iteration: {iteration}")

print("Alle Iterationen abeschlossen und Ergebnisse gespeichert.")

Finished: Model: Gradient Boosting =  3 
Finished: Iteration: 1
Finished: Model: Gradient Boosting =  5 
Finished: Iteration: 2
Finished: Model: Gradient Boosting=  3 
Finished: Iteration: 3
Finished: Model: Gradient Boosting =  5 
Finished: Iteration: 4
Finished: Model: Gradient Boosting =  5 
Finished: Iteration: 5
Finished: Model: Gradient Boosting =  3 
Finished: Iteration: 6
Finished: Model: Gradient Boosting =  5 
Finished: Iteration: 7
Finished: Model: Gradient Boosting =  3 
Finished: Iteration: 8
Finished: Model: Gradient Boosting =  5 
Finished: Iteration: 9
Finished: Model: Gradient Boosting =  3 
Finished: Iteration: 10
Finished: Model: Gradient Boosting =  5 
Finished: Iteration: 11
Finished: Model: Gradient Boosting =  3 
Finished: Iteration: 12
Finished: Model: Gradient Boosting =  5 
Finished: Iteration: 13
Finished: Model: Gradient Boosting =  5 
Finished: Iteration: 14
Finished: Model: Gradient Boosting =  5 
Finished: Iteration: 15
Finished: Model: Gradient Boosting 