<div class="alert alert-info">
  <svg xmlns="http://www.w3.org/2000/svg" width="32" height="32" fill="currentColor" class="bi bi-info-circle-fill" viewBox="0 0 16 16">
  <path d="M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16zm.93-9.412-1 4.705c-.07.34.029.533.304.533.194 0 .487-.07.686-.246l-.088.416c-.287.346-.92.598-1.465.598-.703 0-1.002-.422-.808-1.319l.738-3.468c.064-.293.006-.399-.287-.47l-.451-.081.082-.381 2.29-.287zM8 5.5a1 1 0 1 1 0-2 1 1 0 0 1 0 2z"/>
</svg>
</svg>
<b style="font-size: x-large;">MORE INFO</b><br>
I collected all the parameters of <b>LightGBM</b> , <b>CatBoost</b> and <b>XGBoost</b>  introduced in the TPS Oct 2021 in <a href="https://www.kaggle.com/akmeghdad/all-booster-parameters-for-tps-oct-2021" target="_blank"><b>all-boosters-parameters-for-tps-oct-2021</b></a> dataset.<br>
In the following, I will show how to use this database 
</div>

In [None]:
import numpy as np 
import pandas as pd
import random
import time
import datatable as dt
import datatable as dt
import distutils

from sklearn.model_selection import *
from sklearn.metrics import *

from lightgbm import *
from catboost import *
from xgboost import *

pd.set_option('float_format', '{:,}'.format)

In [None]:
DATA_PATH = "/kaggle/input/all-booster-parameters-for-tps-oct-2021/"
n_splits = 5
early_stopping=600

competition='tps1021'

config =[
    {'csv_name': competition+'-xgboost_parameters.csv', 'model': 'XGBClassifier'},
    {'csv_name': competition+'-lgbm_parameters.csv', 'model': 'LGBMClassifier'},
    {'csv_name': competition+'-catboost_parameters.csv', 'model': 'CatBoostClassifier'}
]

In [None]:
def convert_it(var):
    try:
        return int(var)
    except ValueError:
        try:
            return float(var)
        except ValueError:
            try:
                return bool(distutils.util.strtobool(var))
            except ValueError:
                if '[' in var[0] and ']' in var[-1]:
                    lst = var[1:-1].split(',')
                    return [convert_it(a) for a in lst]
                
                return var

## Reduce memory with Pandas 

<div class="alert alert-info">
  <svg xmlns="http://www.w3.org/2000/svg" width="32" height="32" fill="currentColor" class="bi bi-info-circle-fill" viewBox="0 0 16 16">
  <path d="M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16zm.93-9.412-1 4.705c-.07.34.029.533.304.533.194 0 .487-.07.686-.246l-.088.416c-.287.346-.92.598-1.465.598-.703 0-1.002-.422-.808-1.319l.738-3.468c.064-.293.006-.399-.287-.47l-.451-.081.082-.381 2.29-.287zM8 5.5a1 1 0 1 1 0-2 1 1 0 0 1 0 2z"/>
</svg>
</svg>
<b style="font-size: x-large;">MORE INFO</b><br>
<a href="https://www.kaggle.com/c/tabular-playground-series-oct-2021/discussion/278237" target="_blank">I explain why I use <b><code>dtype='float32'</code></b> <b>here</b></a><br>
</div>

In [None]:
%%time

# DEMO from 5000 random rows
train = pd.read_csv('/kaggle/input/tabular-playground-series-oct-2021/train.csv',dtype='float32',skiprows=random.sample(range(1,1000000), 980000), nrows=5000)

# TO RUN 
# train = pd.read_csv('/kaggle/input/tabular-playground-series-oct-2021/train.csv',dtype='float32').drop(columns=['id'])
# test = pd.read_csv('/kaggle/input/tabular-playground-series-oct-2021/test.csv',dtype='float32').drop(columns=['id'])
# submission = pd.read_csv('/kaggle/input/tabular-playground-series-oct-2021/sample_submission.csv')



In [None]:
train.tail()

In [None]:
# Convert Boolean columns
bool_row=['f22','f43','f242','f243','f244','f245','f246','f247','f248','f249','f250','f251','f252','f253','f254','f255','f256','f257','f258','f259','f260','f261','f262','f263','f264','f265','f266','f267','f268','f269','f270','f271','f272','f273','f274','f275','f276','f277','f278','f279','f280','f281','f282','f283','f284']
for col in bool_row:
    train[col] = train[col].astype(np.int8)
#     test[col] = test[col].astype(np.int8)
    
train['target'] = train['target'].astype(np.int8)

In [None]:
train.tail()

In [None]:
train.info(verbose=True, memory_usage="deep")

In [None]:
y = train['target']
X = train.drop(columns=['target', 'id'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=(1 / n_splits), random_state=59)

In [None]:
xgb_parameters_csv = pd.read_csv('/kaggle/input/all-booster-parameters-for-tps-oct-2021/tps1021-xgboost_parameters.csv')
lgbm_parameters_csv = pd.read_csv('/kaggle/input/all-booster-parameters-for-tps-oct-2021/tps1021-lgbm_parameters.csv')
cat_parameters_csv = pd.read_csv('/kaggle/input/all-booster-parameters-for-tps-oct-2021/tps1021-catboost_parameters.csv')

In [None]:
xgb_parameters_csv.shape

In [None]:
xgb_parameters_csv.head()

## Top 7 accuracy in the XGBClassifier 

In [None]:
xgb_parameters_csv.sort_values(by=['score'], ascending=False).head(7)

## Parameters of the best accuracy in XGBClassifier

In [None]:
xgb_parameters_csv.sort_values(by=['score'], ascending=False).head(1).iloc[0,:].dropna().drop(['date', 'thanks', 'model', 'score']).T.to_dict()

## Parameters of the best accuracy in LGBMClassifier

In [None]:
lgbm_parameters_csv.sort_values(by=['score'], ascending=False).head(1).iloc[0,:].dropna().drop(['date', 'thanks', 'model', 'score']).T.to_dict()

In [None]:
for k in range(len(config)): 
    # We use dtype='str' to prevent FLOAT to INT 
    parameters_csv = pd.read_csv(DATA_PATH + config[k]['csv_name'], dtype='str')

# TO RUN
#   for i in range(parameters_csv.shape[0]): 

# DEMO of 3 parameters in each framework
    for i in range(3): 
        # convert str to int, float or boolean 
        params = {a:convert_it(b) for a, b in parameters_csv.iloc[i,:].dropna().drop(['date', 'thanks', 'model', 'score']).to_dict().items()} 

        conf_model = config[k]['model']
        csv_model = parameters_csv.iloc[i,:]['model']
        
#         If you want to run without a GPU:
#               if 'devices' in params: del params['devices']
#               if 'tree_method' in params: del params['tree_method']
#               if 'predictor' in params: del params['predictor']
#               if 'gpu_id' in params: del params['gpu_id']
#         
        model = eval(csv_model)(**params)
        model.fit(
            X_train, y_train,
            eval_set=[(X_test, y_test)],
            early_stopping_rounds = early_stopping,
            verbose=0
            )

        if csv_model == conf_model: # Classifier
            y_predicted = model.predict_proba(X_test)
            accuracy = roc_auc_score(y_test, y_predicted[:, 1])
        else: # Regressor
            y_predicted = model.predict(X_test)
            accuracy = roc_auc_score(y_test, y_predicted)
            
        print('='*40)
        print(f'Model: {csv_model}\t Accuracy: {accuracy}')
        print(f"{i+1}_ {parameters_csv.iloc[i,:]['thanks']}")