In [None]:
# importing required libraries
import numpy as np
import pandas as pd

# libraries for model building
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

#ignore warnings
import warnings 
warnings.filterwarnings('ignore')

# library for Scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

from sklearn.metrics import roc_auc_score

# loading data
df_train = pd.read_csv('../input/tabular-playground-series-nov-2021/train.csv')
df_folds = pd.read_csv('../input/nov-tps-10-folds/df_folds.csv') # data set with fold as a feature
df_test  = pd.read_csv('../input/tabular-playground-series-nov-2021/test.csv')
submission = pd.read_csv('../input/tabular-playground-series-nov-2021/sample_submission.csv')

In [None]:
# choosing columns required for model building
col = [col for col in df_train.columns if col not in ('id','target','kfold')]
SEED = 211

In [None]:
df_folds['sum'] = df_folds[col].sum(axis=1)
df_test['sum'] = df_test[col].sum(axis=1)

df_folds['mean'] = df_folds[col].mean(axis=1)
df_test['mean'] = df_test[col].mean(axis=1)

df_folds['std'] = df_folds[col].std(axis=1)
df_test['std'] = df_test[col].std(axis=1)

df_folds['max'] = df_folds[col].max(axis=1)
df_test['max'] = df_test[col].max(axis=1)

df_folds['min'] = df_folds[col].min(axis=1)
df_test['min'] = df_test[col].min(axis=1)

df_folds['kur'] = df_folds[col].kurtosis(axis=1)
df_test['kur'] = df_test[col].kurtosis(axis=1)

In [None]:
col.extend(['sum','mean','std','max','min','kur'])

In [None]:
df_folds[col] = scaler.fit_transform(df_folds[col])
df_test[col] = scaler.transform(df_test[col])

In [None]:
# params_cat1 = {'iterations': 80203,
#           'od_wait': 1765,'learning_rate': 0.02010888271017379,
#           'reg_lambda': 0.3051769003766273,
#           'subsample': 0.9155353016941578,
#           'random_strength': 31.905377503941313,
#           'depth': 6,'min_data_in_leaf': 14,
#           'leaf_estimation_iterations': 7,
#           'task_type':"GPU",
#           'bootstrap_type':'Poisson',}  score = 0.74021
params_cat1 = {'iterations': 7509,
 'learning_rate': 0.09912912084658385,
 'od_wait': 602,
 'depth': 15,
 'min_data_in_leaf': 19,
 'leaf_estimation_iterations': 15,
 'reg_lambda': 78.73867864199626,
 'random_strength': 42.96329895925554,
 'od_type': 'IncToDec',
 'boosting_type': 'Plain',
 'bootstrap_type': 'Bernoulli',
 'subsample': 0.7675206706589885}
model_cat = CatBoostClassifier(**params_cat1,task_type='GPU',loss_function='Logloss',eval_metric='AUC',random_state=SEED)

In [None]:
final_preds = []
for fold in range(10):
    Xtrain = df_folds[df_folds.kfold != fold].reset_index(drop=True)
    Xvalid = df_folds[df_folds.kfold == fold].reset_index(drop=True)
    Xtest  = df_test.copy()
    
    ytrain = Xtrain.target
    yvalid = Xvalid.target
    
    Xtrain = Xtrain[col]
    Xvalid = Xvalid[col]
       
    model_cat.fit(Xtrain,ytrain, eval_set=[(Xvalid, yvalid)], early_stopping_rounds=400, verbose=False)
    preds_valid = model_cat.predict_proba(Xvalid)[:,1]
    preds_test  = model_cat.predict_proba(Xtest)[:,1]
    final_preds.append(preds_test)
    
    print(f"Fold: {fold}")
    print(f"roc_score: {roc_auc_score(yvalid, preds_valid)}")
    print('_'*50)
    
preds = np.mean(np.column_stack(final_preds), axis=1)
submission.target = preds

submission.to_csv('sub_cat.csv', index=False)

In [None]:
# # Using default parameters
# model_xgb = XGBClassifier(predictor ='gpu_predictor', tree_method='gpu_hist', gpu_id = 0)
# model_lgb = LGBMClassifier(device='gpu')

In [None]:
# # XGBoost model
# # fitting model on enitre data
# model_xgb.fit(df_train[col],df_train['target'])
    
# # predicting on test data
# pred_test = model_xgb.predict_proba(df_test[col])[:,1]
    
# submission['target'] = pred_test
# submission.to_csv('sub_xgb.csv', index=False)

In [None]:
# # CatBoost model
# # fitting model on enitre data
# model_cat.fit(df_train[col],df_train['target'])
    
# # predicting on test data
# pred_test = model_cat.predict_proba(df_test[col])[:,1]
    
# submission['target'] = pred_test
# submission.to_csv('sub_cat.csv', index=False)

In [None]:
# # Lightgbm model
# # fitting model on enitre data
# model_lgb.fit(df_train[col],df_train['target'])
    
# # predicting on test data
# pred_test = model_lgb.predict_proba(df_test[col])[:,1]
    
# submission['target'] = pred_test
# submission.to_csv('sub_lgb.csv', index=False)