In [None]:
import numpy as np
import pandas as pd
import os, sys, gc, warnings, random

## Sklearn utils
from sklearn.metrics import roc_auc_score

## Turn off warnings
warnings.filterwarnings('ignore')

In [None]:
########################### Initial Vars
###########################################################
TARGET    = 'target'   # Our Target
SEED      = 42         # Base SEED
PATH      = '../input/tabular-playground-series-mar-2021/'

cat_cols = ['cat'+str(i) for i in range(19)]  # Categorial Columns
cnt_cols = ['cont'+str(i) for i in range(11)] # Continuous Columns 

remove_features = ['id',TARGET] # Features that we will not use for training
train_df = pd.read_csv(PATH+'train.csv')
for col in cat_cols:   
    train_df[col] = train_df[col].astype('category')
    
features_columns = [col for col in list(train_df) if col not in remove_features]

In [None]:
########################### Models params
###########################################################
lgb_params = {
                'boosting_type': 'gbdt',
                'objective': 'binary',
                'metric': 'auc',
                'n_estimators': 200,
                'learning_rate': 0.05,
                'num_leaves': 2**7,
                'min_data_in_leaf': 2**8,
                'feature_fraction': 0.7,
                'subsample': 0.7,
                'subsample_freq': 1,
                'early_stopping_rounds': 100,
                'boost_from_average': True,
                'seed': SEED,
                'verbose': -1
            }

In [None]:
# There are 2 ways to train lgb (same for xgb and catboost)
# 1. create booster and call train
# 2. use "sklearn version" and use fit

In [None]:
## 1. Type 1
import lightgbm as lgb
train_data = lgb.Dataset(train_df[features_columns], label=train_df[TARGET])
estimator = lgb.train(
                          lgb_params,
                          train_data,
                          valid_sets = [train_data],
                          verbose_eval = 100,
                        )

# And we can call normal predict here
print(roc_auc_score(train_df[TARGET], estimator.predict(train_df[features_columns])))

# Predict_proba will not even work this way
try:
    print(estimator.predict_proba(train_df[features_columns]))
except:
    print('Ups...')

In [None]:
## 2. Type 2
## Call "sklean type API" / or better call it wrapper
from lightgbm import LGBMClassifier
estimator = LGBMClassifier(**lgb_params)
estimator.fit(train_df[features_columns], train_df[TARGET], 
              eval_set=(train_df[features_columns], train_df[TARGET]),
              verbose = 100)

# And here we need to call predict_proba
print(roc_auc_score(train_df[TARGET], estimator.predict_proba(train_df[features_columns])[:,1]))

# Normal predict will not give right values
print(roc_auc_score(train_df[TARGET], estimator.predict(train_df[features_columns])))