In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

# Supress Warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
train = pd.read_csv('../input/tabular-playground-series-oct-2021/train.csv')
test = pd.read_csv('../input/tabular-playground-series-oct-2021/test.csv')

In [None]:
# prepare dataframe for modeling
X = train.drop(columns=['id','target']).copy()
y = train['target']

test_data = test.drop(columns=['id']).copy()

In [None]:
#feature selection

from sklearn.feature_selection import SelectFromModel

col_names = X.columns

In [None]:
from lightgbm import LGBMClassifier

model = LGBMClassifier()

In [None]:
sfm = SelectFromModel(estimator=model)
X_transformed = sfm.fit_transform(X, y)

X_transformed

In [None]:
support = sfm.get_support()
features_selected = ([x for x, y in zip(col_names, support) if y == True])

features_selected

In [None]:
X = X[['f1', 'f2', 'f3', 'f4', 'f5', 'f7', 'f8', 'f12', 'f13', 'f14',
       'f17', 'f18', 'f19', 'f22', 'f43', 'f44', 'f48', 'f52', 'f56',
       'f58', 'f65', 'f69', 'f72', 'f73', 'f74', 'f75', 'f77', 'f78',
       'f82', 'f90', 'f92', 'f95', 'f96', 'f99', 'f103', 'f112', 'f119',
       'f125', 'f134', 'f136', 'f138', 'f139', 'f143', 'f150', 'f152',
       'f154', 'f156', 'f179', 'f192', 'f195', 'f200', 'f201', 'f211',
       'f213', 'f214', 'f241', 'f243', 'f247', 'f258']]

In [None]:
test = test_data[['f1', 'f2', 'f3', 'f4', 'f5', 'f7', 'f8', 'f12', 'f13', 'f14',
                  'f17', 'f18', 'f19', 'f22', 'f43', 'f44', 'f48', 'f52', 'f56',
                  'f58', 'f65', 'f69', 'f72', 'f73', 'f74', 'f75', 'f77', 'f78',
                  'f82', 'f90', 'f92', 'f95', 'f96', 'f99', 'f103', 'f112', 'f119',
                  'f125', 'f134', 'f136', 'f138', 'f139', 'f143', 'f150', 'f152',
                  'f154', 'f156', 'f179', 'f192', 'f195', 'f200', 'f201', 'f211',
                  'f213', 'f214', 'f241', 'f243', 'f247', 'f258']]

In [None]:
from sklearn.metrics import roc_auc_score, roc_curve, auc
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier

In [None]:
import random
import os

SEED = 12345

In [None]:
def seed_everything(seed=64):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(SEED)

In [None]:
params = {
    'max_depth': 3, 
    'colsample_bytree': 0.3,  
    'subsample': 0.5, 
    'reg_alpha': 18, 
    'reg_lambda': 17,
    'num_leaves': 7,
    'objective' : 'binary',
    'importance_type': 'gain',
}

In [None]:
from sklearn.model_selection import KFold, StratifiedKFold

kf = KFold(n_splits = 5, shuffle = True, random_state = SEED)
skf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = SEED)

In [None]:
oof_lgb = np.zeros(len(X))
predictions_lgb = np.zeros(len(test))
i = 1

for train_ix, test_ix in kf.split(X.values):
        
    print("Out of fold predictions generating for fold  {} \n".format(i))
        
    train_X, train_y = X.values[train_ix], y.values[train_ix]
    test_X, test_y = X.values[test_ix], y.values[test_ix]
    
    model_lgb = LGBMClassifier(
        random_state = SEED,
        n_estimators = 20000,
        learning_rate = 0.095,
   #     device = 'gpu',
        **params
    )   
          
    model_lgb.fit(
        train_X, 
        train_y,
        eval_set = [(test_X, test_y)],
        eval_metric = "auc",
        early_stopping_rounds = 300,
        verbose = 100,
    )
    
    oof_lgb[test_ix] = oof_lgb[test_ix] + model_lgb.predict_proba(test_X)[:, 1]
    predictions_lgb = predictions_lgb + model_lgb.predict_proba(test)[:, 1]
    
    print("AUC for fold {} \t\t {} \n".format(i, round(roc_auc_score(test_y, oof_lgb[test_ix]), 5)))
    
    i = i + 1
    
print("AUC for Training Set: \t\t {} \n".format(round(roc_auc_score(y, oof_lgb), 5)))

In [None]:
submission = pd.read_csv("../input/tabular-playground-series-oct-2021/sample_submission.csv")
submission['target'] = predictions_lgb / 5
submission.to_csv("submission.csv", index = False)
submission.head(10)
