In [None]:
import pandas as pd
import numpy as np
from lightgbm import LGBMClassifier,plot_importance
from sklearn import model_selection
import joblib

In [None]:
df = pd.read_csv('../input/tabular-playground-series-oct-2021/train.csv')

In [None]:
df.head()

In [None]:
test_df = pd.read_csv('../input/tabular-playground-series-oct-2021/test.csv')

In [None]:
def make_folds(df):
    '''
    Function create Stratified K folds on the dataset
    Parameters
    ----------
    df : dataframe
    Returns
    -------
    dataframe
    '''
    
    df['kfold'] = -1
    df = df.sample(frac=1).reset_index(drop=True)
    y = df.target.values
    kf = model_selection.StratifiedKFold(n_splits=5)
    
    for f,(t_,v_) in enumerate(kf.split(X=df,y=y)):
        df.loc[v_,'kfold'] = f
        
    return df

In [None]:
df = make_folds(df)

In [None]:
N_SPLITS = 5
N_ESTIMATORS = 20000
EARLY_STOPPING_ROUNDS = 200
VERBOSE = 1000
SEED = 2021

In [None]:
def run_folds(df,fold,model):
    '''
    Funtcion to train and predict on the models on the given fold
    Input : Dataframe,Integer,Model
    Output: NULL
    '''
    df_train = df[df.kfold!=fold].reset_index(drop=True)
    df_valid = df[df.kfold==fold].reset_index(drop=True)
    
    x_train = df_train.drop(columns=['target','kfold','id'],axis=1).values
    y_train = df_train.target.values
    
    x_valid = df_valid.drop(columns=['target','kfold','id'],axis=1).values
    y_valid = df_valid.target.values
    
    
    clf = models[model]
    
    clf.fit(x_train,y_train,eval_set=[(x_valid, y_valid)],
        eval_metric='auc',
        early_stopping_rounds=EARLY_STOPPING_ROUNDS,
        verbose=VERBOSE)
    
    print(f'Fold{fold}')
    print('*'*50)
    
    joblib.dump(clf,f'new_dt_{fold}_{model}.bin')
    
    return

In [None]:
models = {
    'lgbm':LGBMClassifier()
}

In [None]:
for i in range(5):
    run_folds(df,i,'lgbm')

In [None]:
test = test_df.drop(columns=['id'])
md = joblib.load('./new_dt_3_lgbm.bin')
prediction = md.predict_proba(test)[:,1]
submission = pd.DataFrame({'id': test_df['id'],'target': prediction})
submission.to_csv('submission.csv',index=False)