In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
import lightgbm as lgb

SEED = 1380

In [None]:
train = pd.read_csv("../input/tabular-playground-series-mar-2021/train.csv")
test = pd.read_csv("../input/tabular-playground-series-mar-2021/test.csv")

X = train.drop(["id", "target"], axis=1)
y = train.target
X_test = test.drop(["id"], axis=1)

print(X.shape, y.shape, X_test.shape)

In [None]:
le = LabelEncoder()

X_all = pd.concat([X, X_test], axis=0)

cat_columns = [f"cat{i}" for i in range(19)]

for col in cat_columns:
    X_all[col] = le.fit_transform(X_all[col])
    
X_all

In [None]:
X = X_all.iloc[:len(train), :]
X_test = X_all.iloc[len(train):, :]

In [None]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)

In [None]:
params = {
    "task": "train",
    "boosting_type": "gbdt",
    "objective": "binary",
    "metric": "auc",
    "learning_rate": 0.03,
    "num_leaves": 31,
    "feature_fraction": 0.25,
    "bagging_fraction": 0.85,
    "verbosity": -1,
    "seed": SEED
}

preds = pd.DataFrame()

for k, (tr_id, vl_id) in enumerate(kf.split(X, y)):
    print("="*70)
    print(f"              KFOLD{k+1}")
    print("="*70)
    
    X_train, X_valid = X.iloc[tr_id, :], X.iloc[vl_id, :]
    y_train, y_valid = y.iloc[tr_id], y.iloc[vl_id]
    
    l_train = lgb.Dataset(X_train, y_train)
    l_valid = lgb.Dataset(X_valid, y_valid)
    
    model = lgb.train(params=params,
                      num_boost_round=10000,
                      early_stopping_rounds=200,
                      train_set=l_train,
                      valid_sets=(l_train, l_valid),
                      verbose_eval=500)
    pred = model.predict_proba(X_test, num_iteration=model.best_iteration)[:, 1]
    pred = pd.Series(pred)
    preds = pd.concat([preds, pred], axis=1)

# Submission

In [None]:
pred = preds.mean(axis=1)

In [None]:
submission = pd.read_csv("../input/tabular-playground-series-mar-2021/sample_submission.csv")

submission.target = pred
submission

In [None]:
submission.to_csv("baseline.csv", index=False)