In [None]:
import numpy as np 
import pandas as pd 
import os
import matplotlib.pyplot as plt

from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import log_loss

from lightgbm import LGBMClassifier

In [None]:
train = pd.read_csv("../input/tabular-playground-series-jun-2021/train.csv")
test = pd.read_csv("../input/tabular-playground-series-jun-2021/test.csv")

In [None]:
print(train.shape)
print(test.shape)

In [None]:
train[0:12].T

In [None]:
train2=train.drop('id',axis=1)
test2=test.drop('id',axis=1)

In [None]:
Name0=train2['target'].unique()
Name=sorted(Name0)
print(Name)

In [None]:
N=[]
for i in range(9):
    N+=[i]
    
normal_mapping=dict(zip(Name,N)) 
reverse_mapping=dict(zip(N,Name)) 

def mapper(value):
    return reverse_mapping[value]

print(normal_mapping)

In [None]:
X = train2.drop('target',axis=1)
y = train2['target'].map(normal_mapping)

In [None]:
print(X.shape)
print(y.shape)

In [None]:
def cross_val(X, y, model, params, folds=5):

    skf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=21)
    for fold, (train_idx, test_idx) in enumerate(skf.split(X, y)):
        print(f"Fold: {fold}")
        x_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
        x_test, y_test = X.iloc[test_idx], y.iloc[test_idx]

        alg = model(**params)
        alg.fit(x_train, y_train,
                eval_set=[(x_test, y_test)],
                early_stopping_rounds=100,
                verbose=400)

        pred = alg.predict_proba(x_test)
        loss = log_loss(y_test, pred)
        print(f"Log loss: {loss}")
        print("-"*50)
    
    return alg

In [None]:
params= {'learning_rate': 0.05, 
         'n_estimators': 20000, 
         'max_bin': 90,
         'num_leaves': 10,
         'max_depth': 7,
         'reg_alpha': 8, 
         'reg_lambda': 7, 
         'subsample': 0.7
        }

In [None]:
lgbm_model = cross_val(X, y, LGBMClassifier, params)

In [None]:
pred_lgbm = lgbm_model.predict_proba(test2)

In [None]:
print(pred_lgbm.shape)
print(pred_lgbm[0])

In [None]:
sample = pd.read_csv("../input/tabular-playground-series-jun-2021/sample_submission.csv")
sample

In [None]:
subm = sample
subm[ Name ] = pred_lgbm
subm.to_csv('submission.csv',index=False)
subm