# imports & variables & load

In [None]:
# imports
import numpy as np
import pandas as pd 
import os 
import random
import warnings 
warnings.filterwarnings(action='ignore')

from sklearn.preprocessing import LabelEncoder 

from sklearn.model_selection import train_test_split
import optuna.integration.lightgbm as lgb
from lightgbm import LGBMClassifier

# data path
TRAIN_PATH = "../input/tabular-playground-series-may-2021/train.csv"
TEST_PATH = "../input/tabular-playground-series-may-2021/test.csv"
SAMPLE_SUBMISSION_PATH = "../input/tabular-playground-series-may-2021/sample_submission.csv"
SUBMISSION_PATH = "submission.csv"

# main column
ID = "id"
TARGET = 'target'

CLASS1 = "Class_1"
CLASS2 = "Class_2"
CLASS3 = "Class_3"
CLASS4 = "Class_4"
CLASS = [CLASS1,CLASS2,CLASS3,CLASS4]
TARGET_NUM = len(CLASS)

# seed
SEED = 2022
def seedAll(seed=SEED):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seedAll()

# model
TEST_SIZE = 0.2
VERBOSE_EVAL = 100
ESR = 100

# load
train = pd.read_csv(TRAIN_PATH)
test = pd.read_csv(TEST_PATH)

# build model

In [None]:
# split (input & target)
y = train[TARGET]
X = train.drop([TARGET,ID],axis=1)

# label encoding
labelEncoder = LabelEncoder()
y = labelEncoder.fit_transform(y)
print(y)
print(labelEncoder.classes_)

#split data (train & validation)
X_train, X_val, y_train, y_val = train_test_split(X, y,test_size=TEST_SIZE,
                                                  shuffle=True,random_state=SEED)
# search best param
D_train = lgb.Dataset(X_train, label = y_train)
D_val = lgb.Dataset(X_val, label = y_val)
params = {'objective' : 'multiclass', 
          'num_class' : TARGET_NUM,  
          'metric' : 'multi_logloss', 
          'verbosity' : -1, 
          'boosting_type' : 'gbdt'}

model = lgb.train(params,
                  D_train,
                  valid_sets =[D_val],
                  verbose_eval=VERBOSE_EVAL,
                  early_stopping_rounds=ESR)

# build model using best params
model = LGBMClassifier(**model.params)
model.fit(X_train, y_train, 
          eval_set = ((X_val, y_val)), 
          early_stopping_rounds = ESR, verbose = 0)

# predict & submit

In [None]:
sub = pd.read_csv(SAMPLE_SUBMISSION_PATH)

X_test = test.drop([ID],axis=1)
preds = model.predict_proba(X_test)
sub[CLASS1]=preds[:,0]
sub[CLASS2]=preds[:,1]
sub[CLASS3]=preds[:,2]
sub[CLASS4]=preds[:,3]

sub.to_csv(SUBMISSION_PATH,index = False)
sub.head()