# Libraries

In [None]:
import numpy as np
import pandas as pd
import optuna
from lightgbm import LGBMClassifier
from sklearn.metrics import log_loss
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [None]:
train_set = pd.read_csv("../input/tabular-playground-series-jun-2021/train.csv")
test_set = pd.read_csv("../input/tabular-playground-series-jun-2021/test.csv")

train = train_set.copy()
test = test_set.copy()

train.drop("id",axis=1,inplace=True)
test.drop("id",axis=1,inplace=True)

train["sum"] = train.sum(axis=1)
test["sum"] = test.sum(axis=1)

X = np.array(train.drop("target",axis=1))
y = np.array(train["target"])
le = LabelEncoder()
y = le.fit_transform(np.ravel(y))

# Defining Model

In [None]:
def objective(trial,X,y):
    
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2,random_state=17)
    params = {
        'reg_alpha' : trial.suggest_loguniform('reg_alpha' , 1 , 100),
        'reg_lambda' : trial.suggest_loguniform('reg_lambda' , 500 , 1000),
        'num_leaves' : trial.suggest_int('num_leaves' , 90 , 150), 
        'learning_rate' : trial.suggest_float('learning_rate' , 0.01 , 0.5),
        'max_depth' : trial.suggest_int('max_depth' , 2 , 5),               
        'n_estimators' : trial.suggest_int('n_estimators' , 1 ,50000),
        'min_child_samples' : trial.suggest_int('min_child_samples' , 1 , 10),
        'min_child_weight' : trial.suggest_loguniform('min_child_weight' , 1e-3 , 2),
        'subsample' : trial.suggest_float('subsample' , 0.008 , 1.0),
        'colsample_bytree' : trial.suggest_float('colsample_bytree' , 0.01 , 0.3)
    }
    lgb = LGBMClassifier(**params)  
    lgb.fit(X_train,y_train,eval_set=[(X_val,y_val)],eval_metric='multi_logloss',early_stopping_rounds=50, verbose=False)
        
    y_pred = lgb.predict_proba(X_val)
 
    log_loss_ = log_loss(y_val, y_pred)
    
    return log_loss_

# Optuna

In [None]:
study = optuna.create_study(direction='minimize')
study.optimize(lambda trial: objective(trial,X,y), n_trials=30)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

In [None]:
best_params = study.best_trial.params
best_params

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2,random_state=17)

In [None]:
lgb = LGBMClassifier(**best_params)
lgb.fit(X_train, y_train)

In [None]:
y_pred = lgb.predict_proba(X_val)
loss = log_loss(y_val, y_pred)
loss

In [None]:
y_test = lgb.predict_proba(test)
y_test

In [None]:
result = pd.DataFrame(index=test_set["id"], data={"Class_1": y_test[:,0],"Class_2": y_test[:,1],"Class_3": y_test[:,2],"Class_4": y_test[:,3],"Class_5": y_test[:,4],
                                                 "Class_6": y_test[:,5],"Class_7": y_test[:,6],"Class_8": y_test[:,7],"Class_9": y_test[:,8]})
result.head()

In [None]:
sub = result.to_csv("sub.csv")