In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



In [None]:
train=pd.read_csv('/kaggle/input/tabular-playground-series-jun-2021/train.csv')
test=pd.read_csv('/kaggle/input/tabular-playground-series-jun-2021/test.csv')
ss=pd.read_csv('/kaggle/input/tabular-playground-series-jun-2021/sample_submission.csv')

In [None]:
train.target.unique()
train.target = train.target.map({'Class_1':0,'Class_2':1,'Class_3':2,'Class_4':3,
                                'Class_5':4,'Class_6':5,'Class_7':6,'Class_8':7,
                                'Class_9':8})
target = train.target
train.drop(['target','id'],axis=1,inplace=True)

In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

In [None]:
X_train, X_test, y_train, y_test = train_test_split(train, target, test_size=0.1, random_state=777)
cat_model = CatBoostClassifier(early_stopping_rounds=3,
                               silent=True,
                               loss_function='MultiClass',
                               depth=3,
                               grow_policy='SymmetricTree',
                               iterations=500,
                               learning_rate=0.1)
print('Training Catboost')
cat_model.fit(X_train,y_train,eval_set=[(X_test,y_test)])

X_train, X_test, y_train, y_test = train_test_split(train, target, test_size=0.1, random_state=777)
xg_model = XGBClassifier(n_estimators=150, 
                         verbose = 0,
                         verbose_eval=0,
                         early_stopping_round=3,
                         gamma=0,
                         learning_rate=0.1,
                         max_depth=3)
print('Training XGboost')
xg_model.fit(X_train,y_train,eval_set=[(X_test,y_test)])

X_train, X_test, y_train, y_test = train_test_split(train, target, test_size=0.1, random_state=777)
lg_model = LGBMClassifier(n_estimators=500, 
                          silent=True,
                          early_stopping_round=3,
                          learning_rate=0.1,
                          max_depth=5,
                          num_leaves=30)
print('Training Lightgbm')
lg_model.fit(X_train,y_train,eval_set=[(X_test,y_test)])

In [None]:
cat_w = 1/cat_model.best_score_['validation']['MultiClass'] 
lg_w = 1/lg_model.best_score_['valid_0']['multi_logloss']
xg_w = 1/min(xg_model.evals_result_['validation_0']['mlogloss'])

In [None]:
out = (pd.DataFrame(cat_model.predict_proba(test.drop('id',axis=1))*cat_w)+\
 pd.DataFrame(xg_model.predict_proba(test.drop('id',axis=1))*xg_w)+\
 pd.DataFrame(lg_model.predict_proba(test.drop('id',axis=1))*lg_w))/(cat_w+xg_w+lg_w)

out.columns = ['Class_1','Class_2','Class_3','Class_4',
              'Class_5','Class_6','Class_7','Class_8',
              'Class_9']
out['id'] = ss.id

In [None]:
out[['id','Class_1','Class_2','Class_3','Class_4','Class_5',
     'Class_6','Class_7','Class_8','Class_9']].to_csv('res.csv',index=False)
