# Import Libraries

In [None]:
import pandas as pd
import numpy as np

from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

from sklearn.metrics import accuracy_score, precision_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import StackingClassifier

from sklearn.metrics import auc, roc_curve
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import label_binarize, LabelEncoder
from sklearn.multiclass import OneVsRestClassifier

import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('darkgrid')
train_path = '../input/tabular-playground-series-may-2021/train.csv'
test_path = '../input/tabular-playground-series-may-2021/test.csv'
RS = 69420

In [None]:
df = pd.read_csv(train_path, index_col=0)

dic={"Class_1":0,"Class_2":1,"Class_3":2,"Class_4":3}
df['target'].replace(dic,inplace=True)

X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

X.shape, y.shape

# Outlier Detection with Isolation Forest

In [None]:
# from sklearn.ensemble import IsolationForest

# X_train, y_train, X_test, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=RS)

# # fit the model
# clf = IsolationForest(max_samples=100, random_state=RS, bootstrap=True, n_jobs=-1, verbose=1)
# clf.fit(X_train)
# y_pred_train = clf.predict(X_train)
# y_pred_test = clf.predict(X_test)
# y_pred_outliers = clf.predict(X_outliers)

# # plot the line, the samples, and the nearest vectors to the plane
# xx, yy = np.meshgrid(np.linspace(-5, 5, 50), np.linspace(-5, 5, 50))
# Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
# Z = Z.reshape(xx.shape)

# plt.title("IsolationForest")
# plt.contourf(xx, yy, Z, cmap=plt.cm.Blues_r)

# b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white',
#                  s=20, edgecolor='k')
# b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='green',
#                  s=20, edgecolor='k')
# c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red',
#                 s=20, edgecolor='k')
# plt.axis('tight')
# plt.xlim((-5, 5))
# plt.ylim((-5, 5))
# plt.legend([b1, b2, c],
#            ["training observations",
#             "new regular observations", "new abnormal observations"],
#            loc="upper left")
# plt.show()

# Stack Models

In [None]:
LGBM_TUNED_PARAM = {'lambda_l1': 0.04016285023112862, 'lambda_l2': 1.3570007347597148e-08, 'num_leaves': 27, 'feature_fraction': 0.7511109565597812, 'bagging_fraction': 0.7052737431683656, 'bagging_freq': 2, 'random_seed': 6150, 'min_child_samples': 86}
XGB_TUNED_PARAM =  {'objective': 'multi:softprob', 'num_class': 4, 'learning_rate': 0.05, 'eval_metric': 'mlogloss','subsample': 0.6, 'colsample_bytree': 0.6,'colsample_bylevel': 1,'eta': 0.3,'reg_alpha': 0,'reg_lambda': 1,'max_depth': 3,'min_child_weight': 1}
CAT_TUNED_PARAM = {'depth': 3, 'l2_leaf_reg': 4.287566030099442, 'bagging_temperature': 27.174417642203863, 'auto_class_weights': None, 'loss_function': 'MultiClassOneVsAll','eval_metric': 'MultiClassOneVsAll','grow_policy': 'Lossguide','bootstrap_type': 'Poisson','iterations':10000,'max_bin': 484, 'min_data_in_leaf': 414,'task_type':'GPU','subsample': 0.13534551086578891,'max_ctr_complexity':10}

In [None]:
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=RS)

In [None]:
estimators = [
              ('XGB_DEFAULT', XGBClassifier(objective = 'multi:softprob', num_class = 4, tree_method='gpu_hist')),
              ('LGBM_DEFAULT', LGBMClassifier(device='gpu')),
              ('CAT_DEFAULT', CatBoostClassifier(task_type='GPU')),
              ('CAT_TUNED', CatBoostClassifier(**CAT_TUNED_PARAM)),
              ('XGB_TUNED', XGBClassifier(**XGB_TUNED_PARAM, tree_method='gpu_hist'))
             ]

final = LGBMClassifier(**LGBM_TUNED_PARAM, device='gpu')

In [None]:
classifier = StackingClassifier(estimators=estimators,
                                final_estimator=final,
                                cv=cv,
                                verbose=1,
                                passthrough=True)

In [None]:
%%time
classifier.fit(X, y)

In [None]:
submission = pd.read_csv('../input/tabular-playground-series-may-2021/sample_submission.csv', index_col = 0)
submission

In [None]:
test = pd.read_csv(test_path, index_col = 0)

In [None]:
preds = classifier.predict_proba(test)

In [None]:
submission['Class_1'] = preds[:, 0]
submission['Class_2'] = preds[:, 1]
submission['Class_3'] = preds[:, 2]
submission['Class_4'] = preds[:, 3]

In [None]:
submission

In [None]:
submission.to_csv('submission.csv')