In [None]:
import optuna
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

from sklearn.model_selection import train_test_split,StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import log_loss
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

****Loading the datasets****

In [None]:
train = pd.read_csv("../input/tabular-playground-series-may-2021/train.csv")
test = pd.read_csv("../input/tabular-playground-series-may-2021/test.csv")

In [None]:
cols = []
for col in train.columns[1:-1]:
    cols.append(col)

****Label Encoding****

In [None]:
le = LabelEncoder()
encoded = le.fit_transform(train.target)
train = train.assign(target=encoded)
apple = train['target'].values
apple

In [None]:
train[cols].values

****Correlation Heatmap****

In [None]:
plt.figure(figsize=(16,16),dpi=80)
corr=train.corr()
mask = np.triu(np.ones_like(corr, dtype=bool))
sns.heatmap(corr, mask=mask, cmap='PuBu', robust=True, center=0,
            square=True, linewidths=.5)
plt.title('Correlation', fontsize=15)
plt.show()

In [None]:
X = train.drop(["target"],axis=1)
y = train["target"]
X.shape,y.shape

****Dimension Reduction****

****XGBOOST CLASSIFIER****

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test  = train_test_split(X,y,train_size=0.8,random_state=42)
columns = train.drop(['id','target'],axis=1).columns
X_test = test.drop(['id'],axis=1)

In [None]:
xgb_params = {
    'learning_rate':0.746463,
    'max_depth':1,
    'lambda':25.46112,
    'random_state':21,
    'objective':'multi:softprob',
    'eval_metric':'mlogloss',
} 

In [None]:
preds_1 = np.zeros((X_test.shape[0],4))
kf = StratifiedKFold(n_splits = 10 , random_state = 13 , shuffle = True)
ll =[]
n=0

for tr_idx, test_idx in kf.split(train[columns], train['target']):
    
    X_tr, X_val = train[columns].iloc[tr_idx], train[columns].iloc[test_idx]
    y_tr, y_val = train['target'].iloc[tr_idx], train['target'].iloc[test_idx]
    
    model = XGBClassifier(**xgb_params)
    
    model.fit(X_tr,y_tr,eval_set=[(X_val,y_val)],early_stopping_rounds=200,verbose=False)
    
    preds_1+=model.predict_proba(X_test)/kf.n_splits
    ll.append(log_loss(y_val, model.predict_proba(X_val)))
    print(n+1,ll[n])
    n+=1

In [None]:
df_kfold_xgb = pd.DataFrame(preds_1,columns=['Class_1','Class_2','Class_3','Class_4'])
df_kfold_xgb['id']  = test['id']
df_kfold_xgb = df_kfold_xgb[['id','Class_1','Class_2','Class_3','Class_4']]

****LGBM Classifier****

In [None]:
params_lgbm = {
    'learning_rate': 0.08602375,
    'max_depth': 1,
    'min_child_samples':61,
    'min_child_weight' : 0.2569581,
    'metric': 'multi_logloss', 
    'random_state': 42,
    'n_estimators': 10000,
    'objective': 'multiclass',      
}

In [None]:
preds_2 = np.zeros((X_test.shape[0],4))
skf = StratifiedKFold(n_splits = 10 , random_state = 13 , shuffle = True)
ll =[]
n=0

for tr_idx, test_idx in skf.split(train[columns], train['target']):
    
    X_tr, X_val = train[columns].iloc[tr_idx], train[columns].iloc[test_idx]
    y_tr, y_val = train['target'].iloc[tr_idx], train['target'].iloc[test_idx]
    
    model = LGBMClassifier(**params_lgbm)
    
    model.fit(X_tr,y_tr,eval_set=[(X_val,y_val)],early_stopping_rounds=200,verbose=False)
    
    preds_2+=model.predict_proba(X_test)/kf.n_splits
    ll.append(log_loss(y_val, model.predict_proba(X_val)))
    print(n+1,ll[n])
    n+=1

In [None]:
df_kfold_lgbm = pd.DataFrame(preds_2,columns=['Class_1','Class_2','Class_3','Class_4'])
df_kfold_lgbm['id']  = test['id']
df_kfold_lgbm = df_kfold_lgbm[['id','Class_1','Class_2','Class_3','Class_4']]

****Catboost classifier****

In [None]:
params_cb = {
    'loss_function': 'MultiClass',
    'eval_metric': 'MultiClass',
    'learning_rate' : 0.0765847,
    'reg_lambda': 18.7924786,
    'subsample': 0.537623 ,
    'depth': 5,
    'min_data_in_leaf': 19,
    'verbose':False,
    'bootstrap_type': 'Bernoulli',
    'random_state' :42,
    'leaf_estimation_method':'Newton',
}

In [None]:
preds_3 = np.zeros((X_test.shape[0],4))
kf = StratifiedKFold(n_splits = 10 , random_state = 13 , shuffle = True)
ll =[]
n=0

for tr_idx, test_idx in kf.split(train[columns], train['target']):
    
    X_tr, X_val = train[columns].iloc[tr_idx], train[columns].iloc[test_idx]
    y_tr, y_val = train['target'].iloc[tr_idx], train['target'].iloc[test_idx]
    
    model = CatBoostClassifier(**params_cb)
    
    model.fit(X_tr,y_tr,eval_set=[(X_val,y_val)],early_stopping_rounds=200,verbose=False)
    
    preds_3+=model.predict_proba(X_test)/kf.n_splits
    ll.append(log_loss(y_val, model.predict_proba(X_val)))
    print(n+1,ll[n])
    n+=1

In [None]:
df_kfold = pd.DataFrame(preds_3,columns=['Class_1','Class_2','Class_3','Class_4'])
df_kfold['id']  = test['id']
df_kfold = df_kfold[['id','Class_1','Class_2','Class_3','Class_4']]

In [None]:
output_3 = df_kfold.to_csv('submit_3.csv',index=False)

****Stacking Classifier****

In [None]:
estimators = [('lgbm',LGBMClassifier(**params_lgbm)),('cb',CatBoostClassifier(**params_cb)),('xgb',XGBClassifier(**xgb_params))]
clf = StackingClassifier(estimators=estimators, final_estimator=LGBMClassifier(), stack_method='predict_proba', n_jobs=-1)

In [None]:
preds_4 = np.zeros((X_test.shape[0],4))
kf = StratifiedKFold(n_splits = 5 , random_state = 42 , shuffle = True)
l1 =[]
n=0

for tr_idx, test_idx in kf.split(train[columns], train['target']):
    
    X_tr, X_val = train[columns].iloc[tr_idx], train[columns].iloc[test_idx]
    y_tr, y_val = train['target'].iloc[tr_idx], train['target'].iloc[test_idx]
    
    model = clf
    
    model.fit(X_tr,y_tr)
    
    preds_4+=model.predict_proba(X_test)/kf.n_splits
    ll.append(log_loss(y_val, model.predict_proba(X_val)))
    print(n+1,ll[n])
    n+=1

In [None]:
df_kfold_stk = pd.DataFrame(preds_4,columns=['Class_1','Class_2','Class_3','Class_4'])
df_kfold_stk['id']  = test['id']
df_kfold_stk = df_kfold_stk[['id','Class_1','Class_2','Class_3','Class_4']]

In [None]:
output_4 = df_kfold_stk.to_csv('submit_4.csv',index=False)