# What is there in this notebook

## *This notebook will guide you through the starting submission in this competetion*

## My approach

> #### 1. Did EDA using Dataprep library and found some insights, data was similar to TPS MAY.
> #### 2. Built the Catboost Model, tuned it and trained it on 10 folds.
> #### 3. Built the LGBM Model, tuned it and trained it on 10 folds.
> #### 4. Built the XGboost Model, tuned it and trained it on 10 folds.
> #### 5. Built a voting classifier combining all the probabilities.
> #### 6. Built a stacking model of all the models (training on the predicted probabilities and labels)

In [None]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
import optuna
from sklearn.metrics import log_loss
pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
%matplotlib inline
sns.set(color_codes=True)
pal = sns.color_palette("viridis", 10)
sns.set_palette(pal)

In [None]:
train = pd.read_csv('../input/tabular-playground-series-jun-2021/train.csv')
test = pd.read_csv('../input/tabular-playground-series-jun-2021/test.csv')

In [None]:
train.info()

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
train['target'] = le.fit_transform(train['target'])

In [None]:
train.isnull().sum()

# Plotting + Report with Dataprep

In [None]:
!pip install dataprep

In [None]:
from dataprep.eda import plot, plot_correlation, create_report, plot_missing

In [None]:
plot(train.drop(['id'],axis=1))

In [None]:
# create_report(train)

## Visualising skewness

In [None]:
skew = []
for i in train.drop(['id','target'],axis=1).columns:
    skew.append(train[str(i)].skew())
    
skew_df = pd.DataFrame({'Feature':train.drop(['id','target'],axis=1).columns, 'Skewness': skew})
skew_df.plot(kind='bar',figsize=(18,10))

## Insights from Report and Visualisations
> #### 1. The data is left skewed, log transformations or box-cox can be tried
> #### 2. Most of the features are skewed with 0 values even >90%, that means feature selection will be necessary.
> #### 3. Baseline model can overfit because of skewness in data.
> #### 4. Outlier Detection and removal will also be handy to improve score.
> #### 5. No corelation means that there are some unnecessary features.
> #### 6. Also we can gain some info by feature engineering by trying feature interaction or ratio and increase corelation.

## Creating a Clustering variable!

In [None]:
big_df = pd.concat([train.drop(['id','target'],axis=1), test.drop(['id'],axis=1)])

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
scaled_big_df = pd.DataFrame(sc.fit_transform(big_df), columns=big_df.columns)

In [None]:
from sklearn.cluster import KMeans
km = KMeans(n_clusters = 9, random_state=13).fit(scaled_big_df)

In [None]:
scaled_big_df['Cluster'] = km.predict(scaled_big_df)

In [None]:
s_train = scaled_big_df.iloc[:200000,:]
s_train = pd.concat([s_train,pd.get_dummies(s_train['Cluster'])],axis=1)

In [None]:
s_test = scaled_big_df.iloc[200000:,:]
s_test = pd.concat([s_test,pd.get_dummies(s_test['Cluster'])],axis=1)

In [None]:
s_train = s_train.drop(['Cluster',0],axis=1)
s_test = s_test.drop(['Cluster',0],axis=1)

In [None]:
X = s_train
y = train['target']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test  = train_test_split(X,y,train_size=0.8,random_state=42)

# Baseline CATBoost Classifier

In [None]:
from catboost import CatBoostClassifier, Pool
train_pool = Pool(data=X_train, label=y_train)
test_pool = Pool(data=X_test, label=y_test.values) 

In [None]:
model = CatBoostClassifier(
    loss_function='MultiClass',
    eval_metric='MultiClass',
    verbose=False,
    task_type = 'GPU'
)
model.fit(train_pool,plot=True,eval_set=test_pool)

In [None]:
y_pred = model.predict_proba(X_test)
log_loss(y_test,y_pred)

# Feature Selection with Permutation Importance

In [None]:
import eli5
from eli5.sklearn import PermutationImportance
perm = PermutationImportance(model, random_state=13, scoring = 'neg_log_loss')
perm.fit(X_test,y_test)

In [None]:
feat_importance = pd.DataFrame({'Feature':X_train.columns, 'Importance':perm.feature_importances_}).sort_values(by='Importance',ascending=False)
plt.figure(figsize= (10,15))
sns.barplot(data = feat_importance, y = 'Feature', x= 'Importance',orient='h')

In [None]:
a = perm.feature_importances_
l = []
for i in range(83):
    if i>74:
        if a[i]<0:
            l.append(i-74)
    else:
        if a[i]<0:
            l.append('feature_'+str(i))
        
print('Dropped Features')
print(l)

In [None]:
train_new = s_train.drop(l,axis=1)
train_new['target'] = train['target']
train_new['id'] = train['id']
test_new =s_test.drop(l,axis=1)
X_new = train_new.drop(['id','target'],axis=1)

# Optimizing Catboost Classifier with OPTUNA

In [None]:
def fun(trial,data=X_new,target=y):
    
    train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.2,random_state=42)
    param = {
        'loss_function': 'MultiClass',
        'eval_metric': 'MultiClass',
        'learning_rate' : trial.suggest_uniform('learning_rate',1e-3,0.1),
        'reg_lambda': trial.suggest_uniform('reg_lambda',1e-5,30),
        'subsample': trial.suggest_uniform('subsample',0,1),
        'random_strength': trial.suggest_uniform('random_strength',0,1),
        'depth': trial.suggest_int('depth',5,12),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf',1,100),
        'num_leaves' : trial.suggest_int('num_leaves',16,64),
        'leaf_estimation_method' : 'Newton',
        'leaf_estimation_iterations': trial.suggest_int('leaf_estimation_iterations',1,10),
        'verbose':False,
        'bootstrap_type': 'Bernoulli',
        'random_state' : trial.suggest_categorical('random_state',[13,2000,7,19]),
        'task_type' : 'GPU',
        'grow_policy' : 'Lossguide'
        
    }
    model = CatBoostClassifier(**param)  
    
    model.fit(train_x,train_y,eval_set=[(test_x,test_y)],early_stopping_rounds=200,verbose=False)
    
    preds = model.predict_proba(test_x)
    
    ll = log_loss(test_y, preds)
    
    return ll

In [None]:
study = optuna.create_study(direction='minimize')
study.optimize(fun, n_trials=50)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

# Making Predictions with tuned Model

In [None]:

best_params_cb = study.best_params
best_params_cb['loss_function'] = 'MultiClass'
best_params_cb['eval_metric'] = 'MultiClass'
best_params_cb['verbose'] = False
best_params_cb['n_estimators'] = 10000
best_params_cb['bootstrap_type']= 'Bernoulli'
best_params_cb['leaf_estimation_method'] = 'Newton'
best_params_cb['task_type'] = 'GPU'
best_params_cb['grow_policy'] = 'Lossguide'

# Predictions on Kfold

In [None]:
stacked_df = pd.DataFrame(columns = ['Class1m1', 'Class2m1','Class3m1','Class4m1','Class5m1', 'Class6m1','Class7m1','Class8m1','Class9m1','Class1m2', 'Class2m2','Class3m2','Class4m2','Class5m2', 'Class6m2','Class7m2','Class8m2','Class9m2','Class1m3', 'Class2m3','Class3m3','Class4m3','Class5m3', 'Class6m3','Class7m3','Class8m3','Class9m3','target'])

In [None]:
columns = train_new.drop(['id','target'],axis=1).columns
cb_df = pd.DataFrame(columns = ['Class1m1', 'Class2m1','Class3m1','Class4m1','Class5m1', 'Class6m1','Class7m1','Class8m1','Class9m1','target'])
preds = np.zeros((test.shape[0],9))
kf = StratifiedKFold(n_splits = 10 , random_state = 13 , shuffle = True)
ll =[]
n=0

for tr_idx, test_idx in kf.split(train_new[columns], train_new['target']):
    
    X_tr, X_val = train_new[columns].iloc[tr_idx], train_new[columns].iloc[test_idx]
    y_tr, y_val = train_new['target'].iloc[tr_idx], train_new['target'].iloc[test_idx]
    
    model = CatBoostClassifier(**best_params_cb)
    
    model.fit(X_tr,y_tr,eval_set=[(X_val,y_val)],early_stopping_rounds=100,verbose=False)
    y_pred  = model.predict_proba(X_val)
    df = pd.DataFrame(y_pred,columns=['Class1m1', 'Class2m1','Class3m1','Class4m1','Class5m1', 'Class6m1','Class7m1','Class8m1','Class9m1'])
    df['target'] = list(y_val)
    
    cb_df = pd.concat([cb_df,df])
    preds+=model.predict_proba(test_new)/kf.n_splits
    ll.append(log_loss(y_val, y_pred))
    print(n+1,ll[n])
    n+=1

In [None]:
cb_df

In [None]:
np.mean(ll)

In [None]:
df_kfold = pd.DataFrame(preds,columns=['Class_1','Class_2','Class_3','Class_4','Class_5','Class_6','Class_7','Class_8','Class_9'])
df_kfold['id']  = test['id']
df_kfold = df_kfold[['id','Class_1','Class_2','Class_3','Class_4','Class_5','Class_6','Class_7','Class_8','Class_9']]

In [None]:
df_kfold

In [None]:
output_3 = df_kfold.to_csv('cb_submit.csv',index=False)

# LGBM

In [None]:
from lightgbm import LGBMClassifier

In [None]:
model = LGBMClassifier(random_state= 13, objective= 'multiclass', metric = 'multi_logloss', device_type='gpu').fit(X_train, y_train)

In [None]:
perm = PermutationImportance(model, random_state=13, scoring = 'neg_log_loss')
perm.fit(X_test,y_test)

In [None]:
feat_importance = pd.DataFrame({'Feature':X_train.columns, 'Importance':perm.feature_importances_}).sort_values(by='Importance',ascending=False)
plt.figure(figsize= (8,15))
sns.barplot(data = feat_importance, y = 'Feature', x= 'Importance',orient='h')


In [None]:
a = perm.feature_importances_
l = []
for i in range(83):
    if i>74:
        if a[i]<0:
            l.append(i-74)
    else:
        if a[i]<0:
            l.append('feature_'+str(i))
        
print('Dropped Features')
print(l)

In [None]:
train_new = s_train.drop(l,axis=1)
train_new['target'] = train['target']
train_new['id'] = train['id']
test_new =s_test.drop(l,axis=1)
X_new = train_new.drop(['id','target'],axis=1)

# Tuning with OPTUNA

In [None]:
def fun2(trial, data = X_new, target=y):
    train_x, test_x, train_y, test_y = train_test_split(data,target,train_size=0.8,random_state=42)
    param = {
         'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-5, 30.0),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-5, 30.0),
        'colsample_bytree': trial.suggest_categorical('colsample_bytree', [0.3,0.4,0.5,0.6,0.7,0.8,0.9, 1.0]),

        'subsample': trial.suggest_uniform('subsample', 0,1),
        'learning_rate': 0.01,
        'max_depth': trial.suggest_int('max_depth', 1,100),
        'num_leaves' : trial.suggest_int('num_leaves', 2, 1000),
        'min_child_samples': trial.suggest_int('min_child_samples', 1, 300),
        'min_child_weight' : trial.suggest_loguniform('min_child_weight' , 1e-5 , 1),
        'cat_smooth' : trial.suggest_int('cat_smooth', 1, 100),
        'cat_l2': trial.suggest_int('cat_l2',1,20),
        'metric': 'multi_logloss', 
        'random_state' : trial.suggest_categorical('random_state',[13,2000,7,19]),
        'n_estimators': 10000,
        'objective': 'multiclass',
        'device_type':'gpu'
        
    }
    model = LGBMClassifier(**param)  
    
    model.fit(train_x,train_y,eval_set=[(test_x,test_y)],early_stopping_rounds=200,verbose=False)
    
    pred = model.predict_proba(test_x)
    
    ll = log_loss(test_y, pred)
    
    return ll

In [None]:
study_2 = optuna.create_study(direction='minimize')
study_2.optimize(fun2, n_trials=20)
print('Number of finished trials:', len(study_2.trials))
print('Best trial:', study_2.best_trial.params)

In [None]:

best_params_lgbm =  study_2.best_params
best_params_lgbm['objective'] = 'multiclass'
best_params_lgbm['metric'] = 'multi_logloss'
best_params_lgbm['learning_rate'] = 0.01
best_params_lgbm['n_estimators'] = 10000
best_params_lgbm['device_type'] : 'gpu'

# LGBM Kfold Predictions

In [None]:
columns = train_new.drop(['id','target'],axis=1).columns
preds_2 = np.zeros((test.shape[0],9))
lgbm_df = pd.DataFrame(columns  = ['Class1m2', 'Class2m2','Class3m2','Class4m2','Class5m2', 'Class6m2','Class7m2','Class8m2','Class9m2','target'])
kf = StratifiedKFold(n_splits = 10 , random_state = 13 , shuffle = True)
ll =[]
n=0

for tr_idx, test_idx in kf.split(train_new[columns], train_new['target']):
    
    X_tr, X_val = train_new[columns].iloc[tr_idx], train_new[columns].iloc[test_idx]
    y_tr, y_val = train_new['target'].iloc[tr_idx], train_new['target'].iloc[test_idx]
    
    model = LGBMClassifier(**best_params_lgbm)
    
    model.fit(X_tr,y_tr,eval_set=[(X_val,y_val)],early_stopping_rounds=100,verbose=False)
    y_pred  = model.predict_proba(X_val)
    df = pd.DataFrame(y_pred,columns = ['Class1m2', 'Class2m2','Class3m2','Class4m2','Class5m2', 'Class6m2','Class7m2','Class8m2','Class9m2'])
    df['target'] = list(y_val)
    
    lgbm_df = pd.concat([lgbm_df,df])
    preds_2+=model.predict_proba(test_new)/kf.n_splits
    ll.append(log_loss(y_val, y_pred))
    print(n+1,ll[n])
    n+=1

In [None]:
lgbm_df

In [None]:
np.mean(ll)

In [None]:
df_kfold_lgbm = pd.DataFrame(preds_2,columns=['Class_1','Class_2','Class_3','Class_4','Class_5','Class_6','Class_7','Class_8','Class_9'])
df_kfold_lgbm['id']  = test['id']
df_kfold_lgbm = df_kfold_lgbm[['id','Class_1','Class_2','Class_3','Class_4','Class_5','Class_6','Class_7','Class_8','Class_9']]

In [None]:
df_kfold_lgbm

In [None]:
output_5 = df_kfold_lgbm.to_csv('lgbm_submit.csv',index=False)

# XGBoost

In [None]:
from xgboost import XGBClassifier

# Feature Selection with Permutation Importance

In [None]:
model = XGBClassifier(random_State=13, tree_method ='gpu_hist',predictor= 'gpu_predictor').fit(X_train, y_train)
perm = PermutationImportance(model, random_state=13, scoring = 'neg_log_loss')
perm.fit(X_test,y_test)

In [None]:
feat_importance = pd.DataFrame({'Feature':X_train.columns, 'Importance':perm.feature_importances_}).sort_values(by='Importance',ascending=False)
plt.figure(figsize= (8,15))
sns.barplot(data = feat_importance, y = 'Feature', x= 'Importance',orient='h')


In [None]:
a = perm.feature_importances_
l = []
for i in range(83):
    if i>74:
        if a[i]<0:
            l.append(i-74)
    else:
        if a[i]<0:
            l.append('feature_'+str(i))
        
print('Dropped Features')
print(l)

In [None]:
train_new = s_train.drop(l,axis=1)
train_new['target'] = train['target']
train_new['id'] = train['id']
test_new =s_test.drop(l,axis=1)
X_new = train_new.drop(['id','target'],axis=1)

# Tuning with OPTUNA

In [None]:
def fun3(trial, data = X_new, target = y):
    train_x, test_x, train_y, test_y = train_test_split(data,target,train_size=0.8,random_state=42)

    param = {
       'learning_rate' : trial.suggest_uniform('learning_rate',0,1),
        'gamma' : trial.suggest_uniform('gamma',0,100),
        'max_depth': trial.suggest_int('max_depth', 1,100),
        'min_child_weight' : trial.suggest_uniform('min_child_weight', 0,100),
        'max_delta_step' : trial.suggest_uniform('max_delta_step',1,10),
        'subsample' : trial.suggest_uniform('subsample',0,1),
        'colsample_bytree' : trial.suggest_uniform('colsample_bytree',0,1),
        'lambda' : trial.suggest_uniform('lambda',1e-5,30),
        'alpha' : trial.suggest_uniform('alpha',1e-5,30),
        'tree_method' :'gpu_hist',
        'grow_policy':'lossguide',
        'max_leaves': trial.suggest_int('max_leaves',16,64),
        'random_state' : trial.suggest_categorical('random_state',[13,2000,7,19]),
        'objective':'multi:softprob',
        'eval_metric':'mlogloss',
        'predictor':'gpu_predictor'

        
    }
    model = XGBClassifier(**param)  
    
    model.fit(train_x,train_y,eval_set=[(test_x,test_y)],early_stopping_rounds=200,verbose=False)
    pred_y = model.predict_proba(test_x)
    
    ll = log_loss(test_y, pred_y)
    
    return ll
    

In [None]:
study_3 = optuna.create_study(direction='minimize')
study_3.optimize(fun3, n_trials=50)
print('Number of finished trials:', len(study_3.trials))
print('Best trial:', study_3.best_trial.params)

In [None]:
best_params_xgb = study_3.best_params
best_params_xgb['objective'] = 'multi:softprob'
best_params_xgb['eval_metric'] = 'mlogloss'
best_params_xgb['grow_policy'] = 'lossguide'
best_params_xgb['n_estimators'] = 10000
best_params_xgb['tree_method'] ='gpu_hist'
best_params_xgb['predictor'] ='gpu_predictor'

# XGBoost KFOLD Predictions 

In [None]:
columns = train_new.drop(['id','target'],axis=1).columns
preds_3 = np.zeros((test.shape[0],9))
kf = StratifiedKFold(n_splits = 10 , random_state = 13 , shuffle = True)
xgb_df = pd.DataFrame(columns = ['Class1m3', 'Class2m3','Class3m3','Class4m3','Class5m3', 'Class6m3','Class7m3','Class8m3','Class9m3','target'])
ll =[]
n=0

for tr_idx, test_idx in kf.split(train_new[columns], train_new['target']):
    
    X_tr, X_val = train_new[columns].iloc[tr_idx], train_new[columns].iloc[test_idx]
    y_tr, y_val = train_new['target'].iloc[tr_idx], train_new['target'].iloc[test_idx]
    
    model = XGBClassifier(**best_params_xgb)
    
    model.fit(X_tr,y_tr,eval_set=[(X_val,y_val)],early_stopping_rounds=100,verbose = False)
    y_pred  = model.predict_proba(X_val)
    df = pd.DataFrame(y_pred,columns= ['Class1m3', 'Class2m3','Class3m3','Class4m3','Class5m3', 'Class6m3','Class7m3','Class8m3','Class9m3'])
    df['target'] = list(y_val)
    xgb_df = pd.concat([xgb_df,df])
    
    preds_3+=model.predict_proba(test_new)/kf.n_splits
    ll.append(log_loss(y_val, model.predict_proba(X_val)))
    print(n+1,ll[n])
    n+=1

In [None]:
xgb_df

In [None]:
np.mean(ll)

In [None]:
df_kfold_xgb = pd.DataFrame(preds_3,columns=['Class_1','Class_2','Class_3','Class_4','Class_5','Class_6','Class_7','Class_8','Class_9'])
df_kfold_xgb['id']  = test['id']
df_kfold_xgb = df_kfold_xgb[['id','Class_1','Class_2','Class_3','Class_4','Class_5','Class_6','Class_7','Class_8','Class_9']]

In [None]:
df_kfold_xgb

In [None]:
output_6 = df_kfold_xgb.to_csv('xgb_submit.csv',index=False)

# Voting Classifier (Catboost+LGBM+XGBoost)

In [None]:
preds_combined = (preds+preds_2+preds_3)/3
df_combined = pd.DataFrame(preds_combined,columns=['Class_1','Class_2','Class_3','Class_4','Class_5','Class_6','Class_7','Class_8','Class_9'])
df_combined['id'] = test['id']
df_combined = df_combined[['id','Class_1','Class_2','Class_3','Class_4','Class_5','Class_6','Class_7','Class_8','Class_9']]

In [None]:
df_combined

In [None]:
final_output = df_combined.to_csv('blend_submit.csv',index=False)

# Stacked Model

In [None]:
stacked_df['Class1m1'] = cb_df['Class1m1']
stacked_df['Class2m1'] = cb_df['Class2m1']
stacked_df['Class3m1'] = cb_df['Class3m1']
stacked_df['Class4m1'] = cb_df['Class4m1']
stacked_df['Class5m1'] = cb_df['Class5m1']
stacked_df['Class6m1'] = cb_df['Class6m1']
stacked_df['Class7m1'] = cb_df['Class7m1']
stacked_df['Class8m1'] = cb_df['Class8m1']
stacked_df['Class9m1'] = cb_df['Class9m1']


stacked_df['Class1m2'] = lgbm_df['Class1m2']
stacked_df['Class2m2'] = lgbm_df['Class2m2']
stacked_df['Class3m2'] = lgbm_df['Class3m2']
stacked_df['Class4m2'] = lgbm_df['Class4m2']
stacked_df['Class5m2'] = lgbm_df['Class5m2']
stacked_df['Class6m2'] = lgbm_df['Class6m2']
stacked_df['Class7m2'] = lgbm_df['Class7m2']
stacked_df['Class8m2'] = lgbm_df['Class8m2']
stacked_df['Class9m2'] = lgbm_df['Class9m2']

stacked_df['Class1m3'] = xgb_df['Class1m3']
stacked_df['Class2m3'] = xgb_df['Class2m3']
stacked_df['Class3m3'] = xgb_df['Class3m3']
stacked_df['Class4m3'] = xgb_df['Class4m3']
stacked_df['Class5m3'] = xgb_df['Class5m3']
stacked_df['Class6m3'] = xgb_df['Class6m3']
stacked_df['Class7m3'] = xgb_df['Class7m3']
stacked_df['Class8m3'] = xgb_df['Class8m3']
stacked_df['Class9m3'] = xgb_df['Class9m3']



stacked_df['target'] = cb_df['target']


test_stacked_df = pd.DataFrame(columns = ['Class1m1', 'Class2m1','Class3m1','Class4m1','Class5m1', 'Class6m1','Class7m1','Class8m1','Class9m1','Class1m2', 'Class2m2','Class3m2','Class4m2','Class5m2', 'Class6m2','Class7m2','Class8m2','Class9m2','Class1m3', 'Class2m3','Class3m3','Class4m3','Class5m3', 'Class6m3','Class7m3','Class8m3','Class9m3'])
test_stacked_df['Class1m1'] = df_kfold['Class_1']
test_stacked_df['Class2m1'] = df_kfold['Class_2']
test_stacked_df['Class3m1'] = df_kfold['Class_3']
test_stacked_df['Class4m1'] = df_kfold['Class_4']
test_stacked_df['Class5m1'] = df_kfold['Class_5']
test_stacked_df['Class6m1'] = df_kfold['Class_6']
test_stacked_df['Class7m1'] = df_kfold['Class_7']
test_stacked_df['Class8m1'] = df_kfold['Class_8']
test_stacked_df['Class9m1'] = df_kfold['Class_9']



test_stacked_df['Class1m2'] = df_kfold_lgbm['Class_1']
test_stacked_df['Class2m2'] = df_kfold_lgbm['Class_2']
test_stacked_df['Class3m2'] = df_kfold_lgbm['Class_3']
test_stacked_df['Class4m2'] = df_kfold_lgbm['Class_4']
test_stacked_df['Class5m2'] = df_kfold_lgbm['Class_5']
test_stacked_df['Class6m2'] = df_kfold_lgbm['Class_6']
test_stacked_df['Class7m2'] = df_kfold_lgbm['Class_7']
test_stacked_df['Class8m2'] = df_kfold_lgbm['Class_8']
test_stacked_df['Class9m2'] = df_kfold_lgbm['Class_9']


test_stacked_df['Class1m3'] = df_kfold_xgb['Class_1']
test_stacked_df['Class2m3'] = df_kfold_xgb['Class_2']
test_stacked_df['Class3m3'] = df_kfold_xgb['Class_3']
test_stacked_df['Class4m3'] = df_kfold_xgb['Class_4']
test_stacked_df['Class5m3'] = df_kfold_xgb['Class_5']
test_stacked_df['Class6m3'] = df_kfold_xgb['Class_6']
test_stacked_df['Class7m3'] = df_kfold_xgb['Class_7']
test_stacked_df['Class8m3'] = df_kfold_xgb['Class_8']
test_stacked_df['Class9m3'] = df_kfold_xgb['Class_9']




In [None]:
stacked_df

In [None]:
l=[]
for i in stacked_df['target']:
    l.append(int(i))
    
stacked_df['target'] = l

In [None]:
preds_stacked = np.zeros((test.shape[0],9))
columns = ['Class1m1', 'Class2m1','Class3m1','Class4m1','Class5m1', 'Class6m1','Class7m1','Class8m1','Class9m1','Class1m2', 'Class2m2','Class3m2','Class4m2','Class5m2', 'Class6m2','Class7m2','Class8m2','Class9m2','Class1m3', 'Class2m3','Class3m3','Class4m3','Class5m3', 'Class6m3','Class7m3','Class8m3','Class9m3']
kf = StratifiedKFold(n_splits = 10 , random_state = 13 , shuffle = True)
ll =[]
n=0

for tr_idx, test_idx in kf.split(stacked_df[columns], stacked_df['target']):
    
    X_tr, X_val = stacked_df[columns].iloc[tr_idx], stacked_df[columns].iloc[test_idx]
    y_tr, y_val = stacked_df['target'].iloc[tr_idx], stacked_df['target'].iloc[test_idx]
    
    model = CatBoostClassifier(loss_function='MultiClass',eval_metric='MultiClass',verbose=False,task_type = 'GPU')    
    model.fit(X_tr,y_tr,eval_set=[(X_val,y_val)],early_stopping_rounds=200,verbose=False)
    y_pred  = model.predict_proba(X_val)
    
    preds_stacked+=model.predict_proba(test_stacked_df)/kf.n_splits
    ll.append(log_loss(y_val, y_pred))
    print(n+1,ll[n])
    n+=1

In [None]:
np.mean(ll)

In [None]:
df_kfold_st = pd.DataFrame(preds_stacked,columns=['Class_1','Class_2','Class_3','Class_4','Class_5','Class_6','Class_7','Class_8','Class_9'])
df_kfold_st['id']  = test['id']
df_kfold_st = df_kfold_st[['id','Class_1','Class_2','Class_3','Class_4','Class_5','Class_6','Class_7','Class_8','Class_9']]

In [None]:
df_kfold_st

In [None]:
stacked_submit = df_kfold_st.to_csv('stacked_submit.csv',index=False)

## Thanks, and don't forget to upvote, it will motivate me!!