# Importing Packages

In [None]:
import pandas as pd
import numpy as np
import missingno
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_squared_log_error
from sklearn.model_selection import train_test_split,KFold, GroupKFold, StratifiedKFold
import warnings

warnings.filterwarnings("ignore")

# Importing Data

In [None]:
df = pd.read_csv("../input/tabular-playground-series-may-2021/train.csv")
test = pd.read_csv("../input/tabular-playground-series-may-2021/test.csv")

In [None]:
df.head()

In [None]:
df['target'].value_counts()

In [None]:
le = LabelEncoder()
df['target'] = le.fit_transform(df['target'])

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.columns

In [None]:
cols = list(df.columns)
cols.remove("target")

# Training

In [None]:
test_preds = None
train_rmse = 0
val_rmse = 0
n_splits = 10

kf = KFold(n_splits = n_splits , shuffle = True , random_state = 42)
for fold, (tr_index , val_index) in enumerate(kf.split(df[cols].values , df['target'].values)):
    
    print("-" * 50)
    print(f"Fold {fold + 1}")
    
    x_train,x_val = df[cols].values[tr_index] , df[cols].values[val_index]
    y_train,y_val = df['target'].values[tr_index] , df['target'].values[val_index]
        
    eval_set = [(x_val, y_val)]
    
    model = CatBoostClassifier(depth=4,
            max_ctr_complexity=15,
            iterations=10000,
            od_wait=1000, od_type='Iter',
            learning_rate=0.01,
            min_data_in_leaf=1,
            use_best_model=True,
            loss_function='MultiClass')
    model.fit(x_train, y_train, eval_set = eval_set, verbose = 1000)
    
    train_preds = model.predict(x_train)
    train_rmse += mean_squared_error(y_train ,train_preds , squared = False)
    print("Training RMSE : " , mean_squared_error(y_train ,train_preds , squared = False))
    
    val_preds = model.predict(x_val)
    val_rmse += mean_squared_error(y_val , val_preds , squared = False)
    print("Validation RMSE : " , mean_squared_error(y_val , val_preds , squared = False))
    
    if test_preds is None:
        test_preds = model.predict_proba(test[cols].values)
    else:
        test_preds += model.predict_proba(test[cols].values)

print("-" * 50)
print("Average Training RMSE : " , train_rmse / n_splits)
print("Average Validation RMSE : " , val_rmse / n_splits)

test_preds /= n_splits

In [None]:
test_preds

In [None]:
sub = pd.read_csv("../input/tabular-playground-series-may-2021/sample_submission.csv")
sub['Class_1']=test_preds[:,0]
sub['Class_2']=test_preds[:,1]
sub['Class_3']=test_preds[:,2]
sub['Class_4']=test_preds[:,3]
sub.head()

In [None]:
sub.to_csv("CatBoost.csv",index=False)