# Intro

Welcome to the [Tabular Playground Series - May 2021](https://www.kaggle.com/c/tabular-playground-series-may-2021) competition.
​
![](https://storage.googleapis.com/kaggle-competitions/kaggle/26479/logos/header.png)
​
<span style="color: royalblue;">Please vote the notebook up if it helps you. Feel free to leave a comment above the notebook. Thank you. </span>

# Libraries

In [None]:
import numpy as np
import pandas as pd
import scipy.special
import matplotlib.pyplot as plt
import os
import random

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import log_loss
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
from mlxtend.plotting import plot_confusion_matrix

import warnings
warnings.filterwarnings("ignore")

# Path

In [None]:
path = '/kaggle/input/tabular-playground-series-may-2021/'
os.listdir(path)

# Load Data

In [None]:
train_data = pd.read_csv(path+'train.csv', index_col=0)
test_data = pd.read_csv(path+'test.csv', index_col=0)
samp_subm = pd.read_csv(path+'sample_submission.csv')

# Overview

In [None]:
print('Number train samples:', len(train_data.index))
print('Number test samples:', len(test_data.index))

In [None]:
train_data['target'].value_counts().sort_index()

In [None]:
train_data.head()

# Encode Target

In [None]:
def encode_target(s):
    return int(s.split('_')[1])

train_data['target'] = train_data['target'].apply(encode_target)

In [None]:
train_data.head()

# Define Train, Val And Test Data

In [None]:
X = train_data[train_data.columns[:-1]]
y = train_data['target']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.33, random_state=2021)
X_test = test_data

# Model

In [None]:
model = XGBClassifier(objective = 'multi:softprob',
                      eval_metric = 'mlogloss',
                      n_estimators = 50,
                      num_class=3)
model.fit(X_train, y_train)

In [None]:
y_val_pred = model.predict_proba(X_val)

In [None]:
print('Validation Score:', log_loss(y_val, y_val_pred))

In [None]:
y_test = model.predict_proba(X_test)

In [None]:
targets = ['Class_'+str(i) for i in range(1, 5)]
samp_subm[targets] = pd.DataFrame(y_test, columns=[targets])

# Export

In [None]:
samp_subm.to_csv('submission.csv', index=False)