# Importing important Libraries

In [None]:
# Libraries to handle dataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

from imblearn.over_sampling import SMOTE

from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

# library to ignore warnings if occur
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Reading Datasets

In [None]:
# Train Dataset
train = pd.read_csv('../input/santander-customer-satisfaction/train.csv', index_col=['ID'])         # Making ID as an index column

# Test Dataset
test = pd.read_csv("../input/santander-customer-satisfaction/test.csv", index_col=0)                # ID as index column

# Sample Dataset
samp_sub = pd.read_csv('../input/santander-customer-satisfaction/sample_submission.csv')

In [None]:
train.head()

In [None]:
train['TARGET'].unique()      # Checking the number of unique values present in the response column

In [None]:
train.shape                   # Checking rows and columns

In [None]:
train.isna().info()

In [None]:
train.dtypes                   # Datatypes of each column

In [None]:
#train.select_dtypes(exclude='object')     # Excluding the columns having object datatype

In [None]:
train.shape

In [None]:
train.isna().sum()            # Checking NULL values

# Splitting features and response columns

In [None]:
X_train = train.iloc[:,0:-1]
Y_train = train.iloc[:,-1]

# Normalization DataSet

In [None]:
scaler = StandardScaler()
Xscaled = scaler.fit_transform(X_train)

testscaled = scaler.transform(test)

In [None]:
testscaled

In [None]:
Y_train.value_counts()

In [None]:
Y_train.hist()

In [None]:
# transform the dataset
oversample = SMOTE()
X, y = oversample.fit_resample(Xscaled, Y_train)

In [None]:
X.shape, y.shape

In [None]:
y.hist()

In [None]:
y.value_counts()

# MODEL

In [None]:
XGB_clf = XGBClassifier(random_state=2021) #eval_metric='mlogloss'

In [None]:
XGB_clf.fit(X, y)

In [None]:
y_pred = XGB_clf.predict(testscaled)

In [None]:
y_pred

In [None]:
samp_sub.head()

# Creating Submission File

In [None]:
submit = pd.DataFrame({ "ID": samp_sub.ID, "TARGET": y_pred})

In [None]:
submit.shape

In [None]:
submit.to_csv("XGBoost_Classifier_SMOTE.csv", index=False)