## Tabular Playground - October 2021
> Models Based on [Sklearn](https://scikit-learn.org/stable/user_guide.html) python package

In [None]:
# Basic Data Preprocessing
import numpy as np
import pandas as pd 

# Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# Data Preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler


#Modelling
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

#
from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay,accuracy_score
from sklearn import metrics

In [None]:
# Reading Test and Train data 
# Dropping ID column
train_df= pd.read_csv('../input/tabular-playground-series-oct-2021/train.csv').iloc[:,1:]
test_df= pd.read_csv('../input/tabular-playground-series-oct-2021/test.csv')

In [None]:
# Find Different types of column data types
train_df.dtypes.unique()

In [None]:
# Display first 5 rows
train_df.describe(include='all')

In [None]:
# Create new DF without ID column from scoring dataset
test_df_X = test_df.iloc[:,1:]
# Create DF of ID column to be used later while submission
test_df_id = test_df.iloc[:,:1]

In [None]:
# Shape of train dataframe
train_df.shape, test_df.shape

In [None]:
# Check for Null values
train_df.isnull().values.sum(), test_df.isnull().values.sum()

In [None]:
# Seperate target from Train Dataset
train_df_Y = train_df.target
train_df_X = train_df.iloc[:,:285]

# Find categorical and continuous variables
categorical_variables_train = train_df_X.select_dtypes("int64")
continuous_variables_train = train_df_X.select_dtypes("float64")

In [None]:
# Scale and transform dataset
def data_scaler_fit(option,df):
    if option == 1:
        transformer = StandardScaler().fit(df)
    if option == 2 :
        transformer = RobustScaler().fit(df)
    return transformer

In [None]:
transformer = data_scaler_fit(1,continuous_variables_train)

In [None]:
train_df_X = np.concatenate([transformer.transform(continuous_variables_train),categorical_variables_train.to_numpy()],axis=1)
test_df_X = np.concatenate([transformer.transform(test_df_X[continuous_variables_train.columns]),test_df_X[categorical_variables_train.columns].to_numpy()],axis=1)

# Divide into train and test
X_train, X_test, y_train, y_test = train_test_split(train_df_X, train_df_Y, test_size=0.30, random_state=45)

In [None]:
# Define Different training Models
def training_models(model_type,X_train,y_train):
    # Stochastic Gradient Descent
    if model_type == 'SGD':
        model = SGDClassifier(loss="log", penalty="l2", max_iter=100)
    
    # Multi-layer Perceptron
    if model_type == 'MLP':
        model = MLPClassifier(alpha=1e-5,learning_rate = 'adaptive',warm_start=True,early_stopping = True,
                              max_iter=300,random_state=1)
    # Decision Tree
    if model_type == 'DTC':
        model = DecisionTreeClassifier(max_depth = 10,max_features = 'auto', random_state = 1)
        
    # Random Forest
    if model_type == 'RFC':
        model = RandomForestClassifier(max_depth = 10, warm_start=True,random_state = 1)
    
    model.fit(X_train, y_train)
    return model

In [None]:
model = training_models('MLP',X_train,y_train.to_numpy())

In [None]:
target_predict = pd.DataFrame(model.predict(X_test),columns = ['pred_target'])

In [None]:
conf_metrix = confusion_matrix(y_test,target_predict.to_numpy(), labels=model.classes_ , normalize= 'true')
disp = ConfusionMatrixDisplay(confusion_matrix = conf_metrix ,display_labels=model.classes_)
disp.plot()
plt.show()

In [None]:
print('Accuracy: '+ str(accuracy_score(y_test,target_predict.to_numpy()) * 100) + '%')

### Predict and Submit to leaderboard

In [None]:
# Predict on actual test dataset
probability = pd.DataFrame(model.predict(test_df_X),columns = ['target'])

In [None]:
# Submit Predictions
submission = pd.concat([test_df_id,probability],axis = 1)
submission.to_csv('submission.csv',index=False)