In [None]:
import numpy as np 
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler

# Read in Data

In [None]:
train_data = pd.read_csv("../input/tabular-playground-series-mar-2021/train.csv",index_col='id')
test_data = pd.read_csv("../input/tabular-playground-series-mar-2021/test.csv",index_col='id')
submission = pd.read_csv('../input/tabular-playground-series-mar-2021/sample_submission.csv', index_col='id')

# Set up Categorical and Numerical Cols

In [None]:
y = train_data.pop('target')

In [None]:
num_cols = [col for col in train_data.columns if train_data[col].dtype in ['int64','float64']]
num_cols

In [None]:
cat_cols = [col for col in train_data.columns if train_data[col].dtype in ['O']]
cat_cols

# Pipeline Setup

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [None]:
num_transform = StandardScaler()

cat_transform = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer(
    transformers=[
        ('num',num_transform,num_cols),
        ('cat',cat_transform,cat_cols)
    ])

In [None]:
from xgboost import XGBClassifier
xgboost =  XGBClassifier()

In [None]:
my_pipeline = Pipeline(steps=[
    ('preproc',preprocessor),
    ('model',xgboost)
])

In [None]:
from sklearn.model_selection import GridSearchCV

params = [
    {'model__eta':[0.3,0.6],
     'model__max_depth':[6,12],
     'model__lambda':[1,5],
     'model__alpha':[0,7]
    }
]

params1 = [
    {'model__eta':[0.3]
    }
]

In [None]:
grid = GridSearchCV(my_pipeline, cv=3, n_jobs=-1, param_grid=params ,scoring='roc_auc',
                    verbose=1)

In [None]:
grid.fit(train_data,y)

In [None]:
grid.best_estimator_

In [None]:
grid.best_score_

In [None]:
grid.best_params_

In [None]:
grid.predict_proba(test_data)

In [None]:
pred = grid.predict_proba(test_data)
pred

In [None]:
y_pred = pred[:,1]
y_pred

In [None]:
submission['target'] = y_pred
submission.to_csv('gs_xgboost.csv')