In [None]:
# import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
np.random.seed(42)

In [None]:
# size of the validation set
VAL_SIZE = 0.9

# define model parameters
LEARNING_RATE = 0.1
N_ESTIMATORS  = 25000
MAX_DEPTH     = 9
N_JOBS        = 16
TREE_METHOD   = 'hist'
VERBOSITY     = 1

In [None]:
# load the data
train = pd.read_csv('../input/tabular-playground-series-nov-2021/train.csv')
test  = pd.read_csv('../input/tabular-playground-series-nov-2021/test.csv')
sub   = pd.read_csv('../input/tabular-playground-series-nov-2021/sample_submission.csv')

In [None]:
train.head()

In [None]:
# separate train and validation sets
bids = train['id'].values
train_bids = np.random.choice(bids, replace=False, size=int((1 - VAL_SIZE) * len(train)))
valid_bids = np.array([x for x in bids if x not in train_bids])

In [None]:
y_train = train[train['id'].isin(train_bids)]['target'].values
X_train = train[train['id'].isin(train_bids)].drop(['id', 'target'], axis = 1)
y_valid = train[train['id'].isin(valid_bids)]['target'].values
X_valid = train[train['id'].isin(valid_bids)].drop(['id', 'target'], axis = 1)

In [None]:
# define the model
from xgboost import XGBClassifier
xgb = XGBClassifier(learning_rate = LEARNING_RATE, n_estimators = N_ESTIMATORS, max_depth = MAX_DEPTH, 
                   n_jobs = N_JOBS, tree_method = TREE_METHOD, verbosity=VERBOSITY)

In [None]:
# fit the model	on the train set
model_xgb = xgb.fit(X_train,y_train)

In [None]:
# predict on the validation set
y_valid_pred = model_xgb.predict_proba(X_valid)[:,1]

In [None]:
# error on the validation set
from sklearn.metrics import roc_auc_score
roc_auc_score(y_valid, y_valid_pred)

In [None]:
# predict on the test set
X_test = test.drop(['id'], axis = 1).values
submission_xgb = xgb.predict(X_test)

In [None]:
# submit
sub['target'] = submission_xgb
sub.to_csv('./submission.csv', index = False)