In [None]:
# some packages might require the installation first 
import pandas as pd 
import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler,OneHotEncoder,LabelEncoder
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline
import xgboost as XGBClassifier
import optuna

In [None]:
#Reading tabular data .csv
train_data = pd.read_csv('PATH_TO_TRAIN_DATASET')
test_data = pd.read_csv('PATH_TO_TEST_DATASET')

In [None]:
x_train = train_data.copy()
y_train = train_data['YOUR_TARGET']

In [None]:
# Baseline XGBmodel without tuning
model = XGBClassifier(
                      learning_rate =0.2,
                      n_estimators=1000,
                      max_depth=10,
                      min_child_weight=4,
                      gamma=0,
                      subsample=0.8,
                      colsample_bytree=0.8,
                      objective= 'binary:logistic',
                      nthread=4,
                      scale_pos_weight=1,
                      seed=27,
                      eval_metric='auc')

model.fit(x_train,y_train)

In [None]:
# Predicting the test data
pred = model.predict(test_data)

In [None]:
# Tuning XGB model using Optuna
    
def objective(trial,X,y):
 # 'is_unbalance':True,
    params = {
            'objective': 'binary:logistic',
            'eval_metric': 'auc',
            'booster': 'gbtree',
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
            'max_depth': trial.suggest_int('max_depth', 3, 11),
            'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
            'subsample': trial.suggest_float('subsample', 0.4, 1.0),
            'gamma': trial.suggest_float('gamma', 0, 1),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
            'alpha': trial.suggest_float('alpha', 0, 10),
            'lambda': trial.suggest_float('lambda', 0, 10),
            'scale_pos_weight': trial.suggest_int('scale_pos_weight', 0, 10),
            'n_estimators': 1000
            }
    
    xgb_model = XGBClassifier(**params)
    # Create pipeline
    pipeline = Pipeline(steps=[('classifier', xgb_model)])
    
    # Fit the model
    xgb_model.fit(x_train, y_train)
    cv_scores = cross_val_score(xgb_model, X, y, cv=10, n_jobs=-1, scoring="roc_auc")
    score = np.mean(cv_scores)
    # scorestd = cv_scores.std()
    return score  # Replace with appropriate metric

# Create a study object and specify the direction is 'maximize'.
study = optuna.create_study(direction='maximize')

# Start the optimization
study.optimize(lambda trial: objective(trial, x_train, y_train), n_trials=100,  gc_after_trial=True)

# Print the optimal parameters
print(study.best_params)


In [None]:
# training using the best params result from the hyperparameter tuning above

params = {'INPUT_THE_BEST_PARAMS'}
model = XGBClassifier(**params)

model.fit(x_train,y_train)

In [None]:
# Predicting the test data after tuning

pred = model.predict(test_data)

In [None]:
# Convert the prediction results to tabular data('.csv') file

pred.to_csv('PATH_TO_SAVE_THE_PREDICTION')