https://practicaldatascience.co.uk/machine-learning/how-to-use-optuna-for-xgboost-hyperparameter-tuning

In [5]:
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import xgboost as xgb
import optuna
from sklearn.metrics import accuracy_score

mnist_train = pd.read_csv("/home/rainer/Downloads/ML_datasets/mnist_train.csv")
mnist_test = pd.read_csv("/home/rainer/Downloads/ML_datasets/mnist_test.csv")
X, Y = mnist_train.drop('label', axis=1), mnist_train[['label']]

In [6]:
def objective(trial):
    train_x, test_x, train_y, test_y = train_test_split(X, Y, test_size=0.15,random_state=42)
    param = {
        'tree_method':'gpu_hist',  # this parameter means using the GPU when training our model to speedup the training process
        'lambda': trial.suggest_float('lambda', 1e-1, 10.0),
        'alpha': trial.suggest_float('alpha', 1e-3, 1e-2),
        'colsample_bytree': trial.suggest_categorical('colsample_bytree', [0.6,0.7,0.8]),
        'subsample': trial.suggest_categorical('subsample', [0.9,1.0]),
        'learning_rate': trial.suggest_float('learning_rate', 0.3, 0.5), #equal to eta
        'n_estimators': 500, #equal to num_boost_round
        'max_depth': trial.suggest_int('max_depth', 5, 7),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 5),
        'early_stopping_rounds': 100
    }
    model = xgb.XGBClassifier(**param)
    model.fit(train_x,train_y,eval_set=[(test_x,test_y)],verbose=False)
    preds = model.predict(test_x)
    accuracy = accuracy_score(test_y, preds)
    return accuracy

In [7]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)
print('Number of finished trials: {}'.format(len(study.trials)))
print('Best trial:')
trial = study.best_trial

print('  Value: {}'.format(trial.value))
print('  Params: ')

for key, value in trial.params.items():
    print('    {}: {}'.format(key, value))


[32m[I 2023-04-27 19:24:29,183][0m A new study created in memory with name: no-name-f499dd24-1eb3-445b-9797-0561c0572c36[0m
[32m[I 2023-04-27 19:25:36,054][0m Trial 0 finished with value: 0.9742857142857143 and parameters: {'lambda': 5.369581619684934, 'alpha': 0.002763047467677191, 'colsample_bytree': 0.6, 'subsample': 0.9, 'learning_rate': 0.3429505836613483, 'max_depth': 6, 'min_child_weight': 5}. Best is trial 0 with value: 0.9742857142857143.[0m
[32m[I 2023-04-27 19:26:47,860][0m Trial 1 finished with value: 0.9761904761904762 and parameters: {'lambda': 2.564789696047095, 'alpha': 0.005401286848938813, 'colsample_bytree': 0.6, 'subsample': 1.0, 'learning_rate': 0.36622679180352835, 'max_depth': 6, 'min_child_weight': 1}. Best is trial 1 with value: 0.9761904761904762.[0m
[32m[I 2023-04-27 19:27:46,405][0m Trial 2 finished with value: 0.9750793650793651 and parameters: {'lambda': 7.568284344511005, 'alpha': 0.0046666537940922015, 'colsample_bytree': 0.6, 'subsample': 1.0

Number of finished trials: 100
Best trial:
  Value: 0.9774603174603175
  Params: 
    lambda: 1.2913080747163286
    alpha: 0.004505508810564352
    colsample_bytree: 0.7
    subsample: 1.0
    learning_rate: 0.44672403589298415
    max_depth: 5
    min_child_weight: 1


In [18]:
modified_param = {
    'tree_method':'gpu_hist',
    'lambda': 1.2913080747163286,
    'alpha': 0.004505508810564352,
    'colsample_bytree': 0.7,
    'subsample': 1.0,
    'learning_rate': 0.44672403589298415, #equal to eta
    'n_estimators': 2000, #equal to num_boost_round
    'max_depth': 5,
    'min_child_weight': 1
}

In [19]:
model = xgb.XGBClassifier(**modified_param)
model.fit(X, Y)



In [20]:
predictions = model.predict(mnist_test)
predictionsDf = pd.DataFrame(predictions, columns=["Label"])
predictionsDf.insert(0, 'ImageId', range(1, len(predictionsDf) + 1))
predictionsDf["Label"] = predictionsDf["Label"].astype(int)

In [21]:
predictionsDf

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3
...,...,...
27995,27996,9
27996,27997,7
27997,27998,3
27998,27999,9


In [22]:
predictionsDf.to_csv('/home/rainer/Downloads/ML_datasets/mnist_test_prediction_optuna.csv', index=False)