In [1]:
from sklearn import datasets
import pandas as pd
import numpy as np
# from summarytools import dfSummary
import xgboost as xgb
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report

In [2]:
iris = datasets.load_iris()

In [3]:
X = iris.data
y = iris.target

## EDA

In [4]:
# dfSummary(df)

## Split data

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
# Convert data to DMatrix format
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

## Train the model

In [7]:
xgb_clf = xgb.XGBClassifier(
    objective='multi:softmax',
    eval_metric='merror',
    use_label_encoder=False
)

In [8]:
param_grid = {
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'n_estimators': [100, 200, 300],
    'colsample_bytree': [0.3, 0.7]
}


In [9]:
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)


In [10]:
grid_search = GridSearchCV(
    estimator=xgb_clf,
    param_grid=param_grid,
    scoring='accuracy',
    n_jobs=-1,
    cv=kfold,
    verbose=1)

In [13]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 54 candidates, totalling 270 fits


Parameters: { "use_label_encoder" } are not used.



## Do predictions

In [14]:
best_model = grid_search.best_estimator_

In [16]:
# Make predictions
y_pred = best_model.predict(X_test)

# Evaluate predictions
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Print a detailed classification report
print(classification_report(y_test, y_pred, target_names=iris.target_names))


Accuracy: 1.00
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        19
  versicolor       1.00      1.00      1.00        13
   virginica       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



In [17]:
best_model.feature_importances_

array([0.12888247, 0.07263289, 0.33482382, 0.4636608 ], dtype=float32)