In [1]:
import numpy as np
import xgboost as xgb  # For the XGBoost model
from sklearn.datasets import load_wine  # For loading the Wine dataset
from sklearn.model_selection import train_test_split  # For splitting data
from sklearn.metrics import accuracy_score, classification_report  # For evaluation metrics

# Step 1: Load the Wine dataset
# The dataset is available in scikit-learn and is based on the UCI repository
wine = load_wine()
X = wine.data  # Features (13 columns)
y = wine.target  # Target (3 classes: 0, 1, 2)

# Step 2: Explore the dataset (optional, for understanding)
print("Number of samples:", X.shape[0])  # 178 samples
print("Number of features:", X.shape[1])  # 13 features
print("Target classes:", np.unique(y))  # [0, 1, 2]

# Step 3: Split the data into training and testing sets
# 80% for training, 20% for testing, with a fixed random state for reproducibility
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Create and train the XGBoost model
# Using XGBClassifier with default parameters for simplicity
model = xgb.XGBClassifier(
    use_label_encoder=False,  # Avoids warnings in newer versions
    eval_metric='mlogloss'  # Multiclass log loss for evaluation
)

model.fit(X_train, y_train)

# Step 5: Make predictions on the test set
y_pred = model.predict(X_test)

# Step 6: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Detailed classification report
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=wine.target_names))

# Optional: Feature importance (for insight into the model)
importances = model.feature_importances_
feature_names = wine.feature_names
print("Feature Importances:")
for name, importance in zip(feature_names, importances):
    print(f"{name}: {importance:.4f}")

Number of samples: 178
Number of features: 13
Target classes: [0 1 2]
Accuracy: 0.94
Classification Report:
              precision    recall  f1-score   support

     class_0       1.00      0.93      0.96        14
     class_1       0.88      1.00      0.93        14
     class_2       1.00      0.88      0.93         8

    accuracy                           0.94        36
   macro avg       0.96      0.93      0.94        36
weighted avg       0.95      0.94      0.94        36

Feature Importances:
alcohol: 0.0114
malic_acid: 0.0135
ash: 0.0078
alcalinity_of_ash: 0.0040
magnesium: 0.0287
total_phenols: 0.0041
flavanoids: 0.0974
nonflavanoid_phenols: 0.0000
proanthocyanins: 0.0095
color_intensity: 0.1453
hue: 0.0086
od280/od315_of_diluted_wines: 0.5520
proline: 0.1178


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
