In [1]:
#
# pip install xgboost pandas scikit-learn matplotlib
#

In [2]:
import numpy as np
import xgboost as xgb  # For the XGBoost model
from sklearn.datasets import load_wine  # For loading the Wine dataset
from sklearn.model_selection import train_test_split  # For splitting data
from sklearn.metrics import accuracy_score, classification_report  # For evaluation metrics
from sklearn.tree import DecisionTreeClassifier  # For Decision Tree
from sklearn.naive_bayes import GaussianNB  # For Naive Bayes

# Step 1: Load the Wine dataset
wine = load_wine()
X = wine.data  # Features (13 columns)
y = wine.target  # Target (3 classes: 0, 1, 2)

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Function to evaluate a model (for reusability)
def evaluate_model(model, model_name):
    model.fit(X_train, y_train)  # Train the model
    y_pred = model.predict(X_test)  # Make predictions
    accuracy = accuracy_score(y_test, y_pred)  # Calculate accuracy
    print(f"\n--- {model_name} Evaluation ---")
    print(f"Accuracy: {accuracy:.2f}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred, target_names=wine.target_names))

# Step 4: Train and evaluate XGBoost model
xgb_model = xgb.XGBClassifier(
    # use_label_encoder=False,  # Avoids warnings in newer versions
    eval_metric='mlogloss'  # Multiclass log loss for evaluation
)
evaluate_model(xgb_model, "XGBoost")

# Step 5: Train and evaluate Decision Tree model
dt_model = DecisionTreeClassifier(max_depth=5, random_state=42)  # Using default parameters
evaluate_model(dt_model, "Decision Tree")

# Step 6: Train and evaluate Naive Bayes model
nb_model = GaussianNB()  # Gaussian Naive Bayes for continuous features
evaluate_model(nb_model, "Naive Bayes")

# Optional: Feature importance for Decision Tree (XGBoost already has it in the original code)
if hasattr(dt_model, 'feature_importances_'):
    importances = dt_model.feature_importances_
    feature_names = wine.feature_names
    print("\nDecision Tree Feature Importances:")
    for name, importance in zip(feature_names, importances):
        print(f"{name}: {importance:.4f}")


--- XGBoost Evaluation ---
Accuracy: 0.94
Classification Report:
              precision    recall  f1-score   support

     class_0       1.00      0.93      0.96        14
     class_1       0.88      1.00      0.93        14
     class_2       1.00      0.88      0.93         8

    accuracy                           0.94        36
   macro avg       0.96      0.93      0.94        36
weighted avg       0.95      0.94      0.94        36


--- Decision Tree Evaluation ---
Accuracy: 0.94
Classification Report:
              precision    recall  f1-score   support

     class_0       0.93      0.93      0.93        14
     class_1       0.93      1.00      0.97        14
     class_2       1.00      0.88      0.93         8

    accuracy                           0.94        36
   macro avg       0.95      0.93      0.94        36
weighted avg       0.95      0.94      0.94        36


--- Naive Bayes Evaluation ---
Accuracy: 1.00
Classification Report:
              precision    rec

In [5]:
X

array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
        1.065e+03],
       [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
        1.050e+03],
       [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
        1.185e+03],
       ...,
       [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
        8.350e+02],
       [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
        8.400e+02],
       [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
        5.600e+02]], shape=(178, 13))