In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the red wine dataset
red_wine = pd.read_csv('winequality-red.csv', delimiter=';')

# Prepare the data
X_red = red_wine.drop(columns=['quality'])
y_red = red_wine['quality']

# Encode the target variable
label_encoder_red = LabelEncoder()
y_red = label_encoder_red.fit_transform(y_red)

# Split the data into training and test sets
X_train_red, X_test_red, y_train_red, y_test_red = train_test_split(X_red, y_red, test_size=0.2, random_state=42)

# Standardize the data
scaler_red = StandardScaler()
X_train_red = scaler_red.fit_transform(X_train_red)
X_test_red = scaler_red.transform(X_test_red)

# Train the XGBoost classifier
xgb_red = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
xgb_red.fit(X_train_red, y_train_red)

# Predict and evaluate
y_pred_red = xgb_red.predict(X_test_red)
accuracy_red = accuracy_score(y_test_red, y_pred_red)
print(f"Red Wine XGBoost Classifier Accuracy: {accuracy_red}")
print(classification_report(y_test_red, y_pred_red, target_names=label_encoder_red.classes_.astype(str)))
print(confusion_matrix(y_test_red, y_pred_red))


Red Wine XGBoost Classifier Accuracy: 0.696875
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         1
           4       0.00      0.00      0.00        10
           5       0.75      0.80      0.78       130
           6       0.68      0.73      0.70       132
           7       0.64      0.55      0.59        42
           8       0.00      0.00      0.00         5

    accuracy                           0.70       320
   macro avg       0.34      0.35      0.34       320
weighted avg       0.67      0.70      0.68       320

[[  0   0   1   0   0   0]
 [  0   0   7   3   0   0]
 [  0   1 104  24   1   0]
 [  0   1  25  96   9   1]
 [  0   0   1  17  23   1]
 [  0   0   0   2   3   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the white wine dataset
white_wine = pd.read_csv('winequality-white.csv', delimiter=';')

# Prepare the data
X_white = white_wine.drop(columns=['quality'])
y_white = white_wine['quality']

# Encode the target variable before splitting
label_encoder_white = LabelEncoder()
y_white = label_encoder_white.fit_transform(y_white)

# Split the data into training and test sets
X_train_white, X_test_white, y_train_white, y_test_white = train_test_split(X_white, y_white, test_size=0.2, random_state=42)

# Standardize the data
scaler_white = StandardScaler()
X_train_white = scaler_white.fit_transform(X_train_white)
X_test_white = scaler_white.transform(X_test_white)

# Train the XGBoost classifier with parameter tuning
xgb_white = XGBClassifier(
    use_label_encoder=False,
    eval_metric='mlogloss',
    random_state=42,
    eta=0.1,
    max_depth=5,
    min_child_weight=3,
    subsample=0.8,
    colsample_bytree=0.8,
    reg_lambda=1.0,
    alpha=0.0
)
xgb_white.fit(X_train_white, y_train_white)

# Predict and evaluate
y_pred_white = xgb_white.predict(X_test_white)
accuracy_white = accuracy_score(y_test_white, y_pred_white)
print(f"White Wine XGBoost Classifier Accuracy: {accuracy_white}")

# Generate classification report
unique_classes_white = list(label_encoder_white.classes_)
print(classification_report(y_test_white, y_pred_white, labels=list(range(len(unique_classes_white))), target_names=[str(cls) for cls in unique_classes_white]))
print(confusion_matrix(y_test_white, y_pred_white, labels=list(range(len(unique_classes_white)))))


White Wine XGBoost Classifier Accuracy: 0.6153061224489796
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         5
           4       0.73      0.32      0.44        25
           5       0.62      0.60      0.61       291
           6       0.59      0.74      0.66       432
           7       0.68      0.47      0.56       192
           8       0.69      0.26      0.37        35
           9       0.00      0.00      0.00         0

   micro avg       0.62      0.62      0.62       980
   macro avg       0.47      0.34      0.38       980
weighted avg       0.62      0.62      0.61       980

[[  0   0   3   2   0   0   0]
 [  0   8  12   5   0   0   0]
 [  0   2 176 108   5   0   0]
 [  0   0  87 319  26   0   0]
 [  0   1   6  90  91   4   0]
 [  0   0   0  15  11   9   0]
 [  0   0   0   0   0   0   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
