In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.impute import SimpleImputer

# Load Wine Quality dataset
wine_df = pd.read_csv('Wine_Quality.csv')

# Drop non-numeric 'type' column
wine_df.drop('type', axis=1, inplace=True)

# Separate features and target
X = wine_df.drop('quality', axis=1)
y = wine_df['quality']

# Impute missing values if any
imputer = SimpleImputer(strategy='mean')
X_imputed = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

# Train and evaluate
for ratio in [0.2, 0.3, 0.4]:
    X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=ratio, random_state=42)
    model = GaussianNB()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print(f"\n🔹Train-Test Split: {int((1-ratio)*100)}-{int(ratio*100)}")
    print("Accuracy:", round(accuracy_score(y_test, y_pred), 4))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))


🔹Train-Test Split: 80-20
Accuracy: 0.4715
Confusion Matrix:
 [[  0   0   1   1   0   0   0]
 [  2   7  19  14   4   0   0]
 [  6  19 237 144  14   0   0]
 [  2  15 159 244 153   3   3]
 [  1   2  19  71 124   3   1]
 [  0   0   0   9  21   1   1]
 [  0   0   0   0   0   0   0]]
Classification Report:
               precision    recall  f1-score   support

           3       0.00      0.00      0.00         2
           4       0.16      0.15      0.16        46
           5       0.54      0.56      0.55       420
           6       0.51      0.42      0.46       579
           7       0.39      0.56      0.46       221
           8       0.14      0.03      0.05        32
           9       0.00      0.00      0.00         0

    accuracy                           0.47      1300
   macro avg       0.25      0.25      0.24      1300
weighted avg       0.48      0.47      0.47      1300


🔹Train-Test Split: 70-30
Accuracy: 0.439
Confusion Matrix:
 [[  1   0   3   2   0   0   0]
 [  3  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
