In [3]:
# Import necessary libraries
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
import numpy as np


In [11]:

# Load the Wine dataset (multi-feature dataset)
data = load_wine()
X = data.data  # Features (13 features)
y = data.target  # Target variable (3 classes)

# Ensure non-negative values for MultinomialNB (shift data if necessary)
X = np.maximum(X, 0)  # MultinomialNB requires non-negative values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print(f"Training set size: {X_train.shape[0]}, Test set size: {X_test.shape[0]}")
df = pd.DataFrame(X, columns=data.feature_names)
df['target'] = y
print(df.head())




Training set size: 124, Test set size: 54
   alcohol  malic_acid   ash  alcalinity_of_ash  magnesium  total_phenols  \
0    14.23        1.71  2.43               15.6      127.0           2.80   
1    13.20        1.78  2.14               11.2      100.0           2.65   
2    13.16        2.36  2.67               18.6      101.0           2.80   
3    14.37        1.95  2.50               16.8      113.0           3.85   
4    13.24        2.59  2.87               21.0      118.0           2.80   

   flavanoids  nonflavanoid_phenols  proanthocyanins  color_intensity   hue  \
0        3.06                  0.28             2.29             5.64  1.04   
1        2.76                  0.26             1.28             4.38  1.05   
2        3.24                  0.30             2.81             5.68  1.03   
3        3.49                  0.24             2.18             7.80  0.86   
4        2.69                  0.39             1.82             4.32  1.04   

   od280/od315_of_di

In [5]:
# Initialize the Naive Bayes classifier (MultinomialNB for discrete/count-based features)
nb_classifier = MultinomialNB()

# Train the model
nb_classifier.fit(X_train, y_train)

# Make predictions
y_pred = nb_classifier.predict(X_test)



In [7]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")



Accuracy: 0.89


In [None]:
# Predict probabilities for test data
probabilities = nb_classifier.predict_proba(X_test)
print("\nSample Predicted Probabilities for first 5 test instances:")
print(pd.DataFrame(probabilities, columns=data.target_names).head())


Sample Predicted Probabilities for first 5 test instances:
        class_0       class_1       class_2
0  8.683619e-01  8.330123e-03  1.233080e-01
1  1.000000e+00  2.397809e-13  7.490106e-09
2  8.919091e-08  1.334223e-01  8.665776e-01
3  1.000000e+00  2.120124e-24  6.818387e-18
4  1.446198e-06  8.690840e-01  1.309145e-01
