# Experiment 11

## Problem Statement

To solve classification problems using Naïve Bayes.

## Code

In [9]:
# importing required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB  
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

In [10]:
# loading the dataset
dataset = pd.read_csv('wine_dataset.csv')
print(dataset.head().to_markdown())

|    |   fixed_acidity |   volatile_acidity |   citric_acid |   residual_sugar |   chlorides |   free_sulfur_dioxide |   total_sulfur_dioxide |   density |   pH |   sulphates |   alcohol |   quality | style   |
|---:|----------------:|-------------------:|--------------:|-----------------:|------------:|----------------------:|-----------------------:|----------:|-----:|------------:|----------:|----------:|:--------|
|  0 |             7.4 |               0.7  |          0    |              1.9 |       0.076 |                    11 |                     34 |    0.9978 | 3.51 |        0.56 |       9.4 |         5 | red     |
|  1 |             7.8 |               0.88 |          0    |              2.6 |       0.098 |                    25 |                     67 |    0.9968 | 3.2  |        0.68 |       9.8 |         5 | red     |
|  2 |             7.8 |               0.76 |          0.04 |              2.3 |       0.092 |                    15 |                     54 |    0.997  | 

In [11]:
# splitting the dataset into dependent and independent variables
X = dataset.drop('style', axis=1)  # Independent variables
y = dataset['style']               # Dependent variable

In [12]:
# print number of unique values in y
print(f"Number of unique values in y: {y.nunique()}")

Number of unique values in y: 2


In [13]:
# splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [14]:
# training the model
model = GaussianNB()
model.fit(X_train, y_train)

In [15]:
# evaluating the model
predictions = model.predict(X_test)

# confusion Matrix
conf_matrix = confusion_matrix(y_test, predictions)
print("Confusion Matrix:\n", conf_matrix)

# true positive, false positive, false negative, true negative
for i in range(len(conf_matrix)):
    TP = conf_matrix[i, i]
    FP = conf_matrix[:, i].sum() - TP
    FN = conf_matrix[i, :].sum() - TP
    TN = conf_matrix.sum() - (TP + FP + FN)

    print('Class {} -- TP {}, FP {}, FN {}, TN {}'.format(i, TP, FP, FN, TN))

# accuracy
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)

# Precision, Recall, and F1 Score
precision = precision_score(y_test, predictions, average='weighted')
recall = recall_score(y_test, predictions, average='weighted')
f1 = f1_score(y_test, predictions, average='weighted')

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Confusion Matrix:
 [[317   6]
 [ 24 953]]
Class 0 -- TP 317, FP 24, FN 6, TN 953
Class 1 -- TP 953, FP 6, FN 24, TN 317
Accuracy: 0.9769230769230769
Precision: 0.9778109575849158
Recall: 0.9769230769230769
F1 Score: 0.9771285874585437
