In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [11]:
# Load Iris dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pd.read_csv(url, names=names)

In [12]:
# Dataset Explanation
print("Dataset Information:")
print(dataset.head())
print("\nDataset Description:")
print(dataset.describe())
print("\nClasses:")
print(dataset['class'].unique())

Dataset Information:
   sepal-length  sepal-width  petal-length  petal-width        class
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa

Dataset Description:
       sepal-length  sepal-width  petal-length  petal-width
count    150.000000   150.000000    150.000000   150.000000
mean       5.843333     3.054000      3.758667     1.198667
std        0.828066     0.433594      1.764420     0.763161
min        4.300000     2.000000      1.000000     0.100000
25%        5.100000     2.800000      1.600000     0.300000
50%        5.800000     3.000000      4.350000     1.300000
75%        6.400000     3.300000      5.100000     1.800000
max        7.900000     4.400000      6.900000     2.500000

Cl

In [13]:
# Data preprocessing
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 4].values

In [14]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
# Naive Bayes implementation
class NaiveBayes:
    def __init__(self):
        self.class_probs = {}
        self.mean = {}
        self.var = {}
        
    def fit(self, X, y):
        self.classes = np.unique(y)
        for cls in self.classes:
            X_cls = X[y == cls]
            self.class_probs[cls] = len(X_cls) / len(X)
            self.mean[cls] = np.mean(X_cls, axis=0)
            self.var[cls] = np.var(X_cls, axis=0)
            
    def calculate_probability(self, x, mean, var):
        exponent = np.exp(-(x-mean)**2 / (2 * var))
        return np.prod((1 / (np.sqrt(2 * np.pi * var))) * exponent)
    
    def predict_single(self, x):
        probabilities = {}
        for cls in self.classes:
            probabilities[cls] = self.class_probs[cls] * np.prod(self.calculate_probability(x, self.mean[cls], self.var[cls]))
        return max(probabilities, key=probabilities.get)
    
    def predict(self, X):
        predictions = []
        for x in X:
            predictions.append(self.predict_single(x))
        return predictions

In [16]:
# Training the Naive Bayes model
nb_classifier = NaiveBayes()
nb_classifier.fit(X_train, y_train)

In [17]:
# Prediction
y_pred = nb_classifier.predict(X_test)

In [18]:
# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Classification Report:
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30

