In [None]:
print("Tejasri - 24BAD124")

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

In [None]:
iris = load_iris()


In [None]:
X = iris.data
y = iris.target
feature_names = iris.feature_names
class_names = iris.target_names


In [None]:
df = pd.DataFrame(X, columns=feature_names)
df['species'] = y


In [None]:
print("Dataset Shape:", df.shape)
print("\nFirst 5 rows:")
print(df.head())


In [None]:
print("\nMissing Values:")
print(df.isnull().sum())

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.25, random_state=42, stratify=y
)

In [None]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)

y_pred = gnb.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

In [None]:
print("\nMODEL PERFORMANCE (Gaussian NB)")
print("--------------------------------")
print(f"Accuracy  : {accuracy:.4f}")
print(f"Precision : {precision:.4f}")
print(f"Recall    : {recall:.4f}")
print(f"F1 Score  : {f1:.4f}")


In [None]:
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=class_names))


In [None]:
cm = confusion_matrix(y_test, y_pred)


In [None]:
plt.figure()
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names,
            yticklabels=class_names)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix – Gaussian Naïve Bayes")
plt.show()
comparison = pd.DataFrame({
    'Actual': class_names[y_test],
    'Predicted': class_names[y_pred]
})

In [None]:
print("\nPrediction Comparison (First 10):")
print(comparison.head(10))

In [None]:
y_prob = gnb.predict_proba(X_test)


In [None]:
prob_df = pd.DataFrame(y_prob, columns=class_names)
print("\nClass Probabilities (First 5 Samples):")
print(prob_df.head())


In [None]:
lr = LogisticRegression(max_iter=200)
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)


In [None]:
print("\nLOGISTIC REGRESSION PERFORMANCE")
print("--------------------------------")
print(classification_report(y_test, lr_pred, target_names=class_names))


In [None]:
X_2d = X_scaled[:, 2:4]   
y_2d = y


In [None]:
X_train_2d, X_test_2d, y_train_2d, y_test_2d = train_test_split(
    X_2d, y_2d, test_size=0.25, random_state=42, stratify=y_2d
)

In [None]:
gnb_2d = GaussianNB()
gnb_2d.fit(X_train_2d, y_train_2d)


In [None]:
x_min, x_max = X_2d[:, 0].min() - 1, X_2d[:, 0].max() + 1
y_min, y_max = X_2d[:, 1].min() - 1, X_2d[:, 1].max() + 1

xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200),
                     np.linspace(y_min, y_max, 200))

Z = gnb_2d.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

In [None]:
plt.figure()
plt.contourf(xx, yy, Z, alpha=0.3)
sns.scatterplot(x=X_2d[:, 0], y=X_2d[:, 1], hue=y_2d, palette='Set1')
plt.xlabel("Petal Length (scaled)")
plt.ylabel("Petal Width (scaled)")
plt.title("Decision Boundary – Gaussian Naïve Bayes")
plt.show()

In [None]:
plt.figure()
for i, cls in enumerate(class_names):
    sns.kdeplot(y_prob[:, i], label=cls)
plt.xlabel("Predicted Probability")
plt.title("Class Probability Distribution – Gaussian NB")

plt.show()    
