<a href="https://colab.research.google.com/github/sangitaGIT011/Myproj/blob/main/Welcome_To_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
# ------------------------------------------------------------
# Final ML Classification Comparison
# ------------------------------------------------------------

# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import warnings
warnings.filterwarnings('ignore')

# ------------------------------------------------------------
# 1. Load and clean dataset
# ------------------------------------------------------------
# Example: Using the Iris dataset (you can replace with any cleaned dataset)
from sklearn.datasets import load_iris
iris = load_iris()

# Create dataframe
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['species'] = iris.target

# Show dataset info
print("Dataset Information:")
print(df.head(), "\n")

# Split into features and target
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# ------------------------------------------------------------
# 2. Naïve Bayes Classifier
# ------------------------------------------------------------
print("=== Naïve Bayes Classification ===")
nb = GaussianNB()
nb.fit(X_train, y_train)
nb_pred = nb.predict(X_test)

nb_acc = accuracy_score(y_test, nb_pred)
print("Accuracy:", round(nb_acc, 3))
print("Classification Report:\n", classification_report(y_test, nb_pred))

# ------------------------------------------------------------
# 3. Decision Tree Classifier
# ------------------------------------------------------------
print("\n=== Decision Tree Classification ===")
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test)

dt_acc = accuracy_score(y_test, dt_pred)
print("Accuracy:", round(dt_acc, 3))
print("Classification Report:\n", classification_report(y_test, dt_pred))

## Visualize the Decision Tree
#plt.figure(figsize=(10,6))
#plot_tree(dt, filled=True, feature_names=iris.feature_names, class_names=iris.target_names)
#plt.title("Decision Tree Visualization")
#plt.show()

# ------------------------------------------------------------
# 4. Random Forest Classifier
# ------------------------------------------------------------
print("\n=== Random Forest Classification ===")
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)

rf_acc = accuracy_score(y_test, rf_pred)
print("Accuracy:", round(rf_acc, 3))
print("Classification Report:\n", classification_report(y_test, rf_pred))

# #Feature Importance Plot
#plt.figure(figsize=(7,5))
#sns.barplot(x=rf.feature_importances_, y=iris.feature_names, palette="viridis")
#plt.title("Feature Importance (Random Forest)")
#plt.show()

# ------------------------------------------------------------
# 5. Compare Model Accuracies
# ------------------------------------------------------------
accuracy_results = pd.DataFrame({
    'Model': ['Naïve Bayes', 'Decision Tree', 'Random Forest'],
    'Accuracy': [nb_acc, dt_acc, rf_acc]
})

print("\n=== Model Accuracy Comparison ===")
print(accuracy_results)

## Bar plot for model comparison
#plt.figure(figsize=(7,5))
#sns.barplot(data=accuracy_results, x='Model', y='Accuracy', palette=['skyblue','lightgreen','gold'])
#plt.title("Model Accuracy Comparison")
#plt.ylim(0, 1)
#plt.show()

# ------------------------------------------------------------
# 6. Confusion Matrices
# ------------------------------------------------------------
#fig, axes = plt.subplots(1, 3, figsize=(15, 4))

#sns.heatmap(confusion_matrix(y_test, nb_pred), annot=True, fmt="d", cmap="Blues", ax=axes[0])
#axes[0].set_title("Naïve Bayes Confusion Matrix")

#sns.heatmap(confusion_matrix(y_test, dt_pred), annot=True, fmt="d", cmap="Greens", ax=axes[1])
#axes[1].set_title("Decision Tree Confusion Matrix")

#sns.heatmap(confusion_matrix(y_test, rf_pred), annot=True, fmt="d", cmap="Oranges", ax=axes[2])
#axes[2].set_title("Random Forest Confusion Matrix")

#plt.show()

# ------------------------------------------------------------
# 7. Conclusion
# ------------------------------------------------------------
best_model = accuracy_results.loc[accuracy_results['Accuracy'].idxmax()]
print("\nBest Performing Model:")
print(best_model)


Dataset Information:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   species  
0        0  
1        0  
2        0  
3        0  
4        0   

=== Naïve Bayes Classification ===
Accuracy: 0.978
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.92      0.96        13
           2       0.93      1.00      0.96        13

    accuracy                           0.98        45
   macro avg       0.98      0.97      0.97        45
weighted avg       0.98