In [1]:
# classification_iris.py
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# ----------------------------
# Load preprocessed data
# ----------------------------
df = pd.read_csv("iris_processed.csv")  # From Task 1
print("Data shape:", df.shape)

# Features and labels
X = df.drop(columns=["species_0", "species_1", "species_2"])  # numerical features only
y = df[["species_0", "species_1", "species_2"]].idxmax(axis=1)  # get original species label

# ----------------------------
# Train/test split
# ----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# ----------------------------
# Decision Tree Classifier
# ----------------------------
dt_clf = DecisionTreeClassifier(random_state=42)
dt_clf.fit(X_train, y_train)
y_pred_dt = dt_clf.predict(X_test)

# Metrics for Decision Tree
dt_metrics = {
    "Accuracy": accuracy_score(y_test, y_pred_dt),
    "Precision": precision_score(y_test, y_pred_dt, average="macro"),
    "Recall": recall_score(y_test, y_pred_dt, average="macro"),
    "F1-score": f1_score(y_test, y_pred_dt, average="macro")
}

print("\nDecision Tree Metrics:")
print(pd.DataFrame([dt_metrics]))

# Visualize Decision Tree
plt.figure(figsize=(12, 8))
plot_tree(dt_clf, feature_names=X.columns, class_names=dt_clf.classes_, filled=True)
plt.savefig("decision_tree.png")
plt.close()
print("Decision tree saved as decision_tree.png")

# ----------------------------
# KNN Classifier (k=5)
# ----------------------------
knn_clf = KNeighborsClassifier(n_neighbors=5)
knn_clf.fit(X_train, y_train)
y_pred_knn = knn_clf.predict(X_test)

# Metrics for KNN
knn_metrics = {
    "Accuracy": accuracy_score(y_test, y_pred_knn),
    "Precision": precision_score(y_test, y_pred_knn, average="macro"),
    "Recall": recall_score(y_test, y_pred_knn, average="macro"),
    "F1-score": f1_score(y_test, y_pred_knn, average="macro")
}

print("\nKNN (k=5) Metrics:")
print(pd.DataFrame([knn_metrics]))

# ----------------------------
# Compare models
# ----------------------------
comparison_df = pd.DataFrame([dt_metrics, knn_metrics], index=["Decision Tree", "KNN (k=5)"])
comparison_df.to_csv("classification_comparison.csv", index=True)
print("\nComparison saved as classification_comparison.csv")


Data shape: (150, 7)

Decision Tree Metrics:
   Accuracy  Precision    Recall  F1-score
0  0.933333   0.944444  0.933333   0.93266
Decision tree saved as decision_tree.png

KNN (k=5) Metrics:
   Accuracy  Precision    Recall  F1-score
0  0.933333   0.944444  0.933333   0.93266

Comparison saved as classification_comparison.csv
