In [1]:
# ============================================
# Task 1: Classical ML with Scikit-learn
# Dataset: Iris Species Dataset
# Goal: Predict iris species using a Decision Tree Classifier
# ============================================

# 1️⃣ Import necessary libraries
from sklearn import datasets                          # Load built-in datasets such as Iris
from sklearn.model_selection import train_test_split  # Split data into training/testing sets
from sklearn.preprocessing import StandardScaler, LabelEncoder  # Scale features and encode labels
from sklearn.tree import DecisionTreeClassifier       # Decision Tree algorithm
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report
import numpy as np                                    # For handling arrays
import pandas as pd                                   # For handling data in tabular form

# 2️⃣ Load the Iris dataset
iris = datasets.load_iris()

# Convert to DataFrame for easier preprocessing
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['species'] = iris.target

# 3️⃣ Check for missing values
# (Although Iris dataset has no missing values, we handle it generally.)
if df.isnull().sum().any():
    df.fillna(df.mean(), inplace=True)  # Fill numeric missing values with column mean

# 4️⃣ Encode labels (if they were text — here they are numeric already)
# This step is shown for completeness
label_encoder = LabelEncoder()
df['species'] = label_encoder.fit_transform(df['species'])

# 5️⃣ Separate features (X) and target (y)
X = df.iloc[:, :-1].values  # Feature columns
y = df.iloc[:, -1].values   # Target column

# 6️⃣ Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 7️⃣ (Optional but good practice) Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 8️⃣ Instantiate and train the Decision Tree Classifier
# You can tune hyperparameters like criterion='entropy' or max_depth=3
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# 9️⃣ Make predictions on the test set
y_pred = clf.predict(X_test)

# 🔟 Evaluate the model using multiple metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')  # Macro = treats all classes equally
recall = recall_score(y_test, y_pred, average='macro')

print("Model Performance Metrics:")
print("----------------------------")
print(f"Accuracy:  {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")

# 1️⃣1️⃣ Detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=iris.target_names))


Model Performance Metrics:
----------------------------
Accuracy:  1.0000
Precision: 1.0000
Recall:    1.0000

Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        19
  versicolor       1.00      1.00      1.00        13
   virginica       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

