In [1]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import precision_score, recall_score, f1_score


iris_data = load_iris()
features, labels = iris_data.data, iris_data.target

# Split the dataset into training and testing sets
train_X, test_X, train_y, test_y = train_test_split(features, labels, test_size=0.3, random_state=42)

# Initialize a list to store evaluation results
evaluation_results = []

# Train models with different max depths and evaluate performance
for depth in range(1, 6):
    # Initialize the Decision Tree model
    model = DecisionTreeClassifier(
        max_depth=depth,
        min_samples_leaf=2,
        min_samples_split=5,
        random_state=42
    )
    
    # Train the model
    model.fit(train_X, train_y)
    
    # Make predictions on the test set
    predictions = model.predict(test_X)
    
    # Calculate evaluation metrics
    prec = precision_score(test_y, predictions, average="macro", zero_division=0)
    rec = recall_score(test_y, predictions, average="macro", zero_division=0)
    f1_val = f1_score(test_y, predictions, average="macro", zero_division=0)
    
    # Append results to the list
    evaluation_results.append({
        "Depth": depth,
        "Recall": round(rec, 3),
        "Precision": round(prec, 3),
        "F1 Score": round(f1_val, 3)
    })

# Convert results to a DataFrame and display
results_df = pd.DataFrame(evaluation_results)
print("\nPerformance Comparison for Different Depths:")
display(results_df.style.set_caption("Model Performance by Depth").hide(axis="index"))

# Find the best and worst performing models
best_recall = results_df.iloc[results_df['Recall'].idxmax()]
worst_precision = results_df.iloc[results_df['Precision'].idxmin()]
best_f1 = results_df.iloc[results_df['F1 Score'].idxmax()]

# Print conclusions
print("\nSummary of Results:")
print(f"1. Model with the highest recall: Depth {best_recall['Depth']} (Recall = {best_recall['Recall']})")
print(f"2. Model with the lowest precision: Depth {worst_precision['Depth']} (Precision = {worst_precision['Precision']})")
print(f"3. Model with the best F1 score: Depth {best_f1['Depth']} (F1 Score = {best_f1['F1 Score']})")



Performance Comparison for Different Depths:


Depth,Recall,Precision,F1 Score
1,0.667,0.5,0.556
2,0.974,0.976,0.974
3,1.0,1.0,1.0
4,1.0,1.0,1.0
5,1.0,1.0,1.0



Summary of Results:
1. Model with the highest recall: Depth 3.0 (Recall = 1.0)
2. Model with the lowest precision: Depth 1.0 (Precision = 0.5)
3. Model with the best F1 score: Depth 3.0 (F1 Score = 1.0)
