In [1]:
# 1. Load the Dataset
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import pandas as pd

iris = load_iris(as_frame=True)
X = iris.data
y = iris.target

# 2. Split Data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# 3. Import Models and Metrics
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Function to train, predict, and evaluate
def train_evaluate_model(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    # Calculate Metrics (Multiclass: use 'weighted' average)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    return {
        'model_name': type(model).__name__,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'model_object': model # Keep the trained model object
    }

# 4. Train and Evaluate 3 Models
results = []
models_to_train = [
    LogisticRegression(max_iter=200, random_state=42),
    SVC(kernel='linear', C=1.0, random_state=42),
    RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
]

for model in models_to_train:
    results.append(train_evaluate_model(model, X_train, y_train, X_test, y_test))

# 5. Review Results and Select Best Model
results_df = pd.DataFrame(results)
print(results_df)

# Example: Select the model with the highest F1 Score
best_model_data = results_df.loc[results_df['f1_score'].idxmax()]
best_model = best_model_data['model_object']
print(f"\nBest Model: {best_model_data['model_name']} with F1-Score: {best_model_data['f1_score']:.4f}")

               model_name  accuracy  precision    recall  f1_score  \
0      LogisticRegression  0.933333   0.934524  0.933333  0.933259   
1                     SVC  1.000000   1.000000  1.000000  1.000000   
2  RandomForestClassifier  0.888889   0.898148  0.888889  0.887767   

                                        model_object  
0  LogisticRegression(max_iter=200, random_state=42)  
1              SVC(kernel='linear', random_state=42)  
2  (DecisionTreeClassifier(max_depth=5, max_featu...  

Best Model: SVC with F1-Score: 1.0000
