# Classification Problem with Complete Metrics Analysis

This notebook demonstrates a complete classification workflow using multiple algorithms and shows all relevant classification metrics.

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report, roc_auc_score,
    roc_curve, precision_recall_curve, average_precision_score,
    matthews_corrcoef, cohen_kappa_score, log_loss
)
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
import warnings
warnings.filterwarnings('ignore')

# Set plotting style - fix for newer matplotlib/seaborn versions
try:
    plt.style.use('seaborn-v0_8')
except:
    plt.style.use('seaborn')
sns.set_palette("husl")
np.random.seed(42)

## 1. Data Loading and Exploration

In [None]:
# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
target_names = iris.target_names

# Create DataFrame for easier manipulation
df = pd.DataFrame(X, columns=feature_names)
df['target'] = y
df['species'] = df['target'].map({0: target_names[0], 1: target_names[1], 2: target_names[2]})

print("Dataset Shape:", df.shape)
print("\nFeatures:", feature_names)
print("\nTarget Classes:", target_names)
print("\nClass Distribution:")
print(df['species'].value_counts())

In [None]:
# Display first few rows
df.head()

In [None]:
# Basic statistics
df.describe()

In [None]:
# Visualize the data
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Pairplot style visualization
for i, feature in enumerate(feature_names):
    row = i // 2
    col = i % 2
    
    for j, species in enumerate(target_names):
        species_data = df[df['species'] == species][feature]
        axes[row, col].hist(species_data, alpha=0.7, label=species, bins=15)
    
    axes[row, col].set_title(f'Distribution of {feature}')
    axes[row, col].set_xlabel(feature)
    axes[row, col].set_ylabel('Frequency')
    axes[row, col].legend()

plt.tight_layout()
plt.show()

In [None]:
# Correlation matrix
plt.figure(figsize=(10, 8))
correlation_matrix = df[feature_names].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Feature Correlation Matrix')
plt.show()

## 2. Data Preparation

In [None]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

print("Training set shape:", X_train.shape)
print("Test set shape:", X_test.shape)
print("\nTraining set class distribution:")
unique, counts = np.unique(y_train, return_counts=True)
for i, count in enumerate(counts):
    print(f"{target_names[i]}: {count}")

print("\nTest set class distribution:")
unique, counts = np.unique(y_test, return_counts=True)
for i, count in enumerate(counts):
    print(f"{target_names[i]}: {count}")

In [None]:
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Feature scaling completed")
print("Original feature means:", X_train.mean(axis=0))
print("Scaled feature means:", X_train_scaled.mean(axis=0))
print("Scaled feature std:", X_train_scaled.std(axis=0))

## 3. Model Training

In [None]:
# Initialize multiple classifiers
classifiers = {
    'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
    'Random Forest': RandomForestClassifier(random_state=42, n_estimators=100),
    'SVM': SVC(random_state=42, probability=True),
    'K-Nearest Neighbors': KNeighborsClassifier(n_neighbors=5),
    'Naive Bayes': GaussianNB()
}

# Train all models
trained_models = {}
for name, clf in classifiers.items():
    print(f"Training {name}...")
    clf.fit(X_train_scaled, y_train)
    trained_models[name] = clf
    
print("\nAll models trained successfully!")

## 4. Model Predictions

In [None]:
# Generate predictions for all models
predictions = {}
probabilities = {}

for name, model in trained_models.items():
    predictions[name] = model.predict(X_test_scaled)
    probabilities[name] = model.predict_proba(X_test_scaled)
    
print("Predictions generated for all models")

## 5. Comprehensive Metrics Evaluation

In [None]:
def calculate_all_metrics(y_true, y_pred, y_proba, model_name):
    """
    Calculate comprehensive classification metrics
    """
    metrics = {}
    
    # Basic metrics
    metrics['Accuracy'] = accuracy_score(y_true, y_pred)
    metrics['Precision (Macro)'] = precision_score(y_true, y_pred, average='macro')
    metrics['Precision (Micro)'] = precision_score(y_true, y_pred, average='micro')
    metrics['Precision (Weighted)'] = precision_score(y_true, y_pred, average='weighted')
    metrics['Recall (Macro)'] = recall_score(y_true, y_pred, average='macro')
    metrics['Recall (Micro)'] = recall_score(y_true, y_pred, average='micro')
    metrics['Recall (Weighted)'] = recall_score(y_true, y_pred, average='weighted')
    metrics['F1 Score (Macro)'] = f1_score(y_true, y_pred, average='macro')
    metrics['F1 Score (Micro)'] = f1_score(y_true, y_pred, average='micro')
    metrics['F1 Score (Weighted)'] = f1_score(y_true, y_pred, average='weighted')
    
    # Advanced metrics
    metrics['Matthews Correlation Coefficient'] = matthews_corrcoef(y_true, y_pred)
    metrics['Cohen\'s Kappa'] = cohen_kappa_score(y_true, y_pred)
    metrics['Log Loss'] = log_loss(y_true, y_proba)
    
    # Multi-class ROC AUC
    try:
        metrics['ROC AUC (OvR Macro)'] = roc_auc_score(y_true, y_proba, multi_class='ovr', average='macro')
        metrics['ROC AUC (OvR Weighted)'] = roc_auc_score(y_true, y_proba, multi_class='ovr', average='weighted')
    except:
        metrics['ROC AUC (OvR Macro)'] = 'N/A'
        metrics['ROC AUC (OvR Weighted)'] = 'N/A'
    
    return metrics

# Calculate metrics for all models
all_metrics = {}
for name in trained_models.keys():
    all_metrics[name] = calculate_all_metrics(y_test, predictions[name], probabilities[name], name)

# Create metrics DataFrame
metrics_df = pd.DataFrame(all_metrics).T
print("Comprehensive Metrics for All Models:")
print("=" * 60)
print(metrics_df.round(4))

In [None]:
# Visualize metrics comparison
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Accuracy comparison
models = list(all_metrics.keys())
accuracies = [all_metrics[model]['Accuracy'] for model in models]
axes[0, 0].bar(models, accuracies, color='skyblue')
axes[0, 0].set_title('Accuracy Comparison')
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].tick_params(axis='x', rotation=45)
for i, v in enumerate(accuracies):
    axes[0, 0].text(i, v + 0.01, f'{v:.3f}', ha='center')

# F1 Score comparison
f1_scores = [all_metrics[model]['F1 Score (Macro)'] for model in models]
axes[0, 1].bar(models, f1_scores, color='lightcoral')
axes[0, 1].set_title('F1 Score (Macro) Comparison')
axes[0, 1].set_ylabel('F1 Score')
axes[0, 1].tick_params(axis='x', rotation=45)
for i, v in enumerate(f1_scores):
    axes[0, 1].text(i, v + 0.01, f'{v:.3f}', ha='center')

# Precision comparison
precisions = [all_metrics[model]['Precision (Macro)'] for model in models]
axes[1, 0].bar(models, precisions, color='lightgreen')
axes[1, 0].set_title('Precision (Macro) Comparison')
axes[1, 0].set_ylabel('Precision')
axes[1, 0].tick_params(axis='x', rotation=45)
for i, v in enumerate(precisions):
    axes[1, 0].text(i, v + 0.01, f'{v:.3f}', ha='center')

# Recall comparison
recalls = [all_metrics[model]['Recall (Macro)'] for model in models]
axes[1, 1].bar(models, recalls, color='gold')
axes[1, 1].set_title('Recall (Macro) Comparison')
axes[1, 1].set_ylabel('Recall')
axes[1, 1].tick_params(axis='x', rotation=45)
for i, v in enumerate(recalls):
    axes[1, 1].text(i, v + 0.01, f'{v:.3f}', ha='center')

plt.tight_layout()
plt.show()

## 6. Detailed Classification Reports

In [None]:
# Generate detailed classification reports for each model
for name in trained_models.keys():
    print(f"\nClassification Report for {name}:")
    print("=" * 50)
    print(classification_report(y_test, predictions[name], target_names=target_names))

## 7. Confusion Matrix Analysis

In [None]:
# Plot confusion matrices for all models
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.ravel()

for i, (name, model) in enumerate(trained_models.items()):
    cm = confusion_matrix(y_test, predictions[name])
    
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=target_names, yticklabels=target_names, ax=axes[i])
    axes[i].set_title(f'Confusion Matrix - {name}')
    axes[i].set_xlabel('Predicted')
    axes[i].set_ylabel('Actual')

# Remove the last empty subplot
fig.delaxes(axes[5])
plt.tight_layout()
plt.show()

## 8. ROC Curves and AUC Analysis

In [None]:
# Plot ROC curves for multiclass classification (One-vs-Rest)
n_classes = len(np.unique(y))
y_test_binarized = label_binarize(y_test, classes=[0, 1, 2])

fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.ravel()

for i, (name, model) in enumerate(trained_models.items()):
    y_proba = probabilities[name]
    
    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    
    for class_idx in range(n_classes):
        fpr[class_idx], tpr[class_idx], _ = roc_curve(y_test_binarized[:, class_idx], 
                                                     y_proba[:, class_idx])
        roc_auc[class_idx] = roc_auc_score(y_test_binarized[:, class_idx], 
                                          y_proba[:, class_idx])
    
    # Plot ROC curves
    colors = ['blue', 'red', 'green']
    for class_idx, color in zip(range(n_classes), colors):
        axes[i].plot(fpr[class_idx], tpr[class_idx], color=color, lw=2,
                    label=f'{target_names[class_idx]} (AUC = {roc_auc[class_idx]:.2f})')
    
    axes[i].plot([0, 1], [0, 1], 'k--', lw=2)
    axes[i].set_xlim([0.0, 1.0])
    axes[i].set_ylim([0.0, 1.05])
    axes[i].set_xlabel('False Positive Rate')
    axes[i].set_ylabel('True Positive Rate')
    axes[i].set_title(f'ROC Curves - {name}')
    axes[i].legend(loc="lower right")

# Remove the last empty subplot
fig.delaxes(axes[5])
plt.tight_layout()
plt.show()

## 9. Precision-Recall Curves

In [None]:
# Plot Precision-Recall curves
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.ravel()

for i, (name, model) in enumerate(trained_models.items()):
    y_proba = probabilities[name]
    
    colors = ['blue', 'red', 'green']
    for class_idx, color in zip(range(n_classes), colors):
        precision, recall, _ = precision_recall_curve(y_test_binarized[:, class_idx], 
                                                     y_proba[:, class_idx])
        avg_precision = average_precision_score(y_test_binarized[:, class_idx], 
                                              y_proba[:, class_idx])
        
        axes[i].plot(recall, precision, color=color, lw=2,
                    label=f'{target_names[class_idx]} (AP = {avg_precision:.2f})')
    
    axes[i].set_xlabel('Recall')
    axes[i].set_ylabel('Precision')
    axes[i].set_title(f'Precision-Recall Curves - {name}')
    axes[i].legend(loc="lower left")
    axes[i].set_xlim([0.0, 1.0])
    axes[i].set_ylim([0.0, 1.05])

# Remove the last empty subplot
fig.delaxes(axes[5])
plt.tight_layout()
plt.show()

## 10. Cross-Validation Analysis

In [None]:
# Perform cross-validation for all models
cv_results = {}
cv_folds = 5
skf = StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=42)

print(f"Performing {cv_folds}-fold cross-validation...")
print("=" * 50)

for name, model in classifiers.items():
    # Accuracy scores
    cv_scores = cross_val_score(model, X_train_scaled, y_train, cv=skf, scoring='accuracy')
    cv_results[name] = {
        'scores': cv_scores,
        'mean': cv_scores.mean(),
        'std': cv_scores.std()
    }
    
    print(f"{name}:")
    print(f"  CV Scores: {cv_scores.round(4)}")
    print(f"  Mean CV Score: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")
    print()

# Plot cross-validation results
fig, ax = plt.subplots(figsize=(12, 8))

model_names = list(cv_results.keys())
means = [cv_results[name]['mean'] for name in model_names]
stds = [cv_results[name]['std'] for name in model_names]

x_pos = np.arange(len(model_names))
bars = ax.bar(x_pos, means, yerr=stds, capsize=5, color='lightblue', alpha=0.7)
ax.set_xlabel('Models')
ax.set_ylabel('Cross-Validation Accuracy')
ax.set_title('Cross-Validation Results Comparison')
ax.set_xticks(x_pos)
ax.set_xticklabels(model_names, rotation=45, ha='right')

# Add value labels on bars
for i, (mean, std) in enumerate(zip(means, stds)):
    ax.text(i, mean + std + 0.01, f'{mean:.3f}', ha='center', va='bottom')

plt.tight_layout()
plt.show()

## 11. Feature Importance Analysis

In [None]:
# Feature importance for models that support it
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# Random Forest feature importance
rf_model = trained_models['Random Forest']
rf_importance = rf_model.feature_importances_
axes[0].bar(feature_names, rf_importance, color='lightgreen')
axes[0].set_title('Feature Importance - Random Forest')
axes[0].set_ylabel('Importance')
axes[0].tick_params(axis='x', rotation=45)

# Logistic Regression coefficients (absolute values)
lr_model = trained_models['Logistic Regression']
lr_coef = np.abs(lr_model.coef_).mean(axis=0)  # Average across classes
axes[1].bar(feature_names, lr_coef, color='lightcoral')
axes[1].set_title('Feature Coefficients - Logistic Regression')
axes[1].set_ylabel('Absolute Coefficient Value')
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## 12. Model Performance Summary

In [None]:
# Create a comprehensive summary
summary_data = []

for name in trained_models.keys():
    summary_data.append({
        'Model': name,
        'Test Accuracy': all_metrics[name]['Accuracy'],
        'CV Mean': cv_results[name]['mean'],
        'CV Std': cv_results[name]['std'],
        'F1 Score': all_metrics[name]['F1 Score (Macro)'],
        'Precision': all_metrics[name]['Precision (Macro)'],
        'Recall': all_metrics[name]['Recall (Macro)'],
        'ROC AUC': all_metrics[name]['ROC AUC (OvR Macro)'],
        'MCC': all_metrics[name]['Matthews Correlation Coefficient'],
        'Cohen\'s Kappa': all_metrics[name]['Cohen\'s Kappa']
    })

summary_df = pd.DataFrame(summary_data)
print("Model Performance Summary:")
print("=" * 80)
print(summary_df.round(4).to_string(index=False))

# Find best performing model
best_model_idx = summary_df['Test Accuracy'].idxmax()
best_model = summary_df.iloc[best_model_idx]['Model']
best_accuracy = summary_df.iloc[best_model_idx]['Test Accuracy']

print(f"\n\nBest Performing Model: {best_model}")
print(f"Best Test Accuracy: {best_accuracy:.4f}")

## 13. Prediction Examples

In [None]:
# Show some prediction examples with probabilities
best_model_obj = trained_models[best_model]
sample_indices = [0, 15, 30]  # Show a few examples

print(f"Prediction Examples using {best_model}:")
print("=" * 60)

for idx in sample_indices:
    if idx < len(X_test):
        sample_features = X_test_scaled[idx:idx+1]
        prediction = best_model_obj.predict(sample_features)[0]
        probabilities_sample = best_model_obj.predict_proba(sample_features)[0]
        actual = y_test[idx]
        
        print(f"\nSample {idx + 1}:")
        print(f"  Features: {X_test[idx].round(2)}")
        print(f"  Actual: {target_names[actual]}")
        print(f"  Predicted: {target_names[prediction]}")
        print(f"  Prediction Probabilities:")
        for i, prob in enumerate(probabilities_sample):
            print(f"    {target_names[i]}: {prob:.4f}")
        print(f"  Correct: {'✓' if prediction == actual else '✗'}")

## Conclusion

This notebook has demonstrated a comprehensive classification analysis including:

1. **Data Exploration**: Visualized feature distributions and correlations
2. **Model Training**: Trained 5 different classification algorithms
3. **Comprehensive Metrics**: Calculated accuracy, precision, recall, F1-score, ROC AUC, MCC, Cohen's Kappa, and more
4. **Visualization**: Created confusion matrices, ROC curves, and precision-recall curves
5. **Cross-Validation**: Performed stratified k-fold cross-validation
6. **Feature Importance**: Analyzed which features are most important for classification
7. **Model Comparison**: Compared all models across multiple metrics

The analysis shows that all models perform exceptionally well on the Iris dataset, which is expected as it's a well-separated, clean dataset. The Random Forest and SVM models show particularly strong performance across all metrics.