In [22]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

# Example data (you would replace this with the "Thermal Dogs" dataset)
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, weights=[0.9, 0.1], random_state=42)

# Initialize the model
model = RandomForestClassifier(random_state=42)


def stratified_cross_validation_for_imbalanced(model, X, y, n_splits=5):
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    # Lists to store metrics for each fold
    accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []

    # Compute class weights to handle imbalance
    class_weights = compute_class_weight('balanced', classes=np.unique(y), y=y)
    class_weight_dict = {i: weight for i, weight in enumerate(class_weights)}

    for fold, (train_index, val_index) in enumerate(skf.split(X, y)):
        # Splitting data into training and validation sets
        X_train, X_val = X[train_index], X[val_index]
        y_train, y_val = y[train_index], y[val_index]

        # Setting class weights for the model, if it supports them
        if hasattr(model, 'class_weight'):
            model.class_weight = class_weight_dict
        
        # Training the model on training data
        model.fit(X_train, y_train)
        
        # Predictions on validation data
        y_pred = model.predict(X_val)
        
        # Calculating metrics with weighted average
        accuracy = accuracy_score(y_val, y_pred)
        precision = precision_score(y_val, y_pred, average='weighted')
        recall = recall_score(y_val, y_pred, average='weighted')
        f1 = f1_score(y_val, y_pred, average='weighted')
        
        # Storing metrics
        accuracy_scores.append(accuracy)
        precision_scores.append(precision)
        recall_scores.append(recall)
        f1_scores.append(f1)

        # Print metrics for each fold
        print(f"Fold {fold+1}: Accuracy = {accuracy:.4f}, Precision = {precision:.4f}, Recall = {recall:.4f}, F1 Score = {f1:.4f}")

    # Averaging the metrics across folds
    avg_accuracy = np.mean(accuracy_scores)
    avg_precision = np.mean(precision_scores)
    avg_recall = np.mean(recall_scores)
    avg_f1 = np.mean(f1_scores)
    
    std_accuracy = np.std(accuracy_scores)
    std_precision = np.std(precision_scores)
    std_recall = np.std(recall_scores)
    std_f1 = np.std(f1_scores)
    
    # Printing overall metrics
    print("\nOverall Metrics:")
    print(f"Accuracy: Mean = {avg_accuracy:.3f}, Std = {std_accuracy:.3f}")
    print(f"Precision: Mean = {avg_precision:.3f}, Std = {std_precision:.3f}")
    print(f"Recall: Mean = {avg_recall:.3f}, Std = {std_recall:.3f}")
    print(f"F1 Score: Mean = {avg_f1:.3f}, Std = {std_f1:.3f}")

    return {
        'accuracy': (avg_accuracy, std_accuracy),
        'precision': (avg_precision, std_precision),
        'recall': (avg_recall, std_recall),
        'f1_score': (avg_f1, std_f1)
    }




# Call the stratified cross-validation function
results = stratified_cross_validation_for_imbalanced(model, X, y, n_splits=5)

# Output the results
print(results)

Fold 1: Accuracy = 0.9500, Precision = 0.9481, Recall = 0.9500, F1 Score = 0.9488
Fold 2: Accuracy = 0.9450, Precision = 0.9433, Recall = 0.9450, F1 Score = 0.9377
Fold 3: Accuracy = 0.9450, Precision = 0.9422, Recall = 0.9450, F1 Score = 0.9431
Fold 4: Accuracy = 0.9500, Precision = 0.9473, Recall = 0.9500, F1 Score = 0.9462
Fold 5: Accuracy = 0.9600, Precision = 0.9591, Recall = 0.9600, F1 Score = 0.9570

Overall Metrics:
Accuracy: Mean = 0.950, Std = 0.005
Precision: Mean = 0.948, Std = 0.006
Recall: Mean = 0.950, Std = 0.005
F1 Score: Mean = 0.947, Std = 0.006
{'accuracy': (0.95, 0.0054772255750516665), 'precision': (0.9480147027147027, 0.005996923228832872), 'recall': (0.95, 0.0054772255750516665), 'f1_score': (0.9465728196590364, 0.006399168831426605)}
