In [1]:
# model_testing.ipynb

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (classification_report, confusion_matrix, 
                            accuracy_score, precision_recall_fscore_support,
                            roc_curve, auc, roc_auc_score)
from sklearn.preprocessing import label_binarize
import pickle
import os
import cv2
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

class CricketModelTester:
    """
    Comprehensive testing suite for cricket object detection model.
    """
    
    def __init__(self, model_path, scaler_path, pca_path=None, feature_selector_path=None):
        """
        Load trained model and preprocessors.
        """
        print("Loading models and preprocessors...")
        
        # Load model
        with open(model_path, 'rb') as f:
            self.model = pickle.load(f)
        print(f"✓ Loaded model: {type(self.model).__name__}")
        
        # Load scaler
        with open(scaler_path, 'rb') as f:
            self.scaler = pickle.load(f)
        print(f"✓ Loaded scaler: {type(self.scaler).__name__}")
        
        # Load PCA (optional)
        self.pca = None
        if pca_path and os.path.exists(pca_path):
            with open(pca_path, 'rb') as f:
                self.pca = pickle.load(f)
            print(f"✓ Loaded PCA: {self.pca.n_components_} components")
        
        # Load feature selector (optional)
        self.feature_selector = None
        if feature_selector_path and os.path.exists(feature_selector_path):
            with open(feature_selector_path, 'rb') as f:
                self.feature_selector = pickle.load(f)
            print(f"✓ Loaded feature selector")
        
        print("\n" + "="*70)
        print("Model Testing Suite Ready!")
        print("="*70 + "\n")
    
    def load_test_data(self, test_file_path):
        """
        Load and prepare test data.
        """
        print(f"Loading test data from: {test_file_path}")
        
        df_test = pd.read_csv(test_file_path)
        df_clean = df_test.dropna()
        
        # Separate metadata and features
        metadata_cols = ['image', 'tile_i', 'tile_j', 'tile_number', 'augmentation']
        metadata_cols = [col for col in metadata_cols if col in df_clean.columns]
        
        self.metadata = df_clean[metadata_cols].copy()
        
        # Check if labels exist
        if 'y' in df_clean.columns:
            self.y_true = df_clean['y'].values
            X = df_clean.drop(metadata_cols + ['y'], axis=1)
            has_labels = True
        else:
            self.y_true = None
            X = df_clean.drop(metadata_cols, axis=1)
            has_labels = False
        
        print(f"✓ Test data shape: {X.shape}")
        print(f"✓ Has ground truth labels: {has_labels}")
        
        return X, has_labels
    
    def preprocess_features(self, X):
        """
        Apply same preprocessing as training.
        """
        print("\nApplying preprocessing...")
        
        # Feature selection
        if self.feature_selector:
            if isinstance(self.feature_selector, tuple):
                # Hybrid selector
                X = self.feature_selector[0].transform(X)
                X = self.feature_selector[1].transform(X)
                print(f"✓ Applied hybrid feature selection")
            else:
                X = self.feature_selector.transform(X)
                print(f"✓ Applied feature selection")
            print(f"  Features after selection: {X.shape[1]}")
        
        # Scaling
        X_scaled = self.scaler.transform(X)
        print(f"✓ Applied scaling")
        
        # PCA
        if self.pca:
            X_final = self.pca.transform(X_scaled)
            print(f"✓ Applied PCA: {X_final.shape[1]} components")
        else:
            X_final = X_scaled
        
        return X_final
    
    def predict(self, X_preprocessed):
        """
        Make predictions on preprocessed data.
        """
        print("\nMaking predictions...")
        
        # Predictions
        y_pred = self.model.predict(X_preprocessed)
        
        # Probabilities
        if hasattr(self.model, 'predict_proba'):
            y_proba = self.model.predict_proba(X_preprocessed)
        else:
            y_proba = None
        
        print(f"✓ Predictions complete")
        print(f"  Predicted classes: {np.unique(y_pred)}")
        
        return y_pred, y_proba
    
    def evaluate_performance(self, y_true, y_pred, y_proba=None):
        """
        Comprehensive performance evaluation.
        """
        print("\n" + "="*70)
        print("PERFORMANCE EVALUATION")
        print("="*70)
        
        # Overall metrics
        accuracy = accuracy_score(y_true, y_pred)
        print(f"\nOverall Accuracy: {accuracy:.4f}")
        
        # Per-class metrics
        print("\n" + "-"*70)
        print("CLASSIFICATION REPORT")
        print("-"*70)
        print(classification_report(y_true, y_pred, 
                                   target_names=[f'Class {i}' for i in np.unique(y_true)]))
        
        # Confusion Matrix
        cm = confusion_matrix(y_true, y_pred)
        print("\n" + "-"*70)
        print("CONFUSION MATRIX")
        print("-"*70)
        print(cm)
        
        # Class distribution
        print("\n" + "-"*70)
        print("CLASS DISTRIBUTION")
        print("-"*70)
        print("True labels:")
        print(pd.Series(y_true).value_counts().sort_index())
        print("\nPredicted labels:")
        print(pd.Series(y_pred).value_counts().sort_index())
        
        # ROC-AUC (if probabilities available)
        if y_proba is not None and len(np.unique(y_true)) > 1:
            print("\n" + "-"*70)
            print("ROC-AUC SCORES")
            print("-"*70)
            try:
                if len(np.unique(y_true)) == 2:
                    # Binary classification
                    roc_auc = roc_auc_score(y_true, y_proba[:, 1])
                    print(f"ROC-AUC: {roc_auc:.4f}")
                else:
                    # Multi-class
                    roc_auc = roc_auc_score(y_true, y_proba, 
                                           multi_class='ovr', average='weighted')
                    print(f"ROC-AUC (weighted): {roc_auc:.4f}")
            except Exception as e:
                print(f"Could not compute ROC-AUC: {e}")
        
        print("="*70 + "\n")
        
        return accuracy, cm
    
    def visualize_confusion_matrix(self, cm, class_names=None, save_path=None):
        """
        Plot confusion matrix with nice formatting.
        """
        if class_names is None:
            class_names = [f'Class {i}' for i in range(len(cm))]
        
        plt.figure(figsize=(10, 8))
        
        # Normalize confusion matrix
        cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        
        # Plot
        sns.heatmap(cm_norm, annot=True, fmt='.2%', cmap='Blues',
                   xticklabels=class_names, yticklabels=class_names,
                   cbar_kws={'label': 'Percentage'})
        
        plt.title('Confusion Matrix (Normalized)', fontsize=16, fontweight='bold')
        plt.ylabel('True Label', fontsize=12)
        plt.xlabel('Predicted Label', fontsize=12)
        plt.tight_layout()
        
        if save_path:
            plt.savefig(save_path, dpi=300, bbox_inches='tight')
            print(f"✓ Confusion matrix saved to {save_path}")
        
        plt.show()
        
        # Also plot counts
        plt.figure(figsize=(10, 8))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Greens',
                   xticklabels=class_names, yticklabels=class_names,
                   cbar_kws={'label': 'Count'})
        
        plt.title('Confusion Matrix (Counts)', fontsize=16, fontweight='bold')
        plt.ylabel('True Label', fontsize=12)
        plt.xlabel('Predicted Label', fontsize=12)
        plt.tight_layout()
        
        if save_path:
            count_path = save_path.replace('.png', '_counts.png')
            plt.savefig(count_path, dpi=300, bbox_inches='tight')
        
        plt.show()
    
    def visualize_roc_curves(self, y_true, y_proba, save_path=None):
        """
        Plot ROC curves for all classes.
        """
        n_classes = y_proba.shape[1]
        
        # Binarize labels
        y_true_bin = label_binarize(y_true, classes=range(n_classes))
        
        # Compute ROC curve and AUC for each class
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        
        for i in range(n_classes):
            fpr[i], tpr[i], _ = roc_curve(y_true_bin[:, i], y_proba[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])
        
        # Plot
        plt.figure(figsize=(10, 8))
        
        colors = ['blue', 'red', 'green', 'orange', 'purple', 'brown', 'pink', 'gray']
        for i in range(n_classes):
            plt.plot(fpr[i], tpr[i], color=colors[i % len(colors)],
                    lw=2, label=f'Class {i} (AUC = {roc_auc[i]:.3f})')
        
        plt.plot([0, 1], [0, 1], 'k--', lw=2, label='Random Classifier')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate', fontsize=12)
        plt.ylabel('True Positive Rate', fontsize=12)
        plt.title('ROC Curves - Multi-Class', fontsize=16, fontweight='bold')
        plt.legend(loc='lower right', fontsize=10)
        plt.grid(alpha=0.3)
        plt.tight_layout()
        
        if save_path:
            plt.savefig(save_path, dpi=300, bbox_inches='tight')
            print(f"✓ ROC curves saved to {save_path}")
        
        plt.show()
    
    def visualize_prediction_distribution(self, y_true, y_pred, save_path=None):
        """
        Visualize prediction distribution comparison.
        """
        fig, axes = plt.subplots(1, 2, figsize=(15, 5))
        
        # True distribution
        true_counts = pd.Series(y_true).value_counts().sort_index()
        axes[0].bar(true_counts.index.astype(str), true_counts.values, 
                   color='skyblue', edgecolor='black')
        axes[0].set_title('True Label Distribution', fontsize=14, fontweight='bold')
        axes[0].set_xlabel('Class', fontsize=12)
        axes[0].set_ylabel('Count', fontsize=12)
        axes[0].grid(axis='y', alpha=0.3)
        
        for i, v in enumerate(true_counts.values):
            axes[0].text(i, v + max(true_counts.values)*0.01, str(v), 
                        ha='center', va='bottom', fontweight='bold')
        
        # Predicted distribution
        pred_counts = pd.Series(y_pred).value_counts().sort_index()
        axes[1].bar(pred_counts.index.astype(str), pred_counts.values, 
                   color='lightcoral', edgecolor='black')
        axes[1].set_title('Predicted Label Distribution', fontsize=14, fontweight='bold')
        axes[1].set_xlabel('Class', fontsize=12)
        axes[1].set_ylabel('Count', fontsize=12)
        axes[1].grid(axis='y', alpha=0.3)
        
        for i, v in enumerate(pred_counts.values):
            axes[1].text(i, v + max(pred_counts.values)*0.01, str(v), 
                        ha='center', va='bottom', fontweight='bold')
        
        plt.tight_layout()
        
        if save_path:
            plt.savefig(save_path, dpi=300, bbox_inches='tight')
            print(f"✓ Distribution plot saved to {save_path}")
        
        plt.show()
    
    def per_class_analysis(self, y_true, y_pred, class_names=None):
        """
        Detailed per-class analysis.
        """
        if class_names is None:
            class_names = [f'Class {i}' for i in np.unique(y_true)]
        
        print("\n" + "="*70)
        print("PER-CLASS DETAILED ANALYSIS")
        print("="*70)
        
        # Get precision, recall, f1 for each class
        precision, recall, f1, support = precision_recall_fscore_support(
            y_true, y_pred, average=None
        )
        
        # Create DataFrame
        results_df = pd.DataFrame({
            'Class': class_names,
            'Precision': precision,
            'Recall': recall,
            'F1-Score': f1,
            'Support': support
        })
        
        print("\n", results_df.to_string(index=False))
        
        # Visualize
        fig, axes = plt.subplots(1, 3, figsize=(18, 5))
        
        x = np.arange(len(class_names))
        width = 0.6
        
        # Precision
        axes[0].bar(x, precision, width, label='Precision', color='skyblue', edgecolor='black')
        axes[0].set_ylabel('Score', fontsize=12)
        axes[0].set_title('Precision by Class', fontsize=14, fontweight='bold')
        axes[0].set_xticks(x)
        axes[0].set_xticklabels(class_names, rotation=45, ha='right')
        axes[0].set_ylim([0, 1.1])
        axes[0].grid(axis='y', alpha=0.3)
        
        for i, v in enumerate(precision):
            axes[0].text(i, v + 0.02, f'{v:.3f}', ha='center', va='bottom', fontweight='bold')
        
        # Recall
        axes[1].bar(x, recall, width, label='Recall', color='lightcoral', edgecolor='black')
        axes[1].set_ylabel('Score', fontsize=12)
        axes[1].set_title('Recall by Class', fontsize=14, fontweight='bold')
        axes[1].set_xticks(x)
        axes[1].set_xticklabels(class_names, rotation=45, ha='right')
        axes[1].set_ylim([0, 1.1])
        axes[1].grid(axis='y', alpha=0.3)
        
        for i, v in enumerate(recall):
            axes[1].text(i, v + 0.02, f'{v:.3f}', ha='center', va='bottom', fontweight='bold')
        
        # F1-Score
        axes[2].bar(x, f1, width, label='F1-Score', color='lightgreen', edgecolor='black')
        axes[2].set_ylabel('Score', fontsize=12)
        axes[2].set_title('F1-Score by Class', fontsize=14, fontweight='bold')
        axes[2].set_xticks(x)
        axes[2].set_xticklabels(class_names, rotation=45, ha='right')
        axes[2].set_ylim([0, 1.1])
        axes[2].grid(axis='y', alpha=0.3)
        
        for i, v in enumerate(f1):
            axes[2].text(i, v + 0.02, f'{v:.3f}', ha='center', va='bottom', fontweight='bold')
        
        plt.tight_layout()
        plt.show()
        
        return results_df
    
    def analyze_misclassifications(self, y_true, y_pred, top_n=10):
        """
        Analyze which classes are most often confused.
        """
        print("\n" + "="*70)
        print("MISCLASSIFICATION ANALYSIS")
        print("="*70)
        
        # Find misclassified samples
        misclassified_mask = y_true != y_pred
        n_misclassified = np.sum(misclassified_mask)
        
        print(f"\nTotal misclassified samples: {n_misclassified} ({n_misclassified/len(y_true)*100:.2f}%)")
        
        # Confusion pairs
        confusion_pairs = []
        for true_label, pred_label in zip(y_true[misclassified_mask], 
                                         y_pred[misclassified_mask]):
            confusion_pairs.append((true_label, pred_label))
        
        # Count confusion pairs
        from collections import Counter
        pair_counts = Counter(confusion_pairs)
        
        print(f"\nTop {top_n} most common misclassifications:")
        print("-" * 70)
        print(f"{'True Class':<12} {'Predicted Class':<18} {'Count':<10} {'% of Errors'}")
        print("-" * 70)
        
        for (true_cls, pred_cls), count in pair_counts.most_common(top_n):
            percentage = count / n_misclassified * 100
            print(f"{true_cls:<12} {pred_cls:<18} {count:<10} {percentage:>6.2f}%")
        
        print("="*70)
    
    def save_predictions(self, y_pred, y_proba=None, output_path='test_predictions.csv'):
        """
        Save predictions to CSV file.
        """
        results_df = self.metadata.copy()
        results_df['prediction'] = y_pred
        
        if self.y_true is not None:
            results_df['true_label'] = self.y_true
            results_df['correct'] = (self.y_true == y_pred)
        
        if y_proba is not None:
            # Add probability for each class
            for i in range(y_proba.shape[1]):
                results_df[f'prob_class_{i}'] = y_proba[:, i]
            
            # Add confidence (max probability)
            results_df['confidence'] = np.max(y_proba, axis=1)
        
        results_df.to_csv(output_path, index=False)
        print(f"\n✓ Predictions saved to {output_path}")
        
        return results_df
    
    def test_pipeline(self, test_file_path, output_dir='../outputs/test_results',
                     class_names=None, visualize=True):
        """
        Complete testing pipeline.
        """
        # Create output directory
        os.makedirs(output_dir, exist_ok=True)
        
        print("="*70)
        print("STARTING TEST PIPELINE")
        print("="*70)
        
        # Load test data
        X_test, has_labels = self.load_test_data(test_file_path)
        
        # Preprocess
        X_preprocessed = self.preprocess_features(X_test)
        
        # Predict
        y_pred, y_proba = self.predict(X_preprocessed)
        
        # Save predictions
        results_df = self.save_predictions(
            y_pred, y_proba, 
            output_path=os.path.join(output_dir, 'predictions_pexels_5.csv')
        )
        
        # If ground truth available, evaluate
        if has_labels and self.y_true is not None:
            # Evaluate
            accuracy, cm = self.evaluate_performance(self.y_true, y_pred, y_proba)
            
            if visualize:
                # Confusion matrix
                self.visualize_confusion_matrix(
                    cm, class_names=class_names,
                    save_path=os.path.join(output_dir, 'confusion_matrix.png')
                )
                
                # ROC curves
                if y_proba is not None:
                    self.visualize_roc_curves(
                        self.y_true, y_proba,
                        save_path=os.path.join(output_dir, 'roc_curves.png')
                    )
                
                # Distribution comparison
                self.visualize_prediction_distribution(
                    self.y_true, y_pred,
                    save_path=os.path.join(output_dir, 'distribution.png')
                )
                
                # Per-class analysis
                per_class_df = self.per_class_analysis(self.y_true, y_pred, class_names)
                per_class_df.to_csv(
                    os.path.join(output_dir, 'per_class_metrics.csv'), 
                    index=False
                )
                
                # Misclassification analysis
                self.analyze_misclassifications(self.y_true, y_pred)
        else:
            print("\n⚠️  No ground truth labels available - skipping evaluation")
            print("   Only predictions have been saved.")
        
        print("\n" + "="*70)
        print("TEST PIPELINE COMPLETE")
        print("="*70)
        
        return results_df


# ============================================================================
# USAGE EXAMPLES
# ============================================================================

print("\n" + "="*70)
print("CRICKET OBJECT DETECTION - MODEL TESTING")
print("="*70 + "\n")

# Example 1: Test with ground truth labels
print("="*70)
print("EXAMPLE 1: Testing with Ground Truth Labels")
print("="*70)

tester = CricketModelTester(
    model_path='../models/best_model.pkl',
    scaler_path='../models/scaler.pkl',
    pca_path='../models/pca_model.pkl',
    feature_selector_path='../models/feature_selector.pkl'
)

# Define class names (customize based on your labels)
class_names = ['Background', 'Ball', 'Bat', 'Stadium']

# Run test pipeline
results = tester.test_pipeline(
    test_file_path='enhanced_features_pexels_5.csv',
    output_dir='../outputs/test_results',
    class_names=class_names,
    visualize=True
)

print("\n✓ Testing complete! Check outputs in ../outputs/test_results/")

# Example 2: Test without ground truth (inference only)
# print("\n" + "="*70)
# print("EXAMPLE 2: Inference on Unlabeled Data")
# print("="*70)

# tester2 = CricketModelTester(
#     model_path='../models/best_model.pkl',
#     scaler_path='../models/scaler.pkl',
#     pca_path='../models/pca_model.pkl',
#     feature_selector_path='../models/feature_selector.pkl'
# )

# results_inference = tester2.test_pipeline(
#     test_file_path='unlabeled_features.csv',
#     output_dir='../outputs/inference_results',
#     visualize=False
# )

# print("\n✓ Inference complete! Check predictions in ../outputs/inference_results/")


CRICKET OBJECT DETECTION - MODEL TESTING

EXAMPLE 1: Testing with Ground Truth Labels
Loading models and preprocessors...
✓ Loaded model: SVC
✓ Loaded scaler: RobustScaler
✓ Loaded PCA: 109 components
✓ Loaded feature selector

Model Testing Suite Ready!

STARTING TEST PIPELINE
Loading test data from: enhanced_features_pexels_5.csv
✓ Test data shape: (256, 3290)
✓ Has ground truth labels: False

Applying preprocessing...
✓ Applied hybrid feature selection
  Features after selection: 250
✓ Applied scaling
✓ Applied PCA: 109 components

Making predictions...
✓ Predictions complete
  Predicted classes: [0 1 2 3]

✓ Predictions saved to ../outputs/test_results\predictions_pexels_5.csv

⚠️  No ground truth labels available - skipping evaluation
   Only predictions have been saved.

TEST PIPELINE COMPLETE

✓ Testing complete! Check outputs in ../outputs/test_results/
