# Testing MobileNet Models on Rust Dataset

This notebook tests all three MobileNet variants (V2, V3-Large, V3-Small) on the test images from the Rust_Dataset.

In [None]:
# Import necessary libraries
import os
import time
import numpy as np
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_recall_fscore_support
import matplotlib.pyplot as plt
import seaborn as sns

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
# Configuration
IMG_SIZE = 640
NUM_CLASSES = 4
test_dir = "../Rust_Dataset/test"

# Define transforms for test data
test_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load test dataset
test_dataset = datasets.ImageFolder(test_dir, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# Get class names
class_names = test_dataset.classes
print(f"Classes: {class_names}")
print(f"Number of test images: {len(test_dataset)}")

In [None]:
# Function to load model
def load_model(model_name, model_path, num_classes=4):
    """Load a pre-trained MobileNet model with saved weights"""
    
    if model_name == 'mobilenet_v2':
        model = models.mobilenet_v2(weights='IMAGENET1K_V1')
        model.classifier[1] = nn.Linear(model.last_channel, num_classes)
    elif model_name == 'mobilenet_v3_large':
        model = models.mobilenet_v3_large(weights='IMAGENET1K_V1')
        num_ftrs = model.classifier[3].in_features
        model.classifier[3] = nn.Linear(num_ftrs, num_classes)
    elif model_name == 'mobilenet_v3_small':
        model = models.mobilenet_v3_small(weights='IMAGENET1K_V1')
        num_ftrs = model.classifier[3].in_features
        model.classifier[3] = nn.Linear(num_ftrs, num_classes)
    else:
        raise ValueError(f"Unknown model: {model_name}")
    
    # Load trained weights
    model.load_state_dict(torch.load(model_path, map_location=device))
    model = model.to(device)
    model.eval()
    
    return model

print("Model loading function defined")

In [None]:
# Function to evaluate model
def evaluate_model(model, test_loader, device):
    """Evaluate model and return predictions, true labels, and inference time"""
    
    model.eval()
    all_preds = []
    all_labels = []
    
    start_time = time.time()
    
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.numpy())
    
    end_time = time.time()
    inference_time = ((end_time - start_time) / len(test_loader.dataset)) * 1000  # ms per image
    
    return np.array(all_preds), np.array(all_labels), inference_time

print("Evaluation function defined")

## Test All Models

Now we'll test all three MobileNet variants on the test dataset.

In [None]:
# Define models to test
models_to_test = [
    ('mobilenet_v2', 'rust_mobilenetv2.pth', 'MobileNet V2'),
    ('mobilenet_v3_large', 'rust_mobilenetv3_large.pth', 'MobileNet V3 Large'),
    ('mobilenet_v3_small', 'rust_mobilenetv3_small.pth', 'MobileNet V3 Small')
]

# Store results
results = {}

# Test each model
for model_name, model_path, display_name in models_to_test:
    print(f"\n{'='*60}")
    print(f"Testing {display_name}")
    print(f"{'='*60}")
    
    # Load model
    try:
        model = load_model(model_name, model_path, NUM_CLASSES)
        print(f"✓ Model loaded successfully from {model_path}")
        
        # Evaluate
        predictions, true_labels, inf_time = evaluate_model(model, test_loader, device)
        
        # Calculate metrics
        accuracy = accuracy_score(true_labels, predictions)
        precision, recall, f1, support = precision_recall_fscore_support(
            true_labels, predictions, average='weighted', zero_division=0
        )
        
        # Store results
        results[display_name] = {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'inference_time': inf_time,
            'predictions': predictions,
            'true_labels': true_labels,
            'model_params': sum(p.numel() for p in model.parameters())
        }
        
        print(f"\nResults:")
        print(f"  Accuracy:        {accuracy:.4f}")
        print(f"  Precision:       {precision:.4f}")
        print(f"  Recall:          {recall:.4f}")
        print(f"  F1 Score:        {f1:.4f}")
        print(f"  Inference Time:  {inf_time:.2f} ms/image")
        print(f"  Total Parameters: {results[display_name]['model_params']:,}")
        
    except Exception as e:
        print(f"✗ Error loading or testing {display_name}: {e}")
        results[display_name] = None

print(f"\n{'='*60}")
print("Testing completed!")
print(f"{'='*60}")

## Detailed Classification Reports

View detailed metrics for each class.

In [None]:
# Print detailed classification reports
for model_name, result in results.items():
    if result is not None:
        print(f"\n{'='*60}")
        print(f"Classification Report - {model_name}")
        print(f"{'='*60}")
        print(classification_report(
            result['true_labels'], 
            result['predictions'],
            target_names=class_names,
            digits=4
        ))

## Confusion Matrices

Visualize the confusion matrices for each model.

In [None]:
# Plot confusion matrices
num_models = sum(1 for r in results.values() if r is not None)
fig, axes = plt.subplots(1, num_models, figsize=(6*num_models, 5))

if num_models == 1:
    axes = [axes]

idx = 0
for model_name, result in results.items():
    if result is not None:
        cm = confusion_matrix(result['true_labels'], result['predictions'])
        
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                    xticklabels=class_names, yticklabels=class_names,
                    ax=axes[idx])
        axes[idx].set_title(f'{model_name}\nConfusion Matrix')
        axes[idx].set_ylabel('True Label')
        axes[idx].set_xlabel('Predicted Label')
        idx += 1

plt.tight_layout()
plt.show()

## Model Comparison

Compare all models across different metrics.

In [None]:
# Create comparison table
import pandas as pd

comparison_data = []
for model_name, result in results.items():
    if result is not None:
        comparison_data.append({
            'Model': model_name,
            'Accuracy': f"{result['accuracy']:.4f}",
            'Precision': f"{result['precision']:.4f}",
            'Recall': f"{result['recall']:.4f}",
            'F1 Score': f"{result['f1']:.4f}",
            'Inference Time (ms)': f"{result['inference_time']:.2f}",
            'Parameters': f"{result['model_params']:,}"
        })

comparison_df = pd.DataFrame(comparison_data)
print("\n" + "="*100)
print("MODEL COMPARISON SUMMARY")
print("="*100)
print(comparison_df.to_string(index=False))
print("="*100)

In [None]:
# Visualize comparison
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Prepare data
model_names = [r['Model'] for r in comparison_data]
accuracies = [float(r['Accuracy']) for r in comparison_data]
precisions = [float(r['Precision']) for r in comparison_data]
recalls = [float(r['Recall']) for r in comparison_data]
f1_scores = [float(r['F1 Score']) for r in comparison_data]
inf_times = [float(r['Inference Time (ms)']) for r in comparison_data]

# Accuracy comparison
axes[0, 0].bar(model_names, accuracies, color=['#1f77b4', '#ff7f0e', '#2ca02c'])
axes[0, 0].set_title('Accuracy Comparison', fontsize=12, fontweight='bold')
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].set_ylim([0, 1])
axes[0, 0].grid(axis='y', alpha=0.3)
for i, v in enumerate(accuracies):
    axes[0, 0].text(i, v + 0.02, f'{v:.4f}', ha='center', va='bottom')

# Precision, Recall, F1 comparison
x = np.arange(len(model_names))
width = 0.25
axes[0, 1].bar(x - width, precisions, width, label='Precision', color='#1f77b4')
axes[0, 1].bar(x, recalls, width, label='Recall', color='#ff7f0e')
axes[0, 1].bar(x + width, f1_scores, width, label='F1 Score', color='#2ca02c')
axes[0, 1].set_title('Precision, Recall, F1 Score', fontsize=12, fontweight='bold')
axes[0, 1].set_ylabel('Score')
axes[0, 1].set_xticks(x)
axes[0, 1].set_xticklabels(model_names, rotation=15, ha='right')
axes[0, 1].set_ylim([0, 1])
axes[0, 1].legend()
axes[0, 1].grid(axis='y', alpha=0.3)

# Inference time comparison
axes[1, 0].bar(model_names, inf_times, color=['#d62728', '#9467bd', '#8c564b'])
axes[1, 0].set_title('Inference Time Comparison', fontsize=12, fontweight='bold')
axes[1, 0].set_ylabel('Time (ms/image)')
axes[1, 0].grid(axis='y', alpha=0.3)
for i, v in enumerate(inf_times):
    axes[1, 0].text(i, v + 0.1, f'{v:.2f}', ha='center', va='bottom')

# Parameters comparison
params = [int(r['Parameters'].replace(',', '')) for r in comparison_data]
axes[1, 1].bar(model_names, params, color=['#e377c2', '#7f7f7f', '#bcbd22'])
axes[1, 1].set_title('Model Parameters', fontsize=12, fontweight='bold')
axes[1, 1].set_ylabel('Number of Parameters')
axes[1, 1].grid(axis='y', alpha=0.3)
axes[1, 1].ticklabel_format(style='plain', axis='y')
for i, v in enumerate(params):
    axes[1, 1].text(i, v + max(params)*0.02, f'{v:,}', ha='center', va='bottom', fontsize=8)

plt.tight_layout()
plt.show()

## Best Model Summary

Identify the best performing model for each metric.

In [None]:
# Find best models for each metric
metrics = ['accuracy', 'precision', 'recall', 'f1']
metric_names = ['Accuracy', 'Precision', 'Recall', 'F1 Score']

print("\n" + "="*60)
print("BEST MODELS BY METRIC")
print("="*60)

for metric, metric_name in zip(metrics, metric_names):
    best_model = max(results.items(), key=lambda x: x[1][metric] if x[1] is not None else -1)
    if best_model[1] is not None:
        print(f"\n{metric_name}:")
        print(f"  Best Model: {best_model[0]}")
        print(f"  Score: {best_model[1][metric]:.4f}")

# Best inference time (lowest)
best_inf_model = min(results.items(), key=lambda x: x[1]['inference_time'] if x[1] is not None else float('inf'))
if best_inf_model[1] is not None:
    print(f"\nFastest Inference:")
    print(f"  Best Model: {best_inf_model[0]}")
    print(f"  Time: {best_inf_model[1]['inference_time']:.2f} ms/image")

# Most efficient (smallest)
best_param_model = min(results.items(), key=lambda x: x[1]['model_params'] if x[1] is not None else float('inf'))
if best_param_model[1] is not None:
    print(f"\nSmallest Model:")
    print(f"  Best Model: {best_param_model[0]}")
    print(f"  Parameters: {best_param_model[1]['model_params']:,}")

print("="*60)