CUSTOM IMPLEMENTATION


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# ResNet Model Evaluation on ImageNet Test Set Sample with Custom INT8 Quantization

import torch
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision.datasets as datasets
import matplotlib.pyplot as plt
import numpy as np
import time
import random
import os
from torch.utils.data import Subset, DataLoader, Dataset
from PIL import Image

# Check if GPU is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# To test on CPU also.
quantization_device = torch.device("cpu")

# This is the standard preprocessing for models pretrained on ImageNet
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Path to the extracted ImageNet data
imagenet_path = "/content/drive/My Drive/assignments_dl_cs7150/project/imagenet_data/dataset_dl/"
print(f"Loading from: {imagenet_path}")

all_image_files = []
class_folders = [f for f in os.listdir(imagenet_path) if os.path.isdir(os.path.join(imagenet_path, f))]
print(f"Found {len(class_folders)} class folders (00000 to 00108)")

for folder in class_folders:
    folder_path = os.path.join(imagenet_path, folder)
    files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.jpg')]
    all_image_files.extend(files)

print(f"Found a total of {len(all_image_files)} images")

# Randomly sample 50 images
num_samples = 50
if len(all_image_files) > num_samples:
    random.shuffle(all_image_files)
    sampled_images = all_image_files[:num_samples]
else:
    sampled_images = all_image_files

print(f"Randomly sampled {len(sampled_images)} images for testing")

# Create a simple mapping from folder name to class index
folder_to_idx = {folder: idx for idx, folder in enumerate(sorted(class_folders))}


Using device: cpu
Loading from: /content/drive/My Drive/assignments_dl_cs7150/project/imagenet_data/dataset_dl/
Found 109 class folders (00000 to 00108)
Found a total of 5450 images
Randomly sampled 50 images for testing


In [3]:
# Custom dataset for the sampled images
class SampledImageNetDataset(Dataset):
    def __init__(self, image_files, transform=None):
        self.image_files = image_files
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = self.image_files[idx]

        # Get the class folder from the path
        folder_name = os.path.basename(os.path.dirname(img_path))
        class_idx = folder_to_idx[folder_name]

        # Load and transform the image
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)

        return image, class_idx

# Create the dataset and dataloader
sample_dataset = SampledImageNetDataset(sampled_images, transform=preprocess)
sample_loader = DataLoader(sample_dataset, batch_size=1, shuffle=False)

print(f"Created dataset with {len(sample_dataset)} images")
print(f"Number of classes represented: {len(set(folder_to_idx.values()))}")

# Create class mapping for visualization (folder name to index)
idx_to_class = {v: k for k, v in folder_to_idx.items()}

# Print a few examples
print("\nSample images:")
for i in range(min(5, len(sampled_images))):
    img_path = sampled_images[i]
    folder = os.path.basename(os.path.dirname(img_path))
    print(f"{i+1}. {img_path} (Class: {folder})")


Created dataset with 50 images
Number of classes represented: 109

Sample images:
1. /content/drive/My Drive/assignments_dl_cs7150/project/imagenet_data/dataset_dl/00074/324421257216414.jpg (Class: 00074)
2. /content/drive/My Drive/assignments_dl_cs7150/project/imagenet_data/dataset_dl/00040/799694495669414.jpg (Class: 00040)
3. /content/drive/My Drive/assignments_dl_cs7150/project/imagenet_data/dataset_dl/00035/08141709225444.jpg (Class: 00035)
4. /content/drive/My Drive/assignments_dl_cs7150/project/imagenet_data/dataset_dl/00039/899020713517552.jpg (Class: 00039)
5. /content/drive/My Drive/assignments_dl_cs7150/project/imagenet_data/dataset_dl/00067/291797602056391.jpg (Class: 00067)


In [4]:
# Custom INT8 quantization functions
def clamp(params_q: np.array, lower_bound: int, upper_bound: int) -> np.array:
    """Clamp values to specified bounds"""
    params_q[params_q < lower_bound] = lower_bound
    params_q[params_q > upper_bound] = upper_bound
    return params_q

def asymmetric_quantization(params: np.array, bits: int) -> tuple:
    """Quantize parameters using min-max asymmetric quantization"""
    alpha = np.max(params)
    beta = np.min(params)
    scale = (alpha - beta) / (2**bits-1)
    zero = -1*np.round(beta / scale)
    lower_bound, upper_bound = 0, 2**bits-1
    quantized = clamp(np.round(params / scale + zero), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale, zero

def asymmetric_quantization_percentile(params: np.array, bits: int, percentile: float = 99.99) -> tuple:
    """Quantize parameters using percentile-based asymmetric quantization"""
    # find the percentile value
    alpha = np.percentile(params, percentile)
    beta = np.percentile(params, 100-percentile)
    scale = (alpha - beta) / (2**bits-1)
    zero = -1*np.round(beta / scale)
    lower_bound, upper_bound = 0, 2**bits-1
    quantized = clamp(np.round(params / scale + zero), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale, zero

def asymmetric_dequantize(params_q: np.array, scale: float, zero: int) -> np.array:
    """Dequantize parameters using scale and zero point"""
    return (params_q - zero) * scale

def quantization_error(params: np.array, params_q: np.array):
    """Calculate the MSE between original and quantized parameters"""
    return np.mean((params - params_q)**2)

# Custom quantized Conv2d module
class QuantizedConv2d(torch.nn.Module):
    def __init__(self, conv_module, bits=8, percentile=False, percentile_value=99.99):
        super(QuantizedConv2d, self).__init__()

        # Store original module properties
        self.in_channels = conv_module.in_channels
        self.out_channels = conv_module.out_channels
        self.kernel_size = conv_module.kernel_size
        self.stride = conv_module.stride
        self.padding = conv_module.padding
        self.dilation = conv_module.dilation
        self.groups = conv_module.groups
        self.bias = conv_module.bias is not None

        # Quantization parameters
        self.bits = bits
        self.percentile = percentile
        self.percentile_value = percentile_value

        # Get weight data as numpy array
        weight_data = conv_module.weight.data.cpu().numpy()

        # Quantize weights
        if percentile:
            self.weight_q, self.weight_scale, self.weight_zero = \
                asymmetric_quantization_percentile(weight_data, bits, percentile_value)
        else:
            self.weight_q, self.weight_scale, self.weight_zero = \
                asymmetric_quantization(weight_data, bits)

        # Store weight shape for later reshaping
        self.weight_shape = weight_data.shape

        # If bias exists, quantize it as well
        if self.bias:
            bias_data = conv_module.bias.data.cpu().numpy()
            if percentile:
                self.bias_q, self.bias_scale, self.bias_zero = \
                    asymmetric_quantization_percentile(bias_data, bits, percentile_value)
            else:
                self.bias_q, self.bias_scale, self.bias_zero = \
                    asymmetric_quantization(bias_data, bits)
            self.bias_shape = bias_data.shape

        # Pre-compute dequantized weights for efficiency
        # This avoids repeated dequantization during inference
        weight_dequant = asymmetric_dequantize(self.weight_q, self.weight_scale, self.weight_zero)
        self.weight_dequant = torch.from_numpy(weight_dequant.reshape(self.weight_shape)).float()

        if self.bias:
            bias_dequant = asymmetric_dequantize(self.bias_q, self.bias_scale, self.bias_zero)
            self.bias_dequant = torch.from_numpy(bias_dequant).float()
        else:
            self.bias_dequant = None

    def forward(self, x):
        # Use pre-computed dequantized weights and bias
        # This is much faster than dequantizing on every forward pass
        return torch.nn.functional.conv2d(
            x, self.weight_dequant.to(x.device),
            self.bias_dequant.to(x.device) if self.bias_dequant is not None else None,
            stride=self.stride,
            padding=self.padding,
            dilation=self.dilation,
            groups=self.groups
        )

# Custom quantized Linear module
class QuantizedLinear(torch.nn.Module):
    def __init__(self, linear_module, bits=8, percentile=False, percentile_value=99.99):
        super(QuantizedLinear, self).__init__()

        # Store original module properties
        self.in_features = linear_module.in_features
        self.out_features = linear_module.out_features
        self.bias = linear_module.bias is not None

        # Quantization parameters
        self.bits = bits
        self.percentile = percentile
        self.percentile_value = percentile_value

        # Get weight data as numpy array
        weight_data = linear_module.weight.data.cpu().numpy()

        # Quantize weights
        if percentile:
            self.weight_q, self.weight_scale, self.weight_zero = \
                asymmetric_quantization_percentile(weight_data, bits, percentile_value)
        else:
            self.weight_q, self.weight_scale, self.weight_zero = \
                asymmetric_quantization(weight_data, bits)

        # Store weight shape for later reshaping
        self.weight_shape = weight_data.shape

        # If bias exists, quantize it as well
        if self.bias:
            bias_data = linear_module.bias.data.cpu().numpy()
            if percentile:
                self.bias_q, self.bias_scale, self.bias_zero = \
                    asymmetric_quantization_percentile(bias_data, bits, percentile_value)
            else:
                self.bias_q, self.bias_scale, self.bias_zero = \
                    asymmetric_quantization(bias_data, bits)
            self.bias_shape = bias_data.shape

        # Pre-compute dequantized weights and bias for efficiency
        weight_dequant = asymmetric_dequantize(self.weight_q, self.weight_scale, self.weight_zero)
        self.weight_dequant = torch.from_numpy(weight_dequant.reshape(self.weight_shape)).float()

        if self.bias:
            bias_dequant = asymmetric_dequantize(self.bias_q, self.bias_scale, self.bias_zero)
            self.bias_dequant = torch.from_numpy(bias_dequant).float()
        else:
            self.bias_dequant = None

    def forward(self, x):
        # Use pre-computed dequantized weights and bias
        return torch.nn.functional.linear(
            x,
            self.weight_dequant.to(x.device),
            self.bias_dequant.to(x.device) if self.bias_dequant is not None else None
        )

# Function to recursively replace modules with quantized versions
def quantize_model(model, bits=8, use_percentile=False, percentile_value=99.99):
    """Replace Conv2d and Linear layers with quantized versions"""
    for name, module in model.named_children():
        if len(list(module.children())) > 0:
            # Recursively quantize submodules if they exist
            quantize_model(module, bits, use_percentile, percentile_value)
        else:
            # Quantize if module is Conv2d or Linear
            if isinstance(module, torch.nn.Conv2d):
                setattr(model, name, QuantizedConv2d(module, bits, use_percentile, percentile_value))
            elif isinstance(module, torch.nn.Linear):
                setattr(model, name, QuantizedLinear(module, bits, use_percentile, percentile_value))

    return model



In [5]:
# Function to load ResNet-18 model
def load_model(quantize=False, quantization_type='standard', bits=8):
    # Load the PyTorch model
    model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

    if not quantize:
        # Regular non-quantized model
        model = model.to(device)
        model.eval()
        return model

    # For quantization, work with the model on CPU
    model = model.to('cpu')
    model.eval()

    if quantization_type == 'standard':
        # Use standard min-max quantization
        quantized_model = quantize_model(model, bits=bits, use_percentile=False)
        return quantized_model

    elif quantization_type == 'percentile':
        # Use percentile-based quantization
        quantized_model = quantize_model(model, bits=bits, use_percentile=True, percentile_value=99.99)
        return quantized_model

    # Return original model if no valid quantization type
    return model

# Function to measure FPS
def measure_fps(model, input_tensor, is_quantized=False):
    # Always use the same device for fair comparison
    # Using CPU for all models since the quantized implementation
    # doesn't have GPU acceleration anyway
    input_tensor = input_tensor.to('cpu')
    model = model.to('cpu')

    # Warm-up run (multiple iterations to ensure any caching effects are stabilized)
    with torch.no_grad():
        for _ in range(5):
            model(input_tensor)

    # Actual timing - increased number of runs for better statistics
    start_time = time.time()
    num_runs = 20
    with torch.no_grad():
        for _ in range(num_runs):
            model(input_tensor)

    end_time = time.time()
    time_per_image = (end_time - start_time) / num_runs
    fps = 1.0 / time_per_image

    return fps

# Function to evaluate model on the test set
def evaluate_model(model, data_loader, is_quantized=False):
    if is_quantized:
        model = model.to('cpu')
        eval_device = 'cpu'
    else:
        model = model.to(device)
        eval_device = device

    correct = 0
    total = 0

    # For top-1 and top-5 accuracy
    top1_correct = 0
    top5_correct = 0

    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(eval_device), labels.to(eval_device)
            outputs = model(images)

            # Top-1 accuracy
            _, predicted = torch.max(outputs, 1)
            top1_correct += (predicted == labels).sum().item()

            # Top-5 accuracy
            _, top5_preds = torch.topk(outputs, 5, dim=1)
            for i in range(labels.size(0)):
                if labels[i] in top5_preds[i]:
                    top5_correct += 1

            total += labels.size(0)

    top1_accuracy = 100 * top1_correct / total
    top5_accuracy = 100 * top5_correct / total

    return top1_accuracy, top5_accuracy

In [6]:
# A batch-wise activation quantization wrapper for performance simulation
class ActivationQuantizer(torch.nn.Module):
    def __init__(self, wrapped_model, bits=8, use_percentile=False, percentile_value=99.99):
        super(ActivationQuantizer, self).__init__()
        self.wrapped_model = wrapped_model
        self.bits = bits
        self.use_percentile = use_percentile
        self.percentile_value = percentile_value

        # Store activation scales for each layer (to be calibrated)
        self.activation_scales = {}
        self.activation_zeros = {}
        self.calibrated = False

    def calibrate(self, dataloader, num_batches=10):
        """Calibrate activation quantization parameters using sample data"""
        self.wrapped_model.eval()
        activation_ranges = {}

        # Register forward hooks to capture activations
        hooks = []

        def get_activation_hook(name):
            def hook(module, input, output):
                # Store activations for this layer
                if name not in activation_ranges:
                    activation_ranges[name] = {"min": float('inf'), "max": float('-inf')}

                # Update min/max values
                act_min = output.min().item()
                act_max = output.max().item()

                activation_ranges[name]["min"] = min(activation_ranges[name]["min"], act_min)
                activation_ranges[name]["max"] = max(activation_ranges[name]["max"], act_max)
            return hook

        # Register hooks on all Conv2d and Linear layers
        for name, module in self.wrapped_model.named_modules():
            if isinstance(module, (QuantizedConv2d, QuantizedLinear)):
                hooks.append(module.register_forward_hook(get_activation_hook(name)))

        # Run calibration data through the model
        with torch.no_grad():
            for batch_idx, (inputs, _) in enumerate(dataloader):
                if batch_idx >= num_batches:
                    break
                inputs = inputs.to('cpu')
                _ = self.wrapped_model(inputs)

        # Remove the hooks
        for hook in hooks:
            hook.remove()

        # Compute quantization parameters for each layer
        for name, ranges in activation_ranges.items():
            alpha = ranges["max"]
            beta = ranges["min"]

            if self.use_percentile:
                # For percentile-based, more complex statistics are needed
                # simplifying here and just using the min/max
                pass

            scale = (alpha - beta) / (2**self.bits - 1)
            zero = -1 * np.round(beta / scale) if scale != 0 else 0

            self.activation_scales[name] = scale
            self.activation_zeros[name] = zero

        self.calibrated = True
        print(f"Calibrated activation quantization for {len(activation_ranges)} layers")

    def forward(self, x):
        # Forward through the wrapped model
        return self.wrapped_model(x)

In [7]:
# Function to evaluate models
def evaluate_batched(model, dataloader, is_quantized=False, batch_size=8):
    """Process data in batches for more efficient evaluation"""
    if is_quantized:
        model = model.to('cpu')
        eval_device = 'cpu'
    else:
        model = model.to(device)
        eval_device = device

    # Create a new dataloader with the desired batch size
    batch_dataloader = DataLoader(
        dataloader.dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=0
    )

    top1_correct = 0
    top5_correct = 0
    total = 0

    model.eval()
    with torch.no_grad():
        for images, labels in batch_dataloader:
            images, labels = images.to(eval_device), labels.to(eval_device)
            outputs = model(images)

            # Top-1 accuracy
            _, predicted = torch.max(outputs, 1)
            top1_correct += (predicted == labels).sum().item()

            # Top-5 accuracy
            _, top5_preds = torch.topk(outputs, 5, dim=1)
            for i in range(labels.size(0)):
                if labels[i] in top5_preds[i]:
                    top5_correct += 1

            total += labels.size(0)

    top1_accuracy = 100 * top1_correct / total
    top5_accuracy = 100 * top5_correct / total

    return top1_accuracy, top5_accuracy

# Function to visualize predictions
def visualize_predictions(model_obj, dataset, loader, num_samples=5, is_quantized=False):
    if is_quantized:
        model_obj = model_obj.to('cpu')
        vis_device = 'cpu'
    else:
        model_obj = model_obj.to(device)
        vis_device = device

    model_obj.eval()
    fig, axes = plt.subplots(1, num_samples, figsize=(20, 4))

    with torch.no_grad():
        for i, (images, labels) in enumerate(loader):
            if i >= num_samples:
                break

            images, labels = images.to(vis_device), labels.to(vis_device)
            outputs = model_obj(images)

            # Get predicted class
            _, predicted = torch.max(outputs, 1)

            # Get class names (folder names)
            true_class_idx = labels.item()
            pred_class_idx = predicted.item()

            true_class = idx_to_class[true_class_idx]
            pred_class = idx_to_class.get(pred_class_idx, f"Unknown ({pred_class_idx})")

            # Get image
            img = images[0].cpu().numpy().transpose((1, 2, 0))
            # Denormalize
            img = img * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])
            img = np.clip(img, 0, 1)

            # Plot
            axes[i].imshow(img)
            axes[i].set_title(f"True: {true_class}\nPred: {pred_class}")
            axes[i].axis('off')

    plt.tight_layout()
    plt.savefig('sample_predictions_custom_quantized.png')
    plt.show()

# Calculate model size
def calculate_model_size(model, is_quantized=False, bits=32):
    if is_quantized:
        # For quantized models, consider bit width
        size_in_bytes = 0
        for name, module in model.named_modules():
            if hasattr(module, 'weight_q') and hasattr(module, 'bits'):
                # Count weight parameters
                size_in_bytes += np.prod(module.weight_q.shape) * module.bits / 8

                # Count bias parameters if they exist
                if hasattr(module, 'bias_q'):
                    size_in_bytes += np.prod(module.bias_q.shape) * module.bits / 8
            elif hasattr(module, 'weight'):
                # For non-quantized parts within the model
                size_in_bytes += module.weight.numel() * 4  # float32 = 4 bytes
                if hasattr(module, 'bias') and module.bias is not None:
                    size_in_bytes += module.bias.numel() * 4
    else:
        # For non-quantized models, count parameters assuming float32
        size_in_bytes = sum(p.numel() for p in model.parameters()) * 4  # float32 = 4 bytes

    return size_in_bytes / (1024 * 1024)  # Convert to MB

In [10]:
# Function to run multiple evaluations and calculate averages
def run_multiple_evaluations(num_runs=5):
    # Store results for each run
    run_results = {
        'models': ['ResNet18 Original', 'ResNet18 INT8 Min-Max', 'ResNet18 INT8 Percentile'],
        'runs': [],
        'avg_top1_accuracy': [],
        'avg_top5_accuracy': [],
        'avg_model_size_mb': [],
        'avg_fps': [],  # Add this line to initialize the avg_fps list
    }
    # Create dummy input for FPS measurement
    dummy_input = torch.randn(1, 3, 224, 224)

    for run in range(num_runs):
        print(f"\n--- Starting Run {run+1}/{num_runs} ---")
        run_data = {
            'top1_accuracy': [],
            'top5_accuracy': [],
            'model_size_mb': [],
            'fps': []
        }

        # Evaluate original PyTorch ResNet-18
        print(f"\nRun {run+1}: Evaluating original PyTorch ResNet-18...")
        resnet18_original = load_model(quantize=False)

        model_size_original = calculate_model_size(resnet18_original)
        fps_original = measure_fps(resnet18_original, dummy_input, is_quantized=False)
        top1_acc_original, top5_acc_original = evaluate_model(resnet18_original, sample_loader, is_quantized=False)

        run_data['top1_accuracy'].append(top1_acc_original)
        run_data['top5_accuracy'].append(top5_acc_original)

        run_data['model_size_mb'].append(model_size_original)
        run_data['fps'].append(fps_original)

        # Evaluate standard INT8 quantized model
        print(f"\nRun {run+1}: Evaluating ResNet-18 with Custom INT8 Quantization (Min-Max)...")
        resnet18_int8_standard = load_model(quantize=True, quantization_type='standard', bits=8)

        # Apply activation quantization wrapper and calibrate
        act_quantized_model = ActivationQuantizer(resnet18_int8_standard, bits=8, use_percentile=False)
        act_quantized_model.calibrate(DataLoader(sample_dataset, batch_size=2, shuffle=False))

        model_size_int8_standard = calculate_model_size(resnet18_int8_standard, is_quantized=True, bits=8)
        fps_int8_standard = measure_fps(act_quantized_model, dummy_input, is_quantized=True)
        top1_acc_int8_standard, top5_acc_int8_standard = evaluate_batched(
            act_quantized_model, sample_loader, is_quantized=True, batch_size=4
        )

        run_data['top1_accuracy'].append(top1_acc_int8_standard)
        run_data['top5_accuracy'].append(top5_acc_int8_standard)
        run_data['model_size_mb'].append(model_size_int8_standard)
        run_data['fps'].append(fps_int8_standard)

        # Evaluate percentile INT8 quantized model
        print(f"\nRun {run+1}: Evaluating ResNet-18 with Custom INT8 Quantization (Percentile)...")
        resnet18_int8_percentile = load_model(quantize=True, quantization_type='percentile', bits=8)

        # Apply activation quantization wrapper and calibrate
        act_quantized_model_percentile = ActivationQuantizer(resnet18_int8_percentile, bits=8, use_percentile=True, percentile_value=99.99)
        act_quantized_model_percentile.calibrate(DataLoader(sample_dataset, batch_size=2, shuffle=False))

        model_size_int8_percentile = calculate_model_size(resnet18_int8_percentile, is_quantized=True, bits=8)
        fps_int8_percentile = measure_fps(act_quantized_model_percentile, dummy_input, is_quantized=True)
        top1_acc_int8_percentile, top5_acc_int8_percentile = evaluate_batched(
            act_quantized_model_percentile, sample_loader, is_quantized=True, batch_size=4
        )

        run_data['top1_accuracy'].append(top1_acc_int8_percentile)
        run_data['top5_accuracy'].append(top5_acc_int8_percentile)

        run_data['model_size_mb'].append(model_size_int8_percentile)
        run_data['fps'].append(fps_int8_percentile)

        # Store this run's data
        run_results['runs'].append(run_data)

        print(f"\n--- Run {run+1} Summary ---")
        print(f"{'Model':<30} {'Top-1 Acc (%)':<15} {'Top-5 Acc (%)':<15} {'Size (MB)':<12} {'FPS':<10}")
        print("-" * 85)

        for i, model_name in enumerate(run_results['models']):
            print(f"{model_name:<30} {run_data['top1_accuracy'][i]:<15.2f} {run_data['top5_accuracy'][i]:<15.2f} {run_data['model_size_mb'][i]:<12.2f} {run_data['fps'][i]:<10.2f}")

    # Calculate averages across all runs
    for metric in ['top1_accuracy', 'top5_accuracy', 'model_size_mb', 'fps']:
        for model_idx in range(len(run_results['models'])):
            values = [run_results['runs'][run_idx][metric][model_idx] for run_idx in range(num_runs)]
            run_results[f'avg_{metric}'].append(np.mean(values))

    # Calculate improvement/loss metrics
    original_model_idx = 0  # Index of the original model in results

    # Calculate FPS improvement for each quantized model compared to original
    fps_improvements = []
    for i in range(1, len(run_results['models'])):
        fps_improvement = (run_results['avg_fps'][i] / run_results['avg_fps'][original_model_idx] - 1) * 100
        fps_improvements.append(fps_improvement)

    # Calculate accuracy loss for each quantized model compared to original
    top1_acc_losses = []
    top5_acc_losses = []
    for i in range(1, len(run_results['models'])):
        top1_loss = run_results['avg_top1_accuracy'][original_model_idx] - run_results['avg_top1_accuracy'][i]
        top5_loss = run_results['avg_top5_accuracy'][original_model_idx] - run_results['avg_top5_accuracy'][i]
        top1_acc_losses.append(top1_loss)
        top5_acc_losses.append(top5_loss)

    # Calculate model size reduction
    size_reductions = []
    for i in range(1, len(run_results['models'])):
        size_reduction = (1 - run_results['avg_model_size_mb'][i] / run_results['avg_model_size_mb'][original_model_idx]) * 100
        size_reductions.append(size_reduction)

    # Print final average results
    print("\n\n====== FINAL RESULTS (AVERAGED OVER 5 RUNS) ======")
    print(f"{'Model':<30} {'Top-1 Acc (%)':<15} {'Top-5 Acc (%)':<15} {'Size (MB)':<12} {'FPS':<10}")
    print("-" * 85)

    for i, model_name in enumerate(run_results['models']):
        print(f"{model_name:<30} {run_results['avg_top1_accuracy'][i]:<15.2f} {run_results['avg_top5_accuracy'][i]:<15.2f} {run_results['avg_model_size_mb'][i]:<12.2f} {run_results['avg_fps'][i]:<10.2f}")

    # Print improvements/losses
    print("\n====== IMPROVEMENTS AND LOSSES ======")
    for i, model_name in enumerate(run_results['models'][1:], start=1):
        print(f"\n{model_name} compared to Original:")
        print(f"  FPS Improvement: {fps_improvements[i-1]:+.2f}%")
        print(f"  Top-1 Accuracy Loss: {top1_acc_losses[i-1]:+.2f}%")
        print(f"  Top-5 Accuracy Loss: {top5_acc_losses[i-1]:+.2f}%")
        print(f"  Model Size Reduction: {size_reductions[i-1]:.2f}%")

    # Plot the results
    plot_comparison_results(run_results)

    return run_results

In [12]:
def plot_comparison_results(results):
    # Create a figure with subplots
    fig, axs = plt.subplots(1, 3, figsize=(18, 8))

    # Get the data
    models = results['models']
    avg_top1 = results['avg_top1_accuracy']
    avg_top5 = results['avg_top5_accuracy']
    avg_size = results['avg_model_size_mb']
    avg_fps = results['avg_fps']

    # Accuracy plot (combining top-1 and top-5)
    bar_width = 0.35
    x = np.arange(len(models))
    axs[0].bar(x - bar_width/2, avg_top1, bar_width, color='skyblue', label='Top-1')
    axs[0].bar(x + bar_width/2, avg_top5, bar_width, color='lightgreen', label='Top-5')
    axs[0].set_xlabel('Model')
    axs[0].set_ylabel('Accuracy (%)')
    axs[0].set_title('Average Test Accuracy')
    axs[0].set_xticks(x)
    axs[0].set_xticklabels(models, rotation=15, ha='right')

    # Set y-axis limits based on data - adjust for accuracy plot

    min_acc = min(min(avg_top1), min(avg_top5)) - 5
    max_acc = max(max(avg_top1), max(avg_top5)) + 5

    min_acc = max(0, min_acc)
    max_acc = min(100, max_acc)
    axs[0].set_ylim([min_acc, max_acc])

    axs[0].legend()
    for i, v in enumerate(avg_top1):
        axs[0].text(i - bar_width/2, v + 1, f"{v:.2f}%", ha='center')
    for i, v in enumerate(avg_top5):
        axs[0].text(i + bar_width/2, v + 1, f"{v:.2f}%", ha='center')

    # Model size plot - Use numeric x positions for consistency
    axs[1].bar(x, avg_size, color='salmon')
    axs[1].set_xlabel('Model')
    axs[1].set_ylabel('Size (MB)')
    axs[1].set_title('Average Model Size (MB)')
    axs[1].set_xticks(x)
    axs[1].set_xticklabels(models, rotation=15, ha='right')

    # Set y-axis limits based on data for model size
    min_size = 0
    max_size = max(avg_size) * 1.2  # Add 20% headroom
    axs[1].set_ylim([min_size, max_size])

    for i, v in enumerate(avg_size):
        axs[1].text(i, v + (max_size - min_size) * 0.02, f"{v:.2f}MB", ha='center')

    # FPS plot - Use numeric x positions for consistency
    axs[2].bar(x, avg_fps, color='gold')
    axs[2].set_xlabel('Model')
    axs[2].set_ylabel('FPS')
    axs[2].set_title('Average Frames Per Second')
    axs[2].set_xticks(x)
    axs[2].set_xticklabels(models, rotation=15, ha='right')

    # Set y-axis limits based on data for FPS
    min_fps = 0  #
    max_fps = max(avg_fps) * 1.2
    axs[2].set_ylim([min_fps, max_fps])

    for i, v in enumerate(avg_fps):
        axs[2].text(i, v + (max_fps - min_fps) * 0.02, f"{v:.2f}", ha='center')

    plt.tight_layout()
    plt.savefig('resnet18_custom_int8_comparison_avg.png', dpi=300)
    plt.show()

# Run multiple evaluations and get the average metrics
print("Starting multiple evaluation runs...")
all_results = run_multiple_evaluations(num_runs=5)
print("Evaluation complete!")
print("Results are saved as 'resnet18_custom_int8_comparison_avg.png'")

print("\nVisualizing sample predictions using models from final run:")

# Load models for visualization
resnet18_original = load_model(quantize=False)

resnet18_int8_standard = load_model(quantize=True, quantization_type='standard', bits=8)
act_quantized_standard = ActivationQuantizer(resnet18_int8_standard, bits=8, use_percentile=False)
act_quantized_standard.calibrate(DataLoader(sample_dataset, batch_size=2, shuffle=False))

resnet18_int8_percentile = load_model(quantize=True, quantization_type='percentile', bits=8)
act_quantized_percentile = ActivationQuantizer(resnet18_int8_percentile, bits=8, use_percentile=True, percentile_value=99.99)
act_quantized_percentile.calibrate(DataLoader(sample_dataset, batch_size=2, shuffle=False))

print("\nVisualizing sample predictions using Custom INT8 Min-Max Quantized ResNet-18:")
visualize_predictions(act_quantized_standard, sample_dataset, sample_loader, is_quantized=True)

print("\nVisualizing sample predictions using Custom INT8 Percentile Quantized ResNet-18:")
visualize_predictions(act_quantized_percentile, sample_dataset, sample_loader, is_quantized=True)

print("\nVisualizing sample predictions using original ResNet-18 for comparison:")
visualize_predictions(resnet18_original, sample_dataset, sample_loader)

Output hidden; open in https://colab.research.google.com to view.