# Static_Quantization

> We will be implementing Static_Quantization model compression here

In [1]:
#| default_exp quantization_fastai

In [2]:
#| hide
from nbdev.showdoc import *

In [3]:
#| export
from fastai.vision.all import *  # Import necessary libraries
import os
import sys
import torch
import time
import numpy as np
import pandas as pd
from torch import nn
from torch.utils.data import Dataset, DataLoader, Subset
import logging
from skimage.metrics import structural_similarity as ssim
from tqdm import tqdm
import torch.quantization
sys.path.append('/root/hsi-compression/models/')
from cae1d import ConvolutionalAutoencoder1D
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"


In [4]:
#| export

filepath = "/root/hsi-compression/results/weights/cae1d_8bpppc.pth.tar"

def load_model_with_weights(filepath):
    model = ConvolutionalAutoencoder1D()
    checkpoint = torch.load(filepath, map_location='cpu')
    model.load_state_dict(checkpoint)
    model.eval()
    print("Model loaded successfully.")
    return model




In [5]:
#!export

# Setup logging to help with debugging
logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')

# Base directory for .npy files
base_directory = '/root/hsi-compression/datasets/hyspecnet-11k/patches/'

def load_paths(csv_file):
    df = pd.read_csv(csv_file, header=None)
    file_paths = [os.path.join(base_directory, x.strip()) for x in df[0]]
    print("Paths loaded successfully.")
    return file_paths

class NPYDataset(Dataset):
    def __init__(self, file_paths, transform=None):
        self.file_paths = file_paths
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        sample = np.load(file_path)
        if self.transform:
            sample = self.transform(sample)
        sample = torch.from_numpy(sample).float()
        return sample, sample

def transform_sample(sample):
    return (sample - np.mean(sample)) / np.std(sample)

csv_file_path = '/root/hsi-compression/datasets/hyspecnet-11k/splits/easy/test.csv'
file_paths = load_paths(csv_file_path)

dataset = NPYDataset(file_paths, transform=transform_sample)
print("Dataset initialized successfully.")



Paths loaded successfully.
Dataset initialized successfully.


In [6]:
#|export

def apply_static_quantization(model, dataloader):
    model.eval().to('cpu')
    model.qconfig = torch.quantization.get_default_qconfig('fbgemm')
    torch.quantization.prepare(model, inplace=True)
    print("Model prepared for quantization.")

    with torch.no_grad():
        for inputs, _ in tqdm(dataloader, desc="Calibrating"):
            inputs = inputs.to('cpu')
            model(inputs)
    
    quantized_model = torch.quantization.convert(model)
    print("Static quantization applied successfully.")
    return quantized_model

In [7]:
#| export
def evaluate_model(model, dataloader):
    model.eval()
    losses = []
    criterion = torch.nn.MSELoss()
    with torch.no_grad():
        for inputs, labels in tqdm(dataloader, desc="Evaluating model", leave=False):
            inputs, labels = inputs.to('cpu'), labels.to('cpu')
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            losses.append(loss.item())
    mean_loss = sum(losses) / len(losses)
    print(f"Model evaluation completed. Mean Loss: {mean_loss:.4f}")
    return mean_loss


In [8]:
#| export

def evaluate_ssim(model, dataloader):
    model.eval()
    ssim_scores = []
    with torch.no_grad():
        for data, target in dataloader:
            data, target = data.to('cpu'), target.to('cpu')
            output = model(data)
            output_np = output.detach().numpy()
            target_np = target.numpy()
            for o, t in zip(output_np, target_np):
                score = ssim(o, t, data_range=t.max() - t.min())
                ssim_scores.append(score)
    average_ssim = np.mean(ssim_scores)
    print(f"SSIM evaluation completed. Average SSIM: {average_ssim:.4f}")
    return average_ssim

In [9]:
#| export

def evaluate_latency(model, dataloader, num_iterations=10):
    start_time = time.time()
    model.eval()
    with torch.no_grad():
        for i, (data, _) in enumerate(dataloader):
            if i >= num_iterations:
                break
            data = data.to('cpu')
            _ = model(data)
    total_time = time.time() - start_time
    average_time_per_batch = total_time / num_iterations
    print(f"Latency evaluation completed. Average Time per Batch: {average_time_per_batch:.4f} seconds")
    return average_time_per_batch

In [10]:
#| export
def fix_conv_padding(model):
    for layer in model.modules():
        if isinstance(layer, nn.Conv1d):
            if isinstance(layer.padding, str) and layer.padding == 'same':
                kernel_size = layer.kernel_size[0]
                stride = layer.stride[0]
                padding = (kernel_size - stride) // 2
                layer.padding = (padding,)
    print("Padding fixed in model.")
    return model

In [11]:
# Clear GPU memory
#torch.cuda.empty_cache()
#import gc
#import torch

#gc.collect()
#torch.cuda.empty_cache()

In [12]:
#| eval: false
# Load and initialize the model
filepath = "/root/hsi-compression/results/weights/cae1d_8bpppc.pth.tar"
model = load_model_with_weights(filepath)
model = fix_conv_padding(model)

subset_size = int(len(dataset) * 0.5)
indices = torch.randperm(len(dataset))[:subset_size]
subset = Subset(dataset, indices)
dataloader = DataLoader(subset, batch_size=1, shuffle=True, num_workers=0)
print("DataLoader initialized successfully.")


Model loaded successfully.
Padding fixed in model.
DataLoader initialized successfully.


  checkpoint = torch.load(filepath, map_location='cpu')


In [13]:
#| eval: false

print("Evaluating Original Model...")
original_loss = evaluate_model(model, dataloader)

Evaluating Original Model...


                                                                                                                                       

Model evaluation completed. Mean Loss: 0.6047




In [14]:
#| eval: false

print("Applying Static Quantization...")
quantized_model = apply_static_quantization(model, dataloader)



Applying Static Quantization...
Model prepared for quantization.


Calibrating: 100%|███████████████████████████████████████████████████████████████████████████████████| 122/122 [04:19<00:00,  2.13s/it]


Static quantization applied successfully.


In [15]:
#| eval: false

print("Evaluating Quantized Model...")
quantized_loss = evaluate_model(quantized_model, dataloader)

Evaluating Quantized Model...


                                                                                                                                       

NotImplementedError: Could not run 'quantized::conv1d' with arguments from the 'CPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'quantized::conv1d' is only available for these backends: [Meta, QuantizedCPU, QuantizedCUDA, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradMPS, AutogradXPU, AutogradHPU, AutogradLazy, AutogradMeta, Tracer, AutocastCPU, AutocastXPU, AutocastMPS, AutocastCUDA, FuncTorchBatched, BatchedNestedTensor, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PreDispatch, PythonDispatcher].

Meta: registered at ../aten/src/ATen/core/MetaFallbackKernel.cpp:23 [backend fallback]
QuantizedCPU: registered at ../aten/src/ATen/native/quantized/cpu/qconv.cpp:1972 [kernel]
QuantizedCUDA: registered at ../aten/src/ATen/native/quantized/cudnn/Conv.cpp:391 [kernel]
BackendSelect: fallthrough registered at ../aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:153 [backend fallback]
FuncTorchDynamicLayerBackMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:497 [backend fallback]
Functionalize: registered at ../aten/src/ATen/FunctionalizeFallbackKernel.cpp:349 [backend fallback]
Named: registered at ../aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]
Conjugate: registered at ../aten/src/ATen/ConjugateFallback.cpp:17 [backend fallback]
Negative: registered at ../aten/src/ATen/native/NegateFallback.cpp:18 [backend fallback]
ZeroTensor: registered at ../aten/src/ATen/ZeroTensorFallback.cpp:86 [backend fallback]
ADInplaceOrView: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:96 [backend fallback]
AutogradOther: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:63 [backend fallback]
AutogradCPU: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:67 [backend fallback]
AutogradCUDA: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:75 [backend fallback]
AutogradXLA: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:79 [backend fallback]
AutogradMPS: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:87 [backend fallback]
AutogradXPU: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:71 [backend fallback]
AutogradHPU: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:100 [backend fallback]
AutogradLazy: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:83 [backend fallback]
AutogradMeta: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:91 [backend fallback]
Tracer: registered at ../torch/csrc/autograd/TraceTypeManual.cpp:294 [backend fallback]
AutocastCPU: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:321 [backend fallback]
AutocastXPU: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:463 [backend fallback]
AutocastMPS: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:209 [backend fallback]
AutocastCUDA: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:165 [backend fallback]
FuncTorchBatched: registered at ../aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:731 [backend fallback]
BatchedNestedTensor: registered at ../aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:758 [backend fallback]
FuncTorchVmapMode: fallthrough registered at ../aten/src/ATen/functorch/VmapModeRegistrations.cpp:27 [backend fallback]
Batched: registered at ../aten/src/ATen/LegacyBatchingRegistrations.cpp:1075 [backend fallback]
VmapMode: fallthrough registered at ../aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]
FuncTorchGradWrapper: registered at ../aten/src/ATen/functorch/TensorWrapper.cpp:207 [backend fallback]
PythonTLSSnapshot: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:161 [backend fallback]
FuncTorchDynamicLayerFrontMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:493 [backend fallback]
PreDispatch: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:165 [backend fallback]
PythonDispatcher: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:157 [backend fallback]


In [None]:
#| eval: false

print("Calculating SSIM Scores...")
original_ssim = evaluate_ssim(model, dataloader)
quantized_ssim = evaluate_ssim(quantized_model, dataloader)

print(f"Original SSIM: {original_ssim:.4f}, Quantized SSIM: {quantized_ssim:.4f}")

In [None]:
#| eval: false

print("Measuring Latency...")
original_latency = evaluate_latency(model, dataloader)
quantized_latency = evaluate_latency(quantized_model, dataloader)

print(f"Original Latency: {original_latency:.4f} seconds per batch")
print(f"Quantized Latency: {quantized_latency:.4f} seconds per batch")

In [16]:
#| eval: false

# Model size evaluation
def save_model(model, filepath):
    torch.save(model.state_dict(), filepath)
    print(f"Model saved to {filepath}")

def get_model_size(filepath):
    size_bytes = os.path.getsize(filepath)
    size_mb = size_bytes / (1024 * 1024)
    print(f"Model size: {size_mb:.2f} MB")
    return size_mb

original_model_path = "/root/hsi-compression/results/weights/cae1d_8bpppc.pth.tar"
quantized_model_path = "/root/hsi-compression/compressed_model/static_quant_model.pth"

save_model(model, original_model_path)
save_model(quantized_model, quantized_model_path)

original_size = get_model_size(original_model_path)
quantized_size = get_model_size(quantized_model_path)

print(f"Original Model Size: {original_size:.2f} MB")
print(f"Quantized Model Size: {quantized_size:.2f} MB")

Model saved to /root/hsi-compression/results/weights/cae1d_8bpppc.pth.tar
Model saved to /root/hsi-compression/compressed_model/static_quant_model.pth
Model size: 0.36 MB
Model size: 0.08 MB
Original Model Size: 0.36 MB
Quantized Model Size: 0.08 MB


In [None]:
#| export
def foo(): pass

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()