# Weight_Quantization

> We will be implementing Weight_Quantization model compression here

In [1]:
#| default_exp quantization_fastai

In [2]:
#| hide
from nbdev.showdoc import *

In [3]:
#|export
from fastai.vision.all import *  # Import necessary libraries
import os
import sys
import torch
import time
import numpy as np
import pandas as pd
from torch import nn
from torch.utils.data import Dataset, DataLoader, Subset
import logging
from skimage.metrics import structural_similarity as ssim
from tqdm import tqdm, trange
import torch.quantization
sys.path.append('/root/hsi-compression/models/')
from cae1d import ConvolutionalAutoencoder1D


In [4]:

#|export

filepath = "/root/hsi-compression/results/weights/cae1d_8bpppc.pth.tar"

def load_model_with_weights(filepath, quantized=False):
    model = ConvolutionalAutoencoder1D()
    if quantized:
        qconfig = torch.quantization.get_default_qconfig('fbgemm')
        model.qconfig = qconfig
        model = torch.quantization.prepare(model, inplace=False)
    
    checkpoint = torch.load(filepath, map_location='cpu', weights_only=True)  # Ensure loading to CPU
    if quantized:
        model = torch.quantization.convert(model, inplace=False)  # Convert to quantized version

    model_state_dict = {k: v for k, v in checkpoint.items() if 'activation_post_process' not in k}
    model.load_state_dict(model_state_dict, strict=False)
    model.to('cpu')  # Explicitly move the model to CPU
    model.eval()
    return model




In [5]:
#|export
# Setup logging to help with debugging
logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')

# Base directory where the .npy files are stored
base_directory = '/root/hsi-compression/datasets/hyspecnet-11k/patches/'

# Function to load paths from a CSV file without headers
def load_paths(csv_file):
    df = pd.read_csv(csv_file, header=None)
    file_paths = [os.path.join(base_directory, x.strip()) for x in df[0]]
    return file_paths

class NPYDataset(Dataset):
    def __init__(self, file_paths, transform=None):
        self.file_paths = file_paths
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        sample = np.load(file_path)
        if self.transform:
            sample = self.transform(sample)
        sample = torch.from_numpy(sample).float()
        return sample, sample  # Return sample as both input and target

# Define transformation if needed (e.g., normalization)
def transform_sample(sample):
    return (sample - np.mean(sample)) / np.std(sample)

csv_file_path = '/root/hsi-compression/datasets/hyspecnet-11k/splits/easy/test.csv'
file_paths = load_paths(csv_file_path) 

# Initialize the dataset and DataLoader
dataset = NPYDataset(file_paths, transform=transform_sample)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True, num_workers=4)



In [13]:
#| export

def apply_weight_only_quantization(model):
    model.cpu().eval()
    model.qconfig = torch.quantization.get_default_qconfig('fbgemm')
    torch.quantization.prepare(model, inplace=True)
    quantized_model = torch.quantization.convert(model)
    return quantized_model




In [7]:
#| export

def evaluate_model(model, dataloader):
    model.eval()
    losses = []
    criterion = torch.nn.MSELoss()  # Assuming a mean squared error loss for reconstruction
    total = 0
    with torch.no_grad():
        for inputs, labels in tqdm(dataloader, desc="Evaluating model", leave=False):
            outputs = model(inputs)
            loss = criterion(outputs, labels)  # Ensure labels are of the same shape as outputs
            losses.append(loss.item())
            total += labels.size(0)
    mean_loss = sum(losses) / len(losses)
    return mean_loss


In [8]:
#| export

def evaluate_ssim(model, dataloader):
    model.eval()
    ssim_scores = []
    with torch.no_grad():
        for data, target in dataloader:
            output = model(data)
            output_np = output.cpu().detach().numpy()
            target_np = target.cpu().detach().numpy()
            for o, t in zip(output_np, target_np):
                score = ssim(o, t, data_range=t.max() - t.min())
                ssim_scores.append(score)
    average_ssim = np.mean(ssim_scores)
    return average_ssim


In [9]:
#| export

def evaluate_latency(model, dataloader, num_iterations=100):
    start_time = time.time()
    model.eval()
    with torch.no_grad():
        for i, (data, _) in enumerate(dataloader):
            if i >= num_iterations:
                break
            _ = model(data)
    end_time = time.time()
    total_time = end_time - start_time
    average_time_per_batch = total_time / num_iterations
    return average_time_per_batch


In [15]:
def adjust_padding(model):
    for name, module in model.named_modules():
        if isinstance(module, nn.Conv1d):  # Adjust for Conv1d layers, similarly for Conv2d if needed
            if module.padding == 'same':  # This assumes 'same' is a placeholder for your padding strategy
                # Calculate the padding based on the kernel size
                kernel_size = module.kernel_size[0] if isinstance(module.kernel_size, tuple) else module.kernel_size
                stride = module.stride[0] if isinstance(module.stride, tuple) else module.stride
                dilation = module.dilation[0] if isinstance(module.dilation, tuple) else module.dilation

                padding = ((kernel_size - 1) * dilation) // 2
                module.padding = (padding,)  # Set as a tuple

    return model

In [10]:
# size evaluation

def save_model(model, filepath):
    torch.save(model.state_dict(), filepath)

def get_model_size(filepath):
    size_bytes = os.path.getsize(filepath)
    return size_bytes / (1024 * 1024)  # Convert bytes to megabytes

In [11]:

# Append the directory containing the cae1d.py file, not the file itself
sys.path.append('/root/hsi-compression/models/')

from cae1d import ConvolutionalAutoencoder1D

# Load your model
filepath = "/root/hsi-compression/results/weights/cae1d_8bpppc.pth.tar"
model = load_model_with_weights(filepath)  # Make sure this function is correctly defined

subset_size = int(len(dataset) * 0.5)
indices = torch.randperm(len(dataset))[:subset_size]
subset = Subset(dataset, indices)
dataloader = DataLoader(subset, batch_size=16, shuffle=True, num_workers=0)
print("DataLoader initialized successfully.")

# Now evaluate the model
print("Original Model Accuracy:", evaluate_model(model, dataloader))



DataLoader initialized successfully.


                                                                                                                                       

Original Model Accuracy: 0.6068862453103065




In [20]:
torch.backends.quantized.engine = 'fbgemm'
print("Current quantization engine:", torch.backends.quantized.engine)

Current quantization engine: fbgemm


In [24]:
# If you have a quantized model
model = adjust_padding(model)
quantized_model = apply_weight_only_quantization(model)  # Ensure this function is correctly defined and imported
print("Quantized Model Accuracy:", evaluate_model(quantized_model, dataloader))


                                                                                                                                       

NotImplementedError: Could not run 'quantized::conv1d' with arguments from the 'CPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'quantized::conv1d' is only available for these backends: [Meta, QuantizedCPU, QuantizedCUDA, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradMPS, AutogradXPU, AutogradHPU, AutogradLazy, AutogradMeta, Tracer, AutocastCPU, AutocastXPU, AutocastMPS, AutocastCUDA, FuncTorchBatched, BatchedNestedTensor, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PreDispatch, PythonDispatcher].

Meta: registered at ../aten/src/ATen/core/MetaFallbackKernel.cpp:23 [backend fallback]
QuantizedCPU: registered at ../aten/src/ATen/native/quantized/cpu/qconv.cpp:1972 [kernel]
QuantizedCUDA: registered at ../aten/src/ATen/native/quantized/cudnn/Conv.cpp:391 [kernel]
BackendSelect: fallthrough registered at ../aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:153 [backend fallback]
FuncTorchDynamicLayerBackMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:497 [backend fallback]
Functionalize: registered at ../aten/src/ATen/FunctionalizeFallbackKernel.cpp:349 [backend fallback]
Named: registered at ../aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]
Conjugate: registered at ../aten/src/ATen/ConjugateFallback.cpp:17 [backend fallback]
Negative: registered at ../aten/src/ATen/native/NegateFallback.cpp:18 [backend fallback]
ZeroTensor: registered at ../aten/src/ATen/ZeroTensorFallback.cpp:86 [backend fallback]
ADInplaceOrView: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:96 [backend fallback]
AutogradOther: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:63 [backend fallback]
AutogradCPU: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:67 [backend fallback]
AutogradCUDA: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:75 [backend fallback]
AutogradXLA: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:79 [backend fallback]
AutogradMPS: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:87 [backend fallback]
AutogradXPU: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:71 [backend fallback]
AutogradHPU: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:100 [backend fallback]
AutogradLazy: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:83 [backend fallback]
AutogradMeta: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:91 [backend fallback]
Tracer: registered at ../torch/csrc/autograd/TraceTypeManual.cpp:294 [backend fallback]
AutocastCPU: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:321 [backend fallback]
AutocastXPU: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:463 [backend fallback]
AutocastMPS: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:209 [backend fallback]
AutocastCUDA: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:165 [backend fallback]
FuncTorchBatched: registered at ../aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:731 [backend fallback]
BatchedNestedTensor: registered at ../aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:758 [backend fallback]
FuncTorchVmapMode: fallthrough registered at ../aten/src/ATen/functorch/VmapModeRegistrations.cpp:27 [backend fallback]
Batched: registered at ../aten/src/ATen/LegacyBatchingRegistrations.cpp:1075 [backend fallback]
VmapMode: fallthrough registered at ../aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]
FuncTorchGradWrapper: registered at ../aten/src/ATen/functorch/TensorWrapper.cpp:207 [backend fallback]
PythonTLSSnapshot: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:161 [backend fallback]
FuncTorchDynamicLayerFrontMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:493 [backend fallback]
PreDispatch: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:165 [backend fallback]
PythonDispatcher: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:157 [backend fallback]


In [18]:
import torch
print(torch.__version__)


2.5.1+cu124


In [19]:
import torch
print(torch.backends.quantized.supported_engines)
print(torch.backends.quantized.engine)


['qnnpack', 'none', 'onednn', 'x86', 'fbgemm']
x86


In [None]:
original_ssim = evaluate_ssim(model, dataloader)
quantized_ssim = evaluate_ssim(quantized_model, dataloader)
print(f"Original SSIM: {original_ssim:.4f}, Quantized SSIM: {quantized_ssim:.4f}")


In [None]:
# Measure latency
original_latency = evaluate_latency(model, dataloader)
quantized_latency = evaluate_latency(quantized_model, dataloader)
print(f"Original Model Latency: {original_latency:.4f} seconds per batch")
print(f"Quantized Model Latency: {quantized_latency:.4f} seconds per batch")

In [None]:
# Paths for original and quantized models
original_model_path = "/root/hsi-compression/results/weights/cae1d_8bpppc.pth.tar"
quantized_model_path = "/root/hsi-compression/compressed_model/weight_only_quant_model.pth"

# Assuming 'model' is your original loaded model
save_model(model, original_model_path)

save_model(quantized_model, quantized_model_path)  

# Calculate and print the model sizes
original_size = get_model_size(original_model_path)
quantized_size = get_model_size(quantized_model_path)  
print(f"Original Model Size: {original_size:.2f} MB")
print(f"Quantized Model Size: {quantized_size:.2f} MB")

In [None]:
#| export
def foo(): pass

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()