# Mixed_Precision_Training_Quantization

> We will be implementing Mixed-Precision Training Quantization model compression here

In [1]:
#| default_exp quantization_fastai

In [2]:
#| hide
from nbdev.showdoc import *

In [3]:
#| export
import subprocess
import sys

def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

# List of required packages
required_packages = [
    "fastai",  # fastai includes torch, torchvision, and fastprogress
    "numpy",
    "pandas",
    "scikit-image",
    "tqdm"
]
# Install required packages
for package in required_packages:
    try:
        __import__(package)
    except ImportError:
        install(package)



[0m

In [4]:
#| export
from fastai.vision.all import *  # Import necessary libraries
import os
import sys
import torch
import time
import numpy as np
import pandas as pd
from torch import nn
from torch.utils.data import Dataset, DataLoader, Subset
import logging
from skimage.metrics import structural_similarity as ssim
from tqdm import tqdm, trange
import torch.quantization
import torch.multiprocessing as mp
sys.path.append('/root/hsi-compression/models/')
from cae1d import ConvolutionalAutoencoder1D
from torch.cuda.amp import GradScaler, autocast

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True,max_split_size_mb:64"
device = 'cuda' if torch.cuda.is_available() else 'cpu'  # Ensure device is set to GPU if available
mp.set_start_method('spawn', force=True)



In [5]:
#| export

filepath = "/root/hsi-compression/results/weights/cae1d_8bpppc.pth.tar"

def load_model_with_weights(filepath):
    """Load a ConvolutionalAutoencoder1D model with pretrained weights."""
    model = ConvolutionalAutoencoder1D().to(device)
    model.load_state_dict(torch.load(filepath, map_location=device, weights_only=True))
    model.eval()
    return model



In [6]:
#| export

# Setup logging to help with debugging
logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')

# Base directory where the .npy files are stored
base_directory = '/root/hsi-compression/datasets/hyspecnet-11k/patches/'

# Utility functions
def load_paths(csv_file):
    df = pd.read_csv(csv_file, header=None)
    file_paths = [os.path.join(base_directory, x.strip()) for x in df[0]]
    return file_paths

def transform_sample(sample):
    return (sample - np.mean(sample)) / np.std(sample)

# Dataset class
class NPYDataset(Dataset):
    def __init__(self, file_paths, transform=None):
        self.file_paths = file_paths
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        sample = np.load(file_path)
        if self.transform:
            sample = self.transform(sample)
        return torch.from_numpy(sample).float().to(device), torch.from_numpy(sample).float().to(device)

csv_file_path = '/root/hsi-compression/datasets/hyspecnet-11k/splits/easy/test.csv'
file_paths = load_paths(csv_file_path) 

# Initialize the dataset and DataLoader
dataset = NPYDataset(file_paths, transform=transform_sample)
subset_size = int(len(dataset) * 0.5)
indices = torch.randperm(len(dataset))[:subset_size]
subset = Subset(dataset, indices)

# Create DataLoader using the subset
dataloader = DataLoader(subset, batch_size=1, shuffle=True, num_workers=4, pin_memory=True)  # pin_memory for efficient transfer to CUDA


In [7]:
#| export

# Mixed precision training
def mixed_precision_training(model, dataloader):
    scaler = GradScaler()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = torch.nn.MSELoss().to(device)
    model.train()
    for epoch in range(3):
        for inputs, _ in tqdm(dataloader, desc=f"Epoch {epoch+1}"):
            optimizer.zero_grad()
            with autocast(device=device):
                outputs = model(inputs)
                loss = criterion(outputs, inputs)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
    model.eval()
    return model

In [8]:
#| export

def evaluate_model(model, dataloader):
    model.eval()
    losses = []
    criterion = torch.nn.MSELoss().to(device)  # Ensure loss computation is on GPU
    total = 0
    with torch.no_grad():
        for inputs, labels in tqdm(dataloader, desc="Evaluating model", leave=False):
            outputs = model(inputs)
            loss = criterion(outputs, labels)  # Ensure labels are of the same shape as outputs
            losses.append(loss.item())
            total += labels.size(0)
    mean_loss = sum(losses) / len(losses)
    return mean_loss


In [9]:
#| export

def evaluate_ssim(model, dataloader):
    model.eval()
    ssim_scores = []
    with torch.no_grad():
        for data, target in dataloader:
            output = model(data)
            output_np = output.cpu().detach().numpy()  # Transfer data back to CPU for SSIM computation
            target_np = target.cpu().detach().numpy()
            for o, t in zip(output_np, target_np):
                score = ssim(o, t, data_range=t.max() - t.min())
                ssim_scores.append(score)
    average_ssim = np.mean(ssim_scores)
    return average_ssim


In [10]:
#| export

def evaluate_latency(model, dataloader, num_iterations=100):
    start_time = time.time()
    model.eval()
    with torch.no_grad():
        for i, (data, _) in enumerate(dataloader):
            if i >= num_iterations:
                break
            _ = model(data)
    end_time = time.time()
    total_time = end_time - start_time
    average_time_per_batch = total_time / num_iterations
    return average_time_per_batch


In [11]:
# Clear GPU memory
import gc
import torch

gc.collect()
torch.cuda.empty_cache()

In [12]:
#| export
def save_model(model, filepath):
    """Save model state to a specified file path."""
    torch.save(model.state_dict(), filepath)

def get_model_size(filepath):
    """Calculate the size of the model file."""
    size_bytes = os.path.getsize(filepath)
    return size_bytes / (1024 * 1024)  # Convert bytes to megabytes

In [13]:
#| eval: false

# Append the directory containing the cae1d.py file, not the file itself
sys.path.append('/root/hsi-compression/models/')
from cae1d import ConvolutionalAutoencoder1D

# Load your model
filepath = "/root/hsi-compression/results/weights/cae1d_8bpppc.pth.tar"
model = load_model_with_weights(filepath)  # Make sure this function is correctly defined

# Now evaluate the model
print("Original Model Accuracy:", evaluate_model(model, dataloader))

RuntimeError: Error(s) in loading state_dict for ConvolutionalAutoencoder1D:
	Unexpected key(s) in state_dict: "encoder.0.activation_post_process.eps", "encoder.0.activation_post_process.histogram", "encoder.0.activation_post_process.min_val", "encoder.0.activation_post_process.max_val", "encoder.1.activation_post_process.eps", "encoder.1.activation_post_process.histogram", "encoder.1.activation_post_process.min_val", "encoder.1.activation_post_process.max_val", "encoder.3.activation_post_process.eps", "encoder.3.activation_post_process.histogram", "encoder.3.activation_post_process.min_val", "encoder.3.activation_post_process.max_val", "encoder.4.activation_post_process.eps", "encoder.4.activation_post_process.histogram", "encoder.4.activation_post_process.min_val", "encoder.4.activation_post_process.max_val", "encoder.6.activation_post_process.eps", "encoder.6.activation_post_process.histogram", "encoder.6.activation_post_process.min_val", "encoder.6.activation_post_process.max_val", "encoder.7.activation_post_process.eps", "encoder.7.activation_post_process.histogram", "encoder.7.activation_post_process.min_val", "encoder.7.activation_post_process.max_val", "encoder.8.activation_post_process.eps", "encoder.8.activation_post_process.histogram", "encoder.8.activation_post_process.min_val", "encoder.8.activation_post_process.max_val", "encoder.9.activation_post_process.eps", "encoder.9.activation_post_process.histogram", "encoder.9.activation_post_process.min_val", "encoder.9.activation_post_process.max_val", "decoder.0.activation_post_process.eps", "decoder.0.activation_post_process.histogram", "decoder.0.activation_post_process.min_val", "decoder.0.activation_post_process.max_val", "decoder.1.activation_post_process.eps", "decoder.1.activation_post_process.histogram", "decoder.1.activation_post_process.min_val", "decoder.1.activation_post_process.max_val", "decoder.2.activation_post_process.eps", "decoder.2.activation_post_process.histogram", "decoder.2.activation_post_process.min_val", "decoder.2.activation_post_process.max_val", "decoder.3.activation_post_process.eps", "decoder.3.activation_post_process.histogram", "decoder.3.activation_post_process.min_val", "decoder.3.activation_post_process.max_val", "decoder.5.activation_post_process.eps", "decoder.5.activation_post_process.histogram", "decoder.5.activation_post_process.min_val", "decoder.5.activation_post_process.max_val", "decoder.6.activation_post_process.eps", "decoder.6.activation_post_process.histogram", "decoder.6.activation_post_process.min_val", "decoder.6.activation_post_process.max_val", "decoder.8.activation_post_process.eps", "decoder.8.activation_post_process.histogram", "decoder.8.activation_post_process.min_val", "decoder.8.activation_post_process.max_val". 

In [None]:
#| eval: false

# If you have a quantized model
quantized_model = mixed_precision_training(model, dataloader)# Ensure this function is correctly defined and imported
print("Quantized Model Accuracy:", evaluate_model(quantized_model, dataloader))





In [None]:
#| eval: false

original_ssim = evaluate_ssim(model, dataloader)
quantized_ssim = evaluate_ssim(quantized_model, dataloader)
print(f"Original SSIM: {original_ssim:.4f}, Quantized SSIM: {quantized_ssim:.4f}")



In [None]:
#| eval: false
# Measure latency
original_latency = evaluate_latency(model, dataloader)
quantized_latency = evaluate_latency(quantized_model, dataloader)
print(f"Original Model Latency: {original_latency:.4f} seconds per batch")
print(f"Quantized Model Latency: {quantized_latency:.4f} seconds per batch")



In [None]:
#| eval: false

# Size calculation
# Paths for original and quantized models
original_model_path = "/root/hsi-compression/results/weights/cae1d_8bpppc.pth.tar"
quantized_model_path = "/root/hsi-compression/compressed_model/mixed_precision_model.pth"

# Assuming 'model' is your original loaded model
save_model(model, original_model_path)

save_model(quantized_model, quantized_model_path)  

# Calculate and print the model sizes
original_size = get_model_size(original_model_path)
quantized_size = get_model_size(quantized_model_path)  
print(f"Original Model Size: {original_size:.2f} MB")
print(f"Quantized Model Size: {quantized_size:.2f} MB")

In [None]:
#| eval: false

In [None]:
#| export
def foo(): pass

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()