In [1]:
import sys
import os
import torch
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
from models.lenet import LeNet
from models.utils import PyTorchNeuralNetworkWrapper
from sws_compressor import SoftWeightSharingCompressor
from dataset import get_mnist

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print("Starting pretraining and compression process...")
# Load MNIST dataset
print("Loading MNIST dataset...")
X_train, X_test, y_train, y_test = get_mnist()

# Use a smaller subset for faster training
X_train = X_train
y_train = y_train
X_test = X_test
y_test = y_test

print(f"Training data shape: {X_train.shape}")
print(f"Test data shape: {X_test.shape}")

Starting pretraining and compression process...
Loading MNIST dataset...
Training data shape: (60000, 784)
Test data shape: (10000, 784)


In [3]:
# Initialize the LeNet model
print("Initializing LeNet model...")
model = LeNet()

# Wrap the model with the sklearn-compatible wrapper
print("Creating sklearn-compatible wrapper...")
clf = PyTorchNeuralNetworkWrapper(
    model=model,
    epochs=20,  # Fewer epochs for testing
    batch_size=64,
    learning_rate=5e-4,  # Standard learning rate
    verbose=True
)

Initializing LeNet model...
Creating sklearn-compatible wrapper...


In [None]:
print("Starting pretraining...")
# Pretrain the model
clf.fit(X_train, y_train)

# Evaluate the pretrained model
train_score = clf.score(X_train, y_train)
test_score = clf.score(X_test, y_test)
print(f"Pretrained model - Train accuracy: {train_score:.4f}")
print(f"Pretrained model - Test accuracy: {test_score:.4f}")

# Save the pretrained model
pretrained_model_path = "pretrained_lenet_model.pth"
clf.save_model(pretrained_model_path)
print(f"Pretrained model saved to {pretrained_model_path}")

In [None]:


# Now, let's compress the model using SWS
print("Starting SWS compression...")

# Create a new instance of the model to load the pretrained weights
compressed_model = LeNet()
device = "cuda" if torch.cuda.is_available() else "cpu"
compressed_model = compressed_model.to(device)

# Load the pretrained weights
model_data = torch.load(pretrained_model_path, map_location=device, weights_only=False)
compressed_model.load_state_dict(model_data["model_state_dict"])
compressed_model.eval()

# Prepare test data for compression (we need a DataLoader)
X_test_tensor = torch.FloatTensor(X_test).to(device)
y_test_tensor = torch.LongTensor(y_test).to(device)

# Reshape if needed (for image data)
if len(X_test_tensor.shape) == 2 and X_test_tensor.shape[1] == 784:  # Likely flattened MNIST
    batch_size = X_test_tensor.size(0)
    X_test_tensor = X_test_tensor.view(batch_size, 1, 28, 28)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Initialize the SWS compressor
# Using random values for the gamma inverse distribution and Beta priors
sws_compressor = SoftWeightSharingCompressor(
    tau=5e-3,
    n_components=17,
    zero_component_prior_alpha=2.0,  # Beta(2.0, 2.0) promotes reasonable zero component weight
    zero_component_prior_beta=2.0,   # Beta(2.0, 2.0) promotes reasonable zero component weight
    other_var_prior_alpha=np.random.uniform(0.5, 2.0),  # Random gamma inv dist values
    other_var_prior_beta=np.random.uniform(0.5, 2.0),   # Random gamma inv dist values
    lr_weights=1e-4,
    lr_mixture=5e-4,
    prune_threshold=1e-4,
    device=device
)

print("Compressing model with SWS...")
# Apply compression
compressed_state_dict, code_book, mixture_params = sws_compressor(compressed_model, test_loader, epochs=10)

# Create a Prior object using the final mixture parameters
prior = Prior(mixture_params['mu'], mixture_params['sigma'], mixture_params['pi'])

# Load the compressed weights back into the model
compressed_model.load_state_dict(compressed_state_dict)

# Evaluate the compressed model
compressed_model.eval()
with torch.no_grad():
    X_test_final = torch.FloatTensor(X_test).to(device)
    if len(X_test_final.shape) == 2 and X_test_final.shape[1] == 784:
        batch_size = X_test_final.size(0)
        X_test_final = X_test_final.view(batch_size, 1, 28, 28)
    
    outputs = compressed_model(X_test_final)
    _, predicted = torch.max(outputs.data, 1)
    compressed_accuracy = (predicted.cpu().numpy() == y_test).mean()

print(f"Compressed model - Test accuracy: {compressed_accuracy:.4f}")

# Calculate compression statistics
original_params = sum(p.numel() for p in model.parameters())
unique_weights = len(torch.unique(torch.cat([p.flatten() for p in model.parameters()])))
compressed_params = len(code_book['centres'])

print(f"\nCompression Results:")
print(f"Original number of parameters: {original_params}")
print(f"Number of unique weights after compression: {compressed_params}")
print(f"Compression ratio: {original_params / len(code_book['centres']):.2f}x")
print(f"Accuracy drop: {test_score - compressed_accuracy:.4f}")

# Generate detailed compression report using the new function
print("\nGenerating detailed compression report...")
compression_details = compression_report(
    model=compressed_model,  # Use the compressed model
    prior=prior,
    dataset="MNIST",  # Dataset name
    use_huffman=True,
    pbits_fc=5,
    pbits_conv=8,
    skip_last_matrix=False,
    assign_mode="ml"  # Should match the quantization method used in compression
)

print(f"Detailed Compression Report:")
print(f"  Original bits: {compression_details['orig_bits']:,}")
print(f"  Compressed bits: {compression_details['compressed_bits']:,}")
print(f"  Compression Ratio: {compression_details['CR']:.2f}x")
print(f"  Non-zero elements: {compression_details['nnz']:,}")
print(f"  Layer-by-layer breakdown:")
for layer_info in compression_details['layers']:
    print(f"    {layer_info['layer']} {layer_info['shape']}: {layer_info['orig_bits']} -> {layer_info['bits_IR'] + layer_info['bits_IC'] + layer_info['bits_A'] + layer_info['bits_codebook']} bits")

# Save the compressed model
compressed_model_path = "compressed_lenet_model.pth"
torch.save({
    'state_dict': compressed_state_dict,
    'code_book': code_book,
    'original_accuracy': test_score,
    'compressed_accuracy': compressed_accuracy
}, compressed_model_path)
print(f"Compressed model saved to {compressed_model_path}")

print("Process completed successfully!")


In [4]:
if __name__ == "__main__":
    main()

Starting pretraining and compression process...
Loading MNIST dataset...
Training data shape: (5000, 784)
Test data shape: (1000, 784)
Initializing LeNet model...
Creating sklearn-compatible wrapper...
Starting pretraining...


Training: 100%|██████████| 5/5 [00:05<00:00,  1.14s/it, loss=0.0406]


Pretrained model - Train accuracy: 0.9962
Pretrained model - Test accuracy: 0.9510
Pretrained model saved to pretrained_lenet_model.pth
Starting SWS compression...
Compressing model with SWS...


Retraining: 100%|██████████| 10/10 [00:09<00:00,  1.02it/s, Epoch=10, $\mathcal{L}_E$=0.109, $\mathcal{L}_C$=-7.5e+5]

Compressed model - Test accuracy: 0.0850

Compression Results:
Original number of parameters: 431080
Number of unique weights after compression: 8
Compression ratio: 53885.00x
Accuracy drop: 0.8660
Compressed model saved to compressed_lenet_model.pth
Process completed successfully!





# LeNet-300-100 Experiment
## The exact same baseline as in the paper

In [None]:
!python run_sws.py --preset lenet_300_100 \
  --complexity-mode keras --tau 5e-3 \
  --quant-assign map \
  --auto-tau-ratio 0

# LeNet-300-100 (ours robust ML-assignment + automatic $\tau$)

In [None]:
!python run_sws.py --preset lenet_300_100 \
  --pretrain-epochs 30 --retrain-epochs 30 \
  --pi0 0.95 --num-components 17 \
  --lr-w 5e-4 --lr-theta-means 1e-4 --lr-theta-gammas 3e-3 --lr-theta-rhos 3e-3 \
  --weight-decay 0.0 \
  --complexity-mode epoch --tau 3e-5 --tau-warmup-epochs 5 \
  --gamma-alpha 50 --gamma-beta 0.1 \
  --gamma-alpha-zero 100 --gamma-beta-zero 0.5 \
  --merge-kl-thresh 0.0 --quant-skip-last \
  --quant-assign ml \
  --log-mixture-every 1 --cr-every 5 \
  --run-name pt_lenet300_ml --save-dir runs --seed 1

# LeNet-Caffe experiment
## Baseline

In [None]:
!python run_sws.py --preset lenet5 \
  --complexity-mode keras --tau 5e-3 \
  --quant-assign map \
  --auto-tau-ratio 0


# LeNet-Caffe (ours robust ML-assignment + automatic $\tau$)

In [None]:
!python run_sws.py --preset lenet5 \
  --pretrain-epochs 100 --retrain-epochs 60 \
  --pi0 0.95 --num-components 17 \
  --lr-w 5e-4 --lr-theta-means 1e-4 --lr-theta-gammas 3e-3 --lr-theta-rhos 3e-3 \
  --weight-decay 0.0 \
  --complexity-mode epoch --tau 3e-5 --tau-warmup-epochs 5 \
  --gamma-alpha 50 --gamma-beta 0.1 \
  --gamma-alpha-zero 100 --gamma-beta-zero 0.5 \
  --merge-kl-thresh 0.0 --quant-skip-last \
  --quant-assign ml \
  --log-mixture-every 1 --cr-every 5 \
  --run-name pt_lenet5_ml_safe --save-dir runs --seed 1

# ResNet (light) Experiment
## The same as in the paper

In [None]:
!python run_sws.py --preset wrn_16_4 \
  --complexity-mode keras --tau 5e-3 \
  --quant-assign map --auto-tau-ratio 0 \
  --log-mixture-every 1 --make-gif \
  --run-name wrn_map_keras_tau5e-3


# ResNet (light) (ours robust ML-assignment + automatic $\tau$)

In [None]:
!python run_sws.py --preset wrn_16_4 \
  --complexity-mode epoch --auto-tau-ratio 0.1 \
  --tau-warmup-epochs 10 \
  --quant-assign ml --merge-kl-thresh 1e-6 \
  --log-mixture-every 1 --cr-every 2 \
  --make-gif --gif-fps 2 --gif-sample 50000 \
  --run-name wrn_ml_autoTau


# Collapse demo

In [None]:
!python run_sws.py --preset lenet_300_100 \
  --complexity-mode keras --tau 5e-3 \
  --quant-assign map --auto-tau-ratio 0 \
  --log-mixture-every 1 \
  --make-gif --gif-fps 2 --gif-sample 50000 \
  --gif-xmin -1.2 --gif-xmax 1.2 --gif-ymin -1.2 --gif-ymax 1.2 \
  --run-name lenet300_map_keras_tau5e-3_gif


## Ours Robust

In [None]:
!python run_sws.py --preset lenet_300_100 \
  --complexity-mode epoch --auto-tau-ratio 0.1 \
  --tau-warmup-epochs 10 \
  --quant-assign ml --merge-kl-thresh 1e-6 \
  --log-mixture-every 1 \
  --make-gif --gif-fps 2 --gif-sample 50000 \
  --gif-xmin -1.2 --gif-xmax 1.2 --gif-ymin -1.2 --gif-ymax 1.2 \
  --run-name lenet300_ml_autoTau_gif


In [None]:
!python run_sws.py --preset lenet_300_100 \
  --complexity-mode epoch --tau 8e-5 --tau-warmup-epochs 10 \
  --quant-assign ml --merge-kl-thresh 1e-6 \
  --log-mixture-every 1 \
  --make-gif --gif-fps 2 --gif-sample 50000 \
  --gif-xmin -1.2 --gif-xmax 1.2 --gif-ymin -1.2 --gif-ymax 1.2 \
  --run-name banded_ml_tau8e-5
