## HPML final project: Binary Neural Network analysis

BNCVL code

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from typing import Callable


In [7]:
class BNCVL(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, activation: Callable = nn.ReLU()):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size

        # 32-bit float convolution weights and bias
        self.weight = nn.Parameter(
            torch.randn(out_channels, in_channels, kernel_size, kernel_size)
        )
        self.bias = nn.Parameter(torch.zeros(out_channels))

        self.activation = activation

    def no_grad(self, x):
        """Stops gradient flow (Straight-Through Estimator)."""
        return x.detach()
    
    def quantize(self, x):
        """
        Binary quantization based on the mean of x.
        1 if x > mean(x), else 0.
        """
        threshold = x.mean()
        return (x > threshold).float()
    
    def normalize(self, z):
        """Normalize each feature map in a sample."""
        mean = z.mean(dim=(1, 2, 3), keepdim=True)
        std = z.std(dim=(1, 2, 3), keepdim=True) + 1e-8
        return (z - mean) / std

    def forward(self, x):
        if self.training:
            # Quantization during training (STE approximation)
            w_q = self.weight + self.no_grad(self.quantize(self.weight) - self.weight)
            b_q = self.bias + self.no_grad(self.quantize(self.bias) - self.bias)
        else:
            # Quantization during inference
            w_q = self.quantize(self.weight)
            b_q = self.quantize(self.bias)

        # Perform convolution with quantized weights
        z = F.conv2d(x, w_q, b_q, stride=1, padding=self.kernel_size // 2)

        # Normalize and activate
        z = self.normalize(z)
        return self.activation(z)

In [8]:
# ====== TEST BLOCK ======

def test_bncvl():
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")

    # Create layer and move to device
    layer = BNCVL(in_channels=3, out_channels=8, kernel_size=3).to(device)

    # Create dummy input (batch=4, channels=3, height=32, width=32)
    x = torch.randn(4, 3, 32, 32, device=device, requires_grad=True)

    # Forward pass
    y = layer(x)
    print(f"Output shape: {y.shape}")

    # Check quantization (values should be 0/1 in quantized weights)
    with torch.no_grad():
        w_q = layer.quantize(layer.weight)
        print(f"Unique quantized weight values: {w_q.unique()}")

    # Backward test (gradient should flow through STE)
    loss = y.mean()
    loss.backward()

    print(f"Gradient on input: {x.grad.abs().mean().item():.6f}")
    print(f"Gradient on weights: {layer.weight.grad.abs().mean().item():.6f}")

    assert y.shape == (4, 8, 32, 32)
    print("Forward and backward pass successful.")

test_bncvl()

Using device: cpu
Output shape: torch.Size([4, 8, 32, 32])
Unique quantized weight values: tensor([0., 1.])
Gradient on input: 0.000014
Gradient on weights: 0.000227
Forward and backward pass successful.
