# SNN Weight & Threshold Balancing - Convolutional

In [1]:
import torch
from torchvision.transforms import ToTensor#, Compose, Normalize
from torchvision.datasets import MNIST

import snntorch as snn
import snntorch.functional as SF

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [2]:
# data alread 0-1 normalised, simply convert to tensor
transform_data = ToTensor()

# Load the data
batch_size = 100
train_dataset = MNIST(root = './mnist/', train = True, download = True, transform=transform_data)
test_dataset = MNIST(root = './mnist/', train = False, download = True, transform=transform_data)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = batch_size, shuffle = False)

## Convolutional ReLU Neural Network

In [6]:
# convolutional neural network
class Conv_Net(torch.nn.Module):
    def __init__(self, n_x: int, n_h: list, n_y: list, kernel_size: int=5):
        super().__init__()

        self.in_layer = torch.nn.Conv2d(n_x, n_h[0], kernel_size, bias=False)
        self.h1_layer = torch.nn.Conv2d(n_h[0], n_h[1], kernel_size, bias=False)
        self.h2_layer = torch.nn.Linear(n_y[0], n_y[1], bias=False)
        self.pooling = torch.nn.AvgPool2d(2)
        self.dropout = torch.nn.Dropout()
        self.activator = torch.nn.ReLU()

    def forward(self, x):
        # Flatten images
        # x = x.view(x.size(0), -1)
        
        inp = self.dropout(self.activator(self.pooling(self.in_layer(x))))
        h1 = self.dropout(self.activator(self.pooling(self.h1_layer(inp))))
        
        # vectorise image
        h1 = h1.view(h1.size(0), -1)
        y = self.activator(self.h2_layer(h1))

        return y
    
    def save_parameters(self, path: str):
        torch.save(self.in_layer, path + "0.pt")
        torch.save(self.h1_layer, path + "1.pt")
        torch.save(self.h2_layer, path + "2.pt")

    def load_parameters(self, path: str):
        self.in_layer = torch.load(path + "0.pt", weights_only=False)
        self.h1_layer = torch.load(path + "1.pt", weights_only=False)
        self.h2_layer = torch.load(path + "2.pt", weights_only=False)

In [9]:
def init_weights(m):
    if isinstance(m, torch.nn.Linear) or isinstance(m, torch.nn.Conv2d):
        torch.nn.init.uniform_(m.weight, -0.1, 0.1)

conv_net = Conv_Net(1, [12, 64], [1024, 10]).to(device)
conv_net.apply(init_weights)
optimiser = torch.optim.Adam(conv_net.parameters())

# optimiser used in the original paper seems to kill the gradients, so we're just going to use adam
# optimiser = torch.optim.SGD(conv_net.parameters(), lr=.01, momentum=0.5)

conv_net.train()

Conv_Net(
  (in_layer): Conv2d(1, 12, kernel_size=(5, 5), stride=(1, 1), bias=False)
  (h1_layer): Conv2d(12, 64, kernel_size=(5, 5), stride=(1, 1), bias=False)
  (h2_layer): Linear(in_features=1024, out_features=10, bias=False)
  (pooling): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (dropout): Dropout(p=0.5, inplace=False)
  (activator): ReLU()
)

In [10]:
# Training model
num_epochs = 15
for epoch in range(num_epochs):
    # Go trough all samples in train dataset
    for i, (images, labels) in enumerate(train_loader):
        # Get from dataloader and send to device
        images = images.to(device)
        labels = labels.to(device)
        # Forward pass
        outputs = conv_net(images)
        # Compute loss
        loss = torch.nn.functional.cross_entropy(outputs, labels)
        # Backward and optimize
        optimiser.zero_grad()
        loss.backward()
        optimiser.step()
        # Display
        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')

Epoch [1/15], Step [100/600], Loss: 0.7449
Epoch [1/15], Step [200/600], Loss: 0.5508
Epoch [1/15], Step [300/600], Loss: 0.3478
Epoch [1/15], Step [400/600], Loss: 0.3962
Epoch [1/15], Step [500/600], Loss: 0.3962
Epoch [1/15], Step [600/600], Loss: 0.1065
Epoch [2/15], Step [100/600], Loss: 0.3321
Epoch [2/15], Step [200/600], Loss: 0.1846
Epoch [2/15], Step [300/600], Loss: 0.2293
Epoch [2/15], Step [400/600], Loss: 0.2776
Epoch [2/15], Step [500/600], Loss: 0.1158
Epoch [2/15], Step [600/600], Loss: 0.1491
Epoch [3/15], Step [100/600], Loss: 0.1828
Epoch [3/15], Step [200/600], Loss: 0.1167
Epoch [3/15], Step [300/600], Loss: 0.0789
Epoch [3/15], Step [400/600], Loss: 0.1116
Epoch [3/15], Step [500/600], Loss: 0.1525
Epoch [3/15], Step [600/600], Loss: 0.1324
Epoch [4/15], Step [100/600], Loss: 0.1401
Epoch [4/15], Step [200/600], Loss: 0.0405
Epoch [4/15], Step [300/600], Loss: 0.1163
Epoch [4/15], Step [400/600], Loss: 0.2690
Epoch [4/15], Step [500/600], Loss: 0.1170
Epoch [4/15

In [11]:
# Evaluate model accuracy on test after training
# Set model in eval mode!
conv_net.eval()
# Evaluate
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        # Get images and labels from test loader
        images = images.to(device)
        labels = labels.to(device)
        # Forward pass and predict class using max
        outputs = conv_net(images)
        _, predicted = torch.max(outputs.data, 1)
        # Check if predicted class matches label
        # and count number of correct predictions
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
# Compute final accuracy and display
accuracy = correct/total
print(f'Evaluation after training, test accuracy: {accuracy:.4f}')

Evaluation after training, test accuracy: 0.9894


In [12]:
conv_net.save_parameters("params/conv")

## Convolutional Spiking Neural Network

In [3]:
# to generate poisson spike trains
def to_poisson_spikes(data, steps: int, max_rate: int=200):

    # Rescale factor for Poisson distribution
    rescale_factor = max_rate / steps
    rand_vals = torch.rand(steps, *data.shape, device=data.device)

    # Compare against intensity to generate spikes
    spikes = (rand_vals < data * rescale_factor).float()
    
    return spikes

# fully connected neural network
class Conv_SNN(torch.nn.Module):
    def __init__(self, n_x: int, n_h: list, n_y: list, kernel_size: int=5, beta: float=0, threshold: float=1, steps: int=100, rate: int=200):
        super().__init__()

        self.in_layer = torch.nn.Conv2d(n_x, n_h[0], kernel_size, bias=False)
        self.h1_layer = torch.nn.Conv2d(n_h[0], n_h[1], kernel_size, bias=False)
        self.h2_layer = torch.nn.Linear(n_y[0], n_y[1], bias=False)
        self.pooling = torch.nn.AvgPool2d(2)
        self.in_active = snn.Leaky(beta=beta, threshold=threshold)
        self.h1_active = snn.Leaky(beta=beta, threshold=threshold)
        self.h2_active = snn.Leaky(beta=beta, threshold=threshold)

        self.steps = steps
        self.rate = rate

    def forward(self, x):
        # Flatten images
        # x = x.view(x.size(0), -1)
        x = to_poisson_spikes(x, self.steps, self.rate)
        # x = snn.spikegen.rate(x, self.steps)

        memin = self.in_active.reset_mem()
        memh1 = self.h1_active.reset_mem()
        memh2 = self.h2_active.reset_mem()

        out_spikes = []
        memh2_mem = []

        for step in x:
            curin = self.pooling(self.in_layer(step))
            spkin, memin = self.in_active(curin, memin)
            curh1 = self.pooling(self.h1_layer(spkin))
            spkh1, memh1 = self.h1_active(curh1, memh1)

            # vectorise spike image
            spkh1 = spkh1.view(spkh1.size(0), -1)
            
            curh2 = self.h2_layer(spkh1)
            spkh2, memh2 = self.h2_active(curh2, memh2)

            out_spikes.append(spkh2)
            memh2_mem.append(memh2)

        return torch.stack(out_spikes), torch.stack(memh2_mem)

    def save_parameters(self, path: str):
        torch.save(self.in_layer, path + "0.pt")
        torch.save(self.h1_layer, path + "1.pt")
        torch.save(self.h2_layer, path + "2.pt")

    def load_parameters(self, path: str):
        self.in_layer = torch.load(path + "0.pt", weights_only=False)
        self.h1_layer = torch.load(path + "1.pt", weights_only=False)
        self.h2_layer = torch.load(path + "2.pt", weights_only=False)

In [19]:
step_count = 200
conv_snn = Conv_SNN(1, [12, 64], [1024, 10], 5, beta=1, threshold=4, steps=step_count, rate=400).to(device)
conv_snn.load_parameters("params/conv")
conv_snn.eval()

Conv_SNN(
  (in_layer): Conv2d(1, 12, kernel_size=(5, 5), stride=(1, 1), bias=False)
  (h1_layer): Conv2d(12, 64, kernel_size=(5, 5), stride=(1, 1), bias=False)
  (h2_layer): Linear(in_features=1024, out_features=10, bias=False)
  (pooling): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (in_active): Leaky()
  (h1_active): Leaky()
  (h2_active): Leaky()
)

In [20]:
with torch.no_grad():
    total = 0
    correct = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        spk_out, _ = conv_snn(images)

        correct += SF.accuracy_rate(spk_out, labels) * spk_out.size(1)
        total += spk_out.size(1)

accuracy = correct/total
print(f'Evaluation after training, test accuracy: {accuracy:.4f}')

Evaluation after training, test accuracy: 0.9885


## Convolutional Neural Network Model Normalisation

In [4]:
def model_norm(model: torch.nn.Module, include_last: bool = False):
    """
    takes any pytorch module or network and does model normalisation across all layers.
    by default, the last layer is not normalised.
    """
    param_geny = (param for param in model.parameters() if param.requires_grad)
    param_list = list(param_geny)
    param_geny = iter(param_list)  # reset generator

    param_count = len(param_list)
    layers = param_count if include_last else param_count - 1
    layer_scales = []

    for _ in range(layers):
        neurons = next(param_geny)
        max_pos_in = 0

        if neurons.dim() >= 2:  # if layer is linear/conv
            for neuron in neurons:
                # sum over input channels
                input_sum = torch.sum(torch.clamp(neuron, min=0))
                max_pos_in = max(max_pos_in, input_sum.item())

        if max_pos_in > 0:
            neurons.data /= max_pos_in  # as in fc net

        layer_scales.append(max_pos_in)

    return layer_scales

In [6]:
# to load and rescale from scratch
step_count = 200
conv_snn = Conv_SNN(1, [12, 64], [1024, 10], 5, beta=1).to(device)
conv_snn.load_parameters("params/conv")
conv_snn.eval()

scaling_factors = model_norm(conv_snn, False)
print(scaling_factors)
conv_snn.save_parameters("params/conv_model_norm")

[3.0468525886535645, 17.50514030456543]


In [21]:
# to load rescaled weights from file
max_rate = 500
threshold = 1
simulation_time = 0.5

conv_snn = Conv_SNN(1, [12, 64], [1024, 10], 5, beta=1, threshold=threshold, steps=int(max_rate * simulation_time), rate=max_rate).to(device)
conv_snn.load_parameters("params/conv_model_norm")
conv_snn.eval()

Conv_SNN(
  (in_layer): Conv2d(1, 12, kernel_size=(5, 5), stride=(1, 1), bias=False)
  (h1_layer): Conv2d(12, 64, kernel_size=(5, 5), stride=(1, 1), bias=False)
  (h2_layer): Linear(in_features=1024, out_features=10, bias=False)
  (pooling): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (in_active): Leaky()
  (h1_active): Leaky()
  (h2_active): Leaky()
)

In [13]:
with torch.no_grad():
    total = 0
    correct = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        spk_out, _ = conv_snn(images)

        correct += SF.accuracy_rate(spk_out, labels) * spk_out.size(1)
        total += spk_out.size(1)

accuracy = correct/total
print(f'Evaluation after training, test accuracy: {accuracy:.4f}')

Evaluation after training, test accuracy: 0.9882


## Convolutional Neural Network Data Normalisation

In [4]:
# counting cnn for data normalisation
class Conv_Count_Net(torch.nn.Module):
    def __init__(self, n_x: int, n_h: list, n_y: list, kernel_size: int=5, image_size: list=(28, 28)):
        super().__init__()

        self.in_layer = torch.nn.Conv2d(n_x, n_h[0], kernel_size, bias=False)
        self.h1_layer = torch.nn.Conv2d(n_h[0], n_h[1], kernel_size, bias=False)
        self.h2_layer = torch.nn.Linear(n_y[0], n_y[1], bias=False)
        self.pooling = torch.nn.AvgPool2d(2)
        self.activator = torch.nn.ReLU()

        # to store maximum activations
        size_in_x = (image_size[0] - kernel_size + 1) // 2
        size_in_y = (image_size[1] - kernel_size + 1) // 2
        self.maxin_act = torch.zeros([n_h[0], size_in_x, size_in_y])
        size_h1_x = (size_in_x - kernel_size + 1) // 2
        size_h1_y = (size_in_y - kernel_size + 1) // 2
        self.maxh1_act = torch.zeros([n_h[1], size_h1_x, size_h1_y])
        self.maxh2_act = torch.zeros([n_y[1]])

    def forward(self, x):
        
        inp = self.activator(self.pooling(self.in_layer(x)))
        self.maxin_act = torch.maximum(self.maxin_act, inp)
        h1 = self.activator(self.pooling(self.h1_layer(inp)))
        self.maxh1_act = torch.maximum(self.maxh1_act, h1)
        
        # vectorise image
        h1 = h1.view(h1.size(0), -1)
        y = self.activator(self.h2_layer(h1))
        self.maxh2_act = torch.maximum(self.maxh2_act, y)

        return y
    
    def save_parameters(self, path: str):
        torch.save(self.in_layer, path + "0.pt")
        torch.save(self.h1_layer, path + "1.pt")
        torch.save(self.h2_layer, path + "2.pt")

    def load_parameters(self, path: str):
        self.in_layer = torch.load(path + "0.pt", weights_only=False)
        self.h1_layer = torch.load(path + "1.pt", weights_only=False)
        self.h2_layer = torch.load(path + "2.pt", weights_only=False)

In [5]:
# get maximum neuron activations
count_loader = torch.utils.data.DataLoader(train_dataset, batch_size = 1, shuffle = False)
conv_count_net = Conv_Count_Net(1, [12, 64], [1024, 10], 5).to(device)
conv_count_net.load_parameters("params/conv")
conv_count_net.eval()

with torch.no_grad():

    for images, labels in count_loader:

        images = images.to(device)
        labels = labels.to(device)

        _ = conv_count_net(images)

max_activations = [torch.max(conv_count_net.maxin_act), torch.max(conv_count_net.maxh1_act), torch.max(conv_count_net.maxh2_act)]

In [13]:
# algorithm for data normalisation
def data_norm(model: torch.nn.Module, activations: list[torch.Tensor], include_last: bool=True):
    """
    takes a pytorch module or network and does data normalisation.
    requires the list of maximum activations from each layer.
    unlike model normalisation, also normalises the last layer by default.
    """
    param_geny = (param for param in model.parameters() if param.requires_grad)
    param_list = list(param_geny)
    param_geny = iter(param_list)  # reset generator

    param_count = len(param_list)
    layers = param_count if include_last else param_count - 1
    layer_scales = []

    previous_factor = 1

    for i in range(layers):
        neurons = next(param_geny)
        max_weight = 0

        if neurons.dim() >= 2:

            for neuron in neurons:
                # grab maximum single weight across input connections
                max_weight = max(max_weight, torch.max(neuron))

        if max_weight > 0:
            print(activations[i])
            scale_factor = max(max_weight, activations[i])
            applied_factor = scale_factor / previous_factor

        # rescale all weights wrt applied factor
        neurons.data = neurons / applied_factor # without .data this becomes out-of-place for some reason
        previous_factor = scale_factor
        layer_scales.append(applied_factor)
    
    return layer_scales

In [14]:
print(conv_count_net.maxh1_act.size())

torch.Size([1, 64, 4, 4])


In [16]:
# to load and rescale from scratch
conv_snn = Conv_SNN(1, [12, 64], [1024, 10], 5, beta=1).to(device)
conv_snn.load_parameters("params/conv")
conv_snn.eval()

scaling_factors = data_norm(conv_snn, max_activations)
print(scaling_factors)
conv_snn.save_parameters("params/conv_data_norm")

tensor(2.0191)
tensor(5.0003)
tensor(27.3178)
[tensor(2.0191), tensor(2.4765), tensor(5.4632)]


In [26]:
# to load rescaled weights from file
max_rate = 400
threshold = 1
simulation_time = 0.5

conv_snn = Conv_SNN(1, [12, 64], [1024, 10], 5, beta=1, threshold=threshold, steps=int(max_rate * simulation_time), rate=max_rate).to(device)
conv_snn.load_parameters("params/conv_data_norm")
conv_snn.eval()

Conv_SNN(
  (in_layer): Conv2d(1, 12, kernel_size=(5, 5), stride=(1, 1), bias=False)
  (h1_layer): Conv2d(12, 64, kernel_size=(5, 5), stride=(1, 1), bias=False)
  (h2_layer): Linear(in_features=1024, out_features=10, bias=False)
  (pooling): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (in_active): Leaky()
  (h1_active): Leaky()
  (h2_active): Leaky()
)

In [18]:
with torch.no_grad():
    total = 0
    correct = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        spk_out, _ = conv_snn(images)

        correct += SF.accuracy_rate(spk_out, labels) * spk_out.size(1)
        total += spk_out.size(1)

accuracy = correct/total
print(f'Evaluation after training, test accuracy: {accuracy:.4f}')

Evaluation after training, test accuracy: 0.9881
