In [1]:
import torch
import torch.nn as nn
from sinabs.backend.dynapcnn import DynapcnnNetwork
from sinabs.layers import Merge, IAFSqueeze, SumPool2d
from sinabs.activation.surrogate_gradient_fn import PeriodicExponential
import sinabs.layers as sl

from torch.nn import CrossEntropyLoss
from torch.optim import Adam

from tonic.datasets.nmnist import NMNIST
from tonic.transforms import ToFrame
from torch.utils.data import DataLoader
import numpy as np
from tqdm.notebook import tqdm
from statistics import mode

In [2]:
torch.manual_seed(0)

<torch._C.Generator at 0x7f87e695c5b0>

# Network Module

We need to define a `nn.Module` implementing the Spiking Neural Network (SNN) we want to deploy on chip. The configuration of the network on the chip needs to know in advance the shape of the input data and the batch size that will be used.

In [3]:
channels = 2
height = 34
width = 34
batch_size = 8

input_shape = (channels, height, width)

In [4]:
class SNN(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        # -- chip core A --
        self.conv1 = nn.Conv2d(2, 10, 2, 1, bias=False)
        self.iaf1 = IAFSqueeze(batch_size=batch_size, min_v_mem=-1.0, spike_threshold=1.0, surrogate_grad_fn=PeriodicExponential())
        self.pool1 = nn.AvgPool2d(2,2)
        # -- chip core B --
        self.conv2 = nn.Conv2d(10, 10, 4, 1, bias=False)
        self.iaf2 = IAFSqueeze(batch_size=batch_size, min_v_mem=-1.0, spike_threshold=1.0, surrogate_grad_fn=PeriodicExponential())
        # -- chip core C --
        self.conv3 = nn.Conv2d(10, 1, 2, 1, bias=False)
        self.iaf3 = IAFSqueeze(batch_size=batch_size, min_v_mem=-1.0, spike_threshold=1.0, surrogate_grad_fn=PeriodicExponential())
        # -- chip core D --
        self.fc1 = nn.Linear(144, 200, bias=False)
        self.iaf4 = IAFSqueeze(batch_size=batch_size, min_v_mem=-1.0, spike_threshold=1.0, surrogate_grad_fn=PeriodicExponential())
        # -- chip core E --
        self.fc2 = nn.Linear(200, 10, bias=False)
        self.iaf5 = IAFSqueeze(batch_size=batch_size, min_v_mem=-1.0, spike_threshold=1.0, surrogate_grad_fn=PeriodicExponential())

        # -- layers ignored during deployment --
        self.flat = nn.Flatten()

    def init_weights(self):
        for name, layer in self.named_modules():
            if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear):
                nn.init.xavier_normal_(layer.weight.data)

    def detach_neuron_states(self):
        for name, layer in self.named_modules():
            if name != '':
                if isinstance(layer, sl.StatefulLayer):
                    for name, buffer in layer.named_buffers():
                        buffer.detach_()

    def forward(self, x):
        
        con1_out = self.conv1(x)
        iaf1_out = self.iaf1(con1_out)
        pool1_out = self.pool1(iaf1_out)

        conv2_out = self.conv2(pool1_out)
        iaf2_out = self.iaf2(conv2_out)

        conv3_out = self.conv3(iaf2_out)
        iaf3_out = self.iaf3(conv3_out)

        flat_out = self.flat(iaf3_out)
        
        fc1_out = self.fc1(flat_out)
        iaf4_out = self.iaf4(fc1_out)
        fc2_out = self.fc2(iaf4_out)
        iaf5_out = self.iaf5(fc2_out)

        return iaf5_out

In [5]:
snn = SNN()

Let's train the model to see what kind of accuracy the software model gets:

In [6]:
_ = NMNIST(save_to='./NMNIST', train=True)
_ = NMNIST(save_to='./NMNIST', train=False)

nb_time_steps = 50
to_raster = ToFrame(sensor_size=NMNIST.sensor_size, n_time_bins=nb_time_steps)

snn_train_dataset = NMNIST(save_to='./NMNIST', train=True, transform=to_raster)
snn_test_dataset = NMNIST(save_to='./NMNIST', train=False, transform=to_raster)

sample_data, label = snn_train_dataset[0]
print(f"The transformed array is in shape [Time-Step, Channel, Height, Width] --> {sample_data.shape}")

The transformed array is in shape [Time-Step, Channel, Height, Width] --> (50, 2, 34, 34)


In [7]:
train_indices = [i for i in range(1000)]
test_indices = [i for i in range(100)]

snn_train_dataset_subset = torch.utils.data.Subset(snn_train_dataset, train_indices)
snn_test_subset = torch.utils.data.Subset(snn_train_dataset, test_indices)

snn_train_dataloader = DataLoader(snn_train_dataset_subset, batch_size=batch_size, num_workers=4, drop_last=True, shuffle=True)
snn_test_dataloader = DataLoader(snn_test_subset, batch_size=batch_size, num_workers=4, drop_last=True, shuffle=False)

In [8]:
device = torch.device('cpu')

snn.init_weights()

snn.to(device)

SNN(
  (conv1): Conv2d(2, 10, kernel_size=(2, 2), stride=(1, 1), bias=False)
  (iaf1): IAFSqueeze(spike_threshold=Parameter containing:
  tensor(1.), min_v_mem=Parameter containing:
  tensor(-1.), batch_size=8, num_timesteps=-1)
  (pool1): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (conv2): Conv2d(10, 10, kernel_size=(4, 4), stride=(1, 1), bias=False)
  (iaf2): IAFSqueeze(spike_threshold=Parameter containing:
  tensor(1.), min_v_mem=Parameter containing:
  tensor(-1.), batch_size=8, num_timesteps=-1)
  (conv3): Conv2d(10, 1, kernel_size=(2, 2), stride=(1, 1), bias=False)
  (iaf3): IAFSqueeze(spike_threshold=Parameter containing:
  tensor(1.), min_v_mem=Parameter containing:
  tensor(-1.), batch_size=8, num_timesteps=-1)
  (fc1): Linear(in_features=144, out_features=200, bias=False)
  (iaf4): IAFSqueeze(spike_threshold=Parameter containing:
  tensor(1.), min_v_mem=Parameter containing:
  tensor(-1.), batch_size=8, num_timesteps=-1)
  (fc2): Linear(in_features=200, out_features=10, 

In [9]:
optimizer = Adam(snn.parameters(), lr=1e-4, betas=(0.9, 0.999), eps=1e-8)
loss_fn = CrossEntropyLoss()

training the model...

In [10]:
snn.train()
for e in range(1):
    train_p_bar = tqdm(snn_train_dataloader, total=int(len(snn_train_dataset_subset)/batch_size))

    for X, y in train_p_bar:
        # reshape the input from [Batch, Time, Channel, Height, Width] into [Batch*Time, Channel, Height, Width]
        X = X.reshape(-1, NMNIST.sensor_size[2], NMNIST.sensor_size[0], NMNIST.sensor_size[1]).to(dtype=torch.float, device=device)
        y = y.to(dtype=torch.long, device=device)

        # forward
        pred = snn(X)

        # reshape the output from [Batch*Time,num_classes] into [Batch, Time, num_classes]
        pred = pred.reshape(batch_size, nb_time_steps, -1)

        # accumulate all time-steps output for final prediction
        pred = pred.sum(dim = 1)
        loss = loss_fn(pred, y)

        # gradient update
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # detach the neuron states and activations from current computation graph(necessary)
        snn.detach_neuron_states()

        train_p_bar.set_description(f"epoch {e} - BPTT training loss: {round(loss.item(), 2)}")

  0%|          | 0/125 [00:00<?, ?it/s]

testing the model...

In [11]:
correct_predictions = []

snn.eval()
with torch.no_grad():
    test_p_bar = tqdm(snn_test_dataloader, total=int(len(snn_test_dataloader)/batch_size))
    
    for X, y in test_p_bar:
        # reshape the input from [Batch, Time, Channel, Height, Width] into [Batch*Time, Channel, Height, Width]
        X = X.reshape(-1, NMNIST.sensor_size[2], NMNIST.sensor_size[0], NMNIST.sensor_size[1]).to(dtype=torch.float, device=device)
        y = y.to(dtype=torch.long, device=device)

        # forward
        output = snn(X)

        # reshape the output from [Batch*Time,num_classes] into [Batch, Time, num_classes]
        output = output.reshape(batch_size, nb_time_steps, -1)

        # accumulate all time-steps output for final prediction
        output = output.sum(dim=1)

        # calculate accuracy
        pred = output.argmax(dim=1, keepdim=True)

        # compute the total correct predictions
        correct_predictions.append(pred.eq(y.view_as(pred)))

        test_p_bar.set_description(f"Testing Model...")

correct_predictions = torch.cat(correct_predictions)
test_acc = correct_predictions.sum().item()/(len(correct_predictions))*100

print(f'test accuracy: {round(test_acc, 2)}')

  0%|          | 0/1 [00:00<?, ?it/s]

test accuracy: 100.0


# Deploying the Model: Enter the DynapcnnNetwork Class

In the constructor of `DynapcnnNetworkGraph` the SNN passed as argument (defined as a `nn.Module`) will be parsed such that each layer is represented in a computational graph (using `nirtorch.extract_torch_graph`). 

The layers are the `nodes` of the graph, while their connectivity (how the outputs from a layer are sent to other layers) is represented as `edges`, represented in a `list` of `tuples`.

Once the constructor finishes its initialization, the `hw_model.dynapcnn_layers` property is a dictionary where each entry represents the ID of a `DynapcnnLayer` instance (an `int` from `0` to `L`), with this entry containing a `DynapcnnLayer` instance where a subset of the layers in the original SNN has been incorporated into, the core such instance has been assigned to, and the list of `DynapcnnLayer` instances (their IDs) the layer targets.

In [12]:
hw_model = DynapcnnNetwork(
    snn=snn,
    input_shape=input_shape,
    batch_size=batch_size,
    discretize=True
)

Notice in the model bellow how the property <code>DynapcnnLayer</code> in the model has yet to be assigned to a core. This is only done once
<code>DynapcnnNetworkGraph.to()</code> is called.

In [13]:
print(hw_model)

----------------------- [ DynapcnnLayer 0 ] -----------------------

COMPUTATIONAL NODES:

(node 0): Conv2d(2, 10, kernel_size=(2, 2), stride=(1, 1), bias=False)
(node 1): IAFSqueeze(spike_threshold=Parameter containing:
tensor(241.), min_v_mem=Parameter containing:
tensor(-241.), batch_size=8, num_timesteps=-1)
(node 2): SumPool2d(norm_type=1, kernel_size=(2, 2), stride=None, ceil_mode=False)

METADATA:

> network's entry point: True
> convolution's weight re-scaling factor: None
> assigned core index: None
> destination DynapcnnLayers: [1]
> node 2 feeds input to nodes [3]

----------------------- [ DynapcnnLayer 1 ] -----------------------

COMPUTATIONAL NODES:

(node 3): Conv2d(10, 10, kernel_size=(4, 4), stride=(1, 1), bias=False)
(node 4): IAFSqueeze(spike_threshold=Parameter containing:
tensor(1041.), min_v_mem=Parameter containing:
tensor(-1041.), batch_size=8, num_timesteps=-1)

METADATA:

> network's entry point: False
> convolution's weight re-scaling factor: 2.0
> assigned 

The `hw_model.to()` call will figure out into which core each `DynapcnnLayer` instance will be assigned to. Once this assingment is made the instance itself is used to configure the `CNNLayerConfig` instance representing the core's configuration assigned to it.

If the call is sucessfull, the layers comprising the network and their associated metadata will be printed. To deploy the model, we need to provide the device string defining what Speck devkit is being used.

In [14]:
speck_device = "speck2fmodule:0"

In [15]:
hw_model.to(device=speck_device)

Network is valid: 



DynapcnnNetwork()

In [16]:
print(hw_model)

----------------------- [ DynapcnnLayer 0 ] -----------------------

COMPUTATIONAL NODES:

(node 0): Conv2d(2, 10, kernel_size=(2, 2), stride=(1, 1), bias=False)
(node 1): IAFSqueeze(spike_threshold=Parameter containing:
tensor(241.), min_v_mem=Parameter containing:
tensor(-241.), batch_size=8, num_timesteps=-1)
(node 2): SumPool2d(norm_type=1, kernel_size=(2, 2), stride=None, ceil_mode=False)

METADATA:

> network's entry point: True
> convolution's weight re-scaling factor: None
> assigned core index: 0
> destination DynapcnnLayers: [1]
> node 2 feeds input to nodes [3]

----------------------- [ DynapcnnLayer 1 ] -----------------------

COMPUTATIONAL NODES:

(node 3): Conv2d(10, 10, kernel_size=(4, 4), stride=(1, 1), bias=False)
(node 4): IAFSqueeze(spike_threshold=Parameter containing:
tensor(1041.), min_v_mem=Parameter containing:
tensor(-1041.), batch_size=8, num_timesteps=-1)

METADATA:

> network's entry point: False
> convolution's weight re-scaling factor: 2.0
> assigned cor

# Spikes IN/Out of the Chip

Let's try to use our network configured on the chip to forward some data. We'll get a sample from the NMNIST dataset to do that:

In [17]:
event_dataset = NMNIST(save_to='./NMNIST', train=False)
event_subset = torch.utils.data.Subset(event_dataset, test_indices)

In [18]:
targets = np.array(event_dataset.targets)
target_indices = {idx: np.where(targets == idx)[0] for idx in range(10)}

If you have a tensor with data and want to convert it to <code>input_events</code>, you would instantiate a <code>ChipFactory</code> object providing the device string (<code>"speck2fsomethingsomething"</code>) as instantiation argument. For further details consult the [documentation](https://sinabs.readthedocs.io/en/v2.0.0/tutorials/nir_to_speck.html#prepare-dataset).

In [19]:
from sinabs.backend.dynapcnn.chip_factory import ChipFactory

chip_factory = ChipFactory(speck_device)

This object has a method <code>raster_to_events</code> (see more [here](https://sinabs.readthedocs.io/en/v2.0.0/speck/api/dynapcnn/chip_factory.html#sinabs.backend.dynapcnn.chip_factory.ChipFactory.raster_to_events)) that can convert your data to an event list, which is what the chip expects. This method requires a 4 dimensional tensor of spike events with the dimensions <code>[Time, Channel, Height, Width]</code>.

In [20]:
layer_out = hw_model.get_output_core_id()                   # core assigned to the output layer of the model
layer_in = hw_model.get_input_core_id()[-1]                 # core assigned to the input layyer of the model

print(f'output core id: {layer_out}')
print(f'input core id: {layer_in}')

output core id: 3
input core id: 0


In [21]:
print(f'Output layer monitoring: {hw_model.samna_config.cnn_layers[layer_out].monitor_enable}')

Output layer monitoring: True


In [22]:
correct = 0
predictions = []

for (sample, target) in tqdm(event_subset, total=len(event_subset)):
    input_events = chip_factory.xytp_to_events(sample, layer=layer_in, reset_timestamps=True)
    output = hw_model.hw_forward(input_events)
    prediction = mode((event.feature for event in output)) if output else -1
    correct += (prediction == target)
    predictions.append(prediction)

accuracy = correct / len(event_subset)
print(f"Test accuracy on speck: {accuracy:.2%}")

  0%|          | 0/100 [00:00<?, ?it/s]

Test accuracy on speck: 100.00%
