# Import libraries

Installing the required libraries. Including the SNNtorch and importing the wrapper for the IRON utilities to execute the designs, used to make the comparison reported in the paper.

In [1]:
!pip install snntorch



In [2]:
import snntorch as snn
from snntorch import spikeplot as splt
from snntorch import spikegen

import torch
import torch.nn as nn

import numpy as np
import matplotlib.pyplot as plt
import time

In [3]:
import snn_neuron_wrapper as snn_npu

In [4]:
#@title Plotting Settings
def plot_mem(mem, title=False):
  if title:
    plt.title(title)
  plt.plot(mem)
  plt.xlabel("Time step")
  plt.ylabel("Membrane Potential")
  plt.xlim([0, 50])
  plt.ylim([0, 1])
  plt.show()

def plot_input_output_spikes(spk_in, spk_out, title=None):
    # Generate Plots
    fig, ax = plt.subplots(2, figsize=(8,4), sharex=True,
                           gridspec_kw = {'height_ratios': [0.4, 0.4]})

    # Plot input spikes
    splt.raster(spk_in, ax[0], s=400, c="black", marker="|")
    ax[0].set_ylabel("Input Spikes")
    ax[0].set_yticks([])
    if title:
        ax[0].set_title(title)

    # Plot output spikes
    splt.raster(spk_out, ax[1], s=400, c="black", marker="|")
    ax[1].set_ylabel("Output Spikes")
    ax[1].set_yticks([])
    ax[1].set_xlabel("Time step")

    plt.show()


def plot_step_current_response(cur_in, mem_rec, vline1):
  fig, ax = plt.subplots(2, figsize=(8,6),sharex=True)

  # Plot input current
  ax[0].plot(cur_in, c="tab:orange")
  ax[0].set_ylim([0, 0.2])
  ax[0].set_ylabel("Input Current ($I_{in}$)")
  ax[0].set_title("Lapicque's Neuron Model With Step Input")

  # Plot membrane potential
  ax[1].plot(mem_rec)
  ax[1].set_ylim([0, 0.6])
  ax[1].set_ylabel("Membrane Potential ($U_{mem}$)")

  if vline1:
    ax[1].axvline(x=vline1, ymin=0, ymax=2.2, alpha = 0.25, linestyle="dashed", c="black", linewidth=2, zorder=0, clip_on=False)
  plt.xlabel("Time step")

  plt.show()


def plot_current_pulse_response(cur_in, mem_rec, title, vline1=False, vline2=False, ylim_max1=False):

  fig, ax = plt.subplots(2, figsize=(8,6),sharex=True)

  # Plot input current
  ax[0].plot(cur_in, c="tab:orange")
  if not ylim_max1:
    ax[0].set_ylim([0, 0.2])
  else:
    ax[0].set_ylim([0, ylim_max1])
  ax[0].set_ylabel("Input Current ($I_{in}$)")
  ax[0].set_title(title)

  # Plot membrane potential
  ax[1].plot(mem_rec)
  ax[1].set_ylim([0, 1])
  ax[1].set_ylabel("Membrane Potential ($U_{mem}$)")

  if vline1:
    ax[1].axvline(x=vline1, ymin=0, ymax=2.2, alpha = 0.25, linestyle="dashed", c="black", linewidth=2, zorder=0, clip_on=False)
  if vline2:
    ax[1].axvline(x=vline2, ymin=0, ymax=2.2, alpha = 0.25, linestyle="dashed", c="black", linewidth=2, zorder=0, clip_on=False)
  plt.xlabel("Time step")

  plt.show()

def compare_plots(cur1, cur2, cur3, mem1, mem2, mem3, vline1, vline2, vline3, vline4, title):
  # Generate Plots
  fig, ax = plt.subplots(2, figsize=(8,6),sharex=True)

  # Plot input current
  ax[0].plot(cur1)
  ax[0].plot(cur2)
  ax[0].plot(cur3)
  ax[0].set_ylim([0, 0.2])
  ax[0].set_ylabel("Input Current ($I_{in}$)")
  ax[0].set_title(title)

  # Plot membrane potential
  ax[1].plot(mem1)
  ax[1].plot(mem2)
  ax[1].plot(mem3)
  ax[1].set_ylim([0, 1])
  ax[1].set_ylabel("Membrane Potential ($U_{mem}$)")

  ax[1].axvline(x=vline1, ymin=0, ymax=2.2, alpha = 0.25, linestyle="dashed", c="black", linewidth=2, zorder=0, clip_on=False)
  ax[1].axvline(x=vline2, ymin=0, ymax=2.2, alpha = 0.25, linestyle="dashed", c="black", linewidth=2, zorder=0, clip_on=False)
  ax[1].axvline(x=vline3, ymin=0, ymax=2.2, alpha = 0.25, linestyle="dashed", c="black", linewidth=2, zorder=0, clip_on=False)
  ax[1].axvline(x=vline4, ymin=0, ymax=2.2, alpha = 0.25, linestyle="dashed", c="black", linewidth=2, zorder=0, clip_on=False)

  plt.xlabel("Time step")

  plt.show()

def plot_cur_mem_spk(cur, mem, spk, thr_line=False, vline=False, title=False, ylim_max2=1.25):
  # Generate Plots
  fig, ax = plt.subplots(3, figsize=(8,6), sharex=True,
                        gridspec_kw = {'height_ratios': [1, 1, 0.4]})

  # Plot input current
  ax[0].plot(cur, c="tab:orange")
  ax[0].set_ylim([0, 0.4])
  ax[0].set_xlim([0, 200])
  ax[0].set_ylabel("Input Current ($I_{in}$)")
  if title:
    ax[0].set_title(title)

  # Plot membrane potential
  ax[1].plot(mem)
  ax[1].set_ylim([0, ylim_max2])
  ax[1].set_ylabel("Membrane Potential ($U_{mem}$)")
  if thr_line:
    ax[1].axhline(y=thr_line, alpha=0.25, linestyle="dashed", c="black", linewidth=2)
  plt.xlabel("Time step")

  # Plot output spike using spikeplot
  splt.raster(spk, ax[2], s=400, c="black", marker="|")
  if vline:
    ax[2].axvline(x=vline, ymin=0, ymax=6.75, alpha = 0.15, linestyle="dashed", c="black", linewidth=2, zorder=0, clip_on=False)
  plt.ylabel("Output spikes")
  plt.yticks([])

  plt.show()

def plot_spk_mem_spk(spk_in, mem, spk_rec, title):
  # Generate Plots
  fig, ax = plt.subplots(3, figsize=(8,6), sharex=True,
                        gridspec_kw = {'height_ratios': [0.4, 1, 0.4]})

  # Plot input current
  splt.raster(spk_in, ax[0], s=400, c="black", marker="|")
  ax[0].set_ylabel("Input Spikes")
  ax[0].set_title(title)
  plt.yticks([])

  # Plot membrane potential
  ax[1].plot(mem)
  ax[1].set_ylim([0, 1])
  ax[1].set_ylabel("Membrane Potential ($U_{mem}$)")
  ax[1].axhline(y=0.5, alpha=0.25, linestyle="dashed", c="black", linewidth=2)
  plt.xlabel("Time step")

  # Plot output spike using spikeplot
  splt.raster(spk_rec, ax[2], s=400, c="black", marker="|")
  plt.ylabel("Output spikes")
  plt.yticks([])

  plt.show()


def plot_reset_comparison(spk_in, mem_rec, spk_rec, mem_rec0, spk_rec0):
  # Generate Plots to Compare Reset Mechanisms
  fig, ax = plt.subplots(nrows=3, ncols=2, figsize=(10,6), sharex=True,
                        gridspec_kw = {'height_ratios': [0.4, 1, 0.4], 'wspace':0.05})

  # Reset by Subtraction: input spikes
  splt.raster(spk_in, ax[0][0], s=400, c="black", marker="|")
  ax[0][0].set_ylabel("Input Spikes")
  ax[0][0].set_title("Reset by Subtraction")
  ax[0][0].set_yticks([])

  # Reset by Subtraction: membrane potential
  ax[1][0].plot(mem_rec)
  ax[1][0].set_ylim([0, 0.7])
  ax[1][0].set_ylabel("Membrane Potential ($U_{mem}$)")
  ax[1][0].axhline(y=0.5, alpha=0.25, linestyle="dashed", c="black", linewidth=2)

  # Reset by Subtraction: output spikes
  splt.raster(spk_rec, ax[2][0], s=400, c="black", marker="|")
  ax[2][0].set_yticks([])
  ax[2][0].set_xlabel("Time step")
  ax[2][0].set_ylabel("Output Spikes")

  # Reset to Zero: input spikes
  splt.raster(spk_in, ax[0][1], s=400, c="black", marker="|")
  ax[0][1].set_title("Reset to Zero")
  ax[0][1].set_yticks([])

  # Reset to Zero: membrane potential
  ax[1][1].plot(mem_rec0)
  ax[1][1].set_ylim([0, 0.7])
  ax[1][1].axhline(y=0.5, alpha=0.25, linestyle="dashed", c="black", linewidth=2)
  ax[1][1].set_yticks([])
  ax[2][1].set_xlabel("Time step")

  # Reset to Zero: output spikes
  splt.raster(spk_rec0, ax[2][1], s=400, c="black", marker="|")
  ax[2][1].set_yticks([])

  plt.show()

def plot_snn_spikes(spk_in, spk1_rec, spk2_rec, num_outputs, title):
  # Generate Plots
  fig, ax = plt.subplots(3, figsize=(8,7), sharex=True,
                        gridspec_kw = {'height_ratios': [1, 1, 0.4]})

  # Plot input spikes
  splt.raster(spk_in[:,0], ax[0], s=0.03, c="black")
  ax[0].set_ylabel("Input Spikes")
  ax[0].set_title(title)

  # Plot hidden layer spikes
  splt.raster(spk1_rec.reshape(num_steps, -1), ax[1], s = 0.05, c="black")
  ax[1].set_ylabel("Hidden Layer")

  # Plot output spikes
  splt.raster(spk2_rec.reshape(num_steps, -1), ax[2], c="black", marker="|")
  ax[2].set_ylabel("Output Spikes")
  ax[2].set_ylim([0, num_outputs])

  plt.show()



def plot_metrics(metrics):
    # Extract data
    designs = [m['design'] for m in metrics]
    throughputs = [m['throughput'] for m in metrics]
    npu_times = [m['npu_time'] for m in metrics]

    # Assign a different color to each bar using a colormap
    cmap = plt.get_cmap('tab10')
    colors = [cmap(i % 10) for i in range(len(designs))]

    # Plot 1: Throughput
    plt.figure(figsize=(10, 5))
    bars = plt.bar(designs, throughputs, color=colors)
    plt.ylabel('Throughput (spikes/sec)')
    plt.title('Throughput by Design')
    plt.xticks(rotation=45)
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    # Add value labels on top
    for bar in bars:
        yval = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2, yval, f'{yval:.0f}', 
                 ha='center', va='bottom', fontsize=12)

    plt.tight_layout()
    plt.show()

    # Plot 2: NPU Time
    plt.figure(figsize=(10, 5))
    bars = plt.bar(designs, npu_times, color=colors)
    plt.ylabel('Response Time (µs)')
    plt.title('Response Time (µs) by Design')
    plt.xticks(rotation=45)
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    # Add value labels on top
    for bar in bars:
        yval = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2, yval, f'{yval:.2f}', 
                 ha='center', va='bottom', fontsize=12)

    plt.tight_layout()
    plt.show()


# SNN models

In the following sections, i present the principal results of the paper: starting with a comparision between the SNN neuron implement on NPU both scalar and vectorized (i leave to the reader the possibility to switch from one to another setting the parameter to True).

## Custom Library NPU

### SNN NPU neuron core - vectorized - scalar

The following script run the single core (scalar or vectorized) core on NPU in a cycle, for many input_sizes. The reader can choose the input size and outputsize for the cycle accordingly to the one passed as a paramter to the single core. Note: the input and output size should match.

In [5]:
for in_size in [1024]:
    input_data = torch.ones(in_size, dtype=torch.int32)
    neuron = snn_npu.snn_neuron_npu_singlecore(in1_size = in_size, out_size = in_size, threshold = 5, vectorized = True)
    neuron.to("npu")
    output_data = neuron(input_data)

[MAKE] targetname=denselayer, aie_design_test_to_use=2
rm -rf tmpTrace trace.txt parse*json trace*json
rm -rf build _build inst aie.mlir.prj core_* test.elf denselayer.exe *.exe
[MAKE] targetname=singlecore, aie_design_test_to_use=0
mkdir -p build
python3 /notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/singlecore.py -d npu -i1s 1024 -os 1024 -th 5 -df 0.9 -rs -1 -hr 1 -vt 1 > build/aie.mlir
mkdir -p build
cd build && /home/mliraie/mlir-aie/ironenv/lib/python3.12/site-packages/llvm-aie/bin/clang++ -O2 -std=c++20 --target=aie2-none-unknown-elf -Wno-parentheses -Wno-attributes -Wno-macro-redefined -Wno-empty-body -DNDEBUG -I /home/mliraie/mlir-aie/ironenv/lib/python3.12/site-packages/mlir_aie/include  -c /notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/lif_kernel_singlecore.cc -o scale.o
mkdir -p build
cd build && aiecc.py --aie-generate-xclbin --no-compile-host --xclbin-name=final.xclbin \
    	--no-xchesscc --no-xbridge --peano /home/mliraie/mlir-aie/ironenv/lib/python3.12/site-packages/ll

  Compatibility with CMake < 3.10 will be removed from a future version of
  CMake.

  Update the VERSION argument <min> value.  Or, use the <min>...<max> syntax
  to tell CMake that the project requires at least <min> but has been updated
  to work with policies introduced by <max> or earlier.

[0m


-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /usr/bin/c++ - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Found OpenCV: /usr (found version "4.6.0")
-- Configuring done (0.2s)
-- Generating done (0.0s)
-- Build files have been written to: /notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build
cd _build &&  cmake --build . --config Release
gmake[1]: Entering directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
gmake[2]: Entering directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
gmake[3]: Entering directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
gmake[3]: Leaving directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
gmake[3]: Entering directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
[ 33%] [32mBuilding CXX object CMakeFiles/singlecore.dir/test.cpp.o[0m


[0mopencv library paht: [0m
[0mopencv libs: opencv_calib3d;opencv_core;opencv_dnn;opencv_features2d;opencv_flann;opencv_highgui;opencv_imgcodecs;opencv_imgproc;opencv_ml;opencv_objdetect;opencv_photo;opencv_stitching;opencv_video;opencv_videoio;opencv_alphamat;opencv_aruco;opencv_barcode;opencv_bgsegm;opencv_bioinspired;opencv_ccalib;opencv_cvv;opencv_datasets;opencv_dnn_objdetect;opencv_dnn_superres;opencv_dpm;opencv_face;opencv_freetype;opencv_fuzzy;opencv_hdf;opencv_hfs;opencv_img_hash;opencv_intensity_transform;opencv_line_descriptor;opencv_mcc;opencv_optflow;opencv_phase_unwrapping;opencv_plot;opencv_quality;opencv_rapid;opencv_reg;opencv_rgbd;opencv_saliency;opencv_shape;opencv_stereo;opencv_structured_light;opencv_superres;opencv_surface_matching;opencv_text;opencv_tracking;opencv_videostab;opencv_viz;opencv_wechat_qrcode;opencv_ximgproc;opencv_xobjdetect;opencv_xphoto[0m
   85 | void computeMetrics(auto start_kernel, auto stop_kernel, auto start_data, auto stop_data, int IN

[ 66%] [32mBuilding CXX object CMakeFiles/singlecore.dir/test_utils.cpp.o[0m
[100%] [1m[32mLinking CXX executable singlecore[0m
gmake[3]: Leaving directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
[100%] Built target singlecore
gmake[2]: Leaving directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
gmake[1]: Leaving directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
cp _build/singlecore singlecore.exe 
./singlecore.exe -x build/final.xclbin -i build/insts.bin -k MLIR_AIE --in1_size 1024 --out_size 1024 --threshold 5 --decay_factor 0.9 --hard_reset 1 -v 2 --reset -1 --aie_design 0
Sequence instr count: 75
Loading xclbin: build/final.xclbin
Kernel opcode: MLIR_AIE
Name: MLIR_AIE
Registering xclbin: build/final.xclbin
Getting hardware context.
Getting handle to kernel:MLIR_AIE
Writing data into buffer objects.
Running Kernel.
Execution finished
Correct at neuron 0, time step 0: output 0
Correct at neuron 0, time step 1: output 0
Correct at ne

make: *** [Makefile:100: run] Illegal instruction (core dumped)


### SNN NPU multicore - vectorized - scalar

The following script run the multicore core version (scalar or vectorized) on NPU in a cycle, for many input_sizes. The reader can also here decide which input size to choose. This version will use two aie cores to perform the snn neuron logic, each core can have up to 16 neurons in the vectorized version and one in the scalar version.

In [6]:
for in_size in [4096]:
    input_data = torch.ones(in_size, dtype=torch.int32)
    neuron = snn_npu.snn_neuron_npu_multicore(in1_size = in_size, out_size = in_size, threshold = 2, vectorized = True)
    neuron.to("npu")
    output_data= neuron(input_data)


[MAKE] targetname=denselayer, aie_design_test_to_use=2
rm -rf tmpTrace trace.txt parse*json trace*json
rm -rf build _build inst aie.mlir.prj core_* test.elf denselayer.exe *.exe
[MAKE] targetname=multicore, aie_design_test_to_use=1
mkdir -p build
python3 /notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/multicore.py -d npu -i1s 4096 -os 4096 -th 2 -df 0.9 -rs -1 -hr 1 -vt 1 > build/aie.mlir
mkdir -p build
cd build && /home/mliraie/mlir-aie/ironenv/lib/python3.12/site-packages/llvm-aie/bin/clang++ -O2 -std=c++20 --target=aie2-none-unknown-elf -Wno-parentheses -Wno-attributes -Wno-macro-redefined -Wno-empty-body -DNDEBUG -I /home/mliraie/mlir-aie/ironenv/lib/python3.12/site-packages/mlir_aie/include  -c /notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/lif_kernel_multicore.cc -o scale.o
mkdir -p build
cd build && aiecc.py --aie-generate-xclbin --no-compile-host --xclbin-name=final.xclbin \
    	--no-xchesscc --no-xbridge --peano /home/mliraie/mlir-aie/ironenv/lib/python3.12/site-packages/llvm-

  Compatibility with CMake < 3.10 will be removed from a future version of
  CMake.

  Update the VERSION argument <min> value.  Or, use the <min>...<max> syntax
  to tell CMake that the project requires at least <min> but has been updated
  to work with policies introduced by <max> or earlier.

[0m


-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /usr/bin/c++ - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Found OpenCV: /usr (found version "4.6.0")
-- Configuring done (0.2s)
-- Generating done (0.0s)
-- Build files have been written to: /notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build
cd _build &&  cmake --build . --config Release
gmake[1]: Entering directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
gmake[2]: Entering directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
gmake[3]: Entering directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
gmake[3]: Leaving directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
gmake[3]: Entering directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
[ 33%] [32mBuilding CXX object CMakeFiles/multicore.dir/test.cpp.o[0m


[0mopencv library paht: [0m
[0mopencv libs: opencv_calib3d;opencv_core;opencv_dnn;opencv_features2d;opencv_flann;opencv_highgui;opencv_imgcodecs;opencv_imgproc;opencv_ml;opencv_objdetect;opencv_photo;opencv_stitching;opencv_video;opencv_videoio;opencv_alphamat;opencv_aruco;opencv_barcode;opencv_bgsegm;opencv_bioinspired;opencv_ccalib;opencv_cvv;opencv_datasets;opencv_dnn_objdetect;opencv_dnn_superres;opencv_dpm;opencv_face;opencv_freetype;opencv_fuzzy;opencv_hdf;opencv_hfs;opencv_img_hash;opencv_intensity_transform;opencv_line_descriptor;opencv_mcc;opencv_optflow;opencv_phase_unwrapping;opencv_plot;opencv_quality;opencv_rapid;opencv_reg;opencv_rgbd;opencv_saliency;opencv_shape;opencv_stereo;opencv_structured_light;opencv_superres;opencv_surface_matching;opencv_text;opencv_tracking;opencv_videostab;opencv_viz;opencv_wechat_qrcode;opencv_ximgproc;opencv_xobjdetect;opencv_xphoto[0m
   85 | void computeMetrics(auto start_kernel, auto stop_kernel, auto start_data, auto stop_data, int IN

[ 66%] [32mBuilding CXX object CMakeFiles/multicore.dir/test_utils.cpp.o[0m
[100%] [1m[32mLinking CXX executable multicore[0m
gmake[3]: Leaving directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
[100%] Built target multicore
gmake[2]: Leaving directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
gmake[1]: Leaving directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
cp _build/multicore multicore.exe 
./multicore.exe -x build/final.xclbin -i build/insts.bin -k MLIR_AIE --in1_size 4096 --out_size 4096 --threshold 2 --decay_factor 0.9 --hard_reset 1 -v 2 --reset -1 --aie_design 1
Sequence instr count: 75
Loading xclbin: build/final.xclbin
Kernel opcode: MLIR_AIE
Name: MLIR_AIE
Registering xclbin: build/final.xclbin
Getting hardware context.
Getting handle to kernel:MLIR_AIE
Writing data into buffer objects.
Running Kernel.
Execution finished
Verifying results ...
Neuron0 input:1 output:0
 Correct at neuron 0, time step 0: output 0
Neuron0 input:

## SNNTorch neurons

### Single Leaky neuron

The following script execute a single leaky neuron using the SNNtorch library. It sets a manual seed, then you define the number of steps which is going to be the lenght of the input vector and initiliaze a vector of spikes. The neuron is iterate of its input and its membrane is update accordingly to the dinamycs described in the report.

In [7]:
import torch
import time
import snntorch as snn
from snntorch import spikegen

# Set the random seed for reproducibility
torch.manual_seed(123)

# Define number of simulation steps
num_steps = 1024

# Create a 1-D random spike train (40% chance of spike at each step)
spk_in = spikegen.rate_conv(torch.ones((num_steps, 1)) * 0.2)

# Instantiate a Leaky Integrate-and-Fire neuron
lif = snn.Leaky(beta=0.9, threshold=5)
torch.set_num_threads(1)
# Initialize membrane potential and spike output
mem = torch.zeros(1)
spk_out = torch.zeros(1)
mem_rec = [mem]
spk_rec = [spk_out]

# Measure simulation time
start_time = time.time()
for step in range(num_steps):
    spk_out, mem = lif(spk_in[step], mem)
    mem_rec.append(mem)
    spk_rec.append(spk_out)
end_time = time.time()

# Convert recorded data to tensors
mem_rec = torch.stack(mem_rec)
spk_rec = torch.stack(spk_rec)

# Calculate metrics
total_spikes = num_steps
simulation_time_us = (end_time - start_time) * 1e6  # in microseconds
throughput = total_spikes * 1e6 / simulation_time_us   # spikes per second

print(str(simulation_time_us) + " us")
print(str(throughput) + " spikes/s")

46241.044998168945 us
22144.828258975296 spikes/s


## SNN Net - NPU vs SNN

Lastly, the following script present a comparison between a feedforward neural network running on NPU and the same model of neural network but running on CPU using the SNNtorch framework.

### SNN NPU Network 4-16-4

In [5]:
for in_size in [4096]:
    input_data = torch.ones(in_size, dtype=torch.int32)
    neuron = snn_npu.snn_neuron_npu_denselayer(in1_size = in_size, out_size = in_size, threshold = 5, vectorized = True)
    neuron.to("npu")
    output_data, metrics_loop = neuron(input_data)

[MAKE] targetname=denselayer, aie_design_test_to_use=2
rm -rf tmpTrace trace.txt parse*json trace*json
rm -rf build _build inst aie.mlir.prj core_* test.elf denselayer.exe *.exe
[MAKE] targetname=denselayer, aie_design_test_to_use=2
mkdir -p build
python3 /notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/denselayer.py -d npu -i1s 4096 -os 4096 -th 5 -df 0.9 -rs -1 -hr 1 -vt 0 > build/aie.mlir
mkdir -p build
cd build && /home/mliraie/mlir-aie/ironenv/lib/python3.12/site-packages/llvm-aie/bin/clang++ -O2 -std=c++20 --target=aie2-none-unknown-elf -Wno-parentheses -Wno-attributes -Wno-macro-redefined -Wno-empty-body -DNDEBUG -I /home/mliraie/mlir-aie/ironenv/lib/python3.12/site-packages/mlir_aie/include  -c /notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/lif_kernel_denselayer.cc -o scale.o
mkdir -p build
cd build && aiecc.py --aie-generate-xclbin --no-compile-host --xclbin-name=final.xclbin \
    	--no-xchesscc --no-xbridge --peano /home/mliraie/mlir-aie/ironenv/lib/python3.12/site-packages/ll

  Compatibility with CMake < 3.10 will be removed from a future version of
  CMake.

  Update the VERSION argument <min> value.  Or, use the <min>...<max> syntax
  to tell CMake that the project requires at least <min> but has been updated
  to work with policies introduced by <max> or earlier.

[0m


-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /usr/bin/c++ - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Found OpenCV: /usr (found version "4.6.0")
-- Configuring done (0.2s)
-- Generating done (0.0s)
-- Build files have been written to: /notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build
cd _build &&  cmake --build . --config Release
gmake[1]: Entering directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
gmake[2]: Entering directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
gmake[3]: Entering directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
gmake[3]: Leaving directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
gmake[3]: Entering directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
[ 33%] [32mBuilding CXX object CMakeFiles/denselayer.dir/test.cpp.o[0m


[0mopencv library paht: [0m
[0mopencv libs: opencv_calib3d;opencv_core;opencv_dnn;opencv_features2d;opencv_flann;opencv_highgui;opencv_imgcodecs;opencv_imgproc;opencv_ml;opencv_objdetect;opencv_photo;opencv_stitching;opencv_video;opencv_videoio;opencv_alphamat;opencv_aruco;opencv_barcode;opencv_bgsegm;opencv_bioinspired;opencv_ccalib;opencv_cvv;opencv_datasets;opencv_dnn_objdetect;opencv_dnn_superres;opencv_dpm;opencv_face;opencv_freetype;opencv_fuzzy;opencv_hdf;opencv_hfs;opencv_img_hash;opencv_intensity_transform;opencv_line_descriptor;opencv_mcc;opencv_optflow;opencv_phase_unwrapping;opencv_plot;opencv_quality;opencv_rapid;opencv_reg;opencv_rgbd;opencv_saliency;opencv_shape;opencv_stereo;opencv_structured_light;opencv_superres;opencv_surface_matching;opencv_text;opencv_tracking;opencv_videostab;opencv_viz;opencv_wechat_qrcode;opencv_ximgproc;opencv_xobjdetect;opencv_xphoto[0m
   85 | void computeMetrics(auto start_kernel, auto stop_kernel, auto start_data, auto stop_data, int IN

[ 66%] [32mBuilding CXX object CMakeFiles/denselayer.dir/test_utils.cpp.o[0m
[100%] [1m[32mLinking CXX executable denselayer[0m
gmake[3]: Leaving directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
[100%] Built target denselayer
gmake[2]: Leaving directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
gmake[1]: Leaving directory '/notebooks/SNN-NPU-DL_Framework/OpenHW_deliver/_build'
cp _build/denselayer denselayer.exe 
./denselayer.exe -x build/final.xclbin -i build/insts.bin -k MLIR_AIE --in1_size 4096 --out_size 4096 --threshold 5 --decay_factor 0.9 --hard_reset 1 -v 2 --reset -1 --aie_design 2
Sequence instr count: 249
Loading xclbin: build/final.xclbin
Kernel opcode: MLIR_AIE
Name: MLIR_AIE
Registering xclbin: build/final.xclbin
Getting hardware context.
Getting handle to kernel:MLIR_AIE
Writing data into buffer objects.
Running Kernel.
Execution finished
Verifying results ...
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1


ValueError: too many values to unpack (expected 2)

### SNNtorch net 4-16-4

In [None]:
import torch.nn as nn
import time

# Define Network
class Net(nn.Module):
    def __init__(self, num_inputs, num_hidden, num_outputs, beta):
        super().__init__()

        # initialize layers
        self.fc1 = nn.Linear(num_inputs, num_hidden)
        # Basically the neuron are acting as activation function for the hidden layer
        self.lif1 = snn.Leaky(beta=beta)
        self.fc2 = nn.Linear(num_hidden, num_outputs)
        self.lif2 = snn.Leaky(beta=beta)

    def forward(self, x, mem1, spk1, mem2):
        cur1 = self.fc1(x)
        spk1, mem1 = self.lif1(cur1, mem1)
        cur2 = self.fc2(spk1)
        spk2, mem2 = self.lif2(cur2, mem2)
        return mem1, spk1, mem2, spk2

In [None]:
spk1_rec = []
spk2_rec = []

num_steps = 200
spk_in = spikegen.rate_conv(torch.rand((200, 4))).unsqueeze(1)

In [None]:
# Build the model
num_inputs = 4
num_hidden = 16
num_outputs = 4
model = Net(num_inputs = num_inputs, num_hidden = num_hidden, num_outputs = num_outputs, beta = 0.9)

# Initialize the membrane
mem1 = torch.zeros_like(torch.tensor(num_hidden))
mem2 = torch.zeros_like(torch.tensor(num_outputs))
spk1 = 0
# Run the model
start = time.time()
for step in range (num_steps):
    mem1, spk1, mem2, spk2 = model.forward(spk_in[step], mem1, spk1, mem2)
    spk1_rec.append(spk1)
    spk2_rec.append(spk2)
end = time.time()

spk1_rec = torch.stack(spk1_rec)
spk2_rec = torch.stack(spk2_rec)

# Metrics
duration_us = (end - start) * 1e6
total_spikes = spk2_rec.sum().item()
throughput = total_spikes / ((end - start))
