Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial version of LavaProfiler user interface #233

Open
wants to merge 48 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
f456afb
- Initial enablement of RefPort and VarPorts
PhilippPlank Nov 12, 2021
2d0ec74
- Initial enablement of RefPort and VarPorts
PhilippPlank Nov 12, 2021
25a3a68
- Initial enablement of RefPort and VarPorts
PhilippPlank Nov 12, 2021
a2d1765
- Initial enablement of RefPort and VarPorts
PhilippPlank Nov 12, 2021
74dfdf6
Merge branch 'lava-nc:main' into main
PhilippPlank Nov 15, 2021
1daa9af
- Enablement of RefPorts and VarPorts - addressed change requests fro…
PhilippPlank Nov 16, 2021
5908401
- Enablement of RefPorts and VarPorts - addressed change requests fro…
PhilippPlank Nov 16, 2021
3df2847
- Enablement of RefPorts and VarPorts - addressed change requests fro…
PhilippPlank Nov 16, 2021
9581fae
Merge branch 'lava-nc:main' into main
PhilippPlank Nov 16, 2021
6e4716a
- Enablement of RefPorts and VarPorts - addressed change requests fro…
PhilippPlank Nov 16, 2021
e22def6
Merge branch 'lava-nc:main' into main
PhilippPlank Nov 17, 2021
bd25a80
Merge branch 'lava-nc:main' into main
PhilippPlank Nov 17, 2021
7edeb1f
Merge branch 'lava-nc:main' into main
PhilippPlank Nov 18, 2021
23fb8d7
Merge branch 'lava-nc:main' into main
PhilippPlank Nov 18, 2021
f6686b4
Merge branch 'lava-nc:main' into main
PhilippPlank Nov 23, 2021
86867c2
Merge branch 'lava-nc:main' into main
PhilippPlank Nov 23, 2021
859a195
Merge branch 'lava-nc:main' into main
PhilippPlank Nov 24, 2021
f7007f8
Merge branch 'lava-nc:main' into main
PhilippPlank Nov 25, 2021
0163202
Merge branch 'lava-nc:main' into main
PhilippPlank Nov 25, 2021
bf934ef
modified connection tutorial for release 0.2.0
PhilippPlank Nov 26, 2021
0eef67e
Merge branch 'lava-nc:main' into main
PhilippPlank Nov 26, 2021
f08a35c
fixed typos
PhilippPlank Nov 26, 2021
ceddf2a
Merge branch 'lava-nc:main' into main
PhilippPlank Nov 26, 2021
f798b0a
Merge branch 'lava-nc:main' into main
PhilippPlank Nov 30, 2021
b6e72bb
Merge branch 'lava-nc:main' into main
PhilippPlank Dec 1, 2021
9c57309
Merge branch 'lava-nc:main' into main
PhilippPlank Dec 3, 2021
7425f7e
Merge branch 'lava-nc:main' into main
PhilippPlank Dec 6, 2021
bc353c7
Merge branch 'lava-nc:main' into main
PhilippPlank Jan 27, 2022
73288a4
Merge branch 'lava-nc:main' into main
PhilippPlank Jan 31, 2022
3f5c2d7
Merge branch 'lava-nc:main' into main
PhilippPlank Feb 9, 2022
e691b1d
Merge branch 'lava-nc:main' into main
PhilippPlank Feb 10, 2022
9ab9a5d
Merge branch 'lava-nc:main' into main
PhilippPlank Feb 10, 2022
a1f0367
Merge branch 'lava-nc:main' into main
PhilippPlank Feb 16, 2022
50f9113
Merge branch 'lava-nc:main' into main
PhilippPlank Feb 16, 2022
e2342f5
Merge branch 'lava-nc:main' into main
PhilippPlank Feb 24, 2022
0b7cedb
Merge branch 'lava-nc:main' into main
PhilippPlank Feb 24, 2022
6a30ad7
Merge branch 'lava-nc:main' into main
PhilippPlank Feb 25, 2022
8e652bc
Merge branch 'lava-nc:main' into main
PhilippPlank Feb 25, 2022
3b3cc6f
Merge branch 'lava-nc:main' into main
PhilippPlank Mar 2, 2022
46b54ad
Merge branch 'lava-nc:main' into main
PhilippPlank Mar 3, 2022
49d1f43
Merge branch 'lava-nc:main' into profiler_nc
PhilippPlank Mar 4, 2022
fa69d5f
Merge branch 'lava-nc:main' into profiler_nc
PhilippPlank Mar 11, 2022
49c8b57
Merge branch 'lava-nc:main' into profiler_nc
PhilippPlank Mar 16, 2022
5e8b577
- Initial commit of the LavaProfiler - mock user interface and unit t…
PhilippPlank Mar 21, 2022
1de39fc
- Initial commit of the LavaProfiler - mock user interface and unit t…
PhilippPlank Mar 21, 2022
beb555a
- Update addressing requested changes
PhilippPlank Mar 23, 2022
c9188b8
- Example of profileable ProcModel
PhilippPlank Mar 23, 2022
59fc2f6
- Example of profileable ProcModel
PhilippPlank Mar 23, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions src/lava/magma/core/process/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,8 +383,6 @@ def load(self, path: str):
"""Loads and de-serializes Process from disk."""
pass

# TODO: (PP) Remove if condition on blocking as soon as non-blocking
# execution is completely implemented
def run(self,
condition: AbstractRunCondition = None,
run_cfg: RunConfig = None):
Expand Down
22 changes: 22 additions & 0 deletions src/lava/proc/lif/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,3 +327,25 @@ def reset_voltage(self, spike_vector: np.ndarray):
"""Reset voltage of all spiking neurons to 0.
"""
self.v[spike_vector != 0] = 0 # Reset voltage to 0 wherever we spiked


@implements(proc=LIF, protocol=LoihiProtocol)
@requires(CPU)
@tag('floating_pt', 'profileable')
class PyLifModelFloatProf(PyLifModelFloat):
"""Profileable implementation of Leaky-Integrate-and-Fire neural process in
floating point precision. This class implements additional operations
counters used by the Profiler to calculate power and performance.
"""
updates: float = LavaPyType(float, float)
num_passes: int = LavaPyType(int, int)

def run_spk(self):
"""Spiking activation function for LIF.
"""
super().run_spk()

# Update operation counters (Loihi 1)
self.updates[:] = 0 # inactive
self.updates[:] = np.where(self.v == 0, self.v, 1) # active
self.updates[:] = self.spiking_activation() + 1 # spiking
140 changes: 130 additions & 10 deletions src/lava/utils/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,133 @@
# SPDX-License-Identifier: BSD-3-Clause
# See: https://spdx.org/licenses/

"""
This module will contain a tool to determine power and performance of workloads
for Loihi 1 or Loihi 2 based on software simulations or hardware measurements.

The execution time and energy of a workload will be either measured on hardware
during execution or estimated in simulation. The estimation is based on
elementary hardware operations which are counted during the simulation. Each
elementary operation has a defined execution time and energy cost, which is
used in a performance model to calculate execution time and energy.
"""
import typing as ty
import types
import numpy as np
from lava.magma.core.process.process import AbstractProcess
from lava.magma.core.run_conditions import AbstractRunCondition
from lava.magma.core.run_configs import RunConfig
from lava.magma.runtime.runtime import Runtime
from lava.magma.core.process.message_interface_enum import ActorType
from lava.magma.compiler.compiler import Compiler
from lava.magma.core.resources import (
AbstractComputeResource, Loihi1NeuroCore, Loihi2NeuroCore)


class Profiler:
"""The Profiler is a tool to determine power and performance of workloads
for Loihi 1 or Loihi 2 based on software simulations or hardware
measurements.

The execution time and energy of a workload is either measured on hardware
during execution or estimated in simulation. The estimation is based on
elementary hardware operations which are counted during the simulation.
Each elementary operation has a defined execution time and energy cost,
which is used in a performance model to calculate execution time and energy.
"""

def __init__(self, start: int = 0, end: int = 0,
bin_size: int = 1, buffer_size: int = 1000):
self.start = start
self.end = end
self.bin_size = bin_size
self.buffer_size = buffer_size
self.used_resources: ty.List[AbstractComputeResource] = []

def profile(self, proc: AbstractProcess):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

docstring

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We talked about two API variants. The old one in which the profiler wraps the Proc and probably this new version. Did you get feedback which one is actually better. I believe our conclusion was that the old one seemed more suitable for what we want to do so you wanted to prepare both side by side to show DR and PS the alternative.

Because one class overwriting a method attribute of another class looks like borderline invasive in terms of unexpected side effects. Python allows it but I'd imagine this would draw the anger of the Python community upon us.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still think that the user API would be better if the user could simply define a profiler without having to change his line
proc.run(...),
i.e. the interface that PP coded here. But I also see Andreas point: Overwriting the run function feels bad. In particular, what happens if we ever change the general run method of Processes? Then we may forget to change it in the Profiler, and it will stop working.
What does the Profiler actually do under the hood? Does it just count how often the internal run_spk method is called, and how often spikes are being sent? In that case, could we

  1. ask people to just add a decorator @count to count runs in front of the run_spk function:
    class MyProcModel(list):
    @counter
    def run_spk(self, *args, **kwargs):
    ...
    with
    def counter(f):
    def count(*args, **kwargs):
    count.calls += 1
    return f(*args, **kwargs)
    count.calls = 0
    return count
    This decorator would mean that we don't need to rewrite the whole run method.
  2. subscribe to the port of the Process to read its sent spikes?

To me, that would be way less invasive. But I know too little about what's going on under the hood to say if anything like that would work.

Copy link
Contributor Author

@PhilippPlank PhilippPlank Mar 23, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both versions are slide by slide in the PowerPoint ;)
I still prefer the black box version, but the functionality is the same either way so switching it should be simple.

@phstratmann We need to do quite a few things under the hood, especially modifying the compilation process (the sketch of what we need to do is in form of comments and mock methods in this PR).
We do not really count how often run_spk is called.

proc.run = types.MethodType(self.run, proc)

def get_energy(self) -> np.array:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We probably need something more elaborate instead or besides this method. Yes we want a total time series but in simulation, we also need the ability to get time series for specific Procs or cores or specific contributors to the entire energy.
How are we going to do this?:

"""Returns the energy estimate per time step in µJ."""
...
return 0

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we give the user the option to either receive the whole time series or just the total time / energy? I could imagine that for particularly long runs, we run into memory or runtime problems if we store one value for each time step. If we just accumulate the values, it may often suffice.
In addition, what's about a value to provide the frequency of measurements? We may often not need a value each time step, but only each 1000th time step.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, this is just a first example of getting results. We can offer more sophisticated options, including standard plots etc.

def get_power(self) -> np.array:
"""Returns the power estimate per time step in µW."""
...
return 0

def get_execution_time(self) -> np.array:
"""Returns the execution time estimate per time step in µs."""
...
return 0

def run(self, proc: AbstractProcess, condition: AbstractRunCondition = None,
run_cfg: RunConfig = None):
"""Runs process given RunConfig and RunCondition.

Functionally, this method does the same as run(..) of AbstractProcess,
but modifies the chosen ProcModels and executables to be able to use the
Profiler. From the user perspective, it should not be noticeable as
the API does not change. This method will be used to override the method
run(..) of an instance of AbstractProcess, when the Profiler is
created.

Parameters
----------
proc : AbstractProcess
Process instance which run(..) was initially called on.
condition : AbstractRunCondition
RunCondition instance specifies for how long to run the process.
run_cfg : RunConfig
RunConfig is used by compiler to select a ProcessModel for each
compiled process.
"""

if not proc._runtime:

compiler = Compiler(loglevel=proc.loglevel)
# initializer = Initializer()

# 1. get proc_map
# proc_map = initializer._map_proc_to_model(proc, run_cfg)
proc_map = compiler._map_proc_to_model(
compiler._find_processes(proc), run_cfg)

# 2. modify proc_map
proc_map = self._modify_proc_map(proc_map)

# 3. prepare ProcModels for profiling
self._prepare_proc_models(proc_map)

# 4. create executable
executable = compiler.compile(proc, run_cfg)

# 5. append profiler sync channels
self._set_profiler_sync_channel_builders(executable)

# 6. create Runtime
proc._runtime = Runtime(executable,
ActorType.MultiProcessing,
loglevel=proc.loglevel)
proc._runtime.initialize()

proc._runtime.start(condition)

def _modify_proc_map(self, proc_map):
"""Check if chosen process models have a profileable version and
exchange the process models accordingly.
Tell the user which Processes will not be profiled, as they lack a
profileable ProcModel."""
...
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we automatically switch process models? I could imagine that people may get confused in some cases. Let's assume the user first runs a process without profiler and the compiler chooses a process model that is not profilable. Then the user runs the same process with profiler. The profiler will automatically switch the process model to one that can be profiled. But these process models may differ - maybe because of a bug, maybe because of other reasons. The user will not have expected to see any different process behavior just because (s)he activated the profiler. Instead, I would expect that I receive an error message if the default process model cannot be profiled.

Copy link
Contributor Author

@PhilippPlank PhilippPlank Mar 23, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My approach would be that each ProcModel has a profileable counter part ProcModel, which only adds the operation counters.
So for the LIF neuron for example, we would need a profilable ProcModel for "fixed_pt" and "floating_pt" and there is a 1:1 mapping to exchange them if the profiler is present. I do not mess with the compiler, but only afterwards look for ProcModels which can be exchanged.

If there is no such ProcModel, then the user will be informed that this Process is not considered by the Profiler. If no chosen ProcModel has a profileable version and we run on simulation only, than there will be an error stating no Process is considered for the Profiler.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does this mean in terms of code duplication? Does it mean there is a ProcModel class and then an almost identical ProcModel class that just adds counters?

Before we go into a whole lot of implementation, as usual, we should first write down an end to end (mock) example of what we are trying to enable and then agree that this is the best way to go. Such decisions are best made not in the abstract, for people who have not thought about the pros and cons deeply before, but using a concrete exmple.

Do we have such an example already?

If not I suggest you draft one and share it. We are at an important fork in the road, so we should get some wider input.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added an example in proc/lif/models.py

We can inherit the ProcModel and add the code for operation counters.

return proc_map

def _prepare_proc_models(self, proc_map):
"""Prepare each ProcModel for profiling.
Configure Monitors for ProcModels executing in simulation.
Recognize if ProcModels execute on Hardware."""
for proc_model, proc in proc_map.items():
if Loihi1NeuroCore in proc.required_resources:
self.used_resources.append(Loihi1NeuroCore)
else:
# 1. add operation counter Vars to the Process
# 2. set up Monitors to operation counter Vars
...

def _set_profiler_sync_channel_builders(self, executable):
"""Create and append sync_channel builders if Loihi compute node is
going to execute a profileable ProcModel."""
if Loihi1NeuroCore in self.used_resources or \
Loihi2NeuroCore in self.used_resources:
executable.sync_channel_builders.append(...)
...
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import numpy as np
import functools as ft

from lava.magma.compiler.compiler import Compiler
from lava.magma.core.decorator import requires, tag, implements
from lava.magma.core.model.py.model import PyLoihiProcessModel
from lava.magma.core.model.sub.model import AbstractSubProcessModel
Expand Down
3 changes: 3 additions & 0 deletions tests/lava/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Copyright (C) 2022 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause
# See: https://spdx.org/licenses/
80 changes: 80 additions & 0 deletions tests/lava/utils/test_profiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Copyright (C) 2022 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause
# See: https://spdx.org/licenses/
import unittest

from lava.magma.core.decorator import implements, requires
from lava.magma.core.model.py.model import PyLoihiProcessModel
from lava.magma.core.process.process import AbstractProcess
from lava.magma.core.resources import CPU
from lava.magma.core.run_conditions import RunSteps
from lava.magma.core.sync.domain import SyncDomain
from lava.magma.core.sync.protocols.loihi_protocol import LoihiProtocol
from lava.magma.core.run_configs import RunConfig


# A minimal process
from lava.utils.profiler import Profiler


class P(AbstractProcess):
...


# A minimal PyProcModel implementing P
@implements(proc=P, protocol=LoihiProtocol)
@requires(CPU)
class PyProcModel(PyLoihiProcessModel):

def run_spk(self):
print("Test")


# A simple RunConfig selecting always the first found process model
class MyRunCfg(RunConfig):
def select(self, proc, proc_models):
return proc_models[0]


class TestLavaProfiler(unittest.TestCase):
def test_init(self):
"""TBD"""
start = 1
end = 5
buffer_size = 1000
bin_size = 1
profiler = Profiler(start=start, end=end, buffer_size=buffer_size,
bin_size=bin_size)

self.assertTrue(isinstance(profiler, Profiler))
self.assertTrue(profiler.start == start)
self.assertTrue(profiler.end == end)
self.assertTrue(profiler.buffer_size == buffer_size)
self.assertTrue(profiler.bin_size == bin_size)

def test_get_energy(self):
"""TBD"""

proc = P()
profiler = Profiler()

# The process proc and connected processes should be profiled
profiler.profile(proc)

# No connections are made

simple_sync_domain = SyncDomain("simple", LoihiProtocol(),
[proc])

# The process should compile and run without error (not doing anything)
proc.run(RunSteps(num_steps=3, blocking=True),
MyRunCfg(custom_sync_domains=[simple_sync_domain]))
proc.stop()

energy = profiler.get_energy()

self.assertTrue(energy == 0)


if __name__ == '__main__':
unittest.main()