Skip to content
This repository has been archived by the owner on Apr 25, 2024. It is now read-only.

Commit

Permalink
Optimize KCFG memory usage (#872)
Browse files Browse the repository at this point in the history
This optimization works by making sure that a single object is used for
all groups of equal subterms in a kcfg's nodes.

This is implemented by keeping a map from (optimized) subterms to IDs,
and another map from subterm IDs to actual subterms. Whenever a new node
is added to the kcfg, this PR will do a bottom-up traversal of its
subterms, replacing them with their cached version, if that exists, or
adding them to the cache otherwise.

Memory usage
---------------

For the add liquidity proof, when running with Kasmer, the kcfg object
uses 50-60GB of memory. With this PR, it uses only 1 GB.

Educated guess: In general, I expect that the memory usage for proofs
whose configurations do not change much between consecutive nodes
(usually that would be configurations of medium or high complexity), the
memory usage is higher when the kcfg holds 1-2 nodes, and lower
afterwards. For proofs whose configurations change significantly between
nodes (usually that would mean simple configurations, e.g. with only the
`<k>` cell), this PR may increase the memory usage.

Runtime
--------

* Running a simple proof (i.e. most of the time is spent in the Booster
backend): 6 min 59 sec with the PR, 6 min 56 sec before PR
* Loading a small kcfg (29M on disk) from json: 5.5 sec with the PR, 4.9
sec before PR
* Loading a large kcfg (6.1G on disk) from json: 1606 sec with the PR,
1425 sec before PR

---------

Co-authored-by: devops <devops@runtimeverification.com>
Co-authored-by: Tamás Tóth <tothtamas28@users.noreply.github.com>
Co-authored-by: Lisandra <lisandrasilva18@hotmail.com>
Co-authored-by: Noah Watson <107630091+nwatson22@users.noreply.github.com>
Co-authored-by: rv-jenkins <admin@runtimeverification.com>
Co-authored-by: Everett Hildenbrandt <everett.hildenbrandt@gmail.com>
  • Loading branch information
7 people committed Feb 27, 2024
1 parent 6fd119b commit a8a749d
Show file tree
Hide file tree
Showing 7 changed files with 236 additions and 14 deletions.
4 changes: 2 additions & 2 deletions docs/conf.py
Expand Up @@ -9,8 +9,8 @@
project = 'pyk'
author = 'Runtime Verification, Inc'
copyright = '2024, Runtime Verification, Inc'
version = '0.1.653'
release = '0.1.653'
version = '0.1.654'
release = '0.1.654'

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
Expand Down
2 changes: 1 addition & 1 deletion package/version
@@ -1 +1 @@
0.1.653
0.1.654
2 changes: 1 addition & 1 deletion pyproject.toml
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "pyk"
version = "0.1.653"
version = "0.1.654"
description = ""
authors = [
"Runtime Verification, Inc. <contact@runtimeverification.com>",
Expand Down
25 changes: 15 additions & 10 deletions src/pyk/kcfg/kcfg.py
Expand Up @@ -25,7 +25,7 @@
from ..utils import ensure_dir_path

if TYPE_CHECKING:
from collections.abc import Iterable, Mapping
from collections.abc import Iterable, Mapping, MutableMapping
from pathlib import Path
from types import TracebackType
from typing import Any
Expand Down Expand Up @@ -203,7 +203,7 @@ def edges(self) -> tuple[KCFG.Edge, ...]:
return tuple(KCFG.Edge(self.source, target, 1, ()) for target in self.targets)

_node_id: int
_nodes: dict[int, Node]
_nodes: MutableMapping[int, Node]

_created_nodes: set[int]
_deleted_nodes: set[int]
Expand All @@ -218,9 +218,14 @@ def edges(self) -> tuple[KCFG.Edge, ...]:
_lock: RLock
cfg_dir: Path | None

def __init__(self, cfg_dir: Path | None = None) -> None:
def __init__(self, cfg_dir: Path | None = None, optimize_memory: bool = True) -> None:
self._node_id = 1
self._nodes = {}
if optimize_memory:
from .store import OptimizedNodeStore

self._nodes = OptimizedNodeStore()
else:
self._nodes = {}
self._created_nodes = set()
self._deleted_nodes = set()
self._edges = {}
Expand Down Expand Up @@ -293,9 +298,9 @@ def uncovered(self) -> list[Node]:

@staticmethod
def from_claim(
defn: KDefinition, claim: KClaim, cfg_dir: Path | None = None
defn: KDefinition, claim: KClaim, cfg_dir: Path | None = None, optimize_memory: bool = True
) -> tuple[KCFG, NodeIdLike, NodeIdLike]:
cfg = KCFG(cfg_dir=cfg_dir)
cfg = KCFG(cfg_dir=cfg_dir, optimize_memory=optimize_memory)
claim_body = claim.body
claim_body = defn.instantiate_cell_vars(claim_body)
claim_body = rename_generated_vars(claim_body)
Expand Down Expand Up @@ -382,8 +387,8 @@ def to_dict(self) -> dict[str, Any]:
return {k: v for k, v in res.items() if v}

@staticmethod
def from_dict(dct: Mapping[str, Any]) -> KCFG:
cfg = KCFG()
def from_dict(dct: Mapping[str, Any], optimize_memory: bool = True) -> KCFG:
cfg = KCFG(optimize_memory=optimize_memory)

max_id = 0
for node_dict in dct.get('nodes') or []:
Expand Down Expand Up @@ -440,8 +445,8 @@ def to_json(self) -> str:
return json.dumps(self.to_dict(), sort_keys=True)

@staticmethod
def from_json(s: str) -> KCFG:
return KCFG.from_dict(json.loads(s))
def from_json(s: str, optimize_memory: bool = True) -> KCFG:
return KCFG.from_dict(json.loads(s), optimize_memory=optimize_memory)

def to_rules(self, priority: int = 20) -> list[KRuleLike]:
return [e.to_rule('BASIC-BLOCK', priority=priority) for e in self.edges()]
Expand Down
134 changes: 134 additions & 0 deletions src/pyk/kcfg/store.py
@@ -0,0 +1,134 @@
from __future__ import annotations

import threading
from abc import ABC, abstractmethod
from collections.abc import Hashable, MutableMapping
from dataclasses import dataclass
from typing import TYPE_CHECKING, Generic, TypeVar, final

from ..cterm import CTerm
from ..kast.inner import KApply, KSequence, KToken, KVariable, bottom_up_with_summary
from .kcfg import KCFG

if TYPE_CHECKING:
from collections.abc import Iterator

from ..kast.inner import KInner, KLabel


A = TypeVar('A', bound=Hashable)


class OptimizedNodeStore(MutableMapping[int, KCFG.Node]):
_nodes: dict[int, KCFG.Node]
_optimized_terms: _Cache[_OptInner]
_klabels: _Cache[KLabel]
_terms: list[KInner]

_lock: threading.Lock

def __init__(self) -> None:
self._nodes = {}
self._optimized_terms = _Cache()
self._klabels = _Cache()
self._terms = []

self._lock = threading.Lock()

def __getitem__(self, key: int) -> KCFG.Node:
return self._nodes[key]

def __setitem__(self, key: int, node: KCFG.Node) -> None:
old_cterm = node.cterm
new_config = self._optimize(old_cterm.config)
new_constraints = tuple(self._optimize(c) for c in old_cterm.constraints)
new_node = KCFG.Node(node.id, CTerm(new_config, new_constraints))
self._nodes[key] = new_node

def __delitem__(self, key: int) -> None:
del self._nodes[key]

def __iter__(self) -> Iterator[int]:
return iter(self._nodes)

def __len__(self) -> int:
return len(self._nodes)

def _optimize(self, term: KInner) -> KInner:
def optimizer(to_optimize: KInner, children: list[int]) -> tuple[KInner, int]:
if isinstance(to_optimize, KToken) or isinstance(to_optimize, KVariable):
optimized_id = self._cache(_OptBasic(to_optimize))
elif isinstance(to_optimize, KApply):
klabel_id = self._klabels.cache(to_optimize.label)
optimized_id = self._cache(_OptApply(klabel_id, tuple(children)))
elif isinstance(to_optimize, KSequence):
optimized_id = self._cache(_OptKSequence(tuple(children)))
else:
raise ValueError('Unknown term type: ' + str(type(to_optimize)))
return (self._terms[optimized_id], optimized_id)

with self._lock:
optimized, _ = bottom_up_with_summary(optimizer, term)
return optimized

def _cache(self, term: _OptInner) -> int:
id = self._optimized_terms.cache(term)
assert id <= len(self._terms)
if id == len(self._terms):
self._terms.append(term.build(self._klabels, self._terms))
return id


class _Cache(Generic[A]):
_value_to_id: dict[A, int]
_values: list[A]

def __init__(self) -> None:
self._value_to_id = {}
self._values = []

def cache(self, value: A) -> int:
idx = self._value_to_id.get(value)
if idx is not None:
return idx
idx = len(self._values)
self._value_to_id[value] = idx
self._values.append(value)
return idx

def get(self, idx: int) -> A:
return self._values[idx]


class _OptInner(ABC):
@abstractmethod
def build(self, klabels: _Cache[KLabel], terms: list[KInner]) -> KInner:
...


@final
@dataclass(eq=True, frozen=True)
class _OptBasic(_OptInner):
term: KInner

def build(self, klabels: _Cache[KLabel], terms: list[KInner]) -> KInner:
return self.term


@final
@dataclass(eq=True, frozen=True)
class _OptApply(_OptInner):
label: int
children: tuple[int, ...]

def build(self, klabels: _Cache[KLabel], terms: list[KInner]) -> KInner:
return KApply(klabels.get(self.label), tuple(terms[child] for child in self.children))


@final
@dataclass(eq=True, frozen=True)
class _OptKSequence(_OptInner):
children: tuple[int, ...]

def build(self, klabels: _Cache[KLabel], terms: list[KInner]) -> KInner:
return KSequence(tuple(terms[child] for child in self.children))
Empty file added src/tests/unit/kcfg/__init__.py
Empty file.
83 changes: 83 additions & 0 deletions src/tests/unit/kcfg/test_store.py
@@ -0,0 +1,83 @@
from __future__ import annotations

from itertools import count
from typing import TYPE_CHECKING

import pytest

from pyk.cterm import CTerm
from pyk.kast.inner import KApply, KSequence
from pyk.kcfg.kcfg import KCFG
from pyk.kcfg.store import OptimizedNodeStore, _Cache
from pyk.prelude.utils import token

from ..utils import a, b, c, f

if TYPE_CHECKING:
from typing import Final

from pyk.kast import KInner


EQUAL_TEST_DATA: Final[tuple[tuple[KInner, KInner], ...]] = (
(token(1), token(1)),
(token('a'), token('a')),
(a, a),
(f(a), f(a)),
(KSequence([a, b]), KSequence([a, b])),
)


@pytest.mark.parametrize('term1,term2', EQUAL_TEST_DATA, ids=count())
def test_use_cached(term1: KInner, term2: KInner) -> None:
# Given
cached_values: _Cache[KInner] = _Cache()

# When
id1 = cached_values.cache(term1)
id2 = cached_values.cache(term2)

# Then
assert id1 == id2


NOT_EQUAL_TEST_DATA: Final[tuple[tuple[KInner, KInner], ...]] = (
(token(1), token(2)),
(token(1), token('1')),
(a, b),
(f(a), f(b)),
(KSequence([a, b]), KSequence([a, c])),
)


@pytest.mark.parametrize('term1,term2', NOT_EQUAL_TEST_DATA, ids=count())
def test_not_use_cached(term1: KInner, term2: KInner) -> None:
# Given
cached_values: _Cache[KInner] = _Cache()

# When
id1 = cached_values.cache(term1)
id2 = cached_values.cache(term2)

# Then
assert term1 != term2
assert id1 != id2


OPTIMIZE_TEST_DATA: Final[tuple[KInner, ...]] = (
token(1),
token('a'),
a,
f(a),
KSequence([a, token(3)]),
)


def test_optimized_store() -> None:
store = OptimizedNodeStore()

for idx, item in zip(range(0, len(OPTIMIZE_TEST_DATA)), OPTIMIZE_TEST_DATA, strict=True):
store[idx] = KCFG.Node(idx, CTerm(KApply('<cell>', item), ()))

for idx, item in zip(range(0, len(OPTIMIZE_TEST_DATA)), OPTIMIZE_TEST_DATA, strict=True):
assert KCFG.Node(idx, CTerm(KApply('<cell>', item), ())) == store[idx]

0 comments on commit a8a749d

Please sign in to comment.