Merge pull request #1337 from tahoe-lafs/4060-my-first-benchmark

A very first benchmark Fixes ticket:4060
tahoe-lafs · Sep 7, 2023 · 20c85a8 · 20c85a8
2 parents 9d018e1 + 496ffcd
commit 20c85a8
Show file tree

Hide file tree

Showing 7 changed files with 185 additions and 140 deletions.
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
@@ -0,0 +1,8 @@
+"""pytest-based end-to-end benchmarks of Tahoe-LAFS.
+
+Usage:
+
+$ pytest benchmark --number-of-nodes=3
+
+It's possible to pass --number-of-nodes multiple times.
+"""
diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py
@@ -0,0 +1,126 @@
+"""
+pytest infrastructure for benchmarks.
+
+The number of nodes is parameterized via a --number-of-nodes CLI option added
+to pytest.
+"""
+
+from shutil import which, rmtree
+from tempfile import mkdtemp
+from contextlib import contextmanager
+from time import time
+
+import pytest
+import pytest_twisted
+
+from twisted.internet import reactor
+from twisted.internet.defer import DeferredList, succeed
+
+from allmydata.util.iputil import allocate_tcp_port
+
+from integration.grid import Client, create_grid, create_flog_gatherer
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--number-of-nodes",
+        action="append",
+        default=[],
+        type=int,
+        help="list of number_of_nodes to benchmark against",
+    )
+    # Required to be compatible with integration.util code that we indirectly
+    # depend on, but also might be useful.
+    parser.addoption(
+        "--force-foolscap",
+        action="store_true",
+        default=False,
+        dest="force_foolscap",
+        help=(
+            "If set, force Foolscap only for the storage protocol. "
+            + "Otherwise HTTP will be used."
+        ),
+    )
+
+
+def pytest_generate_tests(metafunc):
+    # Make number_of_nodes accessible as a parameterized fixture:
+    if "number_of_nodes" in metafunc.fixturenames:
+        metafunc.parametrize(
+            "number_of_nodes",
+            metafunc.config.getoption("number_of_nodes"),
+            scope="session",
+        )
+
+
+def port_allocator():
+    port = allocate_tcp_port()
+    return succeed(port)
+
+
+@pytest.fixture(scope="session")
+def grid(request):
+    """
+    Provides a new Grid with a single Introducer and flog-gathering process.
+
+    Notably does _not_ provide storage servers; use the storage_nodes
+    fixture if your tests need a Grid that can be used for puts / gets.
+    """
+    tmp_path = mkdtemp(prefix="tahoe-benchmark")
+    request.addfinalizer(lambda: rmtree(tmp_path))
+    flog_binary = which("flogtool")
+    flog_gatherer = pytest_twisted.blockon(
+        create_flog_gatherer(reactor, request, tmp_path, flog_binary)
+    )
+    g = pytest_twisted.blockon(
+        create_grid(reactor, request, tmp_path, flog_gatherer, port_allocator)
+    )
+    return g
+
+
+@pytest.fixture(scope="session")
+def storage_nodes(grid, number_of_nodes):
+    nodes_d = []
+    for _ in range(number_of_nodes):
+        nodes_d.append(grid.add_storage_node())
+
+    nodes_status = pytest_twisted.blockon(DeferredList(nodes_d))
+    for ok, value in nodes_status:
+        assert ok, "Storage node creation failed: {}".format(value)
+    return grid.storage_servers
+
+
+@pytest.fixture(scope="session")
+def client_node(request, grid, storage_nodes, number_of_nodes) -> Client:
+    """
+    Create a grid client node with number of shares matching number of nodes.
+    """
+    client_node = pytest_twisted.blockon(
+        grid.add_client(
+            "client_node",
+            needed=number_of_nodes,
+            happy=number_of_nodes,
+            total=number_of_nodes,
+        )
+    )
+    print(f"Client node pid: {client_node.process.transport.pid}")
+    return client_node
+
+
+class Benchmarker:
+    """Keep track of benchmarking results."""
+
+    @contextmanager
+    def record(self, name, **parameters):
+        """Record the timing of running some code, if it succeeds."""
+        start = time()
+        yield
+        elapsed = time() - start
+        # For now we just print the outcome:
+        parameters = " ".join(f"{k}={v}" for (k, v) in parameters.items())
+        print(f"BENCHMARK RESULT: {name} {parameters} elapsed {elapsed} secs")
+
+
+@pytest.fixture(scope="session")
+def tahoe_benchmarker():
+    return Benchmarker()
diff --git a/benchmarks/test_cli.py b/benchmarks/test_cli.py
@@ -0,0 +1,48 @@
+"""Benchmarks for minimal `tahoe` CLI interactions."""
+
+from subprocess import Popen, PIPE
+
+import pytest
+
+from integration.util import cli
+
+
+@pytest.fixture(scope="session")
+def cli_alias(client_node):
+    cli(client_node.process, "create-alias", "cli")
+
+
+def test_get_put_one_file(
+    client_node, cli_alias, tmp_path, tahoe_benchmarker, number_of_nodes
+):
+    """
+    Upload a file with ``tahoe put`` and then download it with ``tahoe get``,
+    measuring the latency of both operations.
+    """
+    file_size = 1000  # parameterize later on
+    file_path = tmp_path / "file"
+    DATA = b"0123456789" * (file_size // 10)
+    with file_path.open("wb") as f:
+        f.write(DATA)
+
+    with tahoe_benchmarker.record(
+        "cli-put-file", file_size=file_size, number_of_nodes=number_of_nodes
+    ):
+        cli(client_node.process, "put", str(file_path), "cli:tostdout")
+
+    with tahoe_benchmarker.record(
+        "cli-get-file", file_size=file_size, number_of_nodes=number_of_nodes
+    ):
+        p = Popen(
+            [
+                "tahoe",
+                "--node-directory",
+                client_node.process.node_dir,
+                "get",
+                "cli:tostdout",
+                "-",
+            ],
+            stdout=PIPE,
+        )
+        assert p.stdout.read() == DATA
+        assert p.wait() == 0
diff --git a/benchmarks/upload_download.py b/benchmarks/upload_download.py
diff --git a/integration/util.py b/integration/util.py
@@ -240,7 +240,7 @@ def _tahoe_runner_optional_coverage(proto, reactor, request, other_args):
     allmydata.scripts.runner` and `other_args`, optionally inserting a
     `--coverage` option if the `request` indicates we should.
     """
-    if request.config.getoption('coverage'):
+    if request.config.getoption('coverage', False):
         args = [sys.executable, '-b', '-m', 'coverage', 'run', '-m', 'allmydata.scripts.runner', '--coverage']
     else:
         args = [sys.executable, '-b', '-m', 'allmydata.scripts.runner']

diff --git a/newsfragments/4060.feature b/newsfragments/4060.feature
@@ -0,0 +1 @@
+Started work on a new end-to-end benchmarking framework.
diff --git a/tox.ini b/tox.ini
@@ -109,7 +109,7 @@ passenv = HOME
 setenv =
 	 # If no positional arguments are given, try to run the checks on the
 	 # entire codebase, including various pieces of supporting code.
-	 DEFAULT_FILES=src integration static misc setup.py
+	 DEFAULT_FILES=src integration benchmarks static misc setup.py
 commands =
          ruff check {posargs:{env:DEFAULT_FILES}}
          python misc/coding_tools/check-umids.py {posargs:{env:DEFAULT_FILES}}