diff --git a/pyproject.toml b/pyproject.toml index d02ea77..8598627 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "redisbench-admin" -version = "0.2.1" +version = "0.2.2" description = "Redis benchmark run helper. A wrapper around Redis and Redis Modules benchmark tools ( ftsb_redisearch, memtier_benchmark, redis-benchmark, aibench, etc... )." authors = ["filipecosta90 "] readme = "README.md" diff --git a/redisbench_admin/cli.py b/redisbench_admin/cli.py index 6c005c8..81e9e46 100644 --- a/redisbench_admin/cli.py +++ b/redisbench_admin/cli.py @@ -17,7 +17,7 @@ from redisbench_admin.extract.extract import extract_command_logic from redisbench_admin.run_local.args import create_run_local_arguments from redisbench_admin.run_local.run_local import run_local_command_logic -from redisbench_admin.run_remote.args import create_run_remote_arguments +from redisbench_admin.run_remote.args import create_run_remote_arguments, LOG_LEVEL from redisbench_admin.run_remote.run_remote import run_remote_command_logic @@ -39,7 +39,7 @@ def populate_with_poetry_data(): # logging settings logging.basicConfig( format="%(asctime)s %(levelname)-4s %(message)s", - level=logging.INFO, + level=LOG_LEVEL, datefmt="%Y-%m-%d %H:%M:%S", ) diff --git a/redisbench_admin/profilers/perf.py b/redisbench_admin/profilers/perf.py index ec6bcac..8ae7b55 100644 --- a/redisbench_admin/profilers/perf.py +++ b/redisbench_admin/profilers/perf.py @@ -71,10 +71,22 @@ def retrieve_perf_version(self): self.version_minor = m.group(2) return m, self.version_major, self.version_minor - def generate_record_command(self, pid, output, frequency=None): + def generate_record_command(self, pid, output, frequency=None, call_graph="lbr"): self.output = output self.pid = pid - cmd = [self.perf, "record", "-g", "--pid", "{}".format(pid), "--output", output] + cmd = [ + self.perf, + "record", + "-e", + "cycles:pp", + "-g", + "--pid", + "{}".format(pid), + "--output", + output, + "--call-graph", + call_graph, + ] if frequency: cmd += ["--freq", "{}".format(frequency)] return cmd diff --git a/redisbench_admin/profilers/profilers.py b/redisbench_admin/profilers/profilers.py index 523f07a..60f5739 100644 --- a/redisbench_admin/profilers/profilers.py +++ b/redisbench_admin/profilers/profilers.py @@ -8,6 +8,7 @@ ALLOWED_PROFILERS = "perf:record,ebpf:oncpu,ebpf:offcpu" PROFILERS_DEFAULT = "perf:record" +PROFILE_FREQ_DEFAULT = "99" STACKCOLLAPSE_PATH = pkg_resources.resource_filename( "redisbench_admin", "profilers/stackcollapse-perf.pl" diff --git a/redisbench_admin/run_local/args.py b/redisbench_admin/run_local/args.py index 263f7ae..2532f93 100644 --- a/redisbench_admin/run_local/args.py +++ b/redisbench_admin/run_local/args.py @@ -6,10 +6,15 @@ import os -from redisbench_admin.profilers.profilers import PROFILERS_DEFAULT, ALLOWED_PROFILERS +from redisbench_admin.profilers.profilers import ( + PROFILERS_DEFAULT, + ALLOWED_PROFILERS, + PROFILE_FREQ_DEFAULT, +) PROFILERS_ENABLED = os.getenv("PROFILE", 0) PROFILERS = os.getenv("PROFILERS", PROFILERS_DEFAULT) +PROFILE_FREQ = os.getenv("PROFILE_FREQ", PROFILE_FREQ_DEFAULT) def create_run_local_arguments(parser): diff --git a/redisbench_admin/run_local/run_local.py b/redisbench_admin/run_local/run_local.py index 8f53318..69cd3a2 100644 --- a/redisbench_admin/run_local/run_local.py +++ b/redisbench_admin/run_local/run_local.py @@ -12,6 +12,7 @@ import subprocess import sys import tempfile +import datetime import redis import wget @@ -23,6 +24,7 @@ prepare_benchmark_parameters, get_start_time_vars, ) +from redisbench_admin.run_local.args import PROFILE_FREQ from redisbench_admin.utils.benchmark_config import ( prepare_benchmark_definitions, extract_benchmark_tool_settings, @@ -207,15 +209,36 @@ def run_local_command_logic(args): start_time_str=start_time_str, ) ) - profiler_obj.start_profile(redis_process.pid, profile_filename) + profiler_obj.start_profile( + redis_process.pid, profile_filename, PROFILE_FREQ + ) # run the benchmark + benchmark_start_time = datetime.datetime.now() stdout, stderr = run_local_benchmark(benchmark_tool, command) + benchmark_end_time = datetime.datetime.now() + benchmark_duration_seconds = ( + benchmark_end_time - benchmark_start_time + ).seconds + logging.info("Extracting the benchmark results") logging.info("stdout: {}".format(stdout)) logging.info("stderr: {}".format(stderr)) if profilers_enabled: + expected_min_duration = 60 + if benchmark_duration_seconds < expected_min_duration: + logging.warning( + "Total benchmark duration ({} secs) was bellow {} seconds. ".format( + benchmark_duration_seconds, expected_min_duration + ) + + "Given the profile frequency {} it means that at max we mad {} profiles.".format( + PROFILE_FREQ, int(PROFILE_FREQ) * benchmark_duration_seconds + ) + + "Please increase benchmark time for more accurate profiles." + + "If that is not possible please change the profile frequency to an higher value." + + "via the env variable PROFILE_FREQ. NOTICE THAT THIS INCREASES OVERHEAD!!!" + ) for profiler_name, profiler_obj in profilers_map.items(): # Collect and fold stacks logging.info( diff --git a/redisbench_admin/run_remote/args.py b/redisbench_admin/run_remote/args.py index bbc94d5..13dd9a2 100644 --- a/redisbench_admin/run_remote/args.py +++ b/redisbench_admin/run_remote/args.py @@ -3,16 +3,21 @@ # Copyright (c) 2021., Redis Labs Modules # All rights reserved. # - +import logging import os import socket # environment variables +PERFORMANCE_RTS_PUSH = bool(os.getenv("PUSH_RTS", False)) PERFORMANCE_RTS_AUTH = os.getenv("PERFORMANCE_RTS_AUTH", None) PERFORMANCE_RTS_HOST = os.getenv("PERFORMANCE_RTS_HOST", 6379) PERFORMANCE_RTS_PORT = os.getenv("PERFORMANCE_RTS_PORT", None) TERRAFORM_BIN_PATH = os.getenv("TERRAFORM_BIN_PATH", "terraform") +LOG_LEVEL = logging.INFO +if os.getenv("VERBOSE", "1") == "0": + LOG_LEVEL = logging.WARN + def create_run_remote_arguments(parser): parser.add_argument("--module_path", type=str, required=True) @@ -61,7 +66,7 @@ def create_run_remote_arguments(parser): parser.add_argument("--redistimesies_pass", type=str, default=PERFORMANCE_RTS_AUTH) parser.add_argument( "--push_results_redistimeseries", - default=False, + default=PERFORMANCE_RTS_PUSH, action="store_true", help="uploads the results to RedisTimeSeries. Proper credentials are required", ) diff --git a/redisbench_admin/run_remote/run_remote.py b/redisbench_admin/run_remote/run_remote.py index ca1a60b..3b69fdd 100644 --- a/redisbench_admin/run_remote/run_remote.py +++ b/redisbench_admin/run_remote/run_remote.py @@ -3,7 +3,7 @@ # Copyright (c) 2021., Redis Labs Modules # All rights reserved. # - +import datetime import json import logging import os @@ -53,6 +53,8 @@ ) # internal aux vars +from redisbench_admin.utils.utils import get_ts_metric_name + redisbenchmark_go_link = ( "https://s3.amazonaws.com/benchmarks.redislabs/" "tools/redisgraph-benchmark-go/unstable/" @@ -63,6 +65,7 @@ local_results_file = "./benchmark-result.out" remote_results_file = "/tmp/benchmark-result.out" private_key = "/tmp/benchmarks.redislabs.pem" +min_recommended_benchmark_duration = 60 # environment variables PERFORMANCE_RTS_AUTH = os.getenv("PERFORMANCE_RTS_AUTH", None) @@ -285,6 +288,7 @@ def run_remote_command_logic(args): remote_envs = {} dirname = "." ( + prefix, testcases_setname, tsname_project_total_failures, tsname_project_total_success, @@ -468,6 +472,8 @@ def run_remote_command_logic(args): if benchmark_tool == "redis-benchmark": tmp = local_benchmark_output_filename local_benchmark_output_filename = "result.csv" + + benchmark_start_time = datetime.datetime.now() # run the benchmark _, stdout, _ = run_remote_benchmark( client_public_ip, @@ -477,6 +483,21 @@ def run_remote_command_logic(args): local_benchmark_output_filename, command_str, ) + benchmark_end_time = datetime.datetime.now() + benchmark_duration_seconds = ( + benchmark_end_time - benchmark_start_time + ).seconds + logging.info( + "Benchmark duration {} secs.".format(benchmark_duration_seconds) + ) + if benchmark_duration_seconds < min_recommended_benchmark_duration: + logging.warning( + "Benchmark duration of {} secs is bellow the considered" + " minimum duration for a stable run ({} secs).".format( + benchmark_duration_seconds, + min_recommended_benchmark_duration, + ) + ) if benchmark_tool == "redis-benchmark": local_benchmark_output_filename = tmp @@ -551,6 +572,37 @@ def run_remote_command_logic(args): tf_triggering_env, ), ) + metric_name = "benchmark_duration" + tsname_use_case_duration = get_ts_metric_name( + "by.version", + artifact_version, + tf_github_org, + tf_github_repo, + deployment_type, + test_name, + tf_triggering_env, + metric_name, + ) + labels = get_project_ts_tags( + tf_github_org, + tf_github_repo, + deployment_type, + tf_triggering_env, + ) + labels["version"] = artifact_version + labels["test_name"] = str(test_name) + labels["metric"] = str(metric_name) + logging.info( + "Adding duration {} secs to time-serie named {}".format( + benchmark_duration_seconds, tsname_use_case_duration + ) + ) + rts.add( + tsname_use_case_duration, + start_time_ms, + benchmark_duration_seconds, + labels=labels, + ) except redis.exceptions.ResponseError as e: logging.warning( "Error while updating secondary data structures {}. ".format( diff --git a/redisbench_admin/utils/remote.py b/redisbench_admin/utils/remote.py index 03d14d4..8c4593e 100644 --- a/redisbench_admin/utils/remote.py +++ b/redisbench_admin/utils/remote.py @@ -22,6 +22,7 @@ from tqdm import tqdm from redisbench_admin.utils.local import check_dataset_local_requirements +from redisbench_admin.utils.utils import get_ts_metric_name def get_git_root(path): @@ -498,19 +499,15 @@ def extract_perversion_timeseries_from_results( version_tags["version"] = project_version version_tags["test_name"] = str(test_name) version_tags["metric"] = str(metric_name) - - ts_name = ( - "ci.benchmarks.redislabs/by.version/" - "{triggering_env}/{github_org}/{github_repo}/" - "{test_name}/{deployment_type}/{version}/{metric}".format( - version=project_version, - github_org=tf_github_org, - github_repo=tf_github_repo, - deployment_type=deployment_type, - test_name=test_name, - triggering_env=tf_triggering_env, - metric=metric_name, - ) + ts_name = get_ts_metric_name( + "by.version", + project_version, + tf_github_org, + tf_github_repo, + deployment_type, + test_name, + tf_triggering_env, + metric_name, ) branch_time_series_dict[ts_name] = { @@ -559,6 +556,16 @@ def extract_perbranch_timeseries_from_results( pass finally: metric_name = jsonpath[2:] + ts_name = get_ts_metric_name( + "by.branch", + tf_github_branch, + tf_github_org, + tf_github_repo, + deployment_type, + test_name, + tf_triggering_env, + metric_name, + ) find_res = jsonpath_expr.find(results_dict) if find_res is not None and len(find_res) > 0: metric_value = float(find_res[0].value) @@ -569,19 +576,6 @@ def extract_perbranch_timeseries_from_results( branch_tags["branch"] = str(tf_github_branch) branch_tags["test_name"] = str(test_name) branch_tags["metric"] = str(metric_name) - ts_name = ( - "ci.benchmarks.redislabs/by.branch/" - "{triggering_env}/{github_org}/{github_repo}/" - "{test_name}/{deployment_type}/{branch}/{metric}".format( - branch=str(tf_github_branch), - github_org=tf_github_org, - github_repo=tf_github_repo, - deployment_type=deployment_type, - test_name=test_name, - triggering_env=tf_triggering_env, - metric=metric_name, - ) - ) branch_time_series_dict[ts_name] = { "labels": branch_tags.copy(), @@ -595,31 +589,23 @@ def extract_perbranch_timeseries_from_results( def get_overall_dashboard_keynames(tf_github_org, tf_github_repo, tf_triggering_env): - testcases_setname = ( + prefix = ( "ci.benchmarks.redislabs/" - "{triggering_env}/{github_org}/{github_repo}:testcases".format( + + "{triggering_env}/{github_org}/{github_repo}".format( triggering_env=tf_triggering_env, github_org=tf_github_org, github_repo=tf_github_repo, ) ) - tsname_project_total_success = ( - "ci.benchmarks.redislabs/" - "{triggering_env}/{github_org}/{github_repo}:total_success".format( - triggering_env=tf_triggering_env, - github_org=tf_github_org, - github_repo=tf_github_repo, - ) + testcases_setname = "{}:testcases".format(prefix) + tsname_project_total_success = "{}:total_success".format( + prefix, ) - tsname_project_total_failures = ( - "ci.benchmarks.redislabs/" - "{triggering_env}/{github_org}/{github_repo}:total_failures".format( - triggering_env=tf_triggering_env, - github_org=tf_github_org, - github_repo=tf_github_repo, - ) + tsname_project_total_failures = "{}:total_failures".format( + prefix, ) return ( + prefix, testcases_setname, tsname_project_total_failures, tsname_project_total_success, diff --git a/redisbench_admin/utils/utils.py b/redisbench_admin/utils/utils.py index eadd4a3..4cfebfc 100644 --- a/redisbench_admin/utils/utils.py +++ b/redisbench_admin/utils/utils.py @@ -195,3 +195,30 @@ def read_json_or_csv( col_name = header_array[col_pos] res_dict[col_name].append(col) benchmark_config[config_filename] = res_dict + + +def get_ts_metric_name( + by, + by_value, + tf_github_org, + tf_github_repo, + deployment_type, + test_name, + tf_triggering_env, + metric_name, +): + ts_name = ( + "ci.benchmarks.redislabs/{by}/" + "{triggering_env}/{github_org}/{github_repo}/" + "{test_name}/{deployment_type}/{by_value}/{metric}".format( + by=by, + triggering_env=tf_triggering_env, + github_org=tf_github_org, + github_repo=tf_github_repo, + test_name=test_name, + deployment_type=deployment_type, + by_value=str(by_value), + metric=metric_name, + ) + ) + return ts_name diff --git a/tests/test_run_remote.py b/tests/test_run_remote.py index cd6c912..e9cf8cc 100644 --- a/tests/test_run_remote.py +++ b/tests/test_run_remote.py @@ -82,11 +82,13 @@ def test_get_test_s3_bucket_path(): def test_get_overall_dashboard_keynames(): ( + prefix, testcases_setname, tsname_project_total_failures, tsname_project_total_success, ) = get_overall_dashboard_keynames("org", "repo", "env") assert "ci.benchmarks.redislabs/env/org/repo:testcases" == testcases_setname + assert "ci.benchmarks.redislabs/env/org/repo" == prefix assert ( "ci.benchmarks.redislabs/env/org/repo:total_success" == tsname_project_total_success