In [None]:
!make docker-image > /dev/null 2>&1

In [None]:
from pathlib import Path
import pexpect
import os

""" Collector class has helper methods to interact with kermit"""
class Collector: 
    def __init__(self, config: Path):
        self.env = os.environ.copy()
        self.env["INTERACTIVE"] = "i"
        self.env["CONTAINER_CMD"] = f"bash -lc 'KERNMLOPS_CONFIG_FILE={config} make collect-data'"
        self.collect_process : pexpect.spawn | None = None

    def start_collection(self, logfile=None):
        self.collect_process = pexpect.spawn("make", ["docker"], env=self.env, timeout=None, cwd="./", logfile=logfile)
        self.collect_process.expect_exact(["Ctrl+C", "Started benchmark"])

    def _after_run_generate_file_data(exit_code: int) -> dict[str, Path]:
        start_path : Path = Path("./data")
        if exit_code != 0:
            start_path /= "failed"
        else:
            start_path /= "curated"
        list_of_sys_info_files = start_path.glob("system_info/*")
        latest_sys_info = max(list_of_sys_info_files, key=os.path.getctime)
        collect_id = str(latest_sys_info).removeprefix(str(start_path /"system_info/")).split(".")[0]
        list_of_files = start_path.glob(f"*/{collect_id}.*.parquet")
        output = {"system_info" : latest_sys_info}
        for f in list_of_files:
            output[str(f).removeprefix(str(start_path) + "/").split("/")[0]] = f
        return output
        
    
    def wait(self) -> int:
        if self.collect_process is None:
            return
        return Collector._after_run_generate_file_data(self.collect_process.wait())
        
    def stop_collection(self):
        if self.collect_process is None:
            return
        self.collect_process.sendline("END")
        return self.wait()
        


There are two ways to run kermit:
- With the raw config
- With a pre-programmed benchmark config

In [None]:
collect = Collector("./config/raw_overrides.yaml")
# This creates a raw collector, I suggest looking into this file to learn more

w = open("hello.txt", "wb")
collect.start_collection(logfile=w)
print("Collection has started")
# Start collection

f = open("blah.txt", "w")
bench_test = subprocess.Popen(["cat", "defaults.yaml"], stdout=f)
bench_test.wait()
# Run benchmark application

print("Exit application")
collect.stop_collection()
# Stop the Collector

In [None]:
collect = Collector("./config/start_overrides.yaml")
# This is a simple redis benchmark config

collect.start_collection(None)
# Start collection

collect.wait()
#Wait for collector to finish
