In [1]:
!make docker-image > /dev/null 2>&1

In [2]:
!CONTAINER_CMD="bash -lc 'make install-ycsb" make docker > /dev/null 2>&1

In [3]:
from pathlib import Path
import pexpect
import os
import time

""" Collector class has helper methods to interact with kermit"""
class Collector: 
    def __init__(self, config: Path):
        self.env = os.environ.copy()
        self.env["INTERACTIVE"] = "it"
        self.env["CONTAINER_CMD"] = f"bash -lc 'KERNMLOPS_CONFIG_FILE={config} make collect-data'"
        self.collect_process : pexpect.spawn | None = None

    def start_collection(self, logfile=None):
        self.collect_process = pexpect.spawn("make docker", env=self.env, timeout=None, logfile=logfile)
        self.collect_process.expect_exact(["Started benchmark"])

    def _after_run_generate_file_data() -> dict[str, list[Path]]:
        start_path : Path = Path("./data")
        list_of_collect_id_dirs = start_path.glob("*/*/*")
        latest_collect_id = max(list_of_collect_id_dirs, key=os.path.getctime)
        list_of_files = latest_collect_id.glob("*.*.parquet")
        output = {}
        for f in list_of_files:
            index = str(f).removeprefix(str(f.parent) + "/").split(".")[0]
            if index not in output.keys():
                output[index] = []
            output[index].append(f)
        return output
        
    def wait(self) -> int:
        if self.collect_process is None:
            return
        self.collect_process.expect([pexpect.EOF])
        self.collect_process.wait()
        return Collector._after_run_generate_file_data()
        
    def stop_collection(self):
        if self.collect_process is None:
            return
        self.collect_process.sendline("END")
        return self.wait()

There are two ways to run kermit:
- With the raw config
- With a pre-programmed benchmark config

In [None]:
# Simple iperf3 Test - Docker Friendly Version

import subprocess
import time

# First, let's manually test if iperf3 works
print("Testing iperf3 installation...")

# Install iperf3 if needed
install_result = subprocess.run(["which", "iperf3"], capture_output=True)
if install_result.returncode != 0:
    print("Installing iperf3...")
    subprocess.run(["apt-get", "update"], capture_output=True)
    subprocess.run(["apt-get", "install", "-y", "iperf3"], capture_output=True)
    print("iperf3 installed!")
else:
    print("iperf3 is already installed")

# Kill any existing iperf3 processes
subprocess.run(["pkill", "-f", "iperf3"], capture_output=True)
time.sleep(1)

# Start iperf3 server manually
print("\nStarting iperf3 server on port 5555...")
server = subprocess.Popen(
    ["iperf3", "-s", "-p", "5555"],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE
)

# Wait for server
time.sleep(3)

# Test connection
print("Testing server connection...")
test = subprocess.run(
    ["iperf3", "-c", "127.0.0.1", "-p", "5555", "-t", "1"],
    capture_output=True,
    text=True
)

if test.returncode == 0:
    print("✓ iperf3 is working!")
    
    # Now run actual benchmark with kernmlops
    print("\nRunning benchmark with kernmlops...")
    
    # Use minimal config for Docker
    collect = Collector("./config/iperf_docker.yaml")
    
    try:
        collect.start_collection(None)
        data = collect.wait()
        
        import polars as pl
        tcp_df = pl.read_parquet(data["tcp_v4_rcv"][0])
        print(f"\n✓ Success! Captured {len(tcp_df):,} TCP events")
        
    except Exception as e:
        print(f"\n✗ Benchmark failed: {e}")
        print("\nTry using the alternative method below...")
        
else:
    print("✗ iperf3 server test failed")
    print(f"Error: {test.stderr}")

# Cleanup
server.terminate()
subprocess.run(["pkill", "-f", "iperf3"], capture_output=True)


Testing iperf3 installation...
iperf3 is already installed

Starting iperf3 server on port 5555...
Testing server connection...
✓ iperf3 is working!

Running benchmark with kernmlops...


In [None]:
# Analyze by process
print(tcp_df.group_by("comm").count().sort("count", descending=True))
iperf_client = tcp_df.filter(pl.col("comm").str.contains("iperf3").and_(~pl.col("comm").str.contains("-s")))

print(f"\nProcess breakdown:")
print(f"- iperf3 client: {len(iperf_client)} events")

# Analyze port 5555 traffic (iperf3 default port)
port_5555 = tcp_df.filter((pl.col("dport") == 5555) | (pl.col("sport") == 5555))
print(f"- Port 5555 traffic: {len(port_5555)} events")

# Branch distribution
print("\nTCP state distribution:")
branch_dist = tcp_df.group_by("branch_name").agg([
    pl.count().alias("count"),
    (pl.count() / len(tcp_df) * 100).alias("percentage")
]).sort("count", descending=True)

for row in branch_dist.head(10).iter_rows():
    print(f"- {row[0]}: {row[1]:,} events ({row[2]:.1f}%)")

# Connection analysis
new_connections = tcp_df.filter(pl.col("branch_name") == "new_syn_recv")
print(f"\nNew TCP connections: {len(new_connections)}")
print(f"Connections per stream: ~{len(new_connections) / 4:.0f}")  # 4 parallel streams

# Drop analysis
drops = tcp_df.filter(pl.col("drop_reason") > 0)
if len(drops) > 0:
    print(f"\nDropped packets: {len(drops)}")
    drop_dist = drops.group_by("drop_reason_name").len()
    for row in drop_dist.iter_rows():
        print(f"- {row[0]}: {row[1]} drops")
else:
    print("\nNo dropped packets detected - excellent!")

# Show branch distribution
print("iperf_client group by branch_name")
print(iperf_client.group_by("branch_name").count().sort("count", descending=True))

# Show drop reasons
drops = iperf_client.filter(pl.col("drop_reason") > 0)
print(drops.group_by("drop_reason_name").count())

In [None]:
import subprocess
import sys


# New TCP Collector
collect = Collector("./config/raw_overrides.yaml")
collect.start_collection()

# Generate some TCP traffic
!nc -l 8080 &  # Listen on port 8080
!echo "testtesttesttesttesttesttesttesttesttest" | nc localhost 8080 
!echo "testtesttesttesttesttesttesttesttesttest" | nc localhost 8080 
!echo "testtesttesttesttesttesttesttesttesttest" | nc localhost 8080 
!echo "testtesttesttesttesttesttesttesttesttest" | nc localhost 8080 
!echo "testtesttesttesttesttesttesttesttesttest" | nc localhost 8080 
!echo "testtesttesttesttesttesttesttesttesttest" | nc localhost 8080 

data = collect.stop_collection()

In [None]:
# Analyze TCP branches
import polars as pl
tcp_df = pl.read_parquet(data["tcp_v4_rcv"][0])

print(tcp_df.group_by("comm").count().sort("count", descending=True))

nc = tcp_df.filter(pl.col("comm").str.contains("nc"))

# Show branch distribution
print(nc.group_by("branch_name").count().sort("count", descending=True))

# Show drop reasons
drops = nc.filter(pl.col("drop_reason") > 0)
print(drops.group_by("drop_reason_name").count())

In [None]:
tcp_df

In [None]:
collect = Collector("./config/redis_never.yaml")
collect.start_collection(None)
data = collect.stop_collection()

In [None]:
# Analyze TCP branches
import polars as pl
tcp_df = pl.read_parquet(data["tcp_v4_rcv"][0])

print(tcp_df.group_by("comm").count().sort("count", descending=True))

redis = tcp_df.filter(pl.col("comm").str.contains("redis-server"))

# Show branch distribution
print(redis.group_by("branch_name").count().sort("count", descending=True))

# Show drop reasons
drops = redis.filter(pl.col("drop_reason") > 0)
print(drops.group_by("drop_reason_name").count())

In [None]:
# Create collector with XSBench configuration
collect = Collector("./config/xsbench.yaml")

# Start collection and run XSBench
print("Starting collection with XSBench workload...")
collect.start_collection(None)

# Wait for XSBench to complete
print("Running XSBench benchmark...")
data = collect.stop_collection()




In [None]:
# Analyze TCP traffic generated by XSBench
print("\nAnalyzing TCP traffic from XSBench:")
tcp_df = pl.read_parquet(data["tcp_v4_rcv"][0])

# Show branch distribution
print("\nTCP branch distribution:")
print(tcp_df.group_by("branch_name").len().sort("len", descending=True))

# Show drop reasons if any
drops = tcp_df.filter(pl.col("drop_reason") > 0)
if len(drops) > 0:
    print("\nDropped packets:")
    print(drops.group_by("drop_reason_name").len())
else:
    print("\nNo dropped packets detected")

# Show process-specific TCP activity
print("\nTCP activity by process:")
process_tcp = tcp_df.group_by("comm").len().sort("len", descending=True).head(10)
print(process_tcp)

# Check for XSBench-specific activity
xsbench_traffic = tcp_df.filter(pl.col("comm").str.contains("XSBench"))
if len(xsbench_traffic) > 0:
    print(f"\nXSBench generated {len(xsbench_traffic)} TCP events")
else:
    print("\nNo direct TCP traffic from XSBench process detected")