In [11]:
import os
import socket

def assert_cpus_disabled(start, end):
    with open('/sys/devices/system/cpu/online', 'r') as f:
        online_cpus = f.read().strip()
        for cpu in range(start, end+1):
            assert str(cpu) not in online_cpus, f"CPU {cpu} is enabled"
def check_cpu_governor(start, end):
    for cpu in range(start, end+1):
        governor_file = f"/sys/devices/system/cpu/cpu{cpu}/cpufreq/scaling_governor"
        if os.path.exists(governor_file):
            with open(governor_file, 'r') as f:
                governor = f.read().strip()
                assert governor == 'performance', f"CPU {cpu} governor is not set to performance"
        else:
            print(f"CPU {cpu} does not exist or does not have a scaling governor")
def find_consecutive_free_ports(start_port, end_port, consecutive_ports):
    for port in range(start_port, end_port):
        for i in range(consecutive_ports):
            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
                result = sock.connect_ex(('localhost', port + i))
                if result == 0:
                    break
                if i == consecutive_ports - 1:
                    return port
    return None

# run ~/cpu.sh
check_cpu_governor(0, 15)
assert_cpus_disabled(16, 31)

# Setup evaluation suite

yusung_set = [
    "bullet",
    "factorial",
    "ffmpeg",
    "fractals",
    "funky-kart",
    "game-of-life",
    "gotemplate",
    "hnset-bench",
    'hydro',
    "jqkungfu",
    "lichess",
    "mandelbrot",
    "ogv",
    "onnxjs",
    "pacalc",
    'parquet',
    "playnox",
    "roslyn",
    "rustpython",
    "sandspiel",
    "sqlgui",
    "sqlpractice",
    "takahirox",
    "tic-tac-toe",
    "timestretch",
    "vaporboy",
    "video",
    "waforth",
    "wasmsh",
    "wheel",
]

jakob_set = [
    "boa",
    "commanderkeen",
    "ffmpeg",
    "fib",
    "figma-startpage",
    "funky-kart",
    "game-of-life",
    "guiicons",
    "handy-tools",
    'heatmap',
    'image-convolute',
    "jsc",
    "kittygame",
    'multiplyDouble',
    "multiplyInt",
    "ogv",
    "pathfinding",
    "riconpacker",
    "rtexviewer",
    "sandspiel",
    "sqlgui",
    "video",
    'visual6502remix',
]

other_set = [
    'livesplit',
    'noisereduction',
    'rfxgen',
    'rguilayout',
    'rguistyler',
    'rtexpacker',
    # 'skeletal', messes up the testing framework...
    'uarm',
    'vim-wasm',
    'virtualkc',
]

union = list(set(yusung_set) | set(jakob_set) | set(other_set))
intersection = list(set(yusung_set) & set(jakob_set))
print('yusung_set: ', len(yusung_set))
print('jakob_set: ', len(jakob_set))
print('other: ', len(other_set) + 1) # because skeletal is not included in the union
print('union: ', len(union) + 1) # because skeletal is not included in the union

testset = union
metrics = {testname: { 'trace_match': {}, 'record_metrics': {}, 'replay_metrics': {}} for testname in testset }
start_port = find_consecutive_free_ports(8080, 65535, len(testset))
r3_path = os.getenv('WASMR3_PATH', '~/wasm-r3')

yusung_set:  30
jakob_set:  23
other:  10
union:  56


In [2]:
import subprocess
import json
import concurrent.futures

# Setup artifacts
# frontends = ['custom', 'wasabi', 'firefox', 'webkit']
frontends = ['wasabi']
frontend_to_option = {
    'wasabi': '',
    'firefox': '-f',
    'webkit': '-w',
    'custom': '-c',
}
# parrallel - 2min with
# serial - 37min with 35 pass
parallel = True
timeout = 300 if parallel else 120

def run_command(testname, frontend, i):
    frontend_option = frontend_to_option[frontend]
    port = start_port + i
    command = f". ~/.bashrc && timeout {timeout}s npm test -- {frontend_option} -t {testname} -p {port}"
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    isNormal = result.returncode == 0
    if not isNormal:
        print(result.args)
        print(result.stderr)
    return [testname, frontend, isNormal]

if parallel:
    with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
        futures = [executor.submit(run_command, testname, frontend, i) for i, testname in enumerate(testset) for frontend in frontends]
        results = [future.result() for future in concurrent.futures.as_completed(futures)]
else:
    results = [run_command(testname, frontend, i) for i, testname in enumerate(testset) for frontend in frontends]
for testname, frontend, isNormal in results:
    metrics[testname]['trace_match'][frontend] = isNormal

with open('metrics.json', 'w') as f:
    json.dump(metrics, f, indent=4)

NameError: name 'testset' is not defined

In [1]:
import json
from tabulate import tabulate
with open('metrics.json', 'r') as f:
    metrics = json.load(f)

print('RQ1: Applicability')
rq1_results = [['Test Name', 'Trace Match']] + sorted([[testname, 'o' if metrics[testname]['trace_match']['wasabi'] else ''] for testname in metrics]) + [['Total', sum([1 for testname in metrics if metrics[testname]['trace_match']['wasabi']])]]
print(tabulate(rq1_results, tablefmt="latex"))

RQ1: Applicability
\begin{tabular}{ll}
\hline
 Test Name       & Trace Match \\
 boa             & o           \\
 bullet          & o           \\
 commanderkeen   & o           \\
 factorial       & o           \\
 ffmpeg          & o           \\
 fib             & o           \\
 figma-startpage &             \\
 fractals        &             \\
 funky-kart      & o           \\
 game-of-life    & o           \\
 gotemplate      &             \\
 guiicons        & o           \\
 handy-tools     & o           \\
 heatmap         &             \\
 hnset-bench     & o           \\
 hydro           & o           \\
 image-convolute &             \\
 jqkungfu        & o           \\
 jsc             & o           \\
 kittygame       & o           \\
 lichess         &             \\
 livesplit       &             \\
 mandelbrot      & o           \\
 multiplyDouble  & o           \\
 multiplyInt     & o           \\
 noisereduction  &             \\
 ogv             & o           \\
 o

In [46]:
import subprocess
import json
import concurrent.futures

timeout = 120 # seconds
engine_kind = ['sm', 'sm-base', 'sm-opt', 'v8', 'v8-liftoff', 'v8-turbofan', 'jsc', 'jsc-int','jsc-bbq','jsc-omg', 'wizeng','wizeng-int','wizeng-jit','wizeng-dyn','wasmtime','wasmer','wasmer-base']
wizard_engine_kind = ['wizeng','wizeng-int','wizeng-jit','wizeng-dyn']
opt_kind = ['noopt', 'split', 'merge', 'custom', 'benchmark'] # custom and benchmark are technically not replay opt though
# metrics = {testname: { 'test_success': success, 'replay_metrics': {engine: {opt: {} for opt in opt_kind} for engine in engine_kind} if success else {}} for testname, success in results }
# parallel takes around 2 minutes, while serial takes around 13 minutes
# parallel for quick experiments and sequential for final results
parallel = False

def get_replay_wasm(testname, opt):
    regex = ''
    match opt:
        case 'noopt':
            regex = 'merge|split|custom|benchmark'
        case 'split':
            regex = 'noopt|merge|custom|benchmark'
        case 'merge':
            regex = 'noopt|split|custom|benchmark'
        case 'custom':
            regex = 'noopt|split|merge|benchmark'
        case 'benchmark':
            regex = 'noopt|split|merge|custom'
        case _:
            exit('invalid op')
    find_command = f"find {r3_path}/tests/online/{testname} -name replay.wasm | grep -vE '{regex}'"
    find_result = subprocess.run(find_command, shell=True, capture_output=True, text=True)
    replay_path = find_result.stdout.strip()
    return replay_path

def run_wish_you_were_fast(testname, engine_kind):
    global metrics
    for opt in opt_kind:
        replay_path = get_replay_wasm(testname, opt)
        command = f". ~/.bashrc && timeout {timeout}s wizeng.x86-64-linux --metrics --monitors=profile {replay_path}"
        result = subprocess.run(command, shell=True, capture_output=True, text=True)
        isNormal = 'o' if result.returncode == 0  else ''
        if not isNormal:
            print(result.args)
            print(result.stderr)
            metrics[testname]['replay_metrics'][engine_kind][opt] = 'N/A'
        else:
            monitor, profile = result.stdout.split("pregen:time_us")
            profile = 'pregen:time_us' + profile
            # Make replay_metrics after "pregen:time_us" a key of some object
            metrics[testname]['replay_metrics'][engine_kind][opt] |= {line.rsplit(":", 1)[0].strip(): line.rsplit(":", 1)[1].strip().replace("μs", "").strip() for line in profile.split("\n") if line}

def run_wizard(testname, engine_kind):
    global metrics
    for opt in opt_kind:
        replay_path = get_replay_wasm(testname, opt)
        command = f". ~/.bashrc && timeout {timeout}s wizeng.x86-64-linux --metrics --monitors=profile {replay_path}"
        result = subprocess.run(command, shell=True, capture_output=True, text=True)
        isNormal = result.returncode == 0
        if not isNormal:
            print(result.args)
            print(result.stderr)
            metrics[testname]['replay_metrics'][engine_kind][opt] = {}
        else:
            monitor, profile = result.stdout.split("pregen:time_us")
            profile = 'pregen:time_us' + profile
            # Make replay_metrics after "pregen:time_us" a key of some object
            metrics[testname]['replay_metrics'][engine_kind][opt] |= {line.rsplit(":", 1)[0].strip(): line.rsplit(":", 1)[1].strip().replace("μs", "").strip() for line in profile.split("\n") if line}

if parallel:
    with concurrent.futures.ThreadPoolExecutor() as executor:
        # futures = [executor.submit(run_command, testname, i) for i, testname in enumerate(testset)]
        # results = [future.result() for future in concurrent.futures.as_completed(futures)]
        pass
else:
    for testname, isNormal in results:
        if isNormal:
            # for kind in engine_kind:
            #     run_wish_you_were_fast(testname, kind)
            # for kind in wizard_engine_kind:
            for kind in ['wizeng-int']:
                run_wizard(testname, kind)

with open('metrics.json', 'w') as f:
    json.dump(metrics, f, indent=4)


. ~/.bashrc && timeout 120s wizeng.x86-64-linux --metrics --monitors=profile 

. ~/.bashrc && timeout 120s wizeng.x86-64-linux --metrics --monitors=profile /home/don/wasm-r3/tests/online/funky-kart/noopt/e02ab0-5477-45aa-99d4-226b75c27859/replay.wasm

. ~/.bashrc && timeout 120s wizeng.x86-64-linux --metrics --monitors=profile 

. ~/.bashrc && timeout 120s wizeng.x86-64-linux --metrics --monitors=profile /home/don/wasm-r3/tests/online/tic-tac-toe/noopt/37c5d3-c76f-49a7-a760-a5aa0e5f2743/replay.wasm

. ~/.bashrc && timeout 120s wizeng.x86-64-linux --metrics --monitors=profile /home/don/wasm-r3/tests/online/tic-tac-toe/split/37c5d3-c76f-49a7-a760-a5aa0e5f2743/replay.wasm

. ~/.bashrc && timeout 120s wizeng.x86-64-linux --metrics --monitors=profile /home/don/wasm-r3/tests/online/tic-tac-toe/merge/37c5d3-c76f-49a7-a760-a5aa0e5f2743/replay.wasm

. ~/.bashrc && timeout 120s wizeng.x86-64-linux --metrics --monitors=profile /home/don/wasm-r3/tests/online/tic-tac-toe/custom/37c5d3-c76f-49a7-a76

In [13]:
import json
from tabulate import tabulate
with open('metrics.json', 'r') as f:
    metrics = json.load(f)

print('RQ1: Applicability')
rq1_results = [['Test Name', 'Trace Match']] + sorted([[testname, 'o' if metrics[testname]['trace_match']['wasabi'] else ''] for testname in metrics]) + [['Total', sum([1 for testname in metrics if metrics[testname]['trace_match']['wasabi']])]]
print(tabulate(rq1_results, tablefmt="latex"))

RQ1: Applicability
\begin{tabular}{ll}
\hline
 Test Name       & Trace Match \\
 boa             & o           \\
 bullet          & o           \\
 commanderkeen   & o           \\
 factorial       & o           \\
 ffmpeg          & o           \\
 fib             & o           \\
 figma-startpage &             \\
 fractals        &             \\
 funky-kart      & o           \\
 game-of-life    & o           \\
 gotemplate      &             \\
 guiicons        & o           \\
 handy-tools     & o           \\
 heatmap         &             \\
 hnset-bench     & o           \\
 hydro           & o           \\
 image-convolute &             \\
 jqkungfu        & o           \\
 jsc             & o           \\
 kittygame       & o           \\
 lichess         &             \\
 livesplit       &             \\
 mandelbrot      & o           \\
 multiplyDouble  & o           \\
 multiplyInt     & o           \\
 noisereduction  &             \\
 ogv             & o           \\
 o

In [55]:
import json
from tabulate import tabulate
with open('metrics.json', 'r') as f:
    metrics = json.load(f)
metrics_with_results = { testname: metrics[testname] for testname in metrics if metrics[testname]['test_success'] }

print('RQ2-1: Performance-Record')
print('TODO')

print('RQ2-2: Performance-Replay')
def get_metric(testname, metric):
    return metrics[testname]['replay_metrics']['wizeng-int']['custom'].get(metric)
rq22_results = [['Test name', 'replay proportion', 'pregen time', 'load time', 'validate time', 'spc time', 'start time', 'main time']] + [[testname, 'TODO', get_metric(testname, 'pregen:time_us'), get_metric(testname, 'load:time_us'), get_metric(testname, 'validate:time_us'), get_metric(testname, 'spc:time_us'), get_metric(testname, 'start:time_us'), get_metric(testname, 'main:time_us')] for testname in metrics_with_results ]
print(tabulate(rq22_results, tablefmt="latex"))

RQ2-1: Performance-Record
TODO
RQ2-2: Performance-Replay
\begin{tabular}{llllllll}
\hline
 Test name    & replay proportion & pregen time & load time & validate time & spc time & start time & main time \\
 bullet       & TODO              & 8           & 11666     & 10285         & 0        & 0          & 5612667   \\
 factorial    & TODO              & 5           & 769       & 667           & 0        & 0          & 69        \\
 fractals     & TODO              & 5           & 37        & 15            & 0        & 0          & 1272      \\
 funky-kart   & TODO              & 9           & 13241     & 9101          & 0        & 0          & 5150557   \\
 game-of-life & TODO              & 6           & 53        & 29            & 0        & 0          & 182       \\
 hnset-bench  & TODO              & 6           & 35        & 13            & 0        & 0          & 6         \\
 mandelbrot   & TODO              & 6           & 1492      & 1216          & 0        & 0          & 303

In [56]:
import json
from tabulate import tabulate
with open('metrics.json', 'r') as f:
    metrics = json.load(f)

print('RQ3: Trace Reduction')

print('TODO')

RQ3: Trace Reduction
TODO


In [57]:
import json
from tabulate import tabulate
with open('metrics.json', 'r') as f:
    metrics = json.load(f)
metrics_with_results = { testname: metrics[testname] for testname in metrics if metrics[testname]['test_success'] }

print('RQ4: Replay Optimization')

def get_metric(testname, opt, time):
    metric = metrics[testname]['replay_metrics']['wizeng-int'][opt]
    if len(metric) == 0:
        return 0
    else:
        return metric[time]

print('RQ4-1: Load time')
time = 'load:time_us'
rq4_results = [['Test name', 'noopt time', 'split time', 'merge time', 'fullopt time']] + [[testname, get_metric(testname, 'noopt', time), get_metric(testname, 'split', time), get_metric(testname, 'merge', time), get_metric(testname, 'custom', time)] for testname in metrics_with_results]
print(tabulate(rq4_results, tablefmt="latex"))

print('RQ4-2: Validate time')
time = 'validate:time_us'
rq4_results = [['Test name', 'noopt time', 'split time', 'merge time', 'fullopt time']] + [[testname, get_metric(testname, 'noopt', time), get_metric(testname, 'split', time), get_metric(testname, 'merge', time), get_metric(testname, 'custom', time)] for testname in metrics_with_results]
print(tabulate(rq4_results, tablefmt="latex"))

print('RQ4-3: Main time')
time = 'main:time_us'
rq4_results = [['Test name', 'noopt time', 'split time', 'merge time', 'fullopt time']] + [[testname, get_metric(testname, 'noopt', time), get_metric(testname, 'split', time), get_metric(testname, 'merge', time), get_metric(testname, 'custom', time)] for testname in metrics_with_results]
print(tabulate(rq4_results, tablefmt="latex"))

RQ4: Replay Optimization
RQ4-1: Load time
\begin{tabular}{lllll}
\hline
 Test name    & noopt time & split time & merge time & fullopt time \\
 bullet       & 12082      & 12073      & 11757      & 11666        \\
 factorial    & 772        & 767        & 769        & 769          \\
 fractals     & 38         & 36         & 38         & 37           \\
 funky-kart   & 0          & 131955     & 14681      & 13241        \\
 game-of-life & 57         & 55         & 52         & 53           \\
 hnset-bench  & 35         & 36         & 35         & 35           \\
 mandelbrot   & 13850      & 13728      & 1487       & 1492         \\
 ogv          & 30         & 28         & 29         & 27           \\
 pacalc       & 4763       & 4752       & 4811       & 4647         \\
 sandspiel    & 18002      & 18166      & 17777      & 18175        \\
 sqlgui       & 13468      & 13352      & 14585      & 13391        \\
 sqlpractice  & 39566      & 41296      & 38642      & 24466        \\
 tic-