In [1]:
import subprocess
import shlex
import re
import json
import pyinotify

import numpy as np
import multiprocessing as mp

from pathlib import Path
from scipy import stats
from tqdm import tqdm
from IPython.display import display, Markdown
from tempfile import TemporaryDirectory

In [2]:
EXECUTION_PATTERN = re.compile(r"The simulation took:\n - initialisation: ([^\n]+)\n - execution: ([^\n]+)\n - cleanup: ([^\n]+)\n")

In [3]:
TIME_PATTERN = re.compile(r"(\d+(?:\.\d+)?)([^\d]+)")
TIME_UNITS = {
    "ns": 0.000000001,
    "µs": 0.000001,
    "ms": 0.001,
    "s": 1.0,
}

def parse_time(time_str):
    match = TIME_PATTERN.match(time_str)
    
    if match is None:
        return None
    
    return float(match.group(1)) * TIME_UNITS[match.group(2)]

In [4]:
target_directory = json.loads(subprocess.run("cargo metadata --format-version 1".split(), capture_output=True).stdout)["target_directory"]

In [5]:
class EventLogSink(pyinotify.ProcessEvent):
    def __init__(self, event_log):
        Path(event_log).mkdir(parents=True, exist_ok=True)
        
        self.log_size = 0
        
        self.wm = pyinotify.WatchManager()
        self.notifier = pyinotify.ThreadedNotifier(self.wm, self)
        
        self.notifier.start()
        
        self.wm.add_watch(event_log, pyinotify.IN_CREATE | pyinotify.IN_CLOSE_WRITE, rec=True)
        
    def stop(self):
        self.notifier.stop()
        
        return self.log_size
    
    def process_IN_CREATE(self, event):
        self.wm.add_watch(event.pathname, pyinotify.IN_CREATE | pyinotify.IN_CLOSE_WRITE)
    
    def process_IN_CLOSE_WRITE(self, event):
        path = Path(event.pathname)
        
        if path.exists():
            self.log_size += path.stat().st_size
            
            path.unlink()

In [6]:
def simulate_reporting_dynamic(
    algorithm, speciation=0.001, seed=42, sample=1.0, radius=564, sigma=10.0, report_speciation=False, report_dispersal=False, log=False
):
    if report_dispersal:
        reporters = ["Execution()", "Biodiversity()", "Counter()"]
    elif report_speciation:
        reporters = ["Execution()", "Biodiversity()"]
    else:
        reporters = ["Execution()"]
        
    with TemporaryDirectory() as log_path:
        event_log = EventLogSink(log_path)
        
        config = "".join(f"""
        (
            speciation: {speciation},
            sample: {sample},
            seed: {seed},

            algorithm: {algorithm},

            scenario: AlmostInfinite(
                radius: {radius},
                sigma: {sigma},
            ),
            
            log: {f'"{log_path}"' if log else 'None'},

            reporters: [
                Plugin(
                    library: "{target_directory}/release/deps/libnecsim_plugins_common.so",
                    reporters: [{', '.join(reporters)}]
                )
            ],
        )
        """.split()).replace(",)", ")").replace(",]", "]")

        # Run the simulation
        result = subprocess.run(shlex.split(
            f"{target_directory}/release/rustcoalescence simulate '{config}'"
        ), check=True, capture_output=True, text=True)
        
        event_log_size = event_log.stop()
    
    match = EXECUTION_PATTERN.search(result.stdout)
    if match is None:
        print(result.stdout)
        print(result.stderr)
    initialisation = parse_time(match.group(1))
    execution = parse_time(match.group(2))
    cleanup = parse_time(match.group(3))
        
    return initialisation, execution, cleanup, event_log_size

In [7]:
def simulate_reporting_compiled(speciation=0.001, seed=42, sample=1.0, radius=564, sigma=10.0, report_speciation=False, report_dispersal=False):
    if report_dispersal:
        reporting = "progress-speciation-dispersal"
    elif report_speciation:
        reporting = "progress-speciation"
    else:
        reporting = "progress-only"
        
    # Run the simulation
    result = subprocess.run(shlex.split(
        f"{target_directory}/release/analysis-performance-reporting --radius {radius} " +
        f"--sample {sample} --seed {seed} --sigma {sigma} --speciation {speciation} {reporting}"
    ), check=True, capture_output=True, text=True)
    
    match = EXECUTION_PATTERN.search(result.stdout)
    if match is None:
        print(result.stdout)
        print(result.stderr)
    initialisation = parse_time(match.group(1))
    execution = parse_time(match.group(2))
    cleanup = parse_time(match.group(3))
        
    return initialisation, execution, cleanup

In [8]:
def batch_simulation_many_seeds(simulate, seeds, args=tuple(), kwargs=dict(), silent=False, processes=mp.cpu_count()):
    results = []

    with tqdm(total=len(seeds), disable=silent) as progress:
        def update_progress(result):
            results.append(result)

            progress.update()
        
        def update_error(err):
            print(err)

        with mp.Pool(processes) as pool:
            for seed in seeds:
                pool.apply_async(simulate, args, {**kwargs, "seed": seed}, update_progress, update_error)

            pool.close()
            pool.join()
    
    return results

In [9]:
def format_bytes(b):
    if b < 1e3:
        return f"{int(b)}B"
    elif b < 1e6:
        return f"{np.round(b / 1e3, 2)}kB"
    elif b < 1e9:
        return f"{np.round(b / 1e6, 2)}MB"
    elif b < 1e12:
        return f"{np.round(b / 1e9, 2)}GB"
    else:
        return f"{np.round(b / 1e12, 2)}TB"

In [10]:
subprocess.run(shlex.split(
    f"cargo build --manifest-path {target_directory}/../rustcoalescence/Cargo.toml --release "
    + "--features rustcoalescence-algorithms-monolithic,rustcoalescence-algorithms-independent,"
    + "rustcoalescence-algorithms-cuda"
), check=True, capture_output=True, text=True);

subprocess.run(shlex.split(
    "cargo build --release"
), check=True, capture_output=True, text=True);

CompletedProcess(args=['cargo', 'build', '--release'], returncode=0, stdout='', stderr='   Compiling necsim-plugins-common v0.1.0 (/vol/bitbucket/ml5717/necsim-rust/necsim/plugins/common)\n   Compiling analysis-performance-reporting v0.1.0 (/vol/bitbucket/ml5717/necsim-rust/analysis/performance/reporting)\n    Finished release [optimized] target(s) in 20.34s\n')

In [11]:
display(Markdown("# RAM Information:"))

display(Markdown('>```\n' + subprocess.run(
    shlex.split("free"), check=True, capture_output=True, text=True
).stdout.replace('\n', '\n>') + '```'))

display(Markdown("# CPU Information:"))

display(Markdown('>```\n' + subprocess.run(
    shlex.split("lscpu"), check=True, capture_output=True, text=True
).stdout.replace('\n', '\n>') + '```'))

display(Markdown("# GPU Information:"))

display(Markdown('>```\n' + subprocess.run(
    shlex.split("nvidia-smi"), check=True, capture_output=True, text=True
).stdout.replace('\n', '\n>') + '```'))

# RAM Information:

>```
              total        used        free      shared  buff/cache   available
>Mem:       16317628      756196    10501748      253932     5059684    14970936
>Swap:       4194300           0     4194300
>```

# CPU Information:

>```
Architecture:                    x86_64
>CPU op-mode(s):                  32-bit, 64-bit
>Byte Order:                      Little Endian
>Address sizes:                   46 bits physical, 48 bits virtual
>CPU(s):                          8
>On-line CPU(s) list:             0-7
>Thread(s) per core:              2
>Core(s) per socket:              4
>Socket(s):                       1
>NUMA node(s):                    1
>Vendor ID:                       GenuineIntel
>CPU family:                      6
>Model:                           62
>Model name:                      Intel(R) Xeon(R) CPU E5-1620 v2 @ 3.70GHz
>Stepping:                        4
>CPU MHz:                         3363.090
>CPU max MHz:                     3900.0000
>CPU min MHz:                     1200.0000
>BogoMIPS:                        7382.21
>Virtualisation:                  VT-x
>L1d cache:                       128 KiB
>L1i cache:                       128 KiB
>L2 cache:                        1 MiB
>L3 cache:                        10 MiB
>NUMA node0 CPU(s):               0-7
>Vulnerability Itlb multihit:     KVM: Mitigation: VMX disabled
>Vulnerability L1tf:              Mitigation; PTE Inversion; VMX conditional cache flushes, SMT vulnerable
>Vulnerability Mds:               Mitigation; Clear CPU buffers; SMT vulnerable
>Vulnerability Meltdown:          Mitigation; PTI
>Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp
>Vulnerability Spectre v1:        Mitigation; usercopy/swapgs barriers and __user pointer sanitization
>Vulnerability Spectre v2:        Mitigation; Full generic retpoline, IBPB conditional, IBRS_FW, STIBP conditional, RSB filling
>Vulnerability Srbds:             Not affected
>Vulnerability Tsx async abort:   Not affected
>Flags:                           fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm cpuid_fault epb pti ssbd ibrs ibpb stibp tpr_shadow vnmi flexpriority ept vpid fsgsbase smep erms xsaveopt dtherm ida arat pln pts md_clear flush_l1d
>```

# GPU Information:

>```
Wed May 26 16:10:28 2021       
>+-----------------------------------------------------------------------------+
>| NVIDIA-SMI 460.39       Driver Version: 460.39       CUDA Version: 11.2     |
>|-------------------------------+----------------------+----------------------+
>| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
>| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
>|                               |                      |               MIG M. |
>|===============================+======================+======================|
>|   0  GeForce GTX 1080    Off  | 00000000:05:00.0  On |                  N/A |
>| 34%   43C    P8     7W / 180W |     77MiB /  8119MiB |      0%      Default |
>|                               |                      |                  N/A |
>+-------------------------------+----------------------+----------------------+
>                                                                               
>+-----------------------------------------------------------------------------+
>| Processes:                                                                  |
>|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
>|        ID   ID                                                   Usage      |
>|=============================================================================|
>|    0   N/A  N/A      1874      G   /usr/lib/xorg/Xorg                 74MiB |
>+-----------------------------------------------------------------------------+
>```

In [12]:
display(Markdown("# Reporting Performance:"))

seeds = np.random.randint(0, np.iinfo("uint64").max, dtype="uint64", size=160)

for report_speciation, report_dispersal, title in [
    (False, False, "progress only"), (True, False, "progress + speciation"), (True, True, "progress + speciation + dispersal")
]:
    display(Markdown(f"## {title}:"))
    
    initialisations, executions, cleanups, event_log_sizes = zip(*batch_simulation_many_seeds(
        simulate_reporting_dynamic, seeds, args=("Classical()",), kwargs=dict(
            speciation=0.001, sample=1.0, radius=178, sigma=0.0, report_speciation=report_speciation, report_dispersal=report_dispersal
        ), silent=False
    ))
    display(Markdown(f"* Dynamic Plugins (CPU): {np.round(np.mean(executions), 2)}s ± {np.round(np.std(executions), 2)}s"))

    initialisations, executions, cleanups = zip(*batch_simulation_many_seeds(
        simulate_reporting_compiled, seeds, args=(), kwargs=dict(
            speciation=0.001, sample=1.0, radius=178, sigma=0.0, report_speciation=report_speciation, report_dispersal=report_dispersal
        ), silent=False
    ))
    display(Markdown(f"* Compiled Analysis (CPU): {np.round(np.mean(executions), 2)}s ± {np.round(np.std(executions), 2)}s"))

    initialisations, executions, cleanups, event_log_sizes = zip(*batch_simulation_many_seeds(
        simulate_reporting_dynamic, seeds, args=("Classical()",), kwargs=dict(
            speciation=0.001, sample=1.0, radius=178, sigma=0.0, report_speciation=report_speciation, report_dispersal=report_dispersal, log=True
        ), silent=False
    ))
    display(Markdown(f"* Dynamic Plugins + Log (CPU): {np.round(np.mean(executions), 2)}s ± {np.round(np.std(executions), 2)}s [{format_bytes(np.mean(event_log_sizes))} ± {format_bytes(np.std(event_log_sizes))}]"))
    
    initialisations, executions, cleanups, event_log_sizes = zip(*batch_simulation_many_seeds(
        simulate_reporting_dynamic, seeds, args=("CUDA(ptx_jit:true, parallelism_mode:Monolithic(event_slice:10000000))",), kwargs=dict(
            speciation=0.001, sample=1.0, radius=178, sigma=0.0, report_speciation=report_speciation, report_dispersal=report_dispersal
        ), silent=False, processes=1
    ))
    display(Markdown(f"* Dynamic Plugins (GPU): {np.round(np.mean(executions), 2)}s ± {np.round(np.std(executions), 2)}s"))

# Reporting Performance:

## progress only:

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [17:15<00:00,  6.47s/it]


* Dynamic Plugins (CPU): 51.62s ± 0.52s

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [17:10<00:00,  6.44s/it]


* Compiled Analysis (CPU): 51.45s ± 0.44s

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [17:15<00:00,  6.47s/it]


* Dynamic Plugins + Log (CPU): 51.64s ± 0.51s [0B ± 0B]

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [02:18<00:00,  1.15it/s]


* Dynamic Plugins (GPU): 0.64s ± 0.01s

## progress + speciation:

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [17:22<00:00,  6.52s/it]


* Dynamic Plugins (CPU): 52.0s ± 0.49s

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [17:13<00:00,  6.46s/it]


* Compiled Analysis (CPU): 51.55s ± 0.4s

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [17:17<00:00,  6.48s/it]


* Dynamic Plugins + Log (CPU): 51.74s ± 0.36s [3.95MB ± 313.59kB]

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [02:42<00:00,  1.02s/it]


* Dynamic Plugins (GPU): 0.64s ± 0.01s

## progress + speciation + dispersal:

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [18:26<00:00,  6.92s/it]


* Dynamic Plugins (CPU): 55.18s ± 0.57s

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [17:47<00:00,  6.67s/it]


* Compiled Analysis (CPU): 53.24s ± 0.58s

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [25:05<00:00,  9.41s/it]


* Dynamic Plugins + Log (CPU): 75.05s ± 0.48s [5.57GB ± 17.33MB]

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [1:09:57<00:00, 26.23s/it]


* Dynamic Plugins (GPU): 25.82s ± 0.24s