In [1]:
import subprocess
import shlex
import re
import json

import numpy as np
import multiprocessing as mp

from matplotlib import pyplot as plt
from scipy import stats
from tqdm import tqdm
from IPython.display import display, Markdown
from tempfile import TemporaryDirectory

In [2]:
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 400

def show():
    plt.savefig(f"{show.fig_counter}.pdf", dpi='figure', transparent=True, bbox_inches='tight')
    show.fig_counter += 1
    show.plt_show()

show.fig_counter = 0
show.plt_show = plt.show

plt.show = show

In [3]:
EVENT_PATTERN = re.compile(r"Event Summary:\n - Total #individuals:\n   \d+\n - Total #events:\n   - raw:\n     (\d+)\n   - deduplicated:\n     (\d+)")
EXECUTION_PATTERN = re.compile(r"The simulation took:\n - initialisation: ([^\n]+)\n - execution: ([^\n]+)\n - cleanup: ([^\n]+)\n")

In [4]:
TIME_PATTERN = re.compile(r"(\d+(?:\.\d+)?)([^\d]+)")
TIME_UNITS = {
    "ns": 0.000000001,
    "µs": 0.000001,
    "ms": 0.001,
    "s": 1.0,
}

def parse_time(time_str):
    match = TIME_PATTERN.match(time_str)
    
    if match is None:
        return None
    
    return float(match.group(1)) * TIME_UNITS[match.group(2)]

In [5]:
target_directory = json.loads(subprocess.run("cargo metadata --format-version 1".split(), capture_output=True).stdout)["target_directory"]

In [6]:
def simulate_throughput_monolithic(algorithm, scenario, speciation=0.001, seed=42, sample=1.0):
    config = "".join(f"""
    (
        speciation: {speciation},
        sample: {sample},
        seed: {seed},

        algorithm: {algorithm},

        log: None,

        scenario: {scenario},

        reporters: [
            Plugin(
                library: "{target_directory}/release/deps/libnecsim_plugins_common.so",
                reporters: [Counter(), Execution()]
            )
        ],
    )
    """.split()).replace(",)", ")").replace(",]", "]")

    # Run the simulation
    result = subprocess.run(shlex.split(
        f"{target_directory}/release/rustcoalescence simulate '{config}'"
    ), check=True, capture_output=True, text=True)

    match = EVENT_PATTERN.search(result.stdout)
    if match is None:
        print(result.stdout)
        print(result.stderr)
    raw_events = int(match.group(1))
    deduplicated_events = int(match.group(2))
    
    match = EXECUTION_PATTERN.search(result.stdout)
    if match is None:
        print(result.stdout)
        print(result.stderr)
    initialisation = parse_time(match.group(1))
    execution = parse_time(match.group(2))
    cleanup = parse_time(match.group(3))
        
    return raw_events, deduplicated_events, initialisation, execution, cleanup

In [7]:
def batch_simulation_many_seeds(simulate, seeds, args=tuple(), kwargs=dict(), silent=False, processes=mp.cpu_count()):
    results = []

    with tqdm(total=len(seeds), disable=silent) as progress:
        def update_progress(result):
            results.append(result)

            progress.update()
        
        def update_error(err):
            print(err)

        with mp.Pool(processes) as pool:
            for seed in seeds:
                pool.apply_async(simulate, args, {**kwargs, "seed": seed}, update_progress, update_error)

            pool.close()
            pool.join()
    
    return results

In [8]:
subprocess.run(shlex.split(
    f"cargo build --manifest-path {target_directory}/../rustcoalescence/Cargo.toml --release "
    + "--features rustcoalescence-algorithms-monolithic,rustcoalescence-algorithms-independent,"
    + "rustcoalescence-algorithms-cuda"
), check=True, capture_output=True, text=True);

CompletedProcess(args=['cargo', 'build', '--manifest-path', '/vol/bitbucket/ml5717/necsim-rust/target/../rustcoalescence/Cargo.toml', '--release', '--features', 'rustcoalescence-algorithms-monolithic,rustcoalescence-algorithms-independent,rustcoalescence-algorithms-cuda'], returncode=0, stdout='', stderr='    Finished release [optimized] target(s) in 2.75s\n')

In [9]:
display(Markdown("# RAM Information:"))

display(Markdown('>```\n' + subprocess.run(
    shlex.split("free"), check=True, capture_output=True, text=True
).stdout.replace('\n', '\n>') + '```'))

display(Markdown("# CPU Information:"))

display(Markdown('>```\n' + subprocess.run(
    shlex.split("lscpu"), check=True, capture_output=True, text=True
).stdout.replace('\n', '\n>') + '```'))

display(Markdown("# GPU Information:"))

display(Markdown('>```\n' + subprocess.run(
    shlex.split("nvidia-smi"), check=True, capture_output=True, text=True
).stdout.replace('\n', '\n>') + '```'))

# RAM Information:

>```
              total        used        free      shared  buff/cache   available
>Mem:       16319132      582336    13863816        2812     1872980    15396916
>Swap:       4194300      661216     3533084
>```

# CPU Information:

>```
Architecture:                    x86_64
>CPU op-mode(s):                  32-bit, 64-bit
>Byte Order:                      Little Endian
>Address sizes:                   46 bits physical, 48 bits virtual
>CPU(s):                          8
>On-line CPU(s) list:             0-7
>Thread(s) per core:              2
>Core(s) per socket:              4
>Socket(s):                       1
>NUMA node(s):                    1
>Vendor ID:                       GenuineIntel
>CPU family:                      6
>Model:                           62
>Model name:                      Intel(R) Xeon(R) CPU E5-1620 v2 @ 3.70GHz
>Stepping:                        4
>CPU MHz:                         2379.003
>CPU max MHz:                     3900.0000
>CPU min MHz:                     1200.0000
>BogoMIPS:                        7382.02
>Virtualisation:                  VT-x
>L1d cache:                       128 KiB
>L1i cache:                       128 KiB
>L2 cache:                        1 MiB
>L3 cache:                        10 MiB
>NUMA node0 CPU(s):               0-7
>Vulnerability Itlb multihit:     KVM: Mitigation: VMX disabled
>Vulnerability L1tf:              Mitigation; PTE Inversion; VMX conditional cache flushes, SMT vulnerable
>Vulnerability Mds:               Mitigation; Clear CPU buffers; SMT vulnerable
>Vulnerability Meltdown:          Mitigation; PTI
>Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp
>Vulnerability Spectre v1:        Mitigation; usercopy/swapgs barriers and __user pointer sanitization
>Vulnerability Spectre v2:        Mitigation; Full generic retpoline, IBPB conditional, IBRS_FW, STIBP conditional, RSB filling
>Vulnerability Srbds:             Not affected
>Vulnerability Tsx async abort:   Not affected
>Flags:                           fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm cpuid_fault epb pti ssbd ibrs ibpb stibp tpr_shadow vnmi flexpriority ept vpid fsgsbase smep erms xsaveopt dtherm ida arat pln pts md_clear flush_l1d
>```

# GPU Information:

>```
Sat Jun 12 09:27:45 2021       
>+-----------------------------------------------------------------------------+
>| NVIDIA-SMI 460.80       Driver Version: 460.80       CUDA Version: 11.2     |
>|-------------------------------+----------------------+----------------------+
>| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
>| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
>|                               |                      |               MIG M. |
>|===============================+======================+======================|
>|   0  GeForce GTX 1080    Off  | 00000000:05:00.0 Off |                  N/A |
>| 27%   37C    P8     7W / 180W |     84MiB /  8119MiB |      0%      Default |
>|                               |                      |                  N/A |
>+-------------------------------+----------------------+----------------------+
>                                                                               
>+-----------------------------------------------------------------------------+
>| Processes:                                                                  |
>|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
>|        ID   ID                                                   Usage      |
>|=============================================================================|
>|    0   N/A  N/A      5240      G   /usr/lib/xorg/Xorg                 79MiB |
>+-----------------------------------------------------------------------------+
>```

In [10]:
display(Markdown("# Event throughput:"))

for algorithm in [
    "Classical()", "Gillespie()", "SkippingGillespie()",
    f"""Independent(
        dedup_cache: Relative(factor: {1.0}),
        delta_t: {2.0},
        parallelism_mode: Monolithic(event_slice: {100*100*100})
    )""",
    f"""CUDA(
        ptx_jit: true,
        dedup_cache: Relative(factor: {1.0}),
        delta_t: {2.0},
        parallelism_mode: Monolithic(event_slice: {100*100*100})
    )"""
]:
    display(Markdown(f"## {algorithm[:algorithm.find('(')]}:"))
    
    for scenario, sample in [
        (f"NonSpatial(area: ({100}, {100}), deme: {100})", 1.0),
        (f"AlmostInfinite(radius: {564}, sigma: {10.0})", 1.0),
        (f"""SpatiallyExplicit(
            habitat: "{target_directory}/../maps/madingley/fg0size12/habitat.tif",
            dispersal: "{target_directory}/../maps/madingley/fg0size12/dispersal.tif"
        )""", 0.000025),
    ]:
        seeds = np.random.randint(0, np.iinfo("uint64").max, dtype="uint64", size=160)

        raw_events, deduplicated_events, initialisations, executions, cleanups = zip(*batch_simulation_many_seeds(
            simulate_throughput_monolithic, seeds, args=(algorithm, scenario), kwargs={
                "speciation":0.001, "sample":sample
            }, silent=False, processes=1,
        ))
        
        raw_events = np.array(raw_events)
        deduplicated_events = np.array(deduplicated_events)
        executions = np.array(executions)
        
        raw_throughput = np.round(np.mean(raw_events / executions), 1)
        raw_std = np.round(np.std(raw_events / executions), 1)
        
        throughput = np.round(np.mean(deduplicated_events / executions), 1)
        std = np.round(np.std(deduplicated_events / executions), 1)
        
        if raw_throughput != throughput:
            display(Markdown(f"### {scenario[:scenario.find('(')]}: {throughput}/s ± {std}/s [raw: {raw_throughput}/s ± {raw_std}/s]"))
        else:
            display(Markdown(f"### {scenario[:scenario.find('(')]}: {throughput}/s ± {std}/s"))

# Event throughput:

## Classical:

100%|██████████| 160/160 [05:56<00:00,  2.23s/it]


### NonSpatial: 3271452.1/s ± 136012.9/s

100%|██████████| 160/160 [09:31<00:00,  3.57s/it]


### AlmostInfinite: 2711005.1/s ± 25190.6/s

100%|██████████| 160/160 [1:21:42<00:00, 30.64s/it]


### SpatiallyExplicit: 3680356.4/s ± 119204.3/s

## Gillespie:

100%|██████████| 160/160 [17:19<00:00,  6.50s/it]


### NonSpatial: 1084234.4/s ± 8678.0/s

100%|██████████| 160/160 [29:24<00:00, 11.03s/it]


### AlmostInfinite: 883282.0/s ± 17448.2/s

100%|██████████| 160/160 [3:52:22<00:00, 87.14s/it]


### SpatiallyExplicit: 1223953.2/s ± 17432.3/s

## SkippingGillespie:

100%|██████████| 160/160 [17:50<00:00,  6.69s/it]


### NonSpatial: 1052739.2/s ± 9129.2/s

100%|██████████| 160/160 [29:56<00:00, 11.23s/it]


### AlmostInfinite: 865348.1/s ± 5903.2/s

100%|██████████| 160/160 [13:17<00:00,  4.99s/it]


### SpatiallyExplicit: 1153410.8/s ± 11669.9/s

## Independent:

100%|██████████| 160/160 [10:42<00:00,  4.02s/it]


### NonSpatial: 1487671.2/s ± 21189.6/s [raw: 2634491.4/s ± 36958.7/s]

100%|██████████| 160/160 [15:52<00:00,  5.95s/it]


### AlmostInfinite: 1423771.0/s ± 10858.6/s [raw: 2166790.7/s ± 15487.7/s]

100%|██████████| 160/160 [1:43:54<00:00, 38.97s/it]


### SpatiallyExplicit: 2838130.8/s ± 15913.8/s [raw: 2838718.9/s ± 15920.3/s]

## CUDA:

100%|██████████| 160/160 [37:31<00:00, 14.07s/it]


### NonSpatial: 427176.2/s ± 5212.6/s [raw: 894874.1/s ± 10570.9/s]

100%|██████████| 160/160 [38:27<00:00, 14.42s/it]


### AlmostInfinite: 592585.4/s ± 6837.1/s [raw: 1026352.6/s ± 11134.7/s]

100%|██████████| 160/160 [1:49:12<00:00, 40.95s/it]


### SpatiallyExplicit: 2707489.0/s ± 16896.1/s [raw: 2708499.9/s ± 16903.2/s]