In [1]:
import subprocess
import shlex
import re
import json

import numpy as np
import multiprocessing as mp

from matplotlib import pyplot as plt
from scipy import stats
from tqdm import tqdm
from IPython.display import display, Markdown
from tempfile import TemporaryDirectory

In [2]:
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 400

def show():
    plt.savefig(f"{show.fig_counter}.pdf", dpi='figure', transparent=True, bbox_inches='tight')
    show.fig_counter += 1
    show.plt_show()

show.fig_counter = 0
show.plt_show = plt.show

plt.show = show

In [3]:
EVENT_PATTERN = re.compile(r"Event Summary:\n - Total #individuals:\n   \d+\n - Total #events:\n   - raw:\n     (\d+)\n   - deduplicated:\n     (\d+)")
EXECUTION_PATTERN = re.compile(r"The simulation took:\n - initialisation: ([^\n]+)\n - execution: ([^\n]+)\n - cleanup: ([^\n]+)\n")

In [4]:
TIME_PATTERN = re.compile(r"(\d+(?:\.\d+)?)([^\d]+)")
TIME_UNITS = {
    "ns": 0.000000001,
    "µs": 0.000001,
    "ms": 0.001,
    "s": 1.0,
}

def parse_time(time_str):
    match = TIME_PATTERN.match(time_str)
    
    if match is None:
        return None
    
    return float(match.group(1)) * TIME_UNITS[match.group(2)]

In [5]:
target_directory = json.loads(subprocess.run("cargo metadata --format-version 1".split(), capture_output=True).stdout)["target_directory"]

In [6]:
def simulate_throughput_monolithic(algorithm, speciation=0.001, seed=42, sample=1.0, scenario="NonSpatial(area:(100,100),deme:100)"):
    config = "".join(f"""
    (
        speciation: {speciation},
        sample: {sample},
        seed: {seed},

        algorithm: {algorithm},

        log: None,

        scenario: {scenario},

        reporters: [
            Plugin(
                library: "{target_directory}/release/deps/libnecsim_plugins_common.so",
                reporters: [Counter(), Execution()]
            )
        ],
    )
    """.split()).replace(",)", ")").replace(",]", "]")

    # Run the simulation
    result = subprocess.run(shlex.split(
        "cargo run --release --features rustcoalescence-algorithms-monolithic,"
        + "rustcoalescence-algorithms-independent,rustcoalescence-algorithms-cuda,"
        + f"necsim-partitioning-mpi --quiet -- simulate '{config}'"
    ), check=True, capture_output=True, text=True)

    match = EVENT_PATTERN.search(result.stdout)
    if match is None:
        print(result.stdout)
        print(result.stderr)
    raw_events = int(match.group(1))
    deduplicated_events = int(match.group(2))
    
    match = EXECUTION_PATTERN.search(result.stdout)
    if match is None:
        print(result.stdout)
        print(result.stderr)
    initialisation = parse_time(match.group(1))
    execution = parse_time(match.group(2))
    cleanup = parse_time(match.group(3))
        
    return raw_events, deduplicated_events, initialisation, execution, cleanup

In [7]:
def batch_simulation_many_seeds(simulate, seeds, args=tuple(), kwargs=dict(), silent=False, processes=mp.cpu_count()):
    results = []

    with tqdm(total=len(seeds), disable=silent) as progress:
        def update_progress(result):
            results.append(result)

            progress.update()
        
        def update_error(err):
            print(err)

        with mp.Pool(processes) as pool:
            for seed in seeds:
                pool.apply_async(simulate, args, {**kwargs, "seed": seed}, update_progress, update_error)

            pool.close()
            pool.join()
    
    return results

In [8]:
display(Markdown("# Event throughput:"))

display(Markdown("## GPU Information:"))

display(Markdown('>```\n' + subprocess.run(
    shlex.split("nvidia-smi"), check=True, capture_output=True, text=True
).stdout.replace('\n', '\n>') + '```'))

display(Markdown("## Throughput Results:"))

for algorithm in [
    "Classical()", "Gillespie()", "SkippingGillespie()",
    f"""Independent(
        dedup_cache: Relative(factor: {1.0}),
        delta_t: {2.0},
        parallelism_mode: Monolithic(event_slice: {100*100*100})
    )""",
    f"""CUDA(
        ptx_jit: true,
        dedup_cache: Relative(factor: {1.0}),
        delta_t: {2.0},
        parallelism_mode: Monolithic(event_slice: {100*100*100})
    )"""
]:
    display(Markdown(f"### {algorithm[:algorithm.find('(')]}:"))
    
    for scenario, sample in [
        (f"NonSpatial(area: ({100}, {100}), deme: {100})", 1.0),
        (f"AlmostInfinite(radius: {564}, sigma: {10.0})", 1.0),
        (f"""SpatiallyExplicit(
            habitat: "{target_directory}/../maps/madingley/fg0size12/habitat.tif",
            dispersal: "{target_directory}/../maps/madingley/fg0size12/dispersal.tif"
        )""", 0.000025),
    ]:
        seeds = np.random.randint(0, np.iinfo("uint64").max, dtype="uint64", size=160)

        raw_events, deduplicated_events, initialisations, executions, cleanups = zip(*batch_simulation_many_seeds(
            simulate_throughput_monolithic, seeds, args=(algorithm,), kwargs={
                "scenario":"NonSpatial(area:(100,100),deme:100)", "speciation":0.001
            }, silent=False
        ))
        
        raw_throughput = np.mean(raw_events) / np.mean(executions)
        throughput = np.mean(deduplicated_events) / np.mean(executions)
        
        if raw_throughput != throughput:
            display(Markdown(f"#### {scenario[:scenario.find('(')]}: {np.round(throughput, 1)}/s [raw: {np.round(raw_throughput, 1)}/s]"))
        else:
            display(Markdown(f"#### {scenario[:scenario.find('(')]}: {np.round(throughput, 1)}/s"))

# Event throughput:

## GPU Information:

>```
Wed May 26 08:01:48 2021       
>+-----------------------------------------------------------------------------+
>| NVIDIA-SMI 455.45.01    Driver Version: 455.45.01    CUDA Version: 11.1     |
>|-------------------------------+----------------------+----------------------+
>| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
>| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
>|                               |                      |               MIG M. |
>|===============================+======================+======================|
>|   0  Quadro K620         On   | 00000000:02:00.0 Off |                  N/A |
>| 34%   38C    P8     1W /  30W |    270MiB /  2000MiB |      0%      Default |
>|                               |                      |                  N/A |
>+-------------------------------+----------------------+----------------------+
>                                                                               
>+-----------------------------------------------------------------------------+
>| Processes:                                                                  |
>|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
>|        ID   ID                                                   Usage      |
>|=============================================================================|
>|    0   N/A  N/A      1628      G   /usr/lib/xorg/Xorg                 31MiB |
>|    0   N/A  N/A      1752      G   /usr/bin/gnome-shell               52MiB |
>|    0   N/A  N/A      2238      G   /usr/lib/xorg/Xorg                148MiB |
>|    0   N/A  N/A      2406      G   /usr/bin/gnome-shell               19MiB |
>|    0   N/A  N/A      3096      G   ...gAAAAAAAAA --shared-files       10MiB |
>+-----------------------------------------------------------------------------+
>```

## Throughput Results:

### Classical:

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [06:43<00:00,  2.52s/it]


#### NonSpatial: 1254818.3/s

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [00:34<00:00,  4.69it/s]


#### AlmostInfinite: 1273684.1/s

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [00:33<00:00,  4.74it/s]


#### SpatiallyExplicit: 1292075.6/s

### Gillespie:

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [01:19<00:00,  2.01it/s]


#### NonSpatial: 408182.3/s

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [01:20<00:00,  1.99it/s]


#### AlmostInfinite: 407259.4/s

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [01:20<00:00,  1.98it/s]


#### SpatiallyExplicit: 408636.6/s

### SkippingGillespie:

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [01:22<00:00,  1.93it/s]


#### NonSpatial: 391517.2/s

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [01:21<00:00,  1.96it/s]


#### AlmostInfinite: 394148.8/s

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [01:22<00:00,  1.93it/s]


#### SpatiallyExplicit: 391136.8/s

### Independent:

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [00:46<00:00,  3.46it/s]


#### NonSpatial: 678272.9/s [raw: 1200940.1/s]

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [00:46<00:00,  3.42it/s]


#### AlmostInfinite: 688750.6/s [raw: 1219348.7/s]

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 160/160 [00:47<00:00,  3.38it/s]


#### SpatiallyExplicit: 686446.4/s [raw: 1215514.3/s]

### CUDA:

  0%|                                                                                                                         | 0/160 [00:00<?, ?it/s]

Command '['cargo', 'run', '--release', '--features', 'rustcoalescence-algorithms-monolithic,rustcoalescence-algorithms-independent,rustcoalescence-algorithms-cuda,necsim-partitioning-mpi', '--quiet', '--', 'simulate', '(speciation:0.001,sample:1.0,seed:7903491094859661373,algorithm:CUDA(ptx_jit:true,dedup_cache:Relative(factor:1.0),delta_t:2.0,parallelism_mode:Monolithic(event_slice:1000000)),log:None,scenario:NonSpatial(area:(100,100),deme:100),reporters:[Plugin(library:"/home/ml5717/necsim-rust/target/release/deps/libnecsim_plugins_common.so",reporters:[Counter(),Execution()])])']' returned non-zero exit status 1.
Command '['cargo', 'run', '--release', '--features', 'rustcoalescence-algorithms-monolithic,rustcoalescence-algorithms-independent,rustcoalescence-algorithms-cuda,necsim-partitioning-mpi', '--quiet', '--', 'simulate', '(speciation:0.001,sample:1.0,seed:12458323833530760766,algorithm:CUDA(ptx_jit:true,dedup_cache:Relative(factor:1.0),delta_t:2.0,parallelism_mode:Monolithic(e

  8%|█████████                                                                                                       | 13/160 [02:52<32:27, 13.25s/it]


#### NonSpatial: 35605.6/s [raw: 74624.1/s]

  0%|                                                                                                                         | 0/160 [00:00<?, ?it/s]

Command '['cargo', 'run', '--release', '--features', 'rustcoalescence-algorithms-monolithic,rustcoalescence-algorithms-independent,rustcoalescence-algorithms-cuda,necsim-partitioning-mpi', '--quiet', '--', 'simulate', '(speciation:0.001,sample:1.0,seed:15899162468797680271,algorithm:CUDA(ptx_jit:true,dedup_cache:Relative(factor:1.0),delta_t:2.0,parallelism_mode:Monolithic(event_slice:1000000)),log:None,scenario:NonSpatial(area:(100,100),deme:100),reporters:[Plugin(library:"/home/ml5717/necsim-rust/target/release/deps/libnecsim_plugins_common.so",reporters:[Counter(),Execution()])])']' returned non-zero exit status 1.
Command '['cargo', 'run', '--release', '--features', 'rustcoalescence-algorithms-monolithic,rustcoalescence-algorithms-independent,rustcoalescence-algorithms-cuda,necsim-partitioning-mpi', '--quiet', '--', 'simulate', '(speciation:0.001,sample:1.0,seed:9871746348916692486,algorithm:CUDA(ptx_jit:true,dedup_cache:Relative(factor:1.0),delta_t:2.0,parallelism_mode:Monolithic(e

  8%|█████████                                                                                                       | 13/160 [02:50<32:05, 13.10s/it]


#### AlmostInfinite: 36015.7/s [raw: 75466.1/s]

  0%|                                                                                                                         | 0/160 [00:00<?, ?it/s]

Command '['cargo', 'run', '--release', '--features', 'rustcoalescence-algorithms-monolithic,rustcoalescence-algorithms-independent,rustcoalescence-algorithms-cuda,necsim-partitioning-mpi', '--quiet', '--', 'simulate', '(speciation:0.001,sample:1.0,seed:8468413040996070603,algorithm:CUDA(ptx_jit:true,dedup_cache:Relative(factor:1.0),delta_t:2.0,parallelism_mode:Monolithic(event_slice:1000000)),log:None,scenario:NonSpatial(area:(100,100),deme:100),reporters:[Plugin(library:"/home/ml5717/necsim-rust/target/release/deps/libnecsim_plugins_common.so",reporters:[Counter(),Execution()])])']' returned non-zero exit status 1.
Command '['cargo', 'run', '--release', '--features', 'rustcoalescence-algorithms-monolithic,rustcoalescence-algorithms-independent,rustcoalescence-algorithms-cuda,necsim-partitioning-mpi', '--quiet', '--', 'simulate', '(speciation:0.001,sample:1.0,seed:15436054482459063623,algorithm:CUDA(ptx_jit:true,dedup_cache:Relative(factor:1.0),delta_t:2.0,parallelism_mode:Monolithic(e

  8%|█████████                                                                                                       | 13/160 [02:53<32:40, 13.34s/it]


#### SpatiallyExplicit: 35577.4/s [raw: 74572.3/s]