In [1]:
import subprocess
import shlex
import re
import json

import numpy as np

from IPython.display import display, Markdown

In [2]:
REGISTERS_PATTERN = re.compile(r"NumRegisters: Ok\((\d+)\)")
MAX_THREADS_PATTERN = re.compile(r"MaxThreadsPerBlock: Ok\((\d+)\)")
LOCAL_BYTES_PATTERN = re.compile(r"LocalSizeBytes: Ok\((\d+)\)")

In [3]:
target_directory = json.loads(subprocess.run("cargo metadata --format-version 1".split(), capture_output=True).stdout)["target_directory"]

In [4]:
def simulate_registers_monolithic(scenario, sample=1.0, speciation=False, dispersal=False):
    if dispersal is True:
        reporters = "Counter()"
    elif speciation is True:
        reporters = "Biodiversity()"
    else:
        reporters = ""
    
    config = "".join(f"""
    (
        speciation: 1.0,
        sample: {sample},
        seed: 42,

        algorithm: CUDA(ptx_jit: true),

        scenario: {scenario},

        reporters: [
            Plugin(
                library: "{target_directory}/release/deps/libnecsim_plugins_common.so",
                reporters: [{reporters}]
            )
        ],
    )
    """.split()).replace(",)", ")").replace(",]", "]")

    # Run the simulation
    result = subprocess.run(shlex.split(
        "cargo run --release --features rustcoalescence-algorithms-cuda "
        + f"--quiet -- simulate '{config}'"
    ), check=True, capture_output=True, text=True)
    
    match = REGISTERS_PATTERN.search(result.stdout)
    if match is None:
        print(result.stdout)
        print(result.stderr)
    registers_no_jit = int(match.group(1))
    
    match = REGISTERS_PATTERN.search(result.stdout, match.end())
    if match is None:
        print(result.stdout)
        print(result.stderr)
    registers_with_jit = int(match.group(1))
    
    match = MAX_THREADS_PATTERN.search(result.stdout)
    if match is None:
        print(result.stdout)
        print(result.stderr)
    max_threads_no_jit = int(match.group(1))
    
    match = MAX_THREADS_PATTERN.search(result.stdout, match.end())
    if match is None:
        print(result.stdout)
        print(result.stderr)
    max_threads_with_jit = int(match.group(1))
    
    match = LOCAL_BYTES_PATTERN.search(result.stdout)
    if match is None:
        print(result.stdout)
        print(result.stderr)
    local_bytes_no_jit = int(match.group(1))
    
    match = LOCAL_BYTES_PATTERN.search(result.stdout, match.end())
    if match is None:
        print(result.stdout)
        print(result.stderr)
    local_bytes_with_jit = int(match.group(1))
        
    return (registers_no_jit, registers_with_jit), (max_threads_no_jit, max_threads_with_jit), (local_bytes_no_jit, local_bytes_with_jit)

In [5]:
display(Markdown("# GPU Information:"))

display(Markdown('>```\n' + subprocess.run(
    shlex.split("nvidia-smi"), check=True, capture_output=True, text=True
).stdout.replace('\n', '\n>') + '```'))

# GPU Information:

>```
Thu May 27 06:28:08 2021       
>+-----------------------------------------------------------------------------+
>| NVIDIA-SMI 460.73.01    Driver Version: 460.73.01    CUDA Version: 11.2     |
>|-------------------------------+----------------------+----------------------+
>| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
>| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
>|                               |                      |               MIG M. |
>|===============================+======================+======================|
>|   0  GeForce GTX 1080    Off  | 00000000:05:00.0  On |                  N/A |
>| 27%   38C    P8     7W / 180W |     82MiB /  8119MiB |      0%      Default |
>|                               |                      |                  N/A |
>+-------------------------------+----------------------+----------------------+
>                                                                               
>+-----------------------------------------------------------------------------+
>| Processes:                                                                  |
>|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
>|        ID   ID                                                   Usage      |
>|=============================================================================|
>|    0   N/A  N/A      4336      G   /usr/lib/xorg/Xorg                 79MiB |
>+-----------------------------------------------------------------------------+
>```

In [6]:
display(Markdown("# CUDA Algorithm Register Usage + PTX JIT effect: *\#registers(\#threads)*"))

for scenario, sample in [
    (f"NonSpatial(area: ({100}, {100}), deme: {100})", 0.00001),
    (f"""SpatiallyImplicit(
        local_area: ({100}, {100}), local_deme: {100},
        meta_area: ({100}, {100}), meta_deme: {100},
        migration: {0.1}
    )""", 0.00001),
    (f"AlmostInfinite(radius: {564}, sigma: {10.0})", 0.00001),
    (f"""SpatiallyExplicit(
        habitat: "{target_directory}/../maps/madingley/fg0size12/habitat.tif",
        dispersal: "{target_directory}/../maps/madingley/fg0size12/dispersal.tif"
    )""", 0.00000001),
]:
    display(Markdown(f"## {scenario[:scenario.find('(')]}:"))
    
    for speciation, dispersal, name in [
        (False, False, "progress events only"),
        (True, False, "progress \+ speciation events"),
        (True, True, "progress \+ speciation \+ dispersal events")
    ]:
        registers, max_threads, local_bytes = simulate_registers_monolithic(
            scenario, sample=sample, speciation=speciation, dispersal=dispersal
        )
        
        assert local_bytes[0] == 0
        assert local_bytes[1] == 0
        
        display(Markdown(f"* {name}: {registers[0]}({max_threads[0]}) -> {registers[1]}({max_threads[1]})"))

# CUDA Algorithm Register Usage + PTX JIT effect: *\#registers(\#threads)*

## NonSpatial:

* progress events only: 70(896) -> 54(1024)

* progress \+ speciation events: 70(896) -> 54(1024)

* progress \+ speciation \+ dispersal events: 75(768) -> 54(1024)

## SpatiallyImplicit:

* progress events only: 77(768) -> 53(1024)

* progress \+ speciation events: 77(768) -> 53(1024)

* progress \+ speciation \+ dispersal events: 87(640) -> 56(1024)

## AlmostInfinite:

* progress events only: 68(896) -> 57(1024)

* progress \+ speciation events: 68(896) -> 57(1024)

* progress \+ speciation \+ dispersal events: 74(768) -> 56(1024)

## SpatiallyExplicit:

* progress events only: 88(640) -> 54(1024)

* progress \+ speciation events: 88(640) -> 54(1024)

* progress \+ speciation \+ dispersal events: 90(640) -> 60(1024)