In [1]:
import subprocess
import shlex
import json

import numpy as np

from pathlib import Path
from IPython.display import display, Markdown

In [2]:
target_directory = json.loads(subprocess.run("cargo metadata --format-version 1".split(), capture_output=True).stdout)["target_directory"]

In [3]:
def shlex_split_and_print(args):
    print(args)
    
    return shlex.split(args)

In [4]:
def simulate_cuda_reporting_dynamic(
    speciation=0.001, seed=42, sample=1.0, radius=564, sigma=10.0, report_speciation=False, report_dispersal=False, output="profile", launches=10,
):
    if report_dispersal:
        reporters = ["Execution()", "Biodiversity()", "Counter()"]
    elif report_speciation:
        reporters = ["Execution()", "Biodiversity()"]
    else:
        reporters = ["Execution()"]
        
    config = "".join(f"""
    (
        speciation: {speciation},
        sample: {sample},
        seed: {seed},

        algorithm: CUDA(
            ptx_jit: true,
            parallelism_mode: Monolithic(event_slice: {10000000}),
        ),

        scenario: AlmostInfinite(
            radius: {radius},
            sigma: {sigma},
        ),

        reporters: [
            Plugin(
                library: "{target_directory}/release/deps/libnecsim_plugins_common.so",
                reporters: [{', '.join(reporters)}]
            )
        ],
    )
    """.split()).replace(",)", ")").replace(",]", "]")
    
    # Profile the first `launches` kernel executions using NVIDIA Nsight Compute
    subprocess.run(shlex_split_and_print(
        f"/vol/cuda/10.0.130/NsightCompute-1.0/target/linux-desktop-glibc_2_11_3-glx-x64/nv-nsight-cu-cli --mode=launch-and-attach " +
        f"-k simulate -c {launches} -o ~/ml5717/compute-{output} -f {target_directory}/release/rustcoalescence simulate '{config}'"
    ), capture_output=True, text=True)
    
    # Profile the full simulation using NVIDIA Nsight Systems
    subprocess.run(shlex_split_and_print(
        f"/vol/cuda/11.1.0-cudnn8.0.4.30/nsight-systems-2020.3.4/bin/nsys profile -o ~/ml5717/nsys-{output} " +
        f"-f true -t cuda {target_directory}/release/rustcoalescence simulate '{config}'"
    ), capture_output=True, text=True)

In [5]:
subprocess.run(shlex.split(
    f"cargo build --manifest-path {target_directory}/../rustcoalescence/Cargo.toml --release "
    + "--features rustcoalescence-algorithms-monolithic,rustcoalescence-algorithms-independent,"
    + "rustcoalescence-algorithms-cuda"
), check=True, capture_output=True, text=True);

CompletedProcess(args=['cargo', 'build', '--manifest-path', '/vol/bitbucket/ml5717/necsim-rust/target/../rustcoalescence/Cargo.toml', '--release', '--features', 'rustcoalescence-algorithms-monolithic,rustcoalescence-algorithms-independent,rustcoalescence-algorithms-cuda'], returncode=0, stdout='', stderr='    Finished release [optimized] target(s) in 2.65s\n')

In [6]:
display(Markdown("# RAM Information:"))

display(Markdown('>```\n' + subprocess.run(
    shlex.split("free"), check=True, capture_output=True, text=True
).stdout.replace('\n', '\n>') + '```'))

display(Markdown("# CPU Information:"))

display(Markdown('>```\n' + subprocess.run(
    shlex.split("lscpu"), check=True, capture_output=True, text=True
).stdout.replace('\n', '\n>') + '```'))

display(Markdown("# GPU Information:"))

display(Markdown('>```\n' + subprocess.run(
    shlex.split("nvidia-smi"), check=True, capture_output=True, text=True
).stdout.replace('\n', '\n>') + '```'))

# RAM Information:

>```
              total        used        free      shared  buff/cache   available
>Mem:       16319132     1231832     7291452      274476     7795848    14475768
>Swap:       4194300           0     4194300
>```

# CPU Information:

>```
Architecture:                    x86_64
>CPU op-mode(s):                  32-bit, 64-bit
>Byte Order:                      Little Endian
>Address sizes:                   46 bits physical, 48 bits virtual
>CPU(s):                          8
>On-line CPU(s) list:             0-7
>Thread(s) per core:              2
>Core(s) per socket:              4
>Socket(s):                       1
>NUMA node(s):                    1
>Vendor ID:                       GenuineIntel
>CPU family:                      6
>Model:                           62
>Model name:                      Intel(R) Xeon(R) CPU E5-1620 v2 @ 3.70GHz
>Stepping:                        4
>CPU MHz:                         3123.900
>CPU max MHz:                     3900.0000
>CPU min MHz:                     1200.0000
>BogoMIPS:                        7382.37
>Virtualisation:                  VT-x
>L1d cache:                       128 KiB
>L1i cache:                       128 KiB
>L2 cache:                        1 MiB
>L3 cache:                        10 MiB
>NUMA node0 CPU(s):               0-7
>Vulnerability Itlb multihit:     KVM: Mitigation: VMX disabled
>Vulnerability L1tf:              Mitigation; PTE Inversion; VMX conditional cache flushes, SMT vulnerable
>Vulnerability Mds:               Mitigation; Clear CPU buffers; SMT vulnerable
>Vulnerability Meltdown:          Mitigation; PTI
>Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp
>Vulnerability Spectre v1:        Mitigation; usercopy/swapgs barriers and __user pointer sanitization
>Vulnerability Spectre v2:        Mitigation; Full generic retpoline, IBPB conditional, IBRS_FW, STIBP conditional, RSB filling
>Vulnerability Srbds:             Not affected
>Vulnerability Tsx async abort:   Not affected
>Flags:                           fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm cpuid_fault epb pti ssbd ibrs ibpb stibp tpr_shadow vnmi flexpriority ept vpid fsgsbase smep erms xsaveopt dtherm ida arat pln pts md_clear flush_l1d
>```

# GPU Information:

>```
Sat Jun  5 12:50:44 2021       
>+-----------------------------------------------------------------------------+
>| NVIDIA-SMI 460.73.01    Driver Version: 460.73.01    CUDA Version: 11.2     |
>|-------------------------------+----------------------+----------------------+
>| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
>| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
>|                               |                      |               MIG M. |
>|===============================+======================+======================|
>|   0  GeForce GTX 1080    Off  | 00000000:05:00.0 Off |                  N/A |
>| 27%   39C    P8     6W / 180W |     80MiB /  8119MiB |      0%      Default |
>|                               |                      |                  N/A |
>+-------------------------------+----------------------+----------------------+
>                                                                               
>+-----------------------------------------------------------------------------+
>| Processes:                                                                  |
>|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
>|        ID   ID                                                   Usage      |
>|=============================================================================|
>|    0   N/A  N/A      4336      G   /usr/lib/xorg/Xorg                 75MiB |
>+-----------------------------------------------------------------------------+
>```

In [7]:
seed = np.random.randint(0, np.iinfo("uint64").max, dtype="uint64")

for report_speciation, report_dispersal, output in [
    (False, False, "progress"), (True, False, "progress-speciation"), (True, True, "progress-speciation-dispersal")
]:
    simulate_cuda_reporting_dynamic(
        speciation=0.001, sample=1.0, radius=178, sigma=0.0, seed=seed, output=output,
        report_speciation=report_speciation, report_dispersal=report_dispersal,
    )

/vol/cuda/10.0.130/NsightCompute-1.0/target/linux-desktop-glibc_2_11_3-glx-x64/nv-nsight-cu-cli --mode=launch-and-attach -k simulate -c 10 -o ~/ml5717/compute-progress -f /vol/bitbucket/ml5717/necsim-rust/target/release/rustcoalescence simulate '(speciation:0.001,sample:1.0,seed:14729773322303406744,algorithm:CUDA(ptx_jit:true,parallelism_mode:Monolithic(event_slice:10000000)),scenario:AlmostInfinite(radius:178,sigma:0.0),reporters:[Plugin(library:"/vol/bitbucket/ml5717/necsim-rust/target/release/deps/libnecsim_plugins_common.so",reporters:[Execution()])])'
/vol/cuda/11.1.0-cudnn8.0.4.30/nsight-systems-2020.3.4/bin/nsys profile -o ~/ml5717/nsys-progress -f true -t cuda /vol/bitbucket/ml5717/necsim-rust/target/release/rustcoalescence simulate '(speciation:0.001,sample:1.0,seed:14729773322303406744,algorithm:CUDA(ptx_jit:true,parallelism_mode:Monolithic(event_slice:10000000)),scenario:AlmostInfinite(radius:178,sigma:0.0),reporters:[Plugin(library:"/vol/bitbucket/ml5717/necsim-rust/target