In [1]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import subprocess
from tqdm import tqdm
import time

## Tools for loading the program versions

In [2]:

def compile_version(version, output=False):
    try:
        result = subprocess.check_output(f"module load daint-gpu && module switch PrgEnv-gnu PrgEnv-cray && module load perftools-lite && cd fortran_programs && make VERSION={version} ", shell = True, executable = "/bin/bash", stderr = subprocess.STDOUT)
        print(f"Version {version} succeeded")

    except subprocess.CalledProcessError as cpe:
        result = cpe.output

    finally:   
        if output:
            for line in result.splitlines():
                print(line.decode())

def compile_all_versions(versions, output=False, clean_old=False):
    if clean_old:
        print("Remove all old outputs")
        subprocess.run(f"cd fortran_programs && make clean", shell = True, executable = "/bin/bash", stderr = subprocess.STDOUT)
    else:
        print("Not removing previously compiled programs")
    for version in versions:
        compile_version(version, output)

In [3]:
versions = ["stencil2d-GPT_mem_alloc_fail","GPT_kblock","orig","GPT_loop_fusion2","GPT_loop_fusion2_omp"]

In [4]:
compile_all_versions(versions, output=False, clean_old=True)

Remove all old outputs
rm -f -rf *~ *.o *.mod *.MOD *.i core.* *.out *.lst *.x *.x+orig *.x+[0-9]* *.dat *.report result_*.py report*.txt
Version GPT_kblock succeeded


KeyboardInterrupt: 

In [None]:
def execute_version(version, nx=64, ny=64, nz=64, num_iter=1024, attempt=0):
    try:
        result = subprocess.check_output(f"cd fortran_programs && srun -n 12 ./stencil2d-{version}.x+orig --nx {nx} --ny {ny} --nz {nz} --num_iter {num_iter}", shell = True, executable = "/bin/bash", stderr = subprocess.STDOUT)
    except subprocess.CalledProcessError as cpe:
        print("there was an error")
        result = cpe.output
    code_string = result.decode()
    if "Job" in code_string and attempt < 5: 
        print(f"Job failed with the code {code_string}")
        print(f"It was attempt {attempt}, trying again")
        time.sleep(1)  # not sure if needed, but maybe give server some time to rest? 
        return execute_version(version, nx=nx, ny=ny, nz=nz, num_iter=num_iter, attempt=attempt+1)
    elif "Job" in code_string and attempt == 5:
        print("Servers are busy, ending now")
        return
    exec(code_string, None, globals() )
    return data

def execute_versions(versions, nx=64, ny=64, nz=64, num_iter=1024):
    result = {}
    for version in versions:
        data = execute_version(version, nx=nx, ny=ny, nz=nz, num_iter=num_iter)
        result[version] = data
    return result
    

In [None]:
result = execute_version("stencil2d-GPT_mem_alloc_fail", num_iter=128)
print(result)

In [None]:
def read_field_from_file(filename, num_halo=None):
    (rank, nbits, num_halo, nx, ny, nz) = np.fromfile(filename, dtype=np.int32, count=6)
    offset=(3 + rank) * 32 // nbits
    data = np.fromfile(filename, dtype=np.float32 if nbits == 32 else np.float64, \
                       count=nz * ny * nx + offset)
    if rank == 3:
        return np.reshape(data[offset:], (nz, ny, nx))
    else:
        return np.reshape(data[offset:], (ny, nx))

def validate_results():
    fig, axs = plt.subplots(1, 2, figsize=(12, 4))

    in_field = read_field_from_file('fortran_programs/in_field.dat')
    k_lev = in_field.shape[0] // 2
    im1 = axs[0].imshow(in_field[k_lev, :, :], origin='lower', vmin=-0.1, vmax=1.1);
    fig.colorbar(im1, ax=axs[0]);
    axs[0].set_title('Initial condition (k = {})'.format(k_lev));

    out_field = read_field_from_file('fortran_programs/out_field.dat')
    k_lev = out_field.shape[0] // 2
    im2 = axs[1].imshow(out_field[k_lev, :, :], origin='lower', vmin=-0.1, vmax=1.1);
    fig.colorbar(im2, ax=axs[1]);
    axs[1].set_title('Final result (k = {})'.format(k_lev));
    
    plt.show()

In [None]:
validate_results()

In [None]:
def get_scan_data(version, nxs, nys, nz, num_iter):
    data_sizes = []
    times = []
    print(f"Executing version {version}")
    for nx in tqdm(nxs):
        for ny in nys: 
            runtime = execute_version(version, nx=nx, ny=ny, nz=nz, num_iter=num_iter)[0][-1]
            data_sizes.append(nx * ny * nz * 3 * 4)
            times.append(runtime)
    return np.array(data_sizes), np.array(times)

In [None]:
nxs = 16 * np.array([3, 4, 5, 6, 7])
nys = 16 * np.array([3, 4, 5, 6, 7])
nz = 64
num_iter = 128
versions = ["OpenMP_GPT",
            "GPT_kblock","orig",
            "GPT_loop_fusion2",
            "GPT_loop_fusion2_omp",
           ]
colors = ["blue",
          "red",
          "green",
          "grey",
          "orange",
         ]
for version, color in zip(versions, colors):
    data_sizes, times = get_scan_data(version, nxs, nys, nz, num_iter)
    plt.loglog(data_sizes/ 1024. / 1024., times / data_sizes * 3 * 4 * 1e6, ".", label=version, color=color)
plt.xlabel( 'Working set size [MB]' );
plt.ylabel( 'Runtime / gridpoint [µs]' );
plt.grid(visible=True, which='both')
fig = matplotlib.pyplot.gcf()
fig.set_size_inches(10, 6)
plt.legend(loc="upper left")