```
This software is part of GPU Ocean. 

Copyright (C) 2019 SINTEF Digital
Copyright (C) 2019 Norwegian Meteorological Institute

This notebook implements a test to investigate computational performance for
for all four numerical schemes in a simulation setting, as reported under 
Section 4.7 Numerical Order and Performance in Test Cases for Rotational 
Shallow-Water Schemes by Holm, Brodtkorb, Broström, Christensen and Sætra.

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
```

# Performance: Simulation Performance Experiment

This notebook runs a simulation for each of the four numerical schemes on a domain and time span relevant for real case scenarios.

### To look at the results shown in the paper
Execute the cells below to read the results obtained for the paper and reproduce the optimalization results.

### To find your own optimal block sizes
Comment out the second to last line in the second cell (which overwrites the `test_filename` variable) in order to produce your own benchmark results. Optimal block sizes are typically not the same across different GPUs, so your results might differ from those reported in the paper.


In [None]:
%matplotlib inline

import re
import numpy as np
import pandas as pd
import subprocess
import os
import os.path
import time

import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d, Axes3D

In [None]:
# Generate unique filename
test_filename = "simulation_benchmark_run_0.txt"
file_test = 0
while (os.path.isfile(test_filename)):

    test_filename = "simulation_benchmark_run_" + str(file_test) + ".txt"
    file_test += 1
    
    
#Use the existing file, do not generate new data
test_filename = "simulation_benchmark_run_17.txt"
print("Storing data in " + test_filename)

In [None]:
# Specify python command:
python = "python"
#python = "/home/havahol/miniconda3/envs/gpuocean/bin/python"

In [None]:
def runSimulation(filename):
    sim = np.array(["FBL", "CTCS", "KP", "CDKLM"])
    
    optimal_block_configs = np.array([[32, 8], [32, 4], [32, 12], [32, 12]])
    
    with open(test_filename, 'w') as test_file:
        for k in range(len(sim)):
            test_file.write("##########################################################################\n")
            test_file.write("Using simulator " + sim[k] + ".\n")
            test_file.write("##########################################################################\n")
            
            tic = time.time()
                    
            test_file.write("=========================================\n")
            test_file.write(sim[k] + " [{:02d} x {:02d}]\n".format(optimal_block_configs[k,0], 
                                                                   optimal_block_configs[k,1]))
            test_file.write("-----------------------------------------\n")
            cmd = [ python, "run_simulation.py", 
                   "--block_width", str(optimal_block_configs[k,0]), 
                   "--block_height", str(optimal_block_configs[k,1]),
                   "--simulator", sim[k]]
            p = subprocess.Popen(cmd, shell=False, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
            output = p.stdout.read()
            test_file.write(str(output) + "\n")
            test_file.write("=========================================\n")
            test_file.write("\n")

            toc = time.time()

            infostr = sim[k] + " completed in {:.02f} s\n".format(toc-tic)
            test_file.write(infostr)
            print(infostr)
                    
            test_file.write("\n\n\n")
            test_file.flush()
            os.fsync(test_file)


if not (os.path.isfile(test_filename)):
    runSimulation(test_filename)
else:
    print("Using existing run in " + test_filename)

In [None]:
def getData(filename):
    # State variables
    simulator = None
    block_width = None
    block_height = None
    megacells = None
    wall_time = None
    num_iterations = None
    max_u = None
    mem_req = None

    data = np.empty((0, 8))

    with open(filename) as origin_file:
        for line in origin_file:
            line = str(line)

            # Find simulator
            match = re.match('Using simulator (.+)\.', line)
            if match:
                simulator = match.group(1)
                
            if simulator == None:
                continue

            # Find block size
            match = re.match(simulator + ' \[(\\d+) x (\\d+)\]$', line)
            if match:
                block_width = int(match.group(1))
                block_height = int(match.group(2))

            # Find wall time:
            match = re.match('.*Wall time for 24 hour sim: (\\d+(\.\\d+)?)', line)
            if match:
                wall_time = match.group(1)
                
            # Find num iterations:
            match = re.match('.*Num iteration: (\\d+(\.\\d+)?)', line)
            if match:
                num_iterations = match.group(1)
            
            match = re.match('.*u_max=(\\d+(\.\\d+)?)', line)
            if match:
                max_u = match.group(1)
            
            match = re.match('.*Required memory: (\\d+(\.\\d+)?) MB', line)
            if match:
                mem_req = match.group(1)
            
            # Find simulator megacells
            match = re.match('.*Maximum megacells: (\\d+(\.\\d+)?)', line)
            if match:
                megacells = match.group(1)
                data = np.append(data, [[simulator, block_width, block_height, 
                                         megacells, wall_time, num_iterations,
                                         max_u, mem_req]], axis=0)
                
                block_width = None
                block_height = None

    return data

print(test_filename)
data = getData(test_filename)
print(data)
print("[simulator, block_width, block_height, megacells, wall_time, num_iterations, max_u, mem_req]")

In [None]:
# Checking that the differences between simulation time and iterations per sec
# corresponds to the differences in CFL conditions
fbl_wall_time = float(data[0,4])
cdklm_wall_time = float(data[3,4])
fbl_mega_cells = float(data[0,3])
cdklm_mega_cells = float(data[3,3])

print((cdklm_wall_time/fbl_wall_time)/(fbl_mega_cells/cdklm_mega_cells))
print((1/np.sqrt(2))/0.25)

In [None]:
# Find normalized values
min_wall_time = np.min(data[:,4].astype(np.float32))
normalized_wall_time = data[:,4].astype(np.float32)/min_wall_time

max_mega_cells = np.max(data[:,3].astype(np.float32))
normalized_mega_cells = data[:,3].astype(np.float32)/max_mega_cells

iterations_per_sec = data[:,5].astype(np.float32)/data[:,4].astype(np.float32)
max_iterations_per_sec = np.max(iterations_per_sec)
normalized_iterations_per_sec = iterations_per_sec/max_iterations_per_sec

print('normalized wall time:')
print(normalized_wall_time)

print("\nnormalized_iterations_per_sec:")
print(normalized_iterations_per_sec)

print("\nnormalized_iterations_per_sec * normalized_wall_time:")
print(normalized_iterations_per_sec * normalized_wall_time)

In [None]:
ctcs_wall_time = np.float32(data[1,4])
normalized_wall_time_ctcs = data[:,4].astype(np.float32)/ctcs_wall_time

iterations_per_sec_ctcs = iterations_per_sec[1]
normalized_iterations_per_sec_ctcs = iterations_per_sec/iterations_per_sec_ctcs


print("normalized wall time wrt CTCS")
print(normalized_wall_time_ctcs)

print("\nnormalized iterations per sec wrt CTCS")
print(normalized_iterations_per_sec_ctcs)

In [None]:
cdklm_wall_time = np.float32(data[3,4])
normalized_wall_time_cdklm = data[:,4].astype(np.float32)/cdklm_wall_time

iterations_per_sec_cdklm = iterations_per_sec[3]
normalized_iterations_per_sec_cdklm = iterations_per_sec/iterations_per_sec_cdklm

print("normalized wall time wrt CDKLM")
print(normalized_wall_time_cdklm)

print("\normalized iterations per sec wrt CDKLM")
print(normalized_iterations_per_sec_cdklm)