# Project: ij-blocking

In [7]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import sys
import re
import pickle

In [8]:
def read_field_from_file(filename, num_halo=None):
    (rank, nbits, num_halo, nx, ny, nz) = np.fromfile(filename, dtype=np.int32, count=6)
    offset=(3 + rank) * 32 // nbits
    data = np.fromfile(filename, dtype=np.float32 if nbits == 32 else np.float64, \
                       count=nz * ny * nx + offset)
    if rank == 3:
        return np.reshape(data[offset:], (nz, ny, nx))
    else:
        return np.reshape(data[offset:], (ny, nx))

def validate_results(out_field, k_lev = 32, if_plot = True):
    
    num_tests = len(out_field)
    
    if if_plot:
        if num_tests > 4:
            fig, axs = plt.subplots(2, num_tests//2, figsize=(12, 8))
            axs = axs.flatten()

        else:
            fig, axs = plt.subplots(1, num_tests, figsize=(12, 4))

    test_diff = np.zeros(num_tests-1)
    
    for i, (key,data) in enumerate(out_field.items()):
        if i == 0:
            k_data = data
        if if_plot:
            im1 = axs[i].imshow(data[k_lev, :, :], origin='lower', vmin=-0.1, vmax=1.1);
            fig.colorbar(im1, ax=axs[i]);
            axs[i].set_title(key)
        if i > 0:
            test_diff[i-1] = np.sum(np.abs(k_data-data))
    plt.show()
    
    if sum(test_diff) != 0:
        print('MAE', np.array(test_diff)/ np.prod(k_data.shape))
        print(list(out_field.keys())[1:])
        return False
    return True
    

    


In [9]:
%%bash
module load daint-gpu
module switch PrgEnv-gnu PrgEnv-cray
module load perftools-lite

versions=('kblocking' 'ijblocking-math' 'ijblocking2' 'averaging-nn' 'averaging-nnn' 'lap-nn-ij' 'lap-nnn-ij' 'ijblocking-inline' 'simplecopy-block' 'simplecopy-ij' 'ijblocking-comp' 'ijblocking-small' ) ## THIS SHOULD BE A COPY OF VERSIONS IN CELL BELOW

echo "===== cleaning up ====="
make --directory=../Stencil_code/ clean
echo "===== compiling ====="
for version in "${versions[@]}"
do 
    make --directory=../Stencil_code/ VERSION="$version" 
done

===== cleaning up =====
make: Entering directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
rm -f -rf *~ *.o *.mod *.MOD *.i core.* *.out *.lst *.x *.x+orig *.x+[0-9]* *.dat *.report result_*.py
make: Leaving directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
===== compiling =====
make: Entering directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm -c m_utils.F90
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm -c stencil2d-kblocking.F90
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm m_utils.o stencil2d-kblocking.o -o stencil2d-kblocking.x


INFO: creating the PerfTools-instrumented executable 'stencil2d-kblocking.x' (lite-samples) ...OK


cp stencil2d-kblocking.x stencil2d.x
make: Leaving directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
make: Entering directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm -c stencil2d-ijblocking-math.F90
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm m_utils.o stencil2d-ijblocking-math.o -o stencil2d-ijblocking-math.x


INFO: creating the PerfTools-instrumented executable 'stencil2d-ijblocking-math.x' (lite-samples) ...OK


cp stencil2d-ijblocking-math.x stencil2d.x
make: Leaving directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
make: Entering directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm -c stencil2d-ijblocking2.F90
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm m_utils.o stencil2d-ijblocking2.o -o stencil2d-ijblocking2.x


INFO: creating the PerfTools-instrumented executable 'stencil2d-ijblocking2.x' (lite-samples) ...OK


cp stencil2d-ijblocking2.x stencil2d.x
make: Leaving directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
make: Entering directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm -c stencil2d-averaging-nn.F90
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm m_utils.o stencil2d-averaging-nn.o -o stencil2d-averaging-nn.x


INFO: creating the PerfTools-instrumented executable 'stencil2d-averaging-nn.x' (lite-samples) ...OK


cp stencil2d-averaging-nn.x stencil2d.x
make: Leaving directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
make: Entering directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm -c stencil2d-averaging-nnn.F90
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm m_utils.o stencil2d-averaging-nnn.o -o stencil2d-averaging-nnn.x


INFO: creating the PerfTools-instrumented executable 'stencil2d-averaging-nnn.x' (lite-samples) ...OK


cp stencil2d-averaging-nnn.x stencil2d.x
make: Leaving directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
make: Entering directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm -c stencil2d-lap-nn-ij.F90
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm m_utils.o stencil2d-lap-nn-ij.o -o stencil2d-lap-nn-ij.x


INFO: creating the PerfTools-instrumented executable 'stencil2d-lap-nn-ij.x' (lite-samples) ...OK


cp stencil2d-lap-nn-ij.x stencil2d.x
make: Leaving directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
make: Entering directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm -c stencil2d-lap-nnn-ij.F90
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm m_utils.o stencil2d-lap-nnn-ij.o -o stencil2d-lap-nnn-ij.x


INFO: creating the PerfTools-instrumented executable 'stencil2d-lap-nnn-ij.x' (lite-samples) ...OK


cp stencil2d-lap-nnn-ij.x stencil2d.x
make: Leaving directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
make: Entering directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm -c stencil2d-ijblocking-inline.F90
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm m_utils.o stencil2d-ijblocking-inline.o -o stencil2d-ijblocking-inline.x


INFO: creating the PerfTools-instrumented executable 'stencil2d-ijblocking-inline.x' (lite-samples) ...OK


cp stencil2d-ijblocking-inline.x stencil2d.x
make: Leaving directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
make: Entering directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm -c stencil2d-simplecopy-block.F90
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm m_utils.o stencil2d-simplecopy-block.o -o stencil2d-simplecopy-block.x


INFO: creating the PerfTools-instrumented executable 'stencil2d-simplecopy-block.x' (lite-samples) ...OK


cp stencil2d-simplecopy-block.x stencil2d.x
make: Leaving directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
make: Entering directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm -c stencil2d-simplecopy-ij.F90
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm m_utils.o stencil2d-simplecopy-ij.o -o stencil2d-simplecopy-ij.x


INFO: creating the PerfTools-instrumented executable 'stencil2d-simplecopy-ij.x' (lite-samples) ...OK


cp stencil2d-simplecopy-ij.x stencil2d.x
make: Leaving directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
make: Entering directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm -c stencil2d-ijblocking-comp.F90
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm m_utils.o stencil2d-ijblocking-comp.o -o stencil2d-ijblocking-comp.x


INFO: creating the PerfTools-instrumented executable 'stencil2d-ijblocking-comp.x' (lite-samples) ...OK


cp stencil2d-ijblocking-comp.x stencil2d.x
make: Leaving directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
make: Entering directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm -c stencil2d-ijblocking-small.F90
ftn -O3 -hfp3 -eZ -ffree -N255 -ec -eC -eI -eF -rm m_utils.o stencil2d-ijblocking-small.o -o stencil2d-ijblocking-small.x


INFO: creating the PerfTools-instrumented executable 'stencil2d-ijblocking-small.x' (lite-samples) ...OK


cp stencil2d-ijblocking-small.x stencil2d.x
make: Leaving directory '/users/class169/Project/HPC4WC_project_ijblocking/Stencil_code'


In [10]:
%%capture cap --no-stderr
# srun slurm comand to start a job
# -n number of processes requested

versions = ['kblocking', 'ijblocking-small', 'ijblocking-inline', 'ijblocking-math', 'ijblocking2',] ## THIS SHOULD BE A COPY OF VERSIONS IN CELL ABOVE


output = {}
out_field = {}

nx = np.array([64,128,256,512,1024,2048])
ny = np.array([64])
size_i = np.array([32])
size_j = np.array([32])
num_iter = 256
nz = 16

      
if versions[0] != 'kblocking':
    sys.exit('kblocking has to be first in versions. Leads to error in comparisons')

runtimes = {key: np.zeros((len(nx)*len(ny)*len(size_i)*len(size_j),5)) for key in versions}
ii_index = 0
for nx_ in nx:
    for ny_ in ny:
        for size_i_ in size_i:
            
            for size_j_ in size_j:
                if size_i_>= nx_:
                    continue
                if size_j_ >= ny_:
                    continue
                for version in versions:
                    command = f"srun -n 1 ./../Stencil_code/stencil2d-{version}.x+orig --nx {nx_} --ny {ny_} --nz {nz} --size_i {size_i_} --size_j {size_j_} --num_iter {num_iter}"
                    

                    if version == 'kblocking' :
                        
                        command = f"srun -n 1 ./../Stencil_code/stencil2d-{version}.x+orig --nx {nx_} --ny {ny_} --nz {nz} --num_iter {num_iter}"
                        
                        

                    print(command)
                    out =  !{command}
                    print(out)
                    output.update({version: out})
                    #out_field.update({version: read_field_from_file('out_field.dat')})

                #validate_results(out_field, if_plot=False)
                pattern = r"[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?"
                for i, (key, value) in enumerate(output.items()):
                    
                    try:
                        runtimes[key][ii_index] = [nx_,ny_,size_i_,size_j_,float(re.findall(pattern, value[2])[-1])]
                    except:
                        runtimes[key][ii_index] = [nx_,ny_,size_i_,size_j_,np.nan]

                        
                        
                ii_index+=1
                print('\n ============================= \n')


with open('saved_runtimes_nx.pkl', 'wb') as f:
    pickle.dump(runtimes, f)

In [11]:
with open('best_optimization.txt', 'w') as f:
    f.write(cap.stdout)

In [None]:
%%capture cap --no-stderr

# srun slurm comand to start a job
# -n number of processes requested

versions = ['ijblocking-math'] ## THIS SHOULD BE A COPY OF VERSIONS IN CELL ABOVE


output = {}
out_field = {}

nx = np.array([1024*2])
ny = np.array([1024*2])

size_i = np.array([8,16,32,64,128,256,512,1024,1024*2])
size_j = np.array([8,16,32,64,128,256,512,1024,1024*2])

num_iter = 256
nz = 16


runtimes = {key: np.zeros((len(nx)*len(ny)*len(size_i)*len(size_j),5)) for key in versions}
ii_index = 0
for nx_ in nx:
    for ny_ in ny:
        for size_i_ in size_i:
            
            for size_j_ in size_j:
                for version in versions:
                    command = f"srun -n 1 ./../Stencil_code/stencil2d-{version}.x+orig --nx {nx_} --ny {ny_} --nz {nz} --size_i {size_i_} --size_j {size_j_} --num_iter {num_iter}"
                    

                    if version == 'kblocking' :
                        
                        command = f"srun -n 1 ./../Stencil_code/stencil2d-{version}.x+orig --nx {nx_} --ny {ny_} --nz {nz} --num_iter {num_iter}"
                        
                        

                    print(command)
                    out =  !{command}
                    print(out)
                    output.update({version: out})
                    #out_field.update({version: read_field_from_file('out_field.dat')})

                #validate_results(out_field, if_plot=False)
                pattern = r"[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?"
                for i, (key, value) in enumerate(output.items()):
                    
                    try:
                        runtimes[key][ii_index] = [nx_,ny_,size_i_,size_j_,float(re.findall(pattern, value[2])[-1])]
                    except:
                        runtimes[key][ii_index] = [nx_,ny_,size_i_,size_j_,np.nan]

                        
                        
                ii_index+=1
                print('\n ============================= \n')


In [None]:
with open('best_shape.txt', 'w') as f:
    f.write(cap.stdout)