# Running normal CPU methods on multiple threads

In [2]:
CODE_ROOT = pwd() * "/"

"/home/rrs/Desktop/MPAS_Ocean/"

In [3]:
include(CODE_ROOT * "mode_init/mode_init.jl")

include(CODE_ROOT * "visualization.jl")

include(CODE_ROOT * "mode_forward/time_steppers.jl")

forward_euler_step_cuda! (generic function with 1 method)

In [4]:
using PyPlot, PyCall
animation  = pyimport("matplotlib.animation")
ipydisplay = pyimport("IPython.display")

PyObject <module 'IPython.display' from '/home/rrs/anaconda3/envs/MPAS_Ocean/lib/python3.8/site-packages/IPython/display.py'>

# Load Mesh From File

In [9]:
mpasOcean = MPAS_Ocean(false, CODE_ROOT * "MPAS_O_Shallow_Water/Mesh+Initial_Condition+Registry_Files/Periodic",
                "base_mesh.nc", "mesh.nc", periodicity="Periodic"
)
typeof(mpasOcean)

MPAS_Ocean

In [10]:
function calculate_normal_velocity_tendency_threaded!(mpasOcean::MPAS_Ocean)
    mpasOcean.normalVelocityTendency[:] .= 0

    Threads.@threads for iEdge in 1:mpasOcean.nEdges
        # gravity term: take gradient of sshCurrent across edge
        cell1Index, cell2Index = mpasOcean.cellsOnEdge[:,iEdge]
        
        if cell1Index !== 0 && cell2Index !== 0
            mpasOcean.normalVelocityTendency[iEdge] = mpasOcean.gravity * ( mpasOcean.sshCurrent[cell1Index] - mpasOcean.sshCurrent[cell2Index] ) / mpasOcean.dcEdge[iEdge]
        end
        
        # coriolis term
        for i in 1:mpasOcean.nEdgesOnEdge[iEdge]
            eoe = mpasOcean.edgesOnEdge[i,iEdge]
            
            mpasOcean.normalVelocityTendency[iEdge] += mpasOcean.weightsOnEdge[i,iEdge] * mpasOcean.normalVelocityCurrent[eoe] * mpasOcean.fEdge[eoe]
        end
    end
end

calculate_normal_velocity_tendency_threaded! (generic function with 1 method)

In [7]:
Threads.nthreads()

12

In [8]:
using BenchmarkTools

In [13]:
@benchmark calculate_normal_velocity_tendency_threaded!(mpasOcean)

BechmarkTools.Trial: 621 samples with 1 evaluations.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m2.737 ms[22m[39m … [35m188.844 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 0.00% … 84.28%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m3.684 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m 0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m8.043 ms[22m[39m ± [32m 23.211 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m49.91% ± 16.91%

  [34m█[39m[39m▂[32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [34m█[39m[39m█[32m▄[39m[39m▁[

In [14]:
@benchmark calculate_normal_velocity_tendency!(mpasOcean)

BechmarkTools.Trial: 324 samples with 1 evaluations.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m14.407 ms[22m[39m … [35m18.449 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 17.10%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m14.474 ms              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m15.452 ms[22m[39m ± [32m 1.478 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m6.22% ±  8.27%

  [39m█[34m█[39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▅[39m▄[39m▁[39m▁[39m [39m 
  [39m█[34m█[39m[39m▇[39m▁[39m▁[39