# Running normal CPU methods on multiple threads
to investigate how it scales with using multiple threads on one CPU with the built-in Threads.@threads annotaions

In [1]:
CODE_ROOT = pwd() * "/"

"/Users/robertstrauss/Desktop/MPAS_Ocean_Julia/"

In [2]:
include(CODE_ROOT * "mode_init/MPAS_Ocean.jl")

include(CODE_ROOT * "visualization.jl")

include(CODE_ROOT * "mode_forward/time_steppers.jl")

forward_euler_step! (generic function with 1 method)

In [3]:
using PyPlot, PyCall
animation  = pyimport("matplotlib.animation")
ipydisplay = pyimport("IPython.display")

PyObject <module 'IPython.display' from '/Users/robertstrauss/.julia/conda/3/lib/python3.10/site-packages/IPython/display.py'>

# Load Mesh From File

In [8]:
mpasOcean = MPAS_Ocean(CODE_ROOT * "InertiaGravityWaveMesh",
                "base_mesh.nc", "mesh.nc", periodicity="Periodic"
)
mpasOcean.nCells

10000

In [9]:
function calculate_normal_velocity_tendency_threaded!(mpasOcean::MPAS_Ocean)
    mpasOcean.normalVelocityTendency[:] .= 0

    Threads.@threads for iEdge in 1:mpasOcean.nEdges
        # gravity term: take gradient of sshCurrent across edge
        cell1Index, cell2Index = mpasOcean.cellsOnEdge[:,iEdge]
        
        if cell1Index !== 0 && cell2Index !== 0
            mpasOcean.normalVelocityTendency[iEdge] = mpasOcean.gravity * ( mpasOcean.sshCurrent[cell1Index] - mpasOcean.sshCurrent[cell2Index] ) / mpasOcean.dcEdge[iEdge]
        end
        
        # coriolis term
        for i in 1:mpasOcean.nEdgesOnEdge[iEdge]
            eoe = mpasOcean.edgesOnEdge[i,iEdge]
            
            mpasOcean.normalVelocityTendency[iEdge] += mpasOcean.weightsOnEdge[i,iEdge] * mpasOcean.normalVelocityCurrent[eoe] * mpasOcean.fEdge[eoe]
        end
    end
end

calculate_normal_velocity_tendency_threaded! (generic function with 1 method)

In [10]:
Threads.nthreads()

1

In [11]:
using BenchmarkTools

In [12]:
@benchmark calculate_normal_velocity_tendency_threaded!(mpasOcean)

BenchmarkTools.Trial: 3167 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m1.461 ms[22m[39m … [35m  3.874 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 54.46%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m1.483 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m1.576 ms[22m[39m ± [32m295.263 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m3.79% ±  9.13%

  [39m█[34m▆[39m[39m▄[39m▃[39m▃[32m▃[39m[39m▂[39m▁[39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁
  [39m█[34m█[39m[39m█[39m█[39m█[3

In [13]:
@benchmark calculate_normal_velocity_tendency!(mpasOcean)

BenchmarkTools.Trial: 3976 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m1.222 ms[22m[39m … [35m  6.320 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m1.238 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m1.256 ms[22m[39m ± [32m110.840 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m▆[39m█[39m█[39m▆[39m▅[34m▅[39m[39m▄[39m▆[39m▅[39m▄[32m▄[39m[39m▄[39m▃[39m▃[39m▃[39m▂[39m▂[39m [39m▁[39m▁[39m [39m [39m▁[39m▁[39m [39m▁[39m [39m▁[39m [39m▁[39m [39m▁[39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁
  [39m█[39m█[39m█[39m█[39m█[34m█[39