## Classes and modules

In [None]:
#Lets have matplotlib "inline"
%matplotlib inline

import os
import sys

#Import packages we need
import numpy as np
from netCDF4 import Dataset
import datetime
from IPython.display import display

#For plotting
import matplotlib
from matplotlib import pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable

plt.rcParams["lines.color"] = "w"
plt.rcParams["text.color"] = "w"
plt.rcParams["axes.labelcolor"] = "w"
plt.rcParams["xtick.color"] = "w"
plt.rcParams["ytick.color"] = "w"

GPU Ocean-modules:

In [None]:
from gpuocean.SWEsimulators import CDKLM16
from gpuocean.utils import IPythonMagic, NetCDFInitialization, OceanographicUtilities

In [None]:
%cuda_context_handler gpu_ctx

# Sampling GRF: FFT Approach

In [None]:
source_url = "/sintef/data/NorKyst800/NorKyst-800m_ZDEPTHS_his.an.2019071600.nc"

data_args = NetCDFInitialization.getInitialConditionsNorKystCases(source_url, "lovese", download_data=False)

Basically we only need the context and stream

In [None]:
sim_args = {
    "gpu_ctx": gpu_ctx,
    "dt": 0.0,
    "write_netcdf":False,
    "small_scale_perturbation":True,
    "small_scale_perturbation_interpolation_factor":15
     }

sim = CDKLM16.CDKLM16(**sim_args, **NetCDFInitialization.removeMetadata(data_args))

Use GPU for sampling random numbers (first step of immense speed-up)

In [None]:
from gpuocean.utils import Common

random_numbers_host = np.zeros((630,315), dtype=np.float32, order='C')
random_numbers = Common.CUDAArray2D(sim.gpu_stream, 315, 630, 0, 0, random_numbers_host)

In [None]:
from pycuda.curandom import XORWOWRandomNumberGenerator
rng = XORWOWRandomNumberGenerator()

In [None]:
rng.fill_normal(random_numbers.data, stream=sim.gpu_stream)

In [None]:
u = random_numbers.download(sim.gpu_stream)

In [None]:
_ = plt.hist(u.flatten(), bins=50)

Only one row of distance matrix needed

In [None]:
dist = np.zeros((630*315))
for j in range(315):
    for i in range(630):
        dist[j*630+i] = np.sqrt(i**2+j**2)
        dist[j*630+i] = min([np.sqrt(i**2+j**2),np.sqrt((i-630)**2+j**2),np.sqrt(i**2+(j-315)**2),np.sqrt((i-630)**2+(j-315)**2)]) # only periodic!! Otherwise complex fft values!!

In [None]:
phi = 0.001
corr = np.exp(-phi*dist**2)

In [None]:
cov_toepitz = np.reshape(corr, (315, 630))


In [None]:
plt.imshow(cov_toepitz)
plt.colorbar()

#### FFT on the CPU

In [None]:
np.fft.fft2(cov_toepitz)

In [None]:
cmf = np.real(np.fft.fft2(cov_toepitz))
uif = np.fft.ifft2(u.T)
xf = np.real(np.fft.fft2(np.sqrt(np.maximum(cmf,0))*uif))

In [None]:
plt.figure(figsize=(30,15))
plt.imshow(xf, origin="lower")

#### FFT via PyCuda Kernels (fails)

In [None]:
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
import pycuda.autoinit

In [None]:
import numpy
a = numpy.random.randn(4,4)
a = a.astype(numpy.float32)

a_gpu = cuda.mem_alloc(a.nbytes)
cuda.memcpy_htod(a_gpu, a)

mod = SourceModule("""
  __global__ void doublify(float *a)
  {
    int idx = threadIdx.x + threadIdx.y*4;
    a[idx] *= 2;
  }
  """)
#Allocate,generateandtransfer
func = mod.get_function("doublify")
func(a_gpu, block=(4,4,1))

a_doubled = numpy.empty_like(a)
cuda.memcpy_dtoh(a_doubled,a_gpu)
print(a, "\n\n", a_doubled)

In [None]:
kernel = gpu_ctx.get_kernel("fft.cu")

In [None]:
func = kernel.get_function("doublify")

In [None]:
func.prepare("f")

In [None]:
func.prepared_async_call((4,4),(1,1,1),sim.gpu_stream, a)

#### FFT via skcuda (1D vs 2D?)

In [None]:
from skcuda import fft
import numpy as np

import pycuda.driver as cuda
from pycuda.tools import make_default_context
import pycuda.gpuarray as gpuarray

In [None]:
from skcuda.fft import fft, ifft, Plan

In [None]:
from gpuocean.utils import IPythonMagic
%cuda_context_handler gpu_ctx

In [None]:
data = np.random.randn(4,4)

In [None]:
print(data)

In [None]:
data_gpu = gpuarray.to_gpu(data.astype(np.float32))
out_gpu = gpuarray.empty(data.shape, np.complex64)

In [None]:

plan = Plan(data.shape, np.complex64, np.complex64)
fft(data_gpu, out_gpu, plan)
ifft(out_gpu, data_gpu, plan)

print("It s a god damn shit!!! ")
data_gpu.get()/np.product(data.shape)

In [None]:
# is it 2D???

In [None]:
toep_gpu = gpuarray.to_gpu(cov_toepitz.astype(np.float32))
cmf_gpu = gpuarray.empty(toep_gpu.shape, np.complex64)
fft(toep_gpu, cmf_gpu, plan)

In [None]:
cmf

In [None]:
cmf_gpu.real

In [None]:
uif_gpu = gpuarray.empty(u.shape, np.complex64)
ifft(random_numbers.data, uif_gpu, plan)

In [None]:
grf_gpu = gpuarray.empty_like(random_numbers.data)

import pycuda.cumath as cumath
fft(cumath.sqrt(cmf_gpu.real) * uif_gpu, grf_gpu, plan)

In [None]:
plt.imshow(grf_gpu.get())