## Classes and modules

In [None]:
#Lets have matplotlib "inline"
%matplotlib inline

import os
import sys

#Import packages we need
import numpy as np
from netCDF4 import Dataset
import datetime
from IPython.display import display

#For plotting
import matplotlib
from matplotlib import pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable

plt.rcParams["lines.color"] = "w"
plt.rcParams["text.color"] = "w"
plt.rcParams["axes.labelcolor"] = "w"
plt.rcParams["xtick.color"] = "w"
plt.rcParams["ytick.color"] = "w"

GPU Ocean-modules:

In [None]:
from gpuocean.utils import IPythonMagic

Basically, we only need the context and stream

In [None]:
%cuda_context_handler gpu_ctx

In [None]:
import pycuda.driver as cuda
gpu_stream = cuda.Stream()

# Sampling GRF: FFT Approach

Use GPU for sampling random numbers (first step of immense speed-up)

In [None]:
nx = 10#630
ny = 5#315

In [None]:
from gpuocean.utils import Common

random_numbers_host = np.zeros((nx,ny), dtype=np.float32, order='C')
random_numbers = Common.CUDAArray2D(gpu_stream, ny, nx, 0, 0, random_numbers_host)

In [None]:
from pycuda.curandom import XORWOWRandomNumberGenerator
rng = XORWOWRandomNumberGenerator()

In [None]:
rng.fill_normal(random_numbers.data, stream=gpu_stream)

In [None]:
u = random_numbers.download(gpu_stream).T

In [None]:
_ = plt.hist(u.flatten(), bins=50)

Only one row of distance matrix needed

In [None]:
dist = np.zeros((nx*ny))
for j in range(ny):
    for i in range(nx):
        dist[j*nx+i] = np.sqrt(i**2+j**2)
        dist[j*nx+i] = min([np.sqrt(i**2+j**2),np.sqrt((i-nx)**2+j**2),np.sqrt(i**2+(j-ny)**2),np.sqrt((i-nx)**2+(j-ny)**2)]) # only periodic!! Otherwise complex fft values!!

In [None]:
phi = 0.001
corr = np.exp(-phi*dist**2)

In [None]:
cov_toepitz = np.reshape(corr, (ny, nx))

In [None]:
plt.imshow(cov_toepitz)
plt.colorbar()

#### FFT on the CPU

In [None]:
np.fft.fft2(cov_toepitz)

In [None]:
cmf = np.real(np.fft.fft2(cov_toepitz))
uif = np.fft.ifft2(u)
xf = np.real(np.fft.fft2(np.sqrt(np.maximum(cmf,0))*uif))

In [None]:
plt.figure(figsize=(10,5))
plt.imshow(xf, origin="lower", vmin=-1, vmax=1)
plt.colorbar(shrink=0.7)

Alternative code (same result)

In [None]:
ctf = np.fft.fft2(cov_toepitz)
ctfsm = np.sqrt(np.maximum(0,ctf))
uf = np.fft.fft2(u)
grf = np.fft.ifft2(ctfsm*uf).real

In [None]:
plt.figure(figsize=(10,5))
plt.imshow(grf, vmin=-1, vmax=1)
plt.colorbar(shrink=0.7)

#### FFT via skcuda (1D vs 2D?)

In [None]:
import numpy as np

import pycuda.driver as cuda
from pycuda.tools import make_default_context
import pycuda.gpuarray as gpuarray

In [None]:
from skcuda.fft import fft, ifft, Plan

In [None]:
# https://forums.developer.nvidia.com/t/how-to-apply-scikit-cuda-2d-fft-pycuda/33055

data = np.random.randn(4,4)
print(data)

data_gpu = gpuarray.to_gpu(data.astype(np.float32))
out_gpu = gpuarray.empty(data.shape, np.complex64)

plan = Plan(data.shape, np.complex64, np.complex64)
fft(data_gpu, out_gpu, plan)
ifft(out_gpu, data_gpu, plan)

print("It s a god damn shit!!! ")
data_gpu.get()/np.product(data.shape)

In [None]:
# https://github.com/lebedov/scikit-cuda/issues/253
N = 4
array = np.random.randint(255, size=(N, N)).astype(np.float32)
print(array)
array_gpu = gpuarray.to_gpu(array)
ft = np.fft.fft2(array)
print(ft)

ft_gpu = gpuarray.empty((N//2+1, N//2+1), dtype=np.complex64)
plan = Plan((N, N), np.float32, np.complex64)
fft(array_gpu, ft_gpu, plan)
print(ft_gpu.get())


plan_inv = Plan((N, N), np.complex64, np.float32)
ifft(ft_gpu, array_gpu, plan_inv)
print(array)
print(array_gpu.get()/np.product(array_gpu.shape))

In [None]:
# is it 2D???

In [None]:
toep_gpu = gpuarray.to_gpu(cov_toepitz.astype(np.float32))
cmf_gpu = gpuarray.empty(np.array(toep_gpu.shape)//2+1, np.complex64)
plan = Plan(toep_gpu.shape, np.float32, np.complex64)
fft(toep_gpu, cmf_gpu, plan)

In [None]:
cmf, cmf_gpu.real

In [None]:
# https://www.idtools.com.au/gpu-accelerated-fft-compatible-with-numpy/
u_gpu = gpuarray.to_gpu(u.astype(np.float32))
uf_gpu = gpuarray.empty(np.array(u_gpu.shape)//2+1, np.complex64)
fft(u_gpu, uf_gpu, plan)

In [None]:
uf_gpu.get()

In [None]:
uf

In [None]:
# import pycuda.cumath as cumath
tmp_gpu = gpuarray.to_gpu((np.sqrt(np.maximum(cmf_gpu.get().real,0)) * uf_gpu.get()).astype(np.complex64))

grf_gpu = gpuarray.empty(cov_toepitz.shape, np.float32)
plan_inv = Plan(cov_toepitz.shape, np.complex64, np.float32)
ifft(tmp_gpu, grf_gpu, plan_inv)

In [None]:
plt.imshow(grf_gpu.get()/nx/ny)
plt.colorbar()

#### FFT via PyCuda Kernels (fails)

In [None]:
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
import pycuda.autoinit

In [None]:
import numpy
a = numpy.random.randn(4,4)
a = a.astype(numpy.float32)

a_gpu = cuda.mem_alloc(a.nbytes)
cuda.memcpy_htod(a_gpu, a)

mod = SourceModule("""
  __global__ void doublify(float *a)
  {
    int idx = threadIdx.x + threadIdx.y*4;
    a[idx] *= 2;
  }
  """)
#Allocate,generateandtransfer
func = mod.get_function("doublify")
func(a_gpu, block=(4,4,1))

a_doubled = numpy.empty_like(a)
cuda.memcpy_dtoh(a_doubled,a_gpu)
print(a, "\n\n", a_doubled)

In [None]:
kernel = gpu_ctx.get_kernel("fft.cu")

In [None]:
func = kernel.get_function("doublify")

In [None]:
func.prepare("f")

In [None]:
func.prepared_async_call((4,4),(1,1,1),sim.gpu_stream, a)