# Why GT4Py? (Part II)

This notebook compares a NumPy, CuPy and GT4Py implementation of the Laplacian stencil
```
lap[i, j, k] = - 4 * phi[  i,   j, k] 
               +     phi[i-1,   j, k] 
               +     phi[i+1,   j, k] 
               +     phi[  i, j-1, k] 
               +     phi[  i, j+1, k]
```

## NumPy

In [None]:
import numpy as np

shape = (512, 512, 128)

def lap_numpy(phi, lap):
    lap[1:-1, 1:-1] = (
        - 4. * phi[1:-1, 1:-1]
        +      phi[ :-2, 1:-1]
        +      phi[  2:, 1:-1]
        +      phi[1:-1,  :-2]
        +      phi[1:-1,   2:]
    )
    
phi = np.random.rand(*shape)
lap = np.empty_like(phi)

%timeit lap_numpy(phi, lap)

## CuPy - C-style

In [None]:
import cupy as cp

shape = (512, 512, 128)

def lap_cupy_c(phi, lap):
    lap[1:-1, 1:-1] = (
        - 4. * phi[1:-1, 1:-1]
        +      phi[ :-2, 1:-1]
        +      phi[  2:, 1:-1]
        +      phi[1:-1,  :-2]
        +      phi[1:-1,   2:]
    )
    
phi = cp.random.rand(*shape)
lap = cp.empty_like(phi)

%timeit lap_cupy_c(phi, lap)

## CuPy - F-style

In [None]:
import cupy as cp

shape = (128, 512, 512)

def lap_cupy_f(phi, lap):
    lap[:, 1:-1, 1:-1] = (
        - 4. * phi[:, 1:-1, 1:-1]
        +      phi[:, 1:-1,  :-2]
        +      phi[:, 1:-1,   2:]
        +      phi[:,  :-2, 1:-1]
        +      phi[:,   2:, 1:-1]
    )
    
phi = cp.random.rand(*shape)
lap = cp.empty_like(phi)

%timeit lap_cupy_f(phi, lap)

## GT4Py

In [None]:
import gt4py as gt
from gt4py import gtscript
import numpy as np

backend = "gtcuda"
shape = (512, 512, 128)

@gtscript.stencil(backend=backend, verbose=True)
def lap_gt4py(phi: gtscript.Field[float], lap: gtscript.Field[float]):
    with computation(PARALLEL), interval(...):
        lap = (
            - 4. * phi[ 0,  0, 0]
            +      phi[-1,  0, 0]
            +      phi[+1,  0, 0]
            +      phi[ 0, -1, 0]
            +      phi[ 0, +1, 0]
        )
        
phi_np = np.random.rand(*shape)
phi = gt.storage.from_array(phi_np, backend, (1, 1, 0))
lap = gt.storage.empty(backend, (1, 1, 0), shape, float)

domain = (shape[0] - 2, shape[1] - 2, shape[2])

# copy data to GPU memory
lap_gt4py(phi=phi, lap=lap, origin=(1, 1, 0), domain=domain)

%timeit lap_gt4py(phi=phi, lap=lap, origin=(1, 1, 0), domain=domain)