## 1. [JAX](https://github.com/google/jax) 

In [None]:
import jax.numpy as jnp
from jax import jit
import numpy as np

In [None]:
from timeit import default_timer as timer # for timing the application

In [None]:
def slow_f(x):
  # Element-wise ops see a large benefit from fusion
  return x * x + x * 2.0

In [None]:
start = timer()
x = np.ones((5000, 5000))
end = timer()
print("Time needed to run the sum with cpu: ", end - start)

In [None]:
# This is wrong, because JAX doesn't block until it is ready
start = timer()
x_no_block = jnp.ones((5000, 5000))
end = timer()
print("Time needed to run the sum with gpu: ", end - start)

In [None]:
# This is the correct solution
start = timer()
x_block = jnp.ones((5000, 5000)).block_until_ready()
end = timer()
print("Time needed to run the sum with gpu: ", end - start)

## 2. [CuPy](https://cupy.dev/)

In [None]:
import cupy as cp
import numpy as np

In [None]:
problem_size = [100, 100, 100]

In [None]:
start = timer()
rand_cpu = np.random.rand(*problem_size) # describe what * operator does to a list!
end = timer()
print("Time needed to run the sum with cpu: ", end - start)

In [None]:
start = timer()
rand_gpu = cp.random.rand(*problem_size)
end = timer()
print("Time needed to run the sum with gpu: ", end - start)

Transferring data from cpu to the gpu

In [None]:
dat_cpu = np.random.rand(*problem_size)
dat_gpu = cp.asarray(dat_cpu)

Transferring data from the gpu to the cpu

In [None]:
dat_gpu = cp.random.rand(*problem_size)
dat_cpu = cp.asnumpy(dat_gpu)

Transferring data is quite similar as it is in pytorch!

## 3. [Numba](https://numba.pydata.org/)

In [None]:
import numba
print(numba.__version__)

In [None]:
def bubblesort(X):
    N = len(X)
    for end in range(N, 1, -1):
        for i in range(end - 1):
            cur = X[i]
            if cur > X[i + 1]:
                tmp = X[i]
                X[i] = X[i + 1]
                X[i + 1] = tmp

In [None]:
import numpy as np

original = np.arange(0.0, 10.0, 0.01, dtype='f4')
shuffled = original.copy()
np.random.shuffle(shuffled)

In [None]:
sorted = shuffled.copy()
start = timer()
bubblesort(sorted)
end = timer()
print("Time needed to run the bubblesort with cpu: ", end - start)

In [None]:
# check if it is correct
print(np.array_equal(sorted, original))

Now try to convert the bubblesort algorithm to Numba internal representation

In [None]:
bubblesort_jit = numba.jit("void(f4[:])")(bubblesort)

In [None]:
sorted[:] = shuffled[:] # reset the shuffled array

In [None]:
start = timer()
bubblesort_jit(sorted)
end = timer()
print("Time needed to run the bubblesort with cpu: ", end - start)

In [None]:
# check if the sorting process is correct or not
print(np.array_equal(sorted, original))

In [None]:
# One can use decorators as well if needed
@numba.jit("void(f4[:])")
def bubblesort_jit(X):
    N = len(X)
    for end in range(N, 1, -1):
        for i in range(end - 1):
            cur = X[i]
            if cur > X[i + 1]:
                tmp = X[i]
                X[i] = X[i + 1]
                X[i + 1] = tmp

For more examples visit: https://numba.pydata.org/numba-doc/0.13/tutorial_firststeps.html