In [1]:
from tqdm.notebook import tqdm
from random import randint
import numpy as np
from numba import jit, prange

# ¿Cómo acelerar tu código en Python?

## Python es lento

* Interpretado

In [2]:
res = 0

for i in tqdm(range(200)):
    if i == 180:
        res += "hola"
    else:
        res += 1

  0%|          | 0/200 [00:00<?, ?it/s]

TypeError: unsupported operand type(s) for +=: 'int' and 'str'

* Tipado dinámico

In [3]:
n = 100
print(type(n))
print(n + 1)

<class 'int'>
101


In [4]:
print(n)

n = list(range(1000))
print(type(n))
print(n[:10])

100
<class 'list'>
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


* Bound checking

In [5]:
l = [randint(1, 10) for _ in range(20)]

print(l)

[10, 3, 1, 10, 4, 9, 10, 3, 10, 7, 8, 8, 3, 5, 8, 5, 6, 6, 6, 10]


In [6]:
print(l[19])

10


In [7]:
print(l[20])

IndexError: list index out of range

## Numba

* Permite compilar funciones usando [LLVM](https://llvm.org/)

![llvm](https://linuxhint.com/wp-content/uploads/2017/12/LLVM-Logo-Derivative-1.png)

* Se usa como un decorador

```python
@jit
def func(*args):
    # do something
```

* Soporta un [subconjunto](https://numba.pydata.org/numba-doc/dev/reference/pysupported.html) de Python

* No realiza bound checking

* Si una función lanza una excepción produce memory leaks

## Acelerando el código

Caso de estudio RMSE

In [8]:
v1 = np.random.randn(1000000)
v2 = np.random.randn(1000000)

### Python puro

In [15]:
def rmse(arr_1, arr_2):
    size = len(arr_1)
    
    res = 0
    for e1, e2 in zip(arr_1, arr_2):
        res += (e1 - e2)**2
    return np.sqrt(res/size)

In [16]:
print(rmse(v1, v2))

1.4146686542251374


In [None]:
%%timeit
rmse(v1, v2)

### Numpy

In [17]:
def rmse_np(arr_1, arr_2):
    size = len(arr_1)
    
    return np.sqrt( 1/size * np.sum((arr_1 - arr_2)**2) )

In [18]:
print(rmse_np(v1, v2))

1.41466865422516


In [None]:
%%timeit
rmse_np(v1, v2)

### Numba

In [19]:
@jit(nopython=True)
def rmse_numba(arr_1, arr_2):
    size = len(arr_1)
    
    res = 0
    for e1, e2 in zip(arr_1, arr_2):
        res += (e1 - e2)**2
    return np.sqrt(res/size)

In [20]:
print(rmse_numba(v1, v2))

1.4146686542251374


In [None]:
%%timeit
rmse_numba(v1, v2)

## Otro ejemplo

Calcular una matriz de distancias elemento a elemento

In [23]:
samples = np.random.randn(1000, 5).astype(np.float32)

### Python + numpy

In [24]:
def sims(mat):
    size = len(mat)
    res = np.empty((size, size), dtype=np.float32)
    
    for i in tqdm(range(size)):
        for j in range(size):
            vi = mat[i]
            vj = mat[j]
            res[i, j] = np.dot(vi, vj)/(np.linalg.norm(vi)*np.linalg.norm(vj))
    
    return res

In [25]:
# Usaremos tqdm porque timeit demora mucho
print(sims(samples))

  0%|          | 0/1000 [00:00<?, ?it/s]

[[ 1.         -0.21703635 -0.49474928 ...  0.8253892   0.32849225
   0.6255203 ]
 [-0.21703635  1.         -0.2131697  ...  0.23344891 -0.03217232
   0.315679  ]
 [-0.49474928 -0.2131697   1.         ... -0.27979162  0.4768727
   0.02333749]
 ...
 [ 0.8253892   0.23344891 -0.27979162 ...  1.0000001   0.639371
   0.91750354]
 [ 0.32849225 -0.03217232  0.4768727  ...  0.639371    0.99999994
   0.7428645 ]
 [ 0.6255203   0.315679    0.02333749 ...  0.91750354  0.7428645
   1.        ]]


### Numba

In [26]:
@jit(nopython=True)
def sims_numba(mat):
    size = len(mat)
    res = np.empty((size, size), dtype=np.float32)
    
    for i in range(size):
        for j in range(size):
            vi = mat[i]
            vj = mat[j]
            res[i, j] = np.dot(vi, vj)/(np.linalg.norm(vi)*np.linalg.norm(vj))
    
    return res

In [27]:
out_numba = sims_numba(samples)

In [None]:
%%timeit
sims_numba(samples)

### Numba parallel

In [29]:
@jit(nopython=True, parallel=True)
def sims_numba_parallel(mat):
    size = len(mat)
    res = np.empty((size, size), dtype=np.float32)
    
    for i in range(size):
        for j in prange(size):
            vi = mat[i]
            vj = mat[j]
            vij = np.dot(vi, vj)
            vii = np.dot(vi, vi)
            vjj = np.dot(vj, vj)
            res[i, j] = vij/(np.sqrt(vii)*np.sqrt(vjj))
    
    return res

In [30]:
out_parallel = sims_numba_parallel(samples)

In [None]:
%%timeit
sims_numba_parallel(samples)

In [31]:
np.allclose(out_numba, out_parallel)

True

## Veamos un ejemplo más complejo