In [8]:
import numpy as np

In [38]:
pontos = np.array(np.random.random([100000,2]),dtype=np.float32)

In [3]:
pontos = np.zeros([9,2])
for i in range(3):
    for j in range(3):
        pontos[i*3 + j][0] = i
        pontos[i*3 + j][1] = j

## Biblioteca Scipy

In [15]:
from scipy.spatial import distance_matrix as dm

In [39]:
%%time
distance_matrix = dm(pontos,pontos)

MemoryError: 

In [8]:
np.sum(dm(pontos,pontos))

52166839.850289136

## For Simples


In [10]:
def distancia_simples(pontos):
    resp = np.zeros([len(pontos),len(pontos)])
    for i in range(len(pontos)):
        for j in range(i):
            resp[i][j] = np.sqrt((pontos[i][0]-pontos[j][0])**2 +(pontos[i][1]-pontos[j][1])**2)
    return resp


In [11]:
%%timeit
distancia_simples(pontos)

KeyboardInterrupt: 

## For Simples com JIT

In [18]:
import numba

In [67]:
@numba.njit
def distancia_simples(pontos):
    resp = np.zeros([len(pontos),len(pontos)])
    for i in range(len(pontos)):
        for j in range(i):
            resp[i][j] = ((pontos[i][0]-pontos[j][0])**2 +(pontos[i][1]-pontos[j][1])**2)
    return resp

In [68]:
%%time
distancia_simples(pontos)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1m[1mInvalid use of Function(<built-in function zeros>) with argument(s) of type(s): (list(int64))
 * parameterized
[1mIn definition 0:[0m
[1m    All templates rejected with literals.[0m
[1mIn definition 1:[0m
[1m    All templates rejected without literals.[0m
[1mThis error is usually caused by passing an argument of a type that is unsupported by the named function.[0m[0m
[0m[1m[1] During: resolving callee type: Function(<built-in function zeros>)[0m
[0m[1m[2] During: typing of call at <ipython-input-67-1eb8d4ccf069> (3)
[0m
[1m
File "<ipython-input-67-1eb8d4ccf069>", line 3:[0m
[1mdef distancia_simples(pontos):
[1m    resp = np.zeros([len(pontos),len(pontos)])
[0m    [1m^[0m[0m

This is not usually a problem with Numba itself but instead often caused by
the use of unsupported features or an issue in resolving types.

To see Python/NumPy features supported by the latest release of Numba visit:
http://numba.pydata.org/numba-doc/dev/reference/pysupported.html
and
http://numba.pydata.org/numba-doc/dev/reference/numpysupported.html

For more information about typing errors and how to debug them visit:
http://numba.pydata.org/numba-doc/latest/user/troubleshoot.html#my-code-doesn-t-compile

If you think your code should work with Numba, please report the error message
and traceback, along with a minimal reproducer at:
https://github.com/numba/numba/issues/new


In [22]:
np.sum(distancia_simples(pontos))*2

52366024.21245955

## For Simples com JIT otimizado

In [23]:
import numba

In [24]:
@numba.jit
def faz_coluna(v,x1,y1):
    resp = np.zeros(len(v))
    for i in range(len(v)):
        resp[i] = np.sqrt((v[i][0]-x1)**2 + (v[i][1]-y1)**2)
    return resp

In [25]:
@numba.jit
def distancia_simples_otimizada(pontos):
    resp = np.zeros([len(pontos),len(pontos)])
    for i in range(len(pontos)):
        resp[i] = faz_coluna(pontos,pontos[i][0],pontos[i][1])
    return resp

In [28]:
%%time
distancia_simples_otimizada(pontos)

CPU times: user 277 ms, sys: 108 ms, total: 385 ms
Wall time: 383 ms


array([[0.        , 0.23746096, 0.5237788 , ..., 0.70130771, 0.48783195,
        0.36012203],
       [0.23746096, 0.        , 0.39700142, ..., 0.50462848, 0.63484019,
        0.39072108],
       [0.5237788 , 0.39700142, 0.        , ..., 0.2575548 , 0.59583026,
        0.29506558],
       ...,
       [0.70130771, 0.50462848, 0.2575548 , ..., 0.        , 0.85276467,
        0.5482291 ],
       [0.48783195, 0.63484019, 0.59583026, ..., 0.85276467, 0.        ,
        0.30905756],
       [0.36012203, 0.39072108, 0.29506558, ..., 0.5482291 , 0.30905756,
        0.        ]])

In [19]:
np.sum(distancia_simples_otimizada(pontos))

52166839.850289136

In [155]:
pontos

array([[0.94959219, 0.76848511],
       [0.81605375, 0.09390135],
       [0.05808005, 0.12264902],
       ...,
       [0.10014798, 0.48187384],
       [0.73134594, 0.80992833],
       [0.63721285, 0.27310399]])

In [30]:
import cupy as cp

## Usando Cupy

In [42]:
gpu_pontos = cp.asarray(pontos)

In [43]:
squared_diff = cp.ElementwiseKernel(
'float32 x, float32 y',
 'float32 z',
'z = (x - y) * (x - y)',
'squared_diff')

In [44]:
sum_of_squares = cp.ElementwiseKernel(
'float32 x, float32 y',
 'float32 z',
'z = sqrt(x+y)',
'sum_of_squares')

In [45]:
def cupy_distance(pontos):
    resp = np.zeros([len(pontos),len(pontos)])
    for i in range(len(pontos)):
        a = squared_diff(gpu_pontos[:,0], gpu_pontos[i][0])
        b = squared_diff(gpu_pontos[:,1], gpu_pontos[i][1])
        resp[i] = sum_of_squares(a,b).get()
    return resp

In [46]:
%%time
cupy_distance(gpu_pontos)

MemoryError: 

In [177]:
sum_of_squares(a,b)

array([0.        , 0.49634093, 0.20831722, ..., 0.8843903 , 0.86885315,
       0.18991454], dtype=float32)

In [132]:
y

array([0., 1., 2., 3., 4.], dtype=float32)

In [240]:
bla = numba.cuda.current_context()

In [243]:
bla.get_memory_info()

_MemoryInfo(free=254738432, total=4227858432)

In [246]:
cp.cuda.memory.UnownedMemory

0

In [52]:
import numpy as np
from numba import cuda

USE_64 = True

if USE_64:
    bits = 64
    np_type = np.float64
else:
    bits = 32
    np_type = np.float32

@cuda.jit("void(float{}[:, :], float{}[:, :])".format(bits, bits))
def distance_matrix(mat, out):
    m = mat.shape[0]
    n = mat.shape[1]
    i, j = cuda.grid(2)
    d = 0
    if i < m and j < m:
        for k in range(n):
            tmp = mat[i, k] - mat[j, k]
            d += tmp * tmp
        out[i, j] = d

def gpu_dist_matrix(mat):
    rows = mat.shape[0]

    block_dim = (32, 32)
    grid_dim = (int(rows/block_dim[0] + 1), int(rows/block_dim[1] + 1))

    stream = cuda.stream()
    mat2 = cuda.to_device(np.asarray(mat, dtype=np_type), stream=stream)
    out2 = cuda.device_array((rows, rows))
    distance_matrix[grid_dim, block_dim](mat2, out2)
    out = out2.copy_to_host(stream=stream)

    return out

In [53]:
%%timeit
gpu_dist_matrix(pontos)

CudaAPIError: [2] Call to cuMemAlloc results in CUDA_ERROR_OUT_OF_MEMORY

In [222]:
%%timeit
gpu_dist_matrix(pontos)

52245818.12718841

TypeError: Wrong number of arguments for sqrt