In [3]:
import numpy as np
from minas.map_minas_support import *
np.random.seed(300)
classes = list(map(mkClass, range(1000)))
clusters = sampleClusters(classes)
inputStream = loopExamplesIter(classes)
examples = list(zip(range(200), inputStream))

In [4]:
def minDist(clusters, item):
    dists = map(lambda cl: (sum((cl.center - item) ** 2) ** (1/2), cl), clusters)
    d, cl = min(dists, key=lambda x: x[0])
    return d, cl
counter = 0
results = []
init = time.time()
for i, example in examples:
    counter += 1
    result = minDist(clusters, example.item)
    results.append(result)
elapsed = time.time() - init
len(results)
print(f'minasOnline testSamples {elapsed} seconds, consumed {counter} items, {int(counter / elapsed)} i/s')

minasOnline testSamples 1.3196775913238525 seconds, consumed 200 items, 151 i/s


In [21]:
%%time
from numba import jit

@jit(nopython=True)
def minDistJIT(clusters, item):
    dists = []
    for cl in clusters:
        center = cl[1]
        d = np.sum((center - item) ** 2) ** (1/2)
        # return (d, cl[0])
        dists.append((d, cl[0]))
    # dists = map(lambda cl: (sum((cl[1] - item) ** 2) ** (1/2), cl[0]), clusters)
    min_d: float = -1
    min_clId: int = -1
    for d, clId in dists:
        if min_d == -1 or min_d > d:
            min_d = d
            min_clId = clId
    return min_d, min_clId
minDistJIT(clusters=[ (0, np.array([0,0])) ], item=np.array([0,0]))

CPU times: user 568 ms, sys: 3.25 ms, total: 572 ms
Wall time: 573 ms


In [22]:
localClusters = [(id(cl), cl.center) for cl in clusters]
counter = 0
results = []
init = time.time()
for i, example in examples:
    counter += 1
    result = minDistJIT(localClusters, example.item)
    results.append(result)
elapsed = time.time() - init
len(results)
print(f'minasOnline testSamples {elapsed} seconds, consumed {counter} items, {int(counter / elapsed)} i/s')

minasOnline testSamples 10.152459383010864 seconds, consumed 200 items, 19 i/s


In [26]:
%%time
from numba import jit
import dask

@dask.delayed
@jit(nopython=True)
def minDistJIT(clusters, item):
    dists = []
    for cl in clusters:
        center = cl[1]
        d = np.sum((center - item) ** 2) ** (1/2)
        # return (d, cl[0])
        dists.append((d, cl[0]))
    # dists = map(lambda cl: (sum((cl[1] - item) ** 2) ** (1/2), cl[0]), clusters)
    min_d: int = -1
    min_clId: int = -1
    for d, clId in dists:
        if min_d == -1 or min_d > d:
            min_d = d
            min_clId = clId
    return min_d, min_clId
r = minDistJIT(clusters=[ (0, np.array([0,0])) ], item=np.array([0,0]))
print(r.compute())

(0.0, 0)
CPU times: user 464 ms, sys: 0 ns, total: 464 ms
Wall time: 462 ms


In [27]:
localClusters = [(id(cl), cl.center) for cl in clusters]
counter = 0
results = []
init = time.time()
for i, example in examples:
    counter += 1
    result = minDistJIT(localClusters, example.item)
    results.append(result.compute())
elapsed = time.time() - init
len(results)
print(f'minasOnline testSamples {elapsed} seconds, consumed {counter} items, {int(counter / elapsed)} i/s')

minasOnline testSamples 17.49715542793274 seconds, consumed 200 items, 11 i/s
