Skip to content

Commit

Permalink
Run black on everything
Browse files Browse the repository at this point in the history
  • Loading branch information
lmcinnes committed Jun 29, 2018
1 parent 3470770 commit b3a1b28
Show file tree
Hide file tree
Showing 10 changed files with 1,261 additions and 917 deletions.
3 changes: 2 additions & 1 deletion umap/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .umap_ import UMAP

import pkg_resources
__version__ = pkg_resources.get_distribution('umap-learn').version

__version__ = pkg_resources.get_distribution("umap-learn").version
117 changes: 59 additions & 58 deletions umap/distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ def jaccard(x, y):
for i in range(x.shape[0]):
x_true = x[i] != 0
y_true = y[i] != 0
num_non_zero += (x_true or y_true)
num_equal += (x_true and y_true)
num_non_zero += x_true or y_true
num_equal += x_true and y_true

if num_non_zero == 0.0:
return 0.0
Expand All @@ -176,7 +176,7 @@ def matching(x, y):
for i in range(x.shape[0]):
x_true = x[i] != 0
y_true = y[i] != 0
num_not_equal += (x_true != y_true)
num_not_equal += x_true != y_true

return float(num_not_equal) / x.shape[0]

Expand All @@ -188,8 +188,8 @@ def dice(x, y):
for i in range(x.shape[0]):
x_true = x[i] != 0
y_true = y[i] != 0
num_true_true += (x_true and y_true)
num_not_equal += (x_true != y_true)
num_true_true += x_true and y_true
num_not_equal += x_true != y_true

if num_not_equal == 0.0:
return 0.0
Expand All @@ -204,14 +204,15 @@ def kulsinski(x, y):
for i in range(x.shape[0]):
x_true = x[i] != 0
y_true = y[i] != 0
num_true_true += (x_true and y_true)
num_not_equal += (x_true != y_true)
num_true_true += x_true and y_true
num_not_equal += x_true != y_true

if num_not_equal == 0:
return 0.0
else:
return float(num_not_equal - num_true_true + x.shape[0]) / \
(num_not_equal + x.shape[0])
return float(num_not_equal - num_true_true + x.shape[0]) / (
num_not_equal + x.shape[0]
)


@numba.njit()
Expand All @@ -220,7 +221,7 @@ def rogers_tanimoto(x, y):
for i in range(x.shape[0]):
x_true = x[i] != 0
y_true = y[i] != 0
num_not_equal += (x_true != y_true)
num_not_equal += x_true != y_true

return (2.0 * num_not_equal) / (x.shape[0] + num_not_equal)

Expand All @@ -231,10 +232,9 @@ def russellrao(x, y):
for i in range(x.shape[0]):
x_true = x[i] != 0
y_true = y[i] != 0
num_true_true += (x_true and y_true)
num_true_true += x_true and y_true

if (num_true_true == np.sum(x != 0) and
num_true_true == np.sum(y != 0)):
if num_true_true == np.sum(x != 0) and num_true_true == np.sum(y != 0):
return 0.0
else:
return float(x.shape[0] - num_true_true) / (x.shape[0])
Expand All @@ -246,7 +246,7 @@ def sokal_michener(x, y):
for i in range(x.shape[0]):
x_true = x[i] != 0
y_true = y[i] != 0
num_not_equal += (x_true != y_true)
num_not_equal += x_true != y_true

return (2.0 * num_not_equal) / (x.shape[0] + num_not_equal)

Expand All @@ -258,8 +258,8 @@ def sokal_sneath(x, y):
for i in range(x.shape[0]):
x_true = x[i] != 0
y_true = y[i] != 0
num_true_true += (x_true and y_true)
num_not_equal += (x_true != y_true)
num_true_true += x_true and y_true
num_not_equal += x_true != y_true

if num_not_equal == 0.0:
return 0.0
Expand All @@ -270,7 +270,7 @@ def sokal_sneath(x, y):
@numba.njit()
def haversine(x, y):
if x.shape[0] != 2:
raise ValueError('haversine is only defined for 2 dimensional data')
raise ValueError("haversine is only defined for 2 dimensional data")
sin_lat = np.sin(0.5 * (x[0] - y[0]))
sin_long = np.sin(0.5 * (x[1] - y[1]))
result = np.sqrt(sin_lat ** 2 + np.cos(x[0]) * np.cos(y[0]) * sin_long ** 2)
Expand All @@ -285,18 +285,18 @@ def yule(x, y):
for i in range(x.shape[0]):
x_true = x[i] != 0
y_true = y[i] != 0
num_true_true += (x_true and y_true)
num_true_false += (x_true and (not y_true))
num_false_true += ((not x_true) and y_true)
num_true_true += x_true and y_true
num_true_false += x_true and (not y_true)
num_false_true += (not x_true) and y_true

num_false_false = x.shape[0] - num_true_true - num_true_false - \
num_false_true
num_false_false = x.shape[0] - num_true_true - num_true_false - num_false_true

if (num_true_false == 0.0 or num_false_true == 0.0):
if num_true_false == 0.0 or num_false_true == 0.0:
return 0.0
else:
return (2.0 * num_true_false * num_false_true) / \
(num_true_true * num_false_false + num_true_false * num_false_true)
return (2.0 * num_true_false * num_false_true) / (
num_true_true * num_false_false + num_true_false * num_false_true
)


@numba.njit()
Expand All @@ -306,8 +306,8 @@ def cosine(x, y):
norm_y = 0.0
for i in range(x.shape[0]):
result += x[i] * y[i]
norm_x += x[i]**2
norm_y += y[i]**2
norm_x += x[i] ** 2
norm_y += y[i] ** 2

if norm_x == 0.0 and norm_y == 0.0:
return 0.0
Expand Down Expand Up @@ -344,41 +344,42 @@ def correlation(x, y):
elif dot_product == 0.0:
return 1.0
else:
return (1.0 - (dot_product / np.sqrt(norm_x * norm_y)))
return 1.0 - (dot_product / np.sqrt(norm_x * norm_y))


named_distances = {
# general minkowski distances
'euclidean': euclidean,
'l2': euclidean,
'manhattan': manhattan,
'taxicab': manhattan,
'l1': manhattan,
'chebyshev': chebyshev,
'linfinity': chebyshev,
'linfty': chebyshev,
'linf': chebyshev,
'minkowski': minkowski,
"euclidean": euclidean,
"l2": euclidean,
"manhattan": manhattan,
"taxicab": manhattan,
"l1": manhattan,
"chebyshev": chebyshev,
"linfinity": chebyshev,
"linfty": chebyshev,
"linf": chebyshev,
"minkowski": minkowski,
# Standardised/weighted distances
'seuclidean': standardised_euclidean,
'standardised_euclidean': standardised_euclidean,
'wminkowski': weighted_minkowski,
'weighted_minkowski': weighted_minkowski,
'mahalanobis': mahalanobis,
"seuclidean": standardised_euclidean,
"standardised_euclidean": standardised_euclidean,
"wminkowski": weighted_minkowski,
"weighted_minkowski": weighted_minkowski,
"mahalanobis": mahalanobis,
# Other distances
'canberra': canberra,
'cosine': cosine,
'correlation': correlation,
'haversine': haversine,
'braycurtis': bray_curtis,
"canberra": canberra,
"cosine": cosine,
"correlation": correlation,
"haversine": haversine,
"braycurtis": bray_curtis,
# Binary distances
'hamming': hamming,
'jaccard': jaccard,
'dice': dice,
'matching': matching,
'kulsinski': kulsinski,
'rogerstanimoto': rogers_tanimoto,
'russellrao': russellrao,
'sokalsneath': sokal_sneath,
'sokalmichener': sokal_michener,
'yule': yule,
"hamming": hamming,
"jaccard": jaccard,
"dice": dice,
"matching": matching,
"kulsinski": kulsinski,
"rogerstanimoto": rogers_tanimoto,
"russellrao": russellrao,
"sokalsneath": sokal_sneath,
"sokalmichener": sokal_michener,
"yule": yule,
}
101 changes: 59 additions & 42 deletions umap/nndescent.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,17 @@
import numpy as np
import numba

from umap.utils import (tau_rand,
make_heap,
heap_push,
unchecked_heap_push,
smallest_flagged,
rejection_sample,
build_candidates,
new_build_candidates,
deheap_sort)
from umap.utils import (
tau_rand,
make_heap,
heap_push,
unchecked_heap_push,
smallest_flagged,
rejection_sample,
build_candidates,
new_build_candidates,
deheap_sort,
)

from umap.rp_tree import search_flat_tree

Expand Down Expand Up @@ -42,9 +44,18 @@ def make_nn_descent(dist, dist_args):
"""

@numba.njit(parallel=True)
def nn_descent(data, n_neighbors, rng_state, max_candidates=50,
n_iters=10, delta=0.001, rho=0.5,
rp_tree_init=True, leaf_array=None, verbose=False):
def nn_descent(
data,
n_neighbors,
rng_state,
max_candidates=50,
n_iters=10,
delta=0.001,
rho=0.5,
rp_tree_init=True,
leaf_array=None,
verbose=False,
):
n_vertices = data.shape[0]

current_graph = make_heap(data.shape[0], n_neighbors)
Expand All @@ -63,23 +74,23 @@ def nn_descent(data, n_neighbors, rng_state, max_candidates=50,
for j in range(i + 1, leaf_array.shape[1]):
if leaf_array[n, j] < 0:
break
d = dist(data[leaf_array[n, i]], data[leaf_array[n, j]],
*dist_args)
heap_push(current_graph, leaf_array[n, i], d,
leaf_array[n, j],
1)
heap_push(current_graph, leaf_array[n, j], d,
leaf_array[n, i],
1)
d = dist(
data[leaf_array[n, i]], data[leaf_array[n, j]], *dist_args
)
heap_push(
current_graph, leaf_array[n, i], d, leaf_array[n, j], 1
)
heap_push(
current_graph, leaf_array[n, j], d, leaf_array[n, i], 1
)

for n in range(n_iters):
if verbose:
print("\t", n, " / ", n_iters)

candidate_neighbors = build_candidates(current_graph,
n_vertices,
n_neighbors, max_candidates,
rng_state)
candidate_neighbors = build_candidates(
current_graph, n_vertices, n_neighbors, max_candidates, rng_state
)

c = 0
for i in range(n_vertices):
Expand All @@ -89,8 +100,11 @@ def nn_descent(data, n_neighbors, rng_state, max_candidates=50,
continue
for k in range(max_candidates):
q = int(candidate_neighbors[0, i, k])
if q < 0 or not candidate_neighbors[2, i, j] and not \
candidate_neighbors[2, i, k]:
if (
q < 0
or not candidate_neighbors[2, i, j]
and not candidate_neighbors[2, i, k]
):
continue

d = dist(data[p], data[q], *dist_args)
Expand All @@ -109,8 +123,7 @@ def make_initialisations(dist, dist_args):
@numba.njit(parallel=True)
def init_from_random(n_neighbors, data, query_points, heap, rng_state):
for i in range(query_points.shape[0]):
indices = rejection_sample(n_neighbors, data.shape[0],
rng_state)
indices = rejection_sample(n_neighbors, data.shape[0], rng_state)
for j in range(indices.shape[0]):
if indices[j] < 0:
continue
Expand All @@ -121,10 +134,14 @@ def init_from_random(n_neighbors, data, query_points, heap, rng_state):
@numba.njit(parallel=True)
def init_from_tree(tree, data, query_points, heap, rng_state):
for i in range(query_points.shape[0]):
indices = search_flat_tree(query_points[i], tree.hyperplanes,
tree.offsets, tree.children,
tree.indices,
rng_state)
indices = search_flat_tree(
query_points[i],
tree.hyperplanes,
tree.offsets,
tree.children,
tree.indices,
rng_state,
)

for j in range(indices.shape[0]):
if indices[j] < 0:
Expand All @@ -137,8 +154,9 @@ def init_from_tree(tree, data, query_points, heap, rng_state):
return init_from_random, init_from_tree


def initialise_search(forest, data, query_points, n_neighbors,
init_from_random, init_from_tree, rng_state):
def initialise_search(
forest, data, query_points, n_neighbors, init_from_random, init_from_tree, rng_state
):
results = make_heap(query_points.shape[0], n_neighbors)
init_from_random(n_neighbors, data, query_points, results, rng_state)
if forest is not None:
Expand All @@ -150,11 +168,7 @@ def initialise_search(forest, data, query_points, n_neighbors,

def make_initialized_nnd_search(dist, dist_args):
@numba.njit(parallel=True)
def initialized_nnd_search(data,
indptr,
indices,
initialization,
query_points):
def initialized_nnd_search(data, indptr, indices, initialization, query_points):

for i in numba.prange(query_points.shape[0]):

Expand All @@ -167,10 +181,13 @@ def initialized_nnd_search(data,

if vertex == -1:
break
candidates = indices[indptr[vertex]:indptr[vertex + 1]]
candidates = indices[indptr[vertex] : indptr[vertex + 1]]
for j in range(candidates.shape[0]):
if candidates[j] == vertex or candidates[j] == -1 or \
candidates[j] in tried:
if (
candidates[j] == vertex
or candidates[j] == -1
or candidates[j] in tried
):
continue
d = dist(data[candidates[j]], query_points[i], *dist_args)
unchecked_heap_push(initialization, i, d, candidates[j], 1)
Expand Down

0 comments on commit b3a1b28

Please sign in to comment.