Run black on everything

lmcinnes · Jun 29, 2018 · b3a1b28 · b3a1b28
1 parent 3470770
commit b3a1b28
Show file tree

Hide file tree

Showing 10 changed files with 1,261 additions and 917 deletions.
diff --git a/umap/__init__.py b/umap/__init__.py
@@ -1,4 +1,5 @@
 from .umap_ import UMAP
 
 import pkg_resources
-__version__ = pkg_resources.get_distribution('umap-learn').version
+
+__version__ = pkg_resources.get_distribution("umap-learn").version
diff --git a/umap/distances.py b/umap/distances.py
@@ -161,8 +161,8 @@ def jaccard(x, y):
     for i in range(x.shape[0]):
         x_true = x[i] != 0
         y_true = y[i] != 0
-        num_non_zero += (x_true or y_true)
-        num_equal += (x_true and y_true)
+        num_non_zero += x_true or y_true
+        num_equal += x_true and y_true
 
     if num_non_zero == 0.0:
         return 0.0
@@ -176,7 +176,7 @@ def matching(x, y):
     for i in range(x.shape[0]):
         x_true = x[i] != 0
         y_true = y[i] != 0
-        num_not_equal += (x_true != y_true)
+        num_not_equal += x_true != y_true
 
     return float(num_not_equal) / x.shape[0]
 
@@ -188,8 +188,8 @@ def dice(x, y):
     for i in range(x.shape[0]):
         x_true = x[i] != 0
         y_true = y[i] != 0
-        num_true_true += (x_true and y_true)
-        num_not_equal += (x_true != y_true)
+        num_true_true += x_true and y_true
+        num_not_equal += x_true != y_true
 
     if num_not_equal == 0.0:
         return 0.0
@@ -204,14 +204,15 @@ def kulsinski(x, y):
     for i in range(x.shape[0]):
         x_true = x[i] != 0
         y_true = y[i] != 0
-        num_true_true += (x_true and y_true)
-        num_not_equal += (x_true != y_true)
+        num_true_true += x_true and y_true
+        num_not_equal += x_true != y_true
 
     if num_not_equal == 0:
         return 0.0
     else:
-        return float(num_not_equal - num_true_true + x.shape[0]) / \
-                (num_not_equal + x.shape[0])
+        return float(num_not_equal - num_true_true + x.shape[0]) / (
+            num_not_equal + x.shape[0]
+        )
 
 
 @numba.njit()
@@ -220,7 +221,7 @@ def rogers_tanimoto(x, y):
     for i in range(x.shape[0]):
         x_true = x[i] != 0
         y_true = y[i] != 0
-        num_not_equal += (x_true != y_true)
+        num_not_equal += x_true != y_true
 
     return (2.0 * num_not_equal) / (x.shape[0] + num_not_equal)
 
@@ -231,10 +232,9 @@ def russellrao(x, y):
     for i in range(x.shape[0]):
         x_true = x[i] != 0
         y_true = y[i] != 0
-        num_true_true += (x_true and y_true)
+        num_true_true += x_true and y_true
 
-    if (num_true_true == np.sum(x != 0) and
-        num_true_true == np.sum(y != 0)):
+    if num_true_true == np.sum(x != 0) and num_true_true == np.sum(y != 0):
         return 0.0
     else:
         return float(x.shape[0] - num_true_true) / (x.shape[0])
@@ -246,7 +246,7 @@ def sokal_michener(x, y):
     for i in range(x.shape[0]):
         x_true = x[i] != 0
         y_true = y[i] != 0
-        num_not_equal += (x_true != y_true)
+        num_not_equal += x_true != y_true
 
     return (2.0 * num_not_equal) / (x.shape[0] + num_not_equal)
 
@@ -258,8 +258,8 @@ def sokal_sneath(x, y):
     for i in range(x.shape[0]):
         x_true = x[i] != 0
         y_true = y[i] != 0
-        num_true_true += (x_true and y_true)
-        num_not_equal += (x_true != y_true)
+        num_true_true += x_true and y_true
+        num_not_equal += x_true != y_true
 
     if num_not_equal == 0.0:
         return 0.0
@@ -270,7 +270,7 @@ def sokal_sneath(x, y):
 @numba.njit()
 def haversine(x, y):
     if x.shape[0] != 2:
-        raise ValueError('haversine is only defined for 2 dimensional data')
+        raise ValueError("haversine is only defined for 2 dimensional data")
     sin_lat = np.sin(0.5 * (x[0] - y[0]))
     sin_long = np.sin(0.5 * (x[1] - y[1]))
     result = np.sqrt(sin_lat ** 2 + np.cos(x[0]) * np.cos(y[0]) * sin_long ** 2)
@@ -285,18 +285,18 @@ def yule(x, y):
     for i in range(x.shape[0]):
         x_true = x[i] != 0
         y_true = y[i] != 0
-        num_true_true += (x_true and y_true)
-        num_true_false += (x_true and (not y_true))
-        num_false_true += ((not x_true) and y_true)
+        num_true_true += x_true and y_true
+        num_true_false += x_true and (not y_true)
+        num_false_true += (not x_true) and y_true
 
-    num_false_false = x.shape[0] - num_true_true - num_true_false - \
-                      num_false_true
+    num_false_false = x.shape[0] - num_true_true - num_true_false - num_false_true
 
-    if (num_true_false == 0.0 or num_false_true == 0.0):
+    if num_true_false == 0.0 or num_false_true == 0.0:
         return 0.0
     else:
-        return (2.0 * num_true_false * num_false_true) / \
-               (num_true_true * num_false_false + num_true_false * num_false_true)
+        return (2.0 * num_true_false * num_false_true) / (
+            num_true_true * num_false_false + num_true_false * num_false_true
+        )
 
 
 @numba.njit()
@@ -306,8 +306,8 @@ def cosine(x, y):
     norm_y = 0.0
     for i in range(x.shape[0]):
         result += x[i] * y[i]
-        norm_x += x[i]**2
-        norm_y += y[i]**2
+        norm_x += x[i] ** 2
+        norm_y += y[i] ** 2
 
     if norm_x == 0.0 and norm_y == 0.0:
         return 0.0
@@ -344,41 +344,42 @@ def correlation(x, y):
     elif dot_product == 0.0:
         return 1.0
     else:
-        return (1.0 - (dot_product / np.sqrt(norm_x * norm_y)))
+        return 1.0 - (dot_product / np.sqrt(norm_x * norm_y))
+
 
 named_distances = {
     # general minkowski distances
-    'euclidean': euclidean,
-    'l2': euclidean,
-    'manhattan': manhattan,
-    'taxicab': manhattan,
-    'l1': manhattan,
-    'chebyshev': chebyshev,
-    'linfinity': chebyshev,
-    'linfty': chebyshev,
-    'linf': chebyshev,
-    'minkowski': minkowski,
+    "euclidean": euclidean,
+    "l2": euclidean,
+    "manhattan": manhattan,
+    "taxicab": manhattan,
+    "l1": manhattan,
+    "chebyshev": chebyshev,
+    "linfinity": chebyshev,
+    "linfty": chebyshev,
+    "linf": chebyshev,
+    "minkowski": minkowski,
     # Standardised/weighted distances
-    'seuclidean': standardised_euclidean,
-    'standardised_euclidean': standardised_euclidean,
-    'wminkowski': weighted_minkowski,
-    'weighted_minkowski': weighted_minkowski,
-    'mahalanobis': mahalanobis,
+    "seuclidean": standardised_euclidean,
+    "standardised_euclidean": standardised_euclidean,
+    "wminkowski": weighted_minkowski,
+    "weighted_minkowski": weighted_minkowski,
+    "mahalanobis": mahalanobis,
     # Other distances
-    'canberra': canberra,
-    'cosine': cosine,
-    'correlation': correlation,
-    'haversine': haversine,
-    'braycurtis': bray_curtis,
+    "canberra": canberra,
+    "cosine": cosine,
+    "correlation": correlation,
+    "haversine": haversine,
+    "braycurtis": bray_curtis,
     # Binary distances
-    'hamming': hamming,
-    'jaccard': jaccard,
-    'dice': dice,
-    'matching': matching,
-    'kulsinski': kulsinski,
-    'rogerstanimoto': rogers_tanimoto,
-    'russellrao': russellrao,
-    'sokalsneath': sokal_sneath,
-    'sokalmichener': sokal_michener,
-    'yule': yule,
+    "hamming": hamming,
+    "jaccard": jaccard,
+    "dice": dice,
+    "matching": matching,
+    "kulsinski": kulsinski,
+    "rogerstanimoto": rogers_tanimoto,
+    "russellrao": russellrao,
+    "sokalsneath": sokal_sneath,
+    "sokalmichener": sokal_michener,
+    "yule": yule,
 }
diff --git a/umap/nndescent.py b/umap/nndescent.py
@@ -6,15 +6,17 @@
 import numpy as np
 import numba
 
-from umap.utils import (tau_rand,
-                        make_heap,
-                        heap_push,
-                        unchecked_heap_push,
-                        smallest_flagged,
-                        rejection_sample,
-                        build_candidates,
-                        new_build_candidates,
-                        deheap_sort)
+from umap.utils import (
+    tau_rand,
+    make_heap,
+    heap_push,
+    unchecked_heap_push,
+    smallest_flagged,
+    rejection_sample,
+    build_candidates,
+    new_build_candidates,
+    deheap_sort,
+)
 
 from umap.rp_tree import search_flat_tree
 
@@ -42,9 +44,18 @@ def make_nn_descent(dist, dist_args):
     """
 
     @numba.njit(parallel=True)
-    def nn_descent(data, n_neighbors, rng_state, max_candidates=50,
-                   n_iters=10, delta=0.001, rho=0.5,
-                   rp_tree_init=True, leaf_array=None, verbose=False):
+    def nn_descent(
+        data,
+        n_neighbors,
+        rng_state,
+        max_candidates=50,
+        n_iters=10,
+        delta=0.001,
+        rho=0.5,
+        rp_tree_init=True,
+        leaf_array=None,
+        verbose=False,
+    ):
         n_vertices = data.shape[0]
 
         current_graph = make_heap(data.shape[0], n_neighbors)
@@ -63,23 +74,23 @@ def nn_descent(data, n_neighbors, rng_state, max_candidates=50,
                     for j in range(i + 1, leaf_array.shape[1]):
                         if leaf_array[n, j] < 0:
                             break
-                        d = dist(data[leaf_array[n, i]], data[leaf_array[n, j]],
-                                 *dist_args)
-                        heap_push(current_graph, leaf_array[n, i], d,
-                                  leaf_array[n, j],
-                                  1)
-                        heap_push(current_graph, leaf_array[n, j], d,
-                                  leaf_array[n, i],
-                                  1)
+                        d = dist(
+                            data[leaf_array[n, i]], data[leaf_array[n, j]], *dist_args
+                        )
+                        heap_push(
+                            current_graph, leaf_array[n, i], d, leaf_array[n, j], 1
+                        )
+                        heap_push(
+                            current_graph, leaf_array[n, j], d, leaf_array[n, i], 1
+                        )
 
         for n in range(n_iters):
             if verbose:
                 print("\t", n, " / ", n_iters)
 
-            candidate_neighbors = build_candidates(current_graph,
-                                                   n_vertices,
-                                                   n_neighbors, max_candidates,
-                                                   rng_state)
+            candidate_neighbors = build_candidates(
+                current_graph, n_vertices, n_neighbors, max_candidates, rng_state
+            )
 
             c = 0
             for i in range(n_vertices):
@@ -89,8 +100,11 @@ def nn_descent(data, n_neighbors, rng_state, max_candidates=50,
                         continue
                     for k in range(max_candidates):
                         q = int(candidate_neighbors[0, i, k])
-                        if q < 0 or not candidate_neighbors[2, i, j] and not \
-                                candidate_neighbors[2, i, k]:
+                        if (
+                            q < 0
+                            or not candidate_neighbors[2, i, j]
+                            and not candidate_neighbors[2, i, k]
+                        ):
                             continue
 
                         d = dist(data[p], data[q], *dist_args)
@@ -109,8 +123,7 @@ def make_initialisations(dist, dist_args):
     @numba.njit(parallel=True)
     def init_from_random(n_neighbors, data, query_points, heap, rng_state):
         for i in range(query_points.shape[0]):
-            indices = rejection_sample(n_neighbors, data.shape[0],
-                                       rng_state)
+            indices = rejection_sample(n_neighbors, data.shape[0], rng_state)
             for j in range(indices.shape[0]):
                 if indices[j] < 0:
                     continue
@@ -121,10 +134,14 @@ def init_from_random(n_neighbors, data, query_points, heap, rng_state):
     @numba.njit(parallel=True)
     def init_from_tree(tree, data, query_points, heap, rng_state):
         for i in range(query_points.shape[0]):
-            indices = search_flat_tree(query_points[i], tree.hyperplanes,
-                                       tree.offsets, tree.children,
-                                       tree.indices,
-                                       rng_state)
+            indices = search_flat_tree(
+                query_points[i],
+                tree.hyperplanes,
+                tree.offsets,
+                tree.children,
+                tree.indices,
+                rng_state,
+            )
 
             for j in range(indices.shape[0]):
                 if indices[j] < 0:
@@ -137,8 +154,9 @@ def init_from_tree(tree, data, query_points, heap, rng_state):
     return init_from_random, init_from_tree
 
 
-def initialise_search(forest, data, query_points, n_neighbors,
-                      init_from_random, init_from_tree, rng_state):
+def initialise_search(
+    forest, data, query_points, n_neighbors, init_from_random, init_from_tree, rng_state
+):
     results = make_heap(query_points.shape[0], n_neighbors)
     init_from_random(n_neighbors, data, query_points, results, rng_state)
     if forest is not None:
@@ -150,11 +168,7 @@ def initialise_search(forest, data, query_points, n_neighbors,
 
 def make_initialized_nnd_search(dist, dist_args):
     @numba.njit(parallel=True)
-    def initialized_nnd_search(data,
-                               indptr,
-                               indices,
-                               initialization,
-                               query_points):
+    def initialized_nnd_search(data, indptr, indices, initialization, query_points):
 
         for i in numba.prange(query_points.shape[0]):
 
@@ -167,10 +181,13 @@ def initialized_nnd_search(data,
 
                 if vertex == -1:
                     break
-                candidates = indices[indptr[vertex]:indptr[vertex + 1]]
+                candidates = indices[indptr[vertex] : indptr[vertex + 1]]
                 for j in range(candidates.shape[0]):
-                    if candidates[j] == vertex or candidates[j] == -1 or \
-                                    candidates[j] in tried:
+                    if (
+                        candidates[j] == vertex
+                        or candidates[j] == -1
+                        or candidates[j] in tried
+                    ):
                         continue
                     d = dist(data[candidates[j]], query_points[i], *dist_args)
                     unchecked_heap_push(initialization, i, d, candidates[j], 1)