diff --git a/pynndescent/distances.py b/pynndescent/distances.py index 9b334b3..3d40480 100644 --- a/pynndescent/distances.py +++ b/pynndescent/distances.py @@ -512,9 +512,9 @@ def tsss(x, y): norm_y = np.sqrt(norm_y) magnitude_difference = np.abs(norm_x - norm_y) d_cos /= norm_x * norm_y - theta = np.arccos(d_cos) + np.radians(10) # Add 10 degrees as an "epsilon" to + theta = np.arccos(d_cos) + np.radians(10) # Add 10 degrees as an "epsilon" to # avoid problems - sector = ((np.sqrt(d_euc_squared) + magnitude_difference)**2) * theta + sector = ((np.sqrt(d_euc_squared) + magnitude_difference) ** 2) * theta triangle = norm_x * norm_y * np.sin(theta) / 2.0 return triangle * sector @@ -817,8 +817,10 @@ def kantorovich(x, y, cost=_dummy_cost, max_iter=100000): "l2": {"dist": squared_euclidean, "correction": np.sqrt}, "cosine": {"dist": alternative_cosine, "correction": correct_alternative_cosine}, "dot": {"dist": alternative_dot, "correction": correct_alternative_cosine}, - "true_angular": {"dist": alternative_cosine, "correction": - true_angular_from_alt_cosine}, + "true_angular": { + "dist": alternative_cosine, + "correction": true_angular_from_alt_cosine, + }, "hellinger": { "dist": alternative_hellinger, "correction": correct_alternative_hellinger, diff --git a/pynndescent/graph_utils.py b/pynndescent/graph_utils.py index 6ffd7a6..337a925 100644 --- a/pynndescent/graph_utils.py +++ b/pynndescent/graph_utils.py @@ -54,7 +54,11 @@ def create_component_search(index): }, ) def custom_search_closure( - query_points, candidate_indices, k, epsilon, visited, + query_points, + candidate_indices, + k, + epsilon, + visited, ): result = make_heap(query_points.shape[0], k) distance_scale = 1.0 + epsilon @@ -176,7 +180,8 @@ def adjacency_matrix_representation(neighbor_indices, neighbor_distances): neighbor_distances[neighbor_distances == 0.0] = FLOAT32_EPS result.row = np.repeat( - np.arange(neighbor_indices.shape[0], dtype=np.int32), neighbor_indices.shape[1], + np.arange(neighbor_indices.shape[0], dtype=np.int32), + neighbor_indices.shape[1], ) result.col = neighbor_indices.ravel() result.data = neighbor_distances.ravel() @@ -191,6 +196,7 @@ def adjacency_matrix_representation(neighbor_indices, neighbor_distances): return result + def connect_graph(graph, index, search_size=10, n_jobs=None): search_closure = create_component_search(index) @@ -225,7 +231,8 @@ def new_edge(c1, c2): return i, j, d new_edges = joblib.Parallel(n_jobs=n_jobs, prefer="threads")( - joblib.delayed(new_edge)(c1, c2) for c1, c2 in combinations(range(n_components), 2) + joblib.delayed(new_edge)(c1, c2) + for c1, c2 in combinations(range(n_components), 2) ) for i, j, d in new_edges: diff --git a/pynndescent/sparse.py b/pynndescent/sparse.py index 5ff78f4..8d91bb5 100644 --- a/pynndescent/sparse.py +++ b/pynndescent/sparse.py @@ -823,7 +823,7 @@ def diversify_csr( "canberra": sparse_canberra, "kantorovich": sparse_kantorovich, "wasserstein": sparse_kantorovich, - 'braycurtis': sparse_bray_curtis, + "braycurtis": sparse_bray_curtis, # Binary distances "hamming": sparse_hamming, "jaccard": sparse_jaccard, diff --git a/pynndescent/tests/test_pynndescent_.py b/pynndescent/tests/test_pynndescent_.py index d3f70bf..83d7e7f 100644 --- a/pynndescent/tests/test_pynndescent_.py +++ b/pynndescent/tests/test_pynndescent_.py @@ -438,6 +438,7 @@ def test_compressed_pickle_unpickle(): np.testing.assert_equal(neighbors1, neighbors2) np.testing.assert_equal(distances1, distances2) + def test_transformer_pickle_unpickle(): seed = np.random.RandomState(42) @@ -456,6 +457,7 @@ def test_transformer_pickle_unpickle(): np.testing.assert_equal(result1.indices, result2.indices) np.testing.assert_equal(result1.data, result2.data) + def test_joblib_dump(): seed = np.random.RandomState(42) diff --git a/setup.py b/setup.py index 9b15d5d..193b4e0 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ def readme(): configuration = { "name": "pynndescent", - "version": "0.5.1", + "version": "0.5.2", "description": "Nearest Neighbor Descent", "long_description": readme(), "classifiers": [