Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1 test fails: ZeroDivisionError: division by zero #239

Open
yurivict opened this issue Apr 28, 2024 · 0 comments
Open

1 test fails: ZeroDivisionError: division by zero #239

yurivict opened this issue Apr 28, 2024 · 0 comments

Comments

@yurivict
Copy link

========================================================================================= FAILURES =========================================================================================
_________________________________________________________________________ test_bitpacked_nn_descent_query_accuracy _________________________________________________________________________

nn_data = array([[0.52111531, 0.77647716, 0.93834037, 0.66185582, 0.19981062],
       [0.43632302, 0.16532886, 0.67949223, 0.587... 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ]])

    def test_bitpacked_nn_descent_query_accuracy(nn_data):
        bitpacked_data = (nn_data * 256).astype(np.uint8)
        unpacked_data = np.zeros(
            (bitpacked_data.shape[0], bitpacked_data.shape[1] * 8), dtype=np.float32
        )
        for i in range(unpacked_data.shape[0]):
            for j in range(unpacked_data.shape[1]):
                unpacked_data[i, j] = (bitpacked_data[i, j // 8] & (1 << (j % 8))) > 0
    
>       nnd = NNDescent(
            bitpacked_data[200:], "bit_jaccard", n_neighbors=50, random_state=None
        )

pynndescent/tests/test_pynndescent_.py:207: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <pynndescent.pynndescent_.NNDescent object at 0x24f3964bdc10>
data = array([[ 91, 127, 169,   1,  27],
       [ 51, 148,  22, 235,   2],
       [170, 219,  25, 239, 176],
       ...,
       [ 44, 244,  81,  85, 237],
       [  0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0]], dtype=uint8)
metric = 'bit_jaccard', metric_kwds = {}, n_neighbors = 50, n_trees = 10, leaf_size = None, pruning_degree_multiplier = 1.5, diversify_prob = 1.0, n_search_trees = 1, tree_init = True
init_graph = None, init_dist = None, random_state = None, low_memory = True, max_candidates = None, max_rptree_depth = 200, n_iters = 10, delta = 0.001, n_jobs = None, compressed = False
parallel_batch_queries = False, verbose = False

    def __init__(
        self,
        data,
        metric="euclidean",
        metric_kwds=None,
        n_neighbors=30,
        n_trees=None,
        leaf_size=None,
        pruning_degree_multiplier=1.5,
        diversify_prob=1.0,
        n_search_trees=1,
        tree_init=True,
        init_graph=None,
        init_dist=None,
        random_state=None,
        low_memory=True,
        max_candidates=None,
        max_rptree_depth=200,
        n_iters=None,
        delta=0.001,
        n_jobs=None,
        compressed=False,
        parallel_batch_queries=False,
        verbose=False,
    ):
    
        if n_trees is None:
            n_trees = 5 + int(round((data.shape[0]) ** 0.25))
            n_trees = min(32, n_trees)  # Only so many trees are useful
        if n_iters is None:
            n_iters = max(5, int(round(np.log2(data.shape[0]))))
    
        self.n_trees = n_trees
        self.n_trees_after_update = max(1, int(np.round(self.n_trees / 3)))
        self.n_neighbors = n_neighbors
        self.metric = metric
        self.metric_kwds = metric_kwds
        self.leaf_size = leaf_size
        self.prune_degree_multiplier = pruning_degree_multiplier
        self.diversify_prob = diversify_prob
        self.n_search_trees = n_search_trees
        self.max_rptree_depth = max_rptree_depth
        self.max_candidates = max_candidates
        self.low_memory = low_memory
        self.n_iters = n_iters
        self.delta = delta
        self.dim = data.shape[1]
        self.n_jobs = n_jobs
        self.compressed = compressed
        self.parallel_batch_queries = parallel_batch_queries
        self.verbose = verbose
    
        if getattr(data, "dtype", None) == np.float32 and (
            issparse(data) or is_c_contiguous(data)
        ):
            copy_on_normalize = True
        else:
            copy_on_normalize = False
    
        if metric in ("bit_hamming", "bit_jaccard"):
            data = check_array(data, dtype=np.uint8, order="C")
            self._input_dtype = np.uint8
        else:
            data = check_array(data, dtype=np.float32, accept_sparse="csr", order="C")
            self._input_dtype = np.float32
    
        self._raw_data = data
    
        if not tree_init or n_trees == 0 or init_graph is not None:
            self.tree_init = False
        else:
            self.tree_init = True
    
        metric_kwds = metric_kwds or {}
        self._dist_args = tuple(metric_kwds.values())
    
        self.random_state = random_state
    
        current_random_state = check_random_state(self.random_state)
    
        self._distance_correction = None
    
        if callable(metric):
            _distance_func = metric
        elif metric in pynnd_dist.named_distances:
            if metric in pynnd_dist.fast_distance_alternatives:
                _distance_func = pynnd_dist.fast_distance_alternatives[metric]["dist"]
                self._distance_correction = pynnd_dist.fast_distance_alternatives[
                    metric
                ]["correction"]
            else:
                _distance_func = pynnd_dist.named_distances[metric]
        else:
            raise ValueError("Metric is neither callable, " + "nor a recognised string")
    
        # Create a partial function for distances with arguments
        if len(self._dist_args) > 0:
            dist_args = self._dist_args
    
            @numba.njit()
            def _partial_dist_func(x, y):
                return _distance_func(x, y, *dist_args)
    
            self._distance_func = _partial_dist_func
        else:
            self._distance_func = _distance_func
    
        if metric in (
            "cosine",
            "dot",
            "correlation",
            "dice",
            "jaccard",
            "hellinger",
            "hamming",
            "bit_hamming",
            "bit_jaccard",
        ):
            self._angular_trees = True
            if metric in ("bit_hamming", "bit_jaccard"):
                self._bit_trees = True
            else:
                self._bit_trees = False
        else:
            self._angular_trees = False
            self._bit_trees = False
    
        if metric == "dot":
            data = normalize(data, norm="l2", copy=copy_on_normalize)
            self._raw_data = data
    
        self.rng_state = current_random_state.randint(INT32_MIN, INT32_MAX, 3).astype(
            np.int64
        )
        self.search_rng_state = current_random_state.randint(
            INT32_MIN, INT32_MAX, 3
        ).astype(np.int64)
        # Warm up the rng state
        for i in range(10):
            _ = tau_rand_int(self.search_rng_state)
    
        if self.tree_init:
            if verbose:
                print(ts(), "Building RP forest with", str(n_trees), "trees")
            self._rp_forest = make_forest(
                data,
                n_neighbors,
                n_trees,
                leaf_size,
                self.rng_state,
                current_random_state,
                self.n_jobs,
                self._angular_trees,
                self._bit_trees,
                max_depth=self.max_rptree_depth,
            )
            leaf_array = rptree_leaf_array(self._rp_forest)
        else:
            self._rp_forest = None
            leaf_array = np.array([[-1]])
    
        if self.max_candidates is None:
            effective_max_candidates = min(60, self.n_neighbors)
        else:
            effective_max_candidates = self.max_candidates
    
        # Set threading constraints
        self._original_num_threads = numba.get_num_threads()
        if self.n_jobs != -1 and self.n_jobs is not None:
            numba.set_num_threads(self.n_jobs)
    
        if isspmatrix_csr(self._raw_data):
    
            self._is_sparse = True
    
            if not self._raw_data.has_sorted_indices:
                self._raw_data.sort_indices()
    
            if metric in sparse.sparse_named_distances:
                if metric in sparse.sparse_fast_distance_alternatives:
                    _distance_func = sparse.sparse_fast_distance_alternatives[metric][
                        "dist"
                    ]
                    self._distance_correction = (
                        sparse.sparse_fast_distance_alternatives[metric]["correction"]
                    )
                else:
                    _distance_func = sparse.sparse_named_distances[metric]
            elif callable(metric):
                _distance_func = metric
            else:
                raise ValueError(
                    "Metric {} not supported for sparse data".format(metric)
                )
    
            if metric in sparse.sparse_need_n_features:
                metric_kwds["n_features"] = self._raw_data.shape[1]
            self._dist_args = tuple(metric_kwds.values())
    
            # Create a partial function for distances with arguments
            if len(self._dist_args) > 0:
    
                dist_args = self._dist_args
    
                @numba.njit()
                def _partial_dist_func(ind1, data1, ind2, data2):
                    return _distance_func(ind1, data1, ind2, data2, *dist_args)
    
                self._distance_func = _partial_dist_func
            else:
                self._distance_func = _distance_func
    
            if init_graph is None:
                _init_graph = EMPTY_GRAPH
            else:
                if init_graph.shape[0] != self._raw_data.shape[0]:
                    raise ValueError("Init graph size does not match dataset size!")
                _init_graph = make_heap(init_graph.shape[0], self.n_neighbors)
                _init_graph = sparse_initalize_heap_from_graph_indices(
                    _init_graph,
                    init_graph,
                    self._raw_data.indptr,
                    self._raw_data.indices,
                    self._raw_data.data,
                    self._distance_func,
                )
    
            if verbose:
                print(ts(), "metric NN descent for", str(n_iters), "iterations")
    
            self._neighbor_graph = sparse_nnd.nn_descent(
                self._raw_data.indices,
                self._raw_data.indptr,
                self._raw_data.data,
                self.n_neighbors,
                self.rng_state,
                max_candidates=effective_max_candidates,
                dist=self._distance_func,
                n_iters=self.n_iters,
                delta=self.delta,
                rp_tree_init=True,
                leaf_array=leaf_array,
                init_graph=_init_graph,
                low_memory=self.low_memory,
                verbose=verbose,
            )
    
        else:
    
            self._is_sparse = False
    
            if init_graph is None:
                _init_graph = EMPTY_GRAPH
            else:
                if init_graph.shape[0] != self._raw_data.shape[0]:
                    raise ValueError("Init graph size does not match dataset size!")
                _init_graph = make_heap(init_graph.shape[0], self.n_neighbors)
                if init_dist is None:
                    _init_graph = initalize_heap_from_graph_indices(
                        _init_graph, init_graph, data, self._distance_func
                    )
                elif init_graph.shape != init_dist.shape:
                    raise ValueError(
                        "The shapes of init graph and init distances do not match!"
                    )
                else:
                    _init_graph = initalize_heap_from_graph_indices_and_distances(
                        _init_graph, init_graph, init_dist
                    )
    
            if verbose:
                print(ts(), "NN descent for", str(n_iters), "iterations")
    
>           self._neighbor_graph = nn_descent(
                self._raw_data,
                self.n_neighbors,
                self.rng_state,
                effective_max_candidates,
                self._distance_func,
                self.n_iters,
                self.delta,
                low_memory=self.low_memory,
                rp_tree_init=True,
                init_graph=_init_graph,
                leaf_array=leaf_array,
                verbose=verbose,
            )
E           ZeroDivisionError: division by zero

pynndescent/pynndescent_.py:946: ZeroDivisionError
===================================================================================== warnings summary =====================================================================================
pynndescent/tests/test_distances.py::test_bit_jaccard
pynndescent/tests/test_pynndescent_.py::test_bitpacked_nn_descent_neighbor_accuracy
  /usr/local/lib/python3.9/site-packages/sklearn/metrics/pairwise.py:2317: DataConversionWarning: Data was converted to boolean for metric jaccard
    warnings.warn(msg, DataConversionWarning)

pynndescent/tests/test_pynndescent_.py::test_bitpacked_nn_descent_neighbor_accuracy
  /usr/ports/math/py-pynndescent/work-py39/pynndescent-0.5.12/pynndescent/pynndescent_.py:962: UserWarning: Failed to correctly find n_neighbors for some samples. Results may be less than ideal. Try re-running with different parameters.
    warn(

-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
================================================================================= short test summary info ==================================================================================
SKIPPED [1] pynndescent/tests/test_distances.py:245: incorrect function in scipy<1.8
============================================================= 1 failed, 145 passed, 1 skipped, 3 warnings in 575.60s (0:09:35) =============================================================
*** Error code 1

Version: 0.5.12
Python-3.9
FreeBSD 14.0

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant