Skip to content

ENH: Sparse matrix handling with ICAR prior #7406

@jfhawkin

Description

@jfhawkin

Before

@classmethod
    def dist(cls, W, sigma=1, zero_sum_stdev=0.001, **kwargs):
        # Note: These checks are forcing W to be non-symbolic
        if not W.ndim == 2:
            raise ValueError("W must be matrix with ndim=2")

        if not W.shape[0] == W.shape[1]:
            raise ValueError("W must be a square matrix")

        if not np.allclose(W.T, W):
            raise ValueError("W must be a symmetric matrix")

        if np.any((W != 0) & (W != 1)):
            raise ValueError("W must be composed of only 1s and 0s")

        W = pt.as_tensor_variable(W, dtype=int)
        sigma = pt.as_tensor_variable(sigma)
        zero_sum_stdev = pt.as_tensor_variable(zero_sum_stdev)
        return super().dist([W, sigma, zero_sum_stdev], **kwargs)



    def support_point(rv, size, W, sigma, zero_sum_stdev):
        N = pt.shape(W)[-2]
        return pt.zeros(N)

    def logp(value, W, sigma, zero_sum_stdev):
        # convert adjacency matrix to edgelist representation
        # An edgelist is a pair of lists.
        # If node i and node j are connected then one list
        # will contain i and the other will contain j at the same
        # index value.
        # We only use the lower triangle here because adjacency
        # is a undirected connection.
        N = pt.shape(W)[-2]
        node1, node2 = pt.eq(pt.tril(W), 1).nonzero()

        pairwise_difference = (-1 / (2 * sigma**2)) * pt.sum(pt.square(value[node1] - value[node2]))
        zero_sum = (
            -0.5 * pt.pow(pt.sum(value) / (zero_sum_stdev * N), 2)
            - pt.log(pt.sqrt(2.0 * np.pi))
            - pt.log(zero_sum_stdev * N)
        )

        return check_parameters(pairwise_difference + zero_sum, sigma > 0, msg="sigma > 0")

After

@classmethod
    def dist(cls, W, sigma=1, zero_sum_stdev=0.001, **kwargs):
        # Note: These checks are forcing W to be non-symbolic
        if not W.ndim == 2:
            raise ValueError("W must be matrix with ndim=2")

        if not W.shape[0] == W.shape[1]:
            raise ValueError("W must be a square matrix")

        if not np.allclose(W.data.T, W.data):
            raise ValueError("W must be a symmetric matrix")

        if np.any((W.data != 0) & (W.data != 1)):
            raise ValueError("W must be composed of only 1s and 0s")

        sigma = pt.as_tensor_variable(sigma)
        zero_sum_stdev = pt.as_tensor_variable(zero_sum_stdev)
        return super().dist([W, sigma, zero_sum_stdev], **kwargs)



    def support_point(rv, size, W, sigma, zero_sum_stdev):
        N = pt.shape(W)[-2]
        return pt.zeros(N)

    def logp(value, W, sigma, zero_sum_stdev):
        # convert adjacency matrix to edgelist representation
        # An edgelist is a pair of lists.
        # If node i and node j are connected then one list
        # will contain i and the other will contain j at the same
        # index value.
        # We only use the lower triangle here because adjacency
        # is a undirected connection.
        N = pt.shape(W)[-2]
        node1, node2 = W.nonzero()
        node1 = pt.as_tensor_variable(node1, dtype=int)
        node2 = pt.as_tensor_variable(node2, dtype=int)
        W = pytensor.sparse.as_sparse_or_tensor_variable(W)

        pairwise_difference = (-1 / (2 * sigma**2)) * pt.sum(pt.square(value[node1] - value[node2]))
        zero_sum = (
            -0.5 * pt.pow(pt.sum(value) / (zero_sum_stdev * N), 2)
            - pt.log(pt.sqrt(2.0 * np.pi))
            - pt.log(zero_sum_stdev * N)
        )

        return check_parameters(pairwise_difference + zero_sum, sigma > 0, msg="sigma > 0")

Context for the issue:

The CAR prior allows the user to input a sparse matrix, but the ICAR prior does not have this functionality. For large spatial adjacency matrices, sparsity is critical to ensure efficient memory allocation.

The above solution makes minor revisions to run checks and generate the node tuple from a sparse matrix. It may be necessary to place it in an if statement, similar to the approach used in the CAR class. This version runs for me on a test dataset.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions