In [1]:
import string

import aesara
import aesara.sparse
import aesara.tensor as aet
import numpy as np
import pandas as pd
import scipy.sparse as sp

from tabmat import CategoricalMatrix as TabMat

import example

So far, this only works for the particular case where the design matrix is of the type

$$
\begin{pmatrix}
1 & 0 & \cdots & 0 \\
1 & 0 & \cdots & 0 \\
0 & 1 & \cdots & 0 \\
\vdots & \vdots & \ddots & \vdots\\
0 & 0 & \cdots & 1
\end{pmatrix}
$$

i.e. one, and only one, 1 per row.

The idea is to extend it to the general case where you can have zero, one, or more than one 1s. 

1. Zero: When the observation has the reference level for all the categoricals in the linear term. It's not that the row is 0 for the whole design matrix. Here I'm not considering the Intercept term.
2. One: When there's only one categorical predictor and the observation does not have the reference level or when there's more than one categorical predictors but the observation has the reference level in all but one of them.
3. More than one: When there's more than one categorical predictor and the observation does not have the reference level in at least two of them.

In [2]:
# The "categorical variables"
strings = list(string.ascii_lowercase) + list(string.ascii_uppercase)
strings += [s * 2 for s in strings]
len(strings)

104

In [3]:
class CategoricalMatrix:
    def __init__(self, x):
        self.indices = pd.Categorical(x).codes.astype(np.int32)
        self.length = self.indices.shape[0]
    
    def dot(self, other):
        # For now, let's assume 'other' is a column vector.
        out = np.zeros(self.length, dtype=other.dtype)
        example.mat_vec_1d(self.indices, other, out)
        return out

In [4]:
x = np.random.choice(strings, size=1000)
matrix_dense = np.asarray(pd.get_dummies(x))
matrix_sparse = sp.csr_matrix(matrix_dense)

categorical = CategoricalMatrix(x)
sp_matrix = sp.csr_matrix(matrix_dense)
tbmat = TabMat(x)
y = np.arange(len(strings), dtype=np.float64)

Defining Aesara functions...

In [5]:
aet_x = aet.dmatrix("x")
aet_y = aet.dvector("y")
aet_Y = aet.dmatrix("y")

x_sparse = aesara.sparse.CSR(sp_matrix.data, sp_matrix.indices, sp_matrix.indptr, sp_matrix.shape)

aet_dot = aesara.function([aet_x, aet_y], aet.dot(aet_x, aet_y))
aet_sparse_dot = aesara.function([x_sparse, aet_Y], aesara.sparse.structured_dot(x_sparse, aet_Y))

In [6]:
%timeit categorical.dot(y)
%timeit matrix_dense.dot(y)
%timeit sp_matrix.dot(y)
%timeit tbmat.matvec(y)
%timeit aet_dot(matrix_dense, y)
%timeit aet_sparse_dot(sp_matrix, y[:, None])

2.07 µs ± 165 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
130 µs ± 19.8 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
6.54 µs ± 256 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
20.8 µs ± 5.06 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
112 µs ± 4.52 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
36.6 µs ± 582 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [7]:
x = np.random.choice(strings, size=100000)
matrix_dense = np.asarray(pd.get_dummies(x))
matrix_sparse = sp.csr_matrix(matrix_dense)

categorical = CategoricalMatrix(x)
sp_matrix = sp.csr_matrix(matrix_dense)
tbmat = TabMat(x)

%timeit categorical.dot(y)
%timeit matrix_dense.dot(y)
%timeit sp_matrix.dot(y)
%timeit tbmat.matvec(y)
%timeit aet_dot(matrix_dense, y)
%timeit aet_sparse_dot(sp_matrix, y[:, None])

72.9 µs ± 1.24 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
25 ms ± 1.56 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
206 µs ± 6.22 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
103 µs ± 22.1 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
24 ms ± 657 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
314 µs ± 8.88 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [8]:
x = np.random.choice(strings, size=10000000)
matrix_dense = np.asarray(pd.get_dummies(x))
matrix_sparse = sp.csr_matrix(matrix_dense)

categorical = CategoricalMatrix(x)
sp_matrix = sp.csr_matrix(matrix_dense)
tbmat = TabMat(x)

%timeit categorical.dot(y)
%timeit matrix_dense.dot(y)
%timeit sp_matrix.dot(y)
%timeit tbmat.matvec(y)
%timeit aet_dot(matrix_dense, y)
%timeit aet_sparse_dot(sp_matrix, y[:, None])

29 ms ± 226 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
2.04 s ± 17.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
61.8 ms ± 521 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
37.1 ms ± 2.89 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
2.98 s ± 44.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
63.2 ms ± 669 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
