In [1]:
import string

import aesara
import aesara.sparse
import aesara.tensor as aet
import numpy as np
import pandas as pd
import scipy.sparse as sp

from tabmat import CategoricalMatrix as TabMat

import example

So far, this only works for the particular case where the design matrix is of the type

$$
\begin{pmatrix}
1 & 0 & \cdots & 0 \\
1 & 0 & \cdots & 0 \\
0 & 1 & \cdots & 0 \\
\vdots & \vdots & \ddots & \vdots\\
0 & 0 & \cdots & 1
\end{pmatrix}
$$

i.e. one, and only one, 1 per row.

The idea is to extend it to the general case where you can have zero, one, or more than one 1s. 

1. Zero: When the observation has the reference level for all the categoricals in the linear term. It's not that the row is 0 for the whole design matrix. Here I'm not considering the Intercept term.
2. One: When there's only one categorical predictor and the observation does not have the reference level or when there's more than one categorical predictors but the observation has the reference level in all but one of them.
3. More than one: When there's more than one categorical predictor and the observation does not have the reference level in at least two of them.



**Update**

The new implementation works for the most general case :-)

In [2]:
import formulae

rng = np.random.default_rng(1234)

x = rng.choice(["A", "B", "C"], size=10)
y = rng.choice(["X", "Y", "Z"], size=10)
z = rng.choice(["M", "N", "O"], size=10)
df = pd.DataFrame({"x": x, "y": y, "z": z})

X = formulae.design_matrices("0 + x + y + z", df).common.design_matrix

X_indices = np.ascontiguousarray(np.vstack(np.where(X != 0)).T).astype(np.int32)
vector = rng.normal(size=X.shape[1])
result = np.zeros(X.shape[0], dtype=float)

example.mat_vec_1d_dummy(X_indices, vector, result)

array([-1.20427091, -1.63900486, -0.42584969, -0.74500956,  1.25061673,
       -3.16130481,  2.77291668,  1.25061673,  2.77291668, -0.48267259])

In [3]:
np.allclose(np.dot(X, vector), result)

True

In [4]:
# The "categorical variables"
strings = list(string.ascii_lowercase) + list(string.ascii_uppercase)
strings += [s * 2 for s in strings]
len(strings)

104

In [5]:
class CategoricalMatrix:
    def __init__(self, x):
        self.indices = pd.Categorical(x).codes.astype(np.int32)
        self.length = self.indices.shape[0]
    
    def dot(self, other):
        # For now, let's assume 'other' is a column vector.
        out = np.zeros(self.length, dtype=other.dtype)
        example.mat_vec_1d(self.indices, other, out)
        return out

In [6]:
class CategoricalMatrix2:
    def __init__(self, X):
        self.length = X.shape[0]
        self.X_indices = np.ascontiguousarray(np.vstack(np.where(X != 0)).T).astype(np.int32)
        
    def dot(self, other):
        result = np.zeros(self.length, dtype=other.dtype)
        example.mat_vec_1d_dummy(self.X_indices, other, result)
        return result

In [7]:
x = np.random.choice(strings, size=1000)
matrix_dense = np.asarray(pd.get_dummies(x))
matrix_sparse = sp.csr_matrix(matrix_dense)
categorical = CategoricalMatrix(x)
categorical_2 = CategoricalMatrix2(matrix_dense)
sp_matrix = sp.csr_matrix(matrix_dense)
tbmat = TabMat(x)
y = np.arange(len(strings), dtype=np.float64)

In [8]:
(categorical_2.dot(y) == categorical.dot(y)).all()

True

Defining Aesara functions...

In [9]:
aet_x = aet.dmatrix("x")
aet_y = aet.dvector("y")
aet_Y = aet.dmatrix("y")

x_sparse = aesara.sparse.CSR(sp_matrix.data, sp_matrix.indices, sp_matrix.indptr, sp_matrix.shape)

aet_dot = aesara.function([aet_x, aet_y], aet.dot(aet_x, aet_y))
aet_sparse_dot = aesara.function([x_sparse, aet_Y], aesara.sparse.structured_dot(x_sparse, aet_Y))

In [10]:
%timeit categorical.dot(y)
%timeit categorical_2.dot(y)
%timeit matrix_dense.dot(y)
%timeit sp_matrix.dot(y)
%timeit tbmat.matvec(y)
%timeit aet_dot(matrix_dense, y)
%timeit aet_sparse_dot(sp_matrix, y[:, None])

2 µs ± 47.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
2.33 µs ± 42.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
70 µs ± 2.96 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
6.16 µs ± 54.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
8.26 µs ± 676 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
100 µs ± 1.27 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
34.8 µs ± 67.6 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [11]:
x = np.random.choice(strings, size=100000)
matrix_dense = np.asarray(pd.get_dummies(x))
matrix_sparse = sp.csr_matrix(matrix_dense)

categorical = CategoricalMatrix(x)
categorical_2 = CategoricalMatrix2(matrix_dense)
sp_matrix = sp.csr_matrix(matrix_dense)
tbmat = TabMat(x)

%timeit categorical.dot(y)
%timeit categorical_2.dot(y)
%timeit matrix_dense.dot(y)
%timeit sp_matrix.dot(y)
%timeit tbmat.matvec(y)
%timeit aet_dot(matrix_dense, y)
%timeit aet_sparse_dot(sp_matrix, y[:, None])

68.2 µs ± 663 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
101 µs ± 175 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
20.4 ms ± 595 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
186 µs ± 1.09 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
58.1 µs ± 4.35 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
22.5 ms ± 376 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
255 µs ± 14 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [12]:
x = np.random.choice(strings, size=10000000)
matrix_dense = np.asarray(pd.get_dummies(x))
matrix_sparse = sp.csr_matrix(matrix_dense)

categorical = CategoricalMatrix(x)
categorical_2 = CategoricalMatrix2(matrix_dense)
sp_matrix = sp.csr_matrix(matrix_dense)
tbmat = TabMat(x)

%timeit categorical.dot(y)
%timeit categorical_2.dot(y)
%timeit matrix_dense.dot(y)
%timeit sp_matrix.dot(y)
%timeit tbmat.matvec(y)
%timeit aet_dot(matrix_dense, y)
%timeit aet_sparse_dot(sp_matrix, y[:, None])