Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 33 additions & 30 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ name = "_core_impl"
crate-type = ["cdylib"]

[dependencies]
pyo3 = { version = "0.24.1", features = ["extension-module"] }
ndarray = { version = "0.15", features = ["rayon"] }
rayon = "1.9"
numpy = "0.24"
thiserror = "2.0.12"
pyo3 = { version = "0.26.0", features = ["extension-module"] }
ndarray = { version = "0.16.1", features = ["rayon"] }
rayon = "1.11.0"
numpy = "0.26.0"
thiserror = "2.0.16"

[profile.release]
opt-level = 3 # Maximize performance
Expand Down
7 changes: 7 additions & 0 deletions pyfixest/core/_core_impl.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,10 @@ def _count_fixef_fully_nested_all_rs(
cluster_data: NDArray[np.uint64],
fe_data: NDArray[np.uint64],
) -> tuple[np.ndarray, int]: ...
def _demean_accelerated_rs(
x: NDArray[np.float64],
flist: NDArray[np.uint64],
weights: NDArray[np.float64],
tol: float = 1e-08,
maxiter: int = 100_000,
) -> tuple[np.ndarray, bool]: ...
73 changes: 73 additions & 0 deletions pyfixest/core/demean_accelerated.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import numpy as np
from numpy.typing import NDArray

from ._core_impl import _demean_accelerated_rs


def demean_accelerated(
x: NDArray[np.float64],
flist: NDArray[np.uint64],
weights: NDArray[np.float64],
tol: float = 1e-08,
maxiter: int = 100_000,
) -> tuple[NDArray, bool]:
"""
Demean an array.

Workhorse for demeaning an input array `x` based on the specified fixed
effects and weights via the alternating projections algorithm.

Parameters
----------
x : numpy.ndarray
Input array of shape (n_samples, n_features). Needs to be of type float.
flist : numpy.ndarray
Array of shape (n_samples, n_factors) specifying the fixed effects.
Needs to already be converted to integers.
weights : numpy.ndarray
Array of shape (n_samples,) specifying the weights.
tol : float, optional
Tolerance criterion for convergence. Defaults to 1e-08.
maxiter : int, optional
Maximum number of iterations. Defaults to 100_000.

Returns
-------
tuple[numpy.ndarray, bool]
A tuple containing the demeaned array of shape (n_samples, n_features)
and a boolean indicating whether the algorithm converged successfully.

Examples
--------
```{python}
import numpy as np
import pyfixest as pf
from pyfixest.utils.dgps import get_blw
from pyfixest.estimation.demean_ import demean
from formulaic import model_matrix

fml = "y ~ treat | state + year"

data = get_blw()
data.head()

Y, rhs = model_matrix(fml, data)
X = rhs[0].drop(columns="Intercept")
fe = rhs[1].drop(columns="Intercept")
YX = np.concatenate([Y, X], axis=1)

# to numpy
Y = Y.to_numpy()
X = X.to_numpy()
YX = np.concatenate([Y, X], axis=1)
fe = fe.to_numpy().astype(int) # demean requires fixed effects as ints!

YX_demeaned, success = demean(YX, fe, weights = np.ones(YX.shape[0]))
Y_demeaned = YX_demeaned[:, 0]
X_demeaned = YX_demeaned[:, 1:]

print(np.linalg.lstsq(X_demeaned, Y_demeaned, rcond=None)[0])
print(pf.feols(fml, data).coef())
```
"""
return _demean_accelerated_rs(x, flist.astype(np.uint64), weights, tol, maxiter)
3 changes: 1 addition & 2 deletions src/demean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,7 @@ pub fn _demean_rs(
let flist_arr = flist.as_array();
let weights_arr = weights.as_array();

let (out, success) =
py.allow_threads(|| demean_impl(&x_arr, &flist_arr, &weights_arr, tol, maxiter));
let (out, success) = py.detach(|| demean_impl(&x_arr, &flist_arr, &weights_arr, tol, maxiter));

let pyarray = PyArray2::from_owned_array(py, out);
Ok((pyarray.into(), success))
Expand Down
Loading
Loading