Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
matthewwardrop committed Apr 5, 2022
1 parent f868245 commit 2ade183
Show file tree
Hide file tree
Showing 11 changed files with 1,314 additions and 401 deletions.
8 changes: 5 additions & 3 deletions formulaic/materializers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ def _encode_evaled_factor(
drop_rows: set,
reduced_rank: bool = False,
) -> Dict[str, Any]:
if not isinstance(factor.values, dict) or not factor.metadata.encoded:
if not factor.metadata.encoded:
if factor.expr in self.encoded_cache:
encoded = self.encoded_cache[factor.expr]
elif (factor.expr, reduced_rank) in self.encoded_cache:
Expand Down Expand Up @@ -552,7 +552,9 @@ def wrapped(values, metadata, state, *args, **kwargs):
if nested_state:
state[k] = nested_state
if isinstance(values, FactorValues):
return FactorValues(encoded, metadata=values.__formulaic_metadata__)
return FactorValues(
encoded, metadata=values.__formulaic_metadata__
)
return encoded
return f(values, metadata, state, *args, **kwargs)

Expand Down Expand Up @@ -618,7 +620,7 @@ def wrapped(values, metadata, state, *args, **kwargs):

self.encoded_cache[cache_key] = encoded
else:
encoded = factor.values
encoded = as_columns(factor.values)

encoded = FactorValues(
encoded,
Expand Down
12 changes: 9 additions & 3 deletions formulaic/materializers/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
import pandas
import scipy.sparse as spsparse
from interface_meta import override
from formulaic.materializers.types.factor_values import FactorValues

from formulaic.utils.cast import as_columns

from .base import FormulaMaterializer
from .types import NAAction
Expand Down Expand Up @@ -74,17 +77,21 @@ def _encode_categorical(
# Even though we could reduce rank here, we do not, so that the same
# encoding can be cached for both reduced and unreduced rank. The
# rank will be reduced in the _encode_evaled_factor method.
from formulaic.transforms import contrasts
from formulaic.transforms import encode_contrasts

if drop_rows:
values = values.drop(index=values.index[drop_rows])
return contrasts(
encoded = encode_contrasts(
values,
reduced_rank=False,
_metadata=metadata,
_state=encoder_state,
_spec=spec,
)
return FactorValues(
as_columns(encoded),
metadata=encoded.__formulaic_metadata__,
)

@override
def _get_columns_for_term(self, factors, spec, scale=1):
Expand Down Expand Up @@ -132,7 +139,6 @@ def _get_columns_for_term(self, factors, spec, scale=1):

@override
def _combine_columns(self, cols, spec, drop_rows):

# If we are outputing a pandas DataFrame, explicitly override index
# in case transforms/etc have lost track of it.
if spec.output == "pandas":
Expand Down
4 changes: 3 additions & 1 deletion formulaic/materializers/types/factor_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,9 @@ def __init__(
drop_field: Optional[str] = MISSING,
format: str = MISSING,
encoded: bool = MISSING,
encoder: Optional[Callable[[Any, bool, List[int], Dict[str, Any]], Any]] = MISSING,
encoder: Optional[
Callable[[Any, bool, List[int], Dict[str, Any]], Any]
] = MISSING,
):
metadata_constructor = FactorValuesMetadata
metadata_kwargs = dict(
Expand Down
16 changes: 14 additions & 2 deletions formulaic/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,21 @@

from .basis_spline import basis_spline
from .identity import identity
from .contrasts import contrasts
from .contrasts import C, encode_contrasts, ContrastsRegistry
from .poly import poly
from .scale import center, scale

__all__ = [
"basis_spline",
"identity",
"C",
"encode_contrasts",
"ContrastsRegistry",
"poly",
"center",
"scale",
"TRANSFORMS",
]

TRANSFORMS = {
# Common transforms
Expand All @@ -21,6 +32,7 @@
"center": center,
"poly": poly,
"scale": scale,
"C": contrasts,
"C": C,
"contr": ContrastsRegistry,
"I": identity,
}
Loading

0 comments on commit 2ade183

Please sign in to comment.