-
Notifications
You must be signed in to change notification settings - Fork 27
Closed
Closed
Copy link
Description
Environment Details
Please indicate the following details about the environment in which you found the bug:
- RDT version: 1.18.1
- Python version: 3.13
- Operating System: macOS
Error Description
When trying to fit nullable integer data with a categorical transformer, RDT throws an error.
Steps to reproduce
import pandas as pd
from rdt.transformers import UniformEncoder
df = pd.DataFrame({'example': [1, 2, 3, None]}, dtype='Int64')
encoder = UniformEncoder()
encoder.fit(data=df, column='example')---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
File ~/miniconda3/envs/sdvclean/lib/python3.13/site-packages/pandas/core/internals/blocks.py:2401, in ExtensionBlock.fillna(self, value, limit, inplace, downcast, using_cow, already_warned)
2400 try:
-> 2401 new_values = self.values.fillna(
2402 value=value, method=None, limit=limit, copy=copy
2403 )
2404 except TypeError:
2405 # 3rd party EA that has not implemented copy keyword yet
File ~/miniconda3/envs/sdvclean/lib/python3.13/site-packages/pandas/core/arrays/masked.py:267, in BaseMaskedArray.fillna(self, value, method, limit, copy)
266 new_values = self[:]
--> 267 new_values[mask] = value
268 else:
File ~/miniconda3/envs/sdvclean/lib/python3.13/site-packages/pandas/core/arrays/masked.py:320, in BaseMaskedArray.__setitem__(self, key, value)
318 return
--> 320 value, mask = self._coerce_to_array(value, dtype=self.dtype)
322 self._data[key] = value
File ~/miniconda3/envs/sdvclean/lib/python3.13/site-packages/pandas/core/arrays/numeric.py:272, in NumericArray._coerce_to_array(cls, value, dtype, copy)
271 default_dtype = dtype_cls._default_np_dtype
--> 272 values, mask, _, _ = _coerce_to_data_and_mask(
273 value, dtype, copy, dtype_cls, default_dtype
274 )
275 return values, mask
File ~/miniconda3/envs/sdvclean/lib/python3.13/site-packages/pandas/core/arrays/numeric.py:184, in _coerce_to_data_and_mask(values, dtype, copy, dtype_cls, default_dtype)
183 if values.ndim != 1:
--> 184 raise TypeError("values must be a 1D list-like")
186 if mask is None:
TypeError: values must be a 1D list-like
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
Cell In[18], line 7
5 df = pd.DataFrame({'example': [1, 2, 3, None]}, dtype='Int64')
6 encoder = UniformEncoder()
----> 7 encoder.fit(data=df, column='example')
File ~/miniconda3/envs/sdvclean/lib/python3.13/site-packages/rdt/transformers/base.py:57, in random_state.<locals>.wrapper(self, *args, **kwargs)
55 method_name = function.__name__
56 with set_random_states(self.random_states, method_name, self.set_random_state):
---> 57 return function(self, *args, **kwargs)
File ~/miniconda3/envs/sdvclean/lib/python3.13/site-packages/rdt/transformers/base.py:390, in BaseTransformer.fit(self, data, column)
388 self._set_seed(data)
389 columns_data = self._get_columns_data(data, self.columns)
--> 390 self._fit(columns_data)
391 self._build_output_columns(data)
File ~/miniconda3/envs/sdvclean/lib/python3.13/site-packages/rdt/transformers/categorical.py:129, in UniformEncoder._fit(self, data)
119 """Fit the transformer to the data.
120
121 Compute the frequencies of each category and use them
(...) 126 Data to fit the transformer to.
127 """
128 self.dtype = data.dtypes
--> 129 data = fill_nan_with_none(data)
130 labels = pd.unique(data)
131 labels = self._order_categories(labels)
File ~/miniconda3/envs/sdvclean/lib/python3.13/site-packages/rdt/transformers/utils.py:202, in fill_nan_with_none(data)
199 if isinstance(data, pd.DataFrame):
200 return data.apply(_fill_nan_with_none_series)
--> 202 return _fill_nan_with_none_series(data)
File ~/miniconda3/envs/sdvclean/lib/python3.13/site-packages/rdt/transformers/utils.py:186, in _fill_nan_with_none_series(data)
183 data = data.fillna(sentinel).replace({sentinel: None})
184 return pd.Series(pd.Categorical(data, categories=dtype.categories), index=data.index)
--> 186 return data.fillna(sentinel).replace({sentinel: None})
File ~/miniconda3/envs/sdvclean/lib/python3.13/site-packages/pandas/core/generic.py:7368, in NDFrame.fillna(self, value, method, axis, inplace, limit, downcast)
7361 else:
7362 raise TypeError(
7363 '"value" parameter must be a scalar, dict '
7364 "or Series, but you passed a "
7365 f'"{type(value).__name__}"'
7366 )
-> 7368 new_data = self._mgr.fillna(
7369 value=value, limit=limit, inplace=inplace, downcast=downcast
7370 )
7372 elif isinstance(value, (dict, ABCSeries)):
7373 if axis == 1:
File ~/miniconda3/envs/sdvclean/lib/python3.13/site-packages/pandas/core/internals/base.py:186, in DataManager.fillna(self, value, limit, inplace, downcast)
182 if limit is not None:
183 # Do this validation even if we go through one of the no-op paths
184 limit = libalgos.validate_limit(None, limit=limit)
--> 186 return self.apply_with_block(
187 "fillna",
188 value=value,
189 limit=limit,
190 inplace=inplace,
191 downcast=downcast,
192 using_cow=using_copy_on_write(),
193 already_warned=_AlreadyWarned(),
194 )
File ~/miniconda3/envs/sdvclean/lib/python3.13/site-packages/pandas/core/internals/managers.py:363, in BaseBlockManager.apply(self, f, align_keys, **kwargs)
361 applied = b.apply(f, **kwargs)
362 else:
--> 363 applied = getattr(b, f)(**kwargs)
364 result_blocks = extend_blocks(applied, result_blocks)
366 out = type(self).from_blocks(result_blocks, self.axes)
File ~/miniconda3/envs/sdvclean/lib/python3.13/site-packages/pandas/core/internals/blocks.py:2407, in ExtensionBlock.fillna(self, value, limit, inplace, downcast, using_cow, already_warned)
2404 except TypeError:
2405 # 3rd party EA that has not implemented copy keyword yet
2406 refs = None
-> 2407 new_values = self.values.fillna(value=value, method=None, limit=limit)
2408 # issue the warning *after* retrying, in case the TypeError
2409 # was caused by an invalid fill_value
2410 warnings.warn(
2411 # GH#53278
2412 "ExtensionArray.fillna added a 'copy' keyword in pandas "
(...) 2418 stacklevel=find_stack_level(),
2419 )
File ~/miniconda3/envs/sdvclean/lib/python3.13/site-packages/pandas/core/arrays/masked.py:267, in BaseMaskedArray.fillna(self, value, method, limit, copy)
265 else:
266 new_values = self[:]
--> 267 new_values[mask] = value
268 else:
269 if copy:
File ~/miniconda3/envs/sdvclean/lib/python3.13/site-packages/pandas/core/arrays/masked.py:320, in BaseMaskedArray.__setitem__(self, key, value)
317 self._mask[key] = False
318 return
--> 320 value, mask = self._coerce_to_array(value, dtype=self.dtype)
322 self._data[key] = value
323 self._mask[key] = mask
File ~/miniconda3/envs/sdvclean/lib/python3.13/site-packages/pandas/core/arrays/numeric.py:272, in NumericArray._coerce_to_array(cls, value, dtype, copy)
270 dtype_cls = cls._dtype_cls
271 default_dtype = dtype_cls._default_np_dtype
--> 272 values, mask, _, _ = _coerce_to_data_and_mask(
273 value, dtype, copy, dtype_cls, default_dtype
274 )
275 return values, mask
File ~/miniconda3/envs/sdvclean/lib/python3.13/site-packages/pandas/core/arrays/numeric.py:184, in _coerce_to_data_and_mask(values, dtype, copy, dtype_cls, default_dtype)
181 raise TypeError(f"{values.dtype} cannot be converted to {name}")
183 if values.ndim != 1:
--> 184 raise TypeError("values must be a 1D list-like")
186 if mask is None:
187 if values.dtype.kind in "iu":
188 # fastpath
TypeError: values must be a 1D list-likeMetadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working