explicitly cast to object to support pandas >= 2.0.3

sorenmacbeth · sorenmacbeth · commit 61a68d4465dd · 2024-12-03T00:01:29.000-08:00
without `future.no_silent_downcasting`
diff --git a/src/pytorch_tabular/categorical_encoders.py b/src/pytorch_tabular/categorical_encoders.py
@@ -4,7 +4,7 @@
 # Modified https://github.com/tcassou/mlencoders/blob/master/mlencoders/base_encoder.py to suit NN encoding
 """Category Encoders."""
 
-from pandas import DataFrame, Series, option_context, unique
+from pandas import DataFrame, Series, unique
 
 try:
     import cPickle as pickle
@@ -65,12 +65,10 @@ def transform(self, X):
         category_cols = X_encoded.select_dtypes(include="category").columns
         X_encoded[category_cols] = X_encoded[category_cols].astype("object")
         for col, mapping in self._mapping.items():
-            with option_context("future.no_silent_downcasting", True):
-                X_encoded[col] = X_encoded[col].fillna(NAN_CATEGORY).infer_objects(copy=False).map(mapping["value"])
+            X_encoded[col] = X_encoded[col].fillna(NAN_CATEGORY).astype("object").map(mapping["value"])
 
             if self.handle_unseen == "impute":
-                with option_context("future.no_silent_downcasting", True):
-                    X_encoded[col] = X_encoded[col].fillna(self._imputed).infer_objects(copy=False)
+                X_encoded[col] = X_encoded[col].fillna(self._imputed).astype("object")
             elif self.handle_unseen == "error":
                 if np.unique(X_encoded[col]).shape[0] > mapping.shape[0]:
                     raise ValueError(f"Unseen categories found in `{col}` column.")
@@ -159,12 +157,11 @@ def fit(self, X, y=None):
                 not X[self.cols].isnull().any().any()
             ), "`handle_missing` = `error` and missing values found in columns to encode."
         for col in self.cols:
-            with option_context("future.no_silent_downcasting", True):
-                map = (
-                    Series(unique(X[col].fillna(NAN_CATEGORY).infer_objects(copy=False)), name=col)
-                    .reset_index()
-                    .rename(columns={"index": "value"})
-                )
+            map = (
+                Series(unique(X[col].fillna(NAN_CATEGORY).astype("object")), name=col)
+                .reset_index()
+                .rename(columns={"index": "value"})
+            )
             map["value"] += 1
             self._mapping[col] = map.set_index(col)
 
diff --git a/src/pytorch_tabular/tabular_datamodule.py b/src/pytorch_tabular/tabular_datamodule.py
@@ -303,15 +303,12 @@ def _update_config(self, config) -> InferredConfig:
         else:
             raise ValueError(f"{config.task} is an unsupported task.")
         if self.train is not None:
-            with pd.option_context("future.no_silent_downcasting", True):
-                category_cols = self.train[config.categorical_cols].select_dtypes(include="category").columns
-                self.train[category_cols] = self.train[category_cols].astype("object")
-                categorical_cardinality = [
-                    int(x) + 1
-                    for x in list(
-                        self.train[config.categorical_cols].fillna("NA").infer_objects(copy=False).nunique().values
-                    )
-                ]
+            category_cols = self.train[config.categorical_cols].select_dtypes(include="category").columns
+            self.train[category_cols] = self.train[category_cols].astype("object")
+            categorical_cardinality = [
+                int(x) + 1
+                for x in list(self.train[config.categorical_cols].fillna("NA").astype("object").nunique().values)
+            ]
         else:
             category_cols = self.train_dataset.data[config.categorical_cols].select_dtypes(include="category").columns
             self.train_dataset.data[category_cols] = self.train_dataset.data[category_cols].astype("object")
diff --git a/src/pytorch_tabular/tabular_model.py b/src/pytorch_tabular/tabular_model.py
@@ -26,9 +26,7 @@
 from pandas import DataFrame
 from pytorch_lightning import seed_everything
 from pytorch_lightning.callbacks import RichProgressBar
-from pytorch_lightning.callbacks.gradient_accumulation_scheduler import (
-    GradientAccumulationScheduler,
-)
+from pytorch_lightning.callbacks.gradient_accumulation_scheduler import GradientAccumulationScheduler
 from pytorch_lightning.tuner.tuning import Tuner
 from pytorch_lightning.utilities.model_summary import summarize
 from pytorch_lightning.utilities.rank_zero import rank_zero_only
@@ -48,11 +46,7 @@
 )
 from pytorch_tabular.config.config import InferredConfig
 from pytorch_tabular.models.base_model import BaseModel, _CaptumModel, _GenericModel
-from pytorch_tabular.models.common.layers.embeddings import (
-    Embedding1dLayer,
-    Embedding2dLayer,
-    PreEncoded1dLayer,
-)
+from pytorch_tabular.models.common.layers.embeddings import Embedding1dLayer, Embedding2dLayer, PreEncoded1dLayer
 from pytorch_tabular.tabular_datamodule import TabularDatamodule
 from pytorch_tabular.utils import (
     OOMException,