nubank · matheusfacure · Aug 10, 2022 · Aug 10, 2022 · Aug 10, 2022 · Aug 10, 2022
@@ -354,11 +354,8 @@ def apply_replacements(df: pd.DataFrame,
         Default value to replace when original value is not present in the `vec` dict for the feature
 
     """
-    def column_categorizer(col: str) -> np.ndarray:
-        replaced = df[col].map(vec[col])
-        unseen = df[col].notnull() & replaced.isnull()
-        replaced[unseen] = replace_unseen
-        return replaced
+    def column_categorizer(col: str) -> pd.Series:
+        return df[col].map(lambda x: vec[col].get(x, replace_unseen), na_action='ignore')
 
     categ_columns = {col: column_categorizer(col) for col in columns}
     return df.assign(**categ_columns)

diff --git a/tests/training/test_transformation.py b/tests/training/test_transformation.py
@@ -426,7 +426,7 @@ def test_count_categorizer():
     expected_output_test = pd.DataFrame(
         {
             "feat1_num": [2, 20, 200, 2000],
-            "feat2_cat": [3.0, 1.0, 1.0, 1.0],  # replace unseen vars with constant (1)
+            "feat2_cat": [3, 1, 1, 1],  # replace unseen vars with constant (1)
             "feat3_cat": [nan, nan, 3, 3],
         }
     )
@@ -537,10 +537,10 @@ def test_label_categorizer():
         {
             "feat1_num": [2, 20, 200, 2000],
             "feat2_cat": [
-                0.0,
-                1.0,
-                1.0,
-                -99.0,
+                0,
+                1,
+                1,
+                -99,
             ],  # replace unseen vars with constant (1)
             "feat3_cat": [nan, nan, 0, 0],
         }