From 5007426f782daecd37f9a8716bee47bcef7d9898 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 9 Mar 2020 14:12:45 -0500
Subject: [PATCH 1/5] REGR: Expand ValueError catching in series aggregate

Closes https://github.com/pandas-dev/pandas/issues/31802

This "fixes" #31802 by expanding the number of cases where we swallow an
exception in libreduction. Currently, we're creating an invalid Series
in SeriesBinGrouper where the `.mgr_locs` doesn't match the values. See
https://github.com/pandas-dev/pandas/issues/31802#issuecomment-595954511
for more.

For now, we simply catch more cases that fall back to Python. I've gone
with a minimal change which addresses only issues hitting this exact
exception. We might want to go broader, but that's not clear.
---
 doc/source/whatsnew/v1.0.2.rst           |  1 +
 pandas/core/groupby/ops.py               |  9 +++++++-
 pandas/tests/groupby/test_bin_groupby.py | 27 ++++++++++++++++++++++++
 3 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst
index eec471f989037..e35f519a2fc55 100644
--- a/doc/source/whatsnew/v1.0.2.rst
+++ b/doc/source/whatsnew/v1.0.2.rst
@@ -20,6 +20,7 @@ Fixed regressions
 - Fixed regression in :meth:`pandas.core.groupby.RollingGroupby.apply` where the ``raw`` parameter was ignored (:issue:`31754`)
 - Fixed regression in :meth:`rolling(..).corr() <pandas.core.window.Rolling.corr>` when using a time offset (:issue:`31789`)
 - Fixed regression in :meth:`DataFrameGroupBy.nunique` which was modifying the original values if ``NaN`` values were present (:issue:`31950`)
+- Fixed regression in ``DataFrame.groupby`` raising a ``ValueError`` from an internal operation (:issue:`31802`)
 - Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`).
 - Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`)
 - Fixed regression in :meth:`GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`)
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 7259268ac3f2b..2a2eb135e5472 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -639,9 +639,16 @@ def agg_series(self, obj: Series, func):
         try:
             return self._aggregate_series_fast(obj, func)
         except ValueError as err:
-            if "Function does not reduce" in str(err):
+            msg = str(err)
+            if "Function does not reduce" in msg:
                 # raised in libreduction
                 pass
+            elif "Wrong number of items" in msg:
+                # https://github.com/pandas-dev/pandas/issues/31802
+                # libreduction.SeriesGrouper can create invalid Series /
+                # Blocks, which might raise arbitrary exceptions when
+                # operated upon.
+                pass
             else:
                 raise
         return self._aggregate_series_pure_python(obj, func)
diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py
index ff74d374e5e3f..570ea0dc7d07b 100644
--- a/pandas/tests/groupby/test_bin_groupby.py
+++ b/pandas/tests/groupby/test_bin_groupby.py
@@ -5,6 +5,7 @@
 
 from pandas.core.dtypes.common import ensure_int64
 
+import pandas as pd
 from pandas import Index, Series, isna
 import pandas._testing as tm
 
@@ -51,6 +52,32 @@ def test_series_bin_grouper():
     tm.assert_almost_equal(counts, exp_counts)
 
 
+def assert_block_lengths(x):
+    assert len(x) == len(x._data.blocks[0].mgr_locs)
+    return 0
+
+
+def cumsum_max(x):
+    x.cumsum().max()  # triggers the ValueError when creating a block
+    return 0
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        cumsum_max,
+        pytest.param(assert_block_lengths, marks=pytest.mark.xfail(reason="debatable")),
+    ],
+)
+def test_operation_on_invalid_block_passes(func):
+    # https://github.com/pandas-dev/pandas/issues/31802
+    # SeriesBinGrouper creates an invalid block, which may
+    # raise arbitrary exceptions.
+    df = pd.DataFrame({"A": ["a", "a", "a"], "B": ["a", "b", "b"], "C": [1, 1, 1]})
+    result = df.groupby(["A", "B"]).agg(func)
+    assert isinstance(result, pd.DataFrame)
+
+
 @pytest.mark.parametrize(
     "binner,closed,expected",
     [

From ad746ba68752dbf0d398dbd7572efe702edd2657 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 9 Mar 2020 16:27:51 -0500
Subject: [PATCH 2/5] update mgr_locs

---
 pandas/_libs/reduction.pyx               |  2 ++
 pandas/tests/groupby/test_bin_groupby.py | 20 +++++++++-----------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index b27072aa66708..29a5a73ef08d0 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -177,6 +177,8 @@ cdef class _BaseGrouper:
             object.__setattr__(cached_ityp, '_index_data', islider.buf)
             cached_ityp._engine.clear_mapping()
             object.__setattr__(cached_typ._data._block, 'values', vslider.buf)
+            object.__setattr__(cached_typ._data._block, 'mgr_locs',
+                               slice(len(vslider.buf)))
             object.__setattr__(cached_typ, '_index', cached_ityp)
             object.__setattr__(cached_typ, 'name', self.name)
 
diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py
index 570ea0dc7d07b..5d0d3be183a52 100644
--- a/pandas/tests/groupby/test_bin_groupby.py
+++ b/pandas/tests/groupby/test_bin_groupby.py
@@ -62,20 +62,18 @@ def cumsum_max(x):
     return 0
 
 
-@pytest.mark.parametrize(
-    "func",
-    [
-        cumsum_max,
-        pytest.param(assert_block_lengths, marks=pytest.mark.xfail(reason="debatable")),
-    ],
-)
-def test_operation_on_invalid_block_passes(func):
+@pytest.mark.parametrize("func", [cumsum_max, assert_block_lengths])
+def test_mgr_locs_updated(func):
     # https://github.com/pandas-dev/pandas/issues/31802
-    # SeriesBinGrouper creates an invalid block, which may
-    # raise arbitrary exceptions.
+    # Some operations may require creating new blocks, which requires
+    # valid mgr_locs
     df = pd.DataFrame({"A": ["a", "a", "a"], "B": ["a", "b", "b"], "C": [1, 1, 1]})
     result = df.groupby(["A", "B"]).agg(func)
-    assert isinstance(result, pd.DataFrame)
+    expected = pd.DataFrame(
+        {"C": [0, 0]},
+        index=pd.MultiIndex.from_product([["a"], ["a", "b"]], names=["A", "B"]),
+    )
+    tm.assert_frame_equal(result, expected)
 
 
 @pytest.mark.parametrize(

From 922b30de38716b9fec88c67b9f06813c0889899b Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 9 Mar 2020 20:33:24 -0500
Subject: [PATCH 3/5] revert

---
 pandas/core/groupby/ops.py               | 9 +--------
 pandas/tests/groupby/test_bin_groupby.py | 2 +-
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 2a2eb135e5472..7259268ac3f2b 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -639,16 +639,9 @@ def agg_series(self, obj: Series, func):
         try:
             return self._aggregate_series_fast(obj, func)
         except ValueError as err:
-            msg = str(err)
-            if "Function does not reduce" in msg:
+            if "Function does not reduce" in str(err):
                 # raised in libreduction
                 pass
-            elif "Wrong number of items" in msg:
-                # https://github.com/pandas-dev/pandas/issues/31802
-                # libreduction.SeriesGrouper can create invalid Series /
-                # Blocks, which might raise arbitrary exceptions when
-                # operated upon.
-                pass
             else:
                 raise
         return self._aggregate_series_pure_python(obj, func)
diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py
index 5d0d3be183a52..152086c241a52 100644
--- a/pandas/tests/groupby/test_bin_groupby.py
+++ b/pandas/tests/groupby/test_bin_groupby.py
@@ -58,7 +58,7 @@ def assert_block_lengths(x):
 
 
 def cumsum_max(x):
-    x.cumsum().max()  # triggers the ValueError when creating a block
+    x.cumsum().max()
     return 0
 
 

From f63acd3d0bb41ba2b349163156de2684ed8da4d1 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 9 Mar 2020 13:17:15 -0700
Subject: [PATCH 4/5] TST: separate out pd.crosstab tests from test_pivot
 (#32536)

---
 pandas/tests/reshape/test_crosstab.py | 700 +++++++++++++++++++++++++
 pandas/tests/reshape/test_pivot.py    | 707 +-------------------------
 2 files changed, 701 insertions(+), 706 deletions(-)
 create mode 100644 pandas/tests/reshape/test_crosstab.py

diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py
new file mode 100644
index 0000000000000..8795af2e11122
--- /dev/null
+++ b/pandas/tests/reshape/test_crosstab.py
@@ -0,0 +1,700 @@
+import numpy as np
+import pytest
+
+from pandas import CategoricalIndex, DataFrame, Index, MultiIndex, Series, crosstab
+import pandas._testing as tm
+
+
+class TestCrosstab:
+    def setup_method(self, method):
+        df = DataFrame(
+            {
+                "A": [
+                    "foo",
+                    "foo",
+                    "foo",
+                    "foo",
+                    "bar",
+                    "bar",
+                    "bar",
+                    "bar",
+                    "foo",
+                    "foo",
+                    "foo",
+                ],
+                "B": [
+                    "one",
+                    "one",
+                    "one",
+                    "two",
+                    "one",
+                    "one",
+                    "one",
+                    "two",
+                    "two",
+                    "two",
+                    "one",
+                ],
+                "C": [
+                    "dull",
+                    "dull",
+                    "shiny",
+                    "dull",
+                    "dull",
+                    "shiny",
+                    "shiny",
+                    "dull",
+                    "shiny",
+                    "shiny",
+                    "shiny",
+                ],
+                "D": np.random.randn(11),
+                "E": np.random.randn(11),
+                "F": np.random.randn(11),
+            }
+        )
+
+        self.df = df.append(df, ignore_index=True)
+
+    def test_crosstab_single(self):
+        df = self.df
+        result = crosstab(df["A"], df["C"])
+        expected = df.groupby(["A", "C"]).size().unstack()
+        tm.assert_frame_equal(result, expected.fillna(0).astype(np.int64))
+
+    def test_crosstab_multiple(self):
+        df = self.df
+
+        result = crosstab(df["A"], [df["B"], df["C"]])
+        expected = df.groupby(["A", "B", "C"]).size()
+        expected = expected.unstack("B").unstack("C").fillna(0).astype(np.int64)
+        tm.assert_frame_equal(result, expected)
+
+        result = crosstab([df["B"], df["C"]], df["A"])
+        expected = df.groupby(["B", "C", "A"]).size()
+        expected = expected.unstack("A").fillna(0).astype(np.int64)
+        tm.assert_frame_equal(result, expected)
+
+    def test_crosstab_ndarray(self):
+        a = np.random.randint(0, 5, size=100)
+        b = np.random.randint(0, 3, size=100)
+        c = np.random.randint(0, 10, size=100)
+
+        df = DataFrame({"a": a, "b": b, "c": c})
+
+        result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c"))
+        expected = crosstab(df["a"], [df["b"], df["c"]])
+        tm.assert_frame_equal(result, expected)
+
+        result = crosstab([b, c], a, colnames=["a"], rownames=("b", "c"))
+        expected = crosstab([df["b"], df["c"]], df["a"])
+        tm.assert_frame_equal(result, expected)
+
+        # assign arbitrary names
+        result = crosstab(self.df["A"].values, self.df["C"].values)
+        assert result.index.name == "row_0"
+        assert result.columns.name == "col_0"
+
+    def test_crosstab_non_aligned(self):
+        # GH 17005
+        a = Series([0, 1, 1], index=["a", "b", "c"])
+        b = Series([3, 4, 3, 4, 3], index=["a", "b", "c", "d", "f"])
+        c = np.array([3, 4, 3])
+
+        expected = DataFrame(
+            [[1, 0], [1, 1]],
+            index=Index([0, 1], name="row_0"),
+            columns=Index([3, 4], name="col_0"),
+        )
+
+        result = crosstab(a, b)
+        tm.assert_frame_equal(result, expected)
+
+        result = crosstab(a, c)
+        tm.assert_frame_equal(result, expected)
+
+    def test_crosstab_margins(self):
+        a = np.random.randint(0, 7, size=100)
+        b = np.random.randint(0, 3, size=100)
+        c = np.random.randint(0, 5, size=100)
+
+        df = DataFrame({"a": a, "b": b, "c": c})
+
+        result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c"), margins=True)
+
+        assert result.index.names == ("a",)
+        assert result.columns.names == ["b", "c"]
+
+        all_cols = result["All", ""]
+        exp_cols = df.groupby(["a"]).size().astype("i8")
+        # to keep index.name
+        exp_margin = Series([len(df)], index=Index(["All"], name="a"))
+        exp_cols = exp_cols.append(exp_margin)
+        exp_cols.name = ("All", "")
+
+        tm.assert_series_equal(all_cols, exp_cols)
+
+        all_rows = result.loc["All"]
+        exp_rows = df.groupby(["b", "c"]).size().astype("i8")
+        exp_rows = exp_rows.append(Series([len(df)], index=[("All", "")]))
+        exp_rows.name = "All"
+
+        exp_rows = exp_rows.reindex(all_rows.index)
+        exp_rows = exp_rows.fillna(0).astype(np.int64)
+        tm.assert_series_equal(all_rows, exp_rows)
+
+    def test_crosstab_margins_set_margin_name(self):
+        # GH 15972
+        a = np.random.randint(0, 7, size=100)
+        b = np.random.randint(0, 3, size=100)
+        c = np.random.randint(0, 5, size=100)
+
+        df = DataFrame({"a": a, "b": b, "c": c})
+
+        result = crosstab(
+            a,
+            [b, c],
+            rownames=["a"],
+            colnames=("b", "c"),
+            margins=True,
+            margins_name="TOTAL",
+        )
+
+        assert result.index.names == ("a",)
+        assert result.columns.names == ["b", "c"]
+
+        all_cols = result["TOTAL", ""]
+        exp_cols = df.groupby(["a"]).size().astype("i8")
+        # to keep index.name
+        exp_margin = Series([len(df)], index=Index(["TOTAL"], name="a"))
+        exp_cols = exp_cols.append(exp_margin)
+        exp_cols.name = ("TOTAL", "")
+
+        tm.assert_series_equal(all_cols, exp_cols)
+
+        all_rows = result.loc["TOTAL"]
+        exp_rows = df.groupby(["b", "c"]).size().astype("i8")
+        exp_rows = exp_rows.append(Series([len(df)], index=[("TOTAL", "")]))
+        exp_rows.name = "TOTAL"
+
+        exp_rows = exp_rows.reindex(all_rows.index)
+        exp_rows = exp_rows.fillna(0).astype(np.int64)
+        tm.assert_series_equal(all_rows, exp_rows)
+
+        msg = "margins_name argument must be a string"
+        for margins_name in [666, None, ["a", "b"]]:
+            with pytest.raises(ValueError, match=msg):
+                crosstab(
+                    a,
+                    [b, c],
+                    rownames=["a"],
+                    colnames=("b", "c"),
+                    margins=True,
+                    margins_name=margins_name,
+                )
+
+    def test_crosstab_pass_values(self):
+        a = np.random.randint(0, 7, size=100)
+        b = np.random.randint(0, 3, size=100)
+        c = np.random.randint(0, 5, size=100)
+        values = np.random.randn(100)
+
+        table = crosstab(
+            [a, b], c, values, aggfunc=np.sum, rownames=["foo", "bar"], colnames=["baz"]
+        )
+
+        df = DataFrame({"foo": a, "bar": b, "baz": c, "values": values})
+
+        expected = df.pivot_table(
+            "values", index=["foo", "bar"], columns="baz", aggfunc=np.sum
+        )
+        tm.assert_frame_equal(table, expected)
+
+    def test_crosstab_dropna(self):
+        # GH 3820
+        a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object)
+        b = np.array(["one", "one", "two", "one", "two", "two", "two"], dtype=object)
+        c = np.array(
+            ["dull", "dull", "dull", "dull", "dull", "shiny", "shiny"], dtype=object
+        )
+        res = crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"], dropna=False)
+        m = MultiIndex.from_tuples(
+            [("one", "dull"), ("one", "shiny"), ("two", "dull"), ("two", "shiny")],
+            names=["b", "c"],
+        )
+        tm.assert_index_equal(res.columns, m)
+
+    def test_crosstab_no_overlap(self):
+        # GS 10291
+
+        s1 = Series([1, 2, 3], index=[1, 2, 3])
+        s2 = Series([4, 5, 6], index=[4, 5, 6])
+
+        actual = crosstab(s1, s2)
+        expected = DataFrame()
+
+        tm.assert_frame_equal(actual, expected)
+
+    def test_margin_dropna(self):
+        # GH 12577
+        # pivot_table counts null into margin ('All')
+        # when margins=true and dropna=true
+
+        df = DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]})
+        actual = crosstab(df.a, df.b, margins=True, dropna=True)
+        expected = DataFrame([[1, 0, 1], [1, 3, 4], [2, 3, 5]])
+        expected.index = Index([1.0, 2.0, "All"], name="a")
+        expected.columns = Index([3, 4, "All"], name="b")
+        tm.assert_frame_equal(actual, expected)
+
+        df = DataFrame(
+            {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]}
+        )
+        actual = crosstab(df.a, df.b, margins=True, dropna=True)
+        expected = DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]])
+        expected.index = Index([1.0, 2.0, "All"], name="a")
+        expected.columns = Index([3.0, 4.0, "All"], name="b")
+        tm.assert_frame_equal(actual, expected)
+
+        df = DataFrame(
+            {"a": [1, np.nan, np.nan, np.nan, np.nan, 2], "b": [3, 3, 4, 4, 4, 4]}
+        )
+        actual = crosstab(df.a, df.b, margins=True, dropna=True)
+        expected = DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]])
+        expected.index = Index([1.0, 2.0, "All"], name="a")
+        expected.columns = Index([3, 4, "All"], name="b")
+        tm.assert_frame_equal(actual, expected)
+
+        # GH 12642
+        # _add_margins raises KeyError: Level None not found
+        # when margins=True and dropna=False
+        df = DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]})
+        actual = crosstab(df.a, df.b, margins=True, dropna=False)
+        expected = DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]])
+        expected.index = Index([1.0, 2.0, "All"], name="a")
+        expected.columns = Index([3, 4, "All"], name="b")
+        tm.assert_frame_equal(actual, expected)
+
+        df = DataFrame(
+            {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]}
+        )
+        actual = crosstab(df.a, df.b, margins=True, dropna=False)
+        expected = DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]])
+        expected.index = Index([1.0, 2.0, "All"], name="a")
+        expected.columns = Index([3.0, 4.0, "All"], name="b")
+        tm.assert_frame_equal(actual, expected)
+
+        a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object)
+        b = np.array(["one", "one", "two", "one", "two", np.nan, "two"], dtype=object)
+        c = np.array(
+            ["dull", "dull", "dull", "dull", "dull", "shiny", "shiny"], dtype=object
+        )
+
+        actual = crosstab(
+            a, [b, c], rownames=["a"], colnames=["b", "c"], margins=True, dropna=False
+        )
+        m = MultiIndex.from_arrays(
+            [
+                ["one", "one", "two", "two", "All"],
+                ["dull", "shiny", "dull", "shiny", ""],
+            ],
+            names=["b", "c"],
+        )
+        expected = DataFrame(
+            [[1, 0, 1, 0, 2], [2, 0, 1, 1, 5], [3, 0, 2, 1, 7]], columns=m
+        )
+        expected.index = Index(["bar", "foo", "All"], name="a")
+        tm.assert_frame_equal(actual, expected)
+
+        actual = crosstab(
+            [a, b], c, rownames=["a", "b"], colnames=["c"], margins=True, dropna=False
+        )
+        m = MultiIndex.from_arrays(
+            [["bar", "bar", "foo", "foo", "All"], ["one", "two", "one", "two", ""]],
+            names=["a", "b"],
+        )
+        expected = DataFrame(
+            [[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], [5, 2, 7]], index=m
+        )
+        expected.columns = Index(["dull", "shiny", "All"], name="c")
+        tm.assert_frame_equal(actual, expected)
+
+        actual = crosstab(
+            [a, b], c, rownames=["a", "b"], colnames=["c"], margins=True, dropna=True
+        )
+        m = MultiIndex.from_arrays(
+            [["bar", "bar", "foo", "foo", "All"], ["one", "two", "one", "two", ""]],
+            names=["a", "b"],
+        )
+        expected = DataFrame(
+            [[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], [5, 1, 6]], index=m
+        )
+        expected.columns = Index(["dull", "shiny", "All"], name="c")
+        tm.assert_frame_equal(actual, expected)
+
+    def test_crosstab_normalize(self):
+        # Issue 12578
+        df = DataFrame(
+            {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]}
+        )
+
+        rindex = Index([1, 2], name="a")
+        cindex = Index([3, 4], name="b")
+        full_normal = DataFrame([[0.2, 0], [0.2, 0.6]], index=rindex, columns=cindex)
+        row_normal = DataFrame([[1.0, 0], [0.25, 0.75]], index=rindex, columns=cindex)
+        col_normal = DataFrame([[0.5, 0], [0.5, 1.0]], index=rindex, columns=cindex)
+
+        # Check all normalize args
+        tm.assert_frame_equal(crosstab(df.a, df.b, normalize="all"), full_normal)
+        tm.assert_frame_equal(crosstab(df.a, df.b, normalize=True), full_normal)
+        tm.assert_frame_equal(crosstab(df.a, df.b, normalize="index"), row_normal)
+        tm.assert_frame_equal(crosstab(df.a, df.b, normalize="columns"), col_normal)
+        tm.assert_frame_equal(
+            crosstab(df.a, df.b, normalize=1),
+            crosstab(df.a, df.b, normalize="columns"),
+        )
+        tm.assert_frame_equal(
+            crosstab(df.a, df.b, normalize=0), crosstab(df.a, df.b, normalize="index"),
+        )
+
+        row_normal_margins = DataFrame(
+            [[1.0, 0], [0.25, 0.75], [0.4, 0.6]],
+            index=Index([1, 2, "All"], name="a", dtype="object"),
+            columns=Index([3, 4], name="b", dtype="object"),
+        )
+        col_normal_margins = DataFrame(
+            [[0.5, 0, 0.2], [0.5, 1.0, 0.8]],
+            index=Index([1, 2], name="a", dtype="object"),
+            columns=Index([3, 4, "All"], name="b", dtype="object"),
+        )
+
+        all_normal_margins = DataFrame(
+            [[0.2, 0, 0.2], [0.2, 0.6, 0.8], [0.4, 0.6, 1]],
+            index=Index([1, 2, "All"], name="a", dtype="object"),
+            columns=Index([3, 4, "All"], name="b", dtype="object"),
+        )
+        tm.assert_frame_equal(
+            crosstab(df.a, df.b, normalize="index", margins=True), row_normal_margins
+        )
+        tm.assert_frame_equal(
+            crosstab(df.a, df.b, normalize="columns", margins=True), col_normal_margins,
+        )
+        tm.assert_frame_equal(
+            crosstab(df.a, df.b, normalize=True, margins=True), all_normal_margins
+        )
+
+        # Test arrays
+        crosstab(
+            [np.array([1, 1, 2, 2]), np.array([1, 2, 1, 2])], np.array([1, 2, 1, 2])
+        )
+
+        # Test with aggfunc
+        norm_counts = DataFrame(
+            [[0.25, 0, 0.25], [0.25, 0.5, 0.75], [0.5, 0.5, 1]],
+            index=Index([1, 2, "All"], name="a", dtype="object"),
+            columns=Index([3, 4, "All"], name="b"),
+        )
+        test_case = crosstab(
+            df.a, df.b, df.c, aggfunc="count", normalize="all", margins=True
+        )
+        tm.assert_frame_equal(test_case, norm_counts)
+
+        df = DataFrame(
+            {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [0, 4, np.nan, 3, 3]}
+        )
+
+        norm_sum = DataFrame(
+            [[0, 0, 0.0], [0.4, 0.6, 1], [0.4, 0.6, 1]],
+            index=Index([1, 2, "All"], name="a", dtype="object"),
+            columns=Index([3, 4, "All"], name="b", dtype="object"),
+        )
+        test_case = crosstab(
+            df.a, df.b, df.c, aggfunc=np.sum, normalize="all", margins=True
+        )
+        tm.assert_frame_equal(test_case, norm_sum)
+
+    def test_crosstab_with_empties(self):
+        # Check handling of empties
+        df = DataFrame(
+            {
+                "a": [1, 2, 2, 2, 2],
+                "b": [3, 3, 4, 4, 4],
+                "c": [np.nan, np.nan, np.nan, np.nan, np.nan],
+            }
+        )
+
+        empty = DataFrame(
+            [[0.0, 0.0], [0.0, 0.0]],
+            index=Index([1, 2], name="a", dtype="int64"),
+            columns=Index([3, 4], name="b"),
+        )
+
+        for i in [True, "index", "columns"]:
+            calculated = crosstab(df.a, df.b, values=df.c, aggfunc="count", normalize=i)
+            tm.assert_frame_equal(empty, calculated)
+
+        nans = DataFrame(
+            [[0.0, np.nan], [0.0, 0.0]],
+            index=Index([1, 2], name="a", dtype="int64"),
+            columns=Index([3, 4], name="b"),
+        )
+
+        calculated = crosstab(df.a, df.b, values=df.c, aggfunc="count", normalize=False)
+        tm.assert_frame_equal(nans, calculated)
+
+    def test_crosstab_errors(self):
+        # Issue 12578
+
+        df = DataFrame(
+            {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]}
+        )
+
+        error = "values cannot be used without an aggfunc."
+        with pytest.raises(ValueError, match=error):
+            crosstab(df.a, df.b, values=df.c)
+
+        error = "aggfunc cannot be used without values"
+        with pytest.raises(ValueError, match=error):
+            crosstab(df.a, df.b, aggfunc=np.mean)
+
+        error = "Not a valid normalize argument"
+        with pytest.raises(ValueError, match=error):
+            crosstab(df.a, df.b, normalize="42")
+
+        with pytest.raises(ValueError, match=error):
+            crosstab(df.a, df.b, normalize=42)
+
+        error = "Not a valid margins argument"
+        with pytest.raises(ValueError, match=error):
+            crosstab(df.a, df.b, normalize="all", margins=42)
+
+    def test_crosstab_with_categorial_columns(self):
+        # GH 8860
+        df = DataFrame(
+            {
+                "MAKE": ["Honda", "Acura", "Tesla", "Honda", "Honda", "Acura"],
+                "MODEL": ["Sedan", "Sedan", "Electric", "Pickup", "Sedan", "Sedan"],
+            }
+        )
+        categories = ["Sedan", "Electric", "Pickup"]
+        df["MODEL"] = df["MODEL"].astype("category").cat.set_categories(categories)
+        result = crosstab(df["MAKE"], df["MODEL"])
+
+        expected_index = Index(["Acura", "Honda", "Tesla"], name="MAKE")
+        expected_columns = CategoricalIndex(
+            categories, categories=categories, ordered=False, name="MODEL"
+        )
+        expected_data = [[2, 0, 0], [2, 0, 1], [0, 1, 0]]
+        expected = DataFrame(
+            expected_data, index=expected_index, columns=expected_columns
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_crosstab_with_numpy_size(self):
+        # GH 4003
+        df = DataFrame(
+            {
+                "A": ["one", "one", "two", "three"] * 6,
+                "B": ["A", "B", "C"] * 8,
+                "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4,
+                "D": np.random.randn(24),
+                "E": np.random.randn(24),
+            }
+        )
+        result = crosstab(
+            index=[df["A"], df["B"]],
+            columns=[df["C"]],
+            margins=True,
+            aggfunc=np.size,
+            values=df["D"],
+        )
+        expected_index = MultiIndex(
+            levels=[["All", "one", "three", "two"], ["", "A", "B", "C"]],
+            codes=[[1, 1, 1, 2, 2, 2, 3, 3, 3, 0], [1, 2, 3, 1, 2, 3, 1, 2, 3, 0]],
+            names=["A", "B"],
+        )
+        expected_column = Index(["bar", "foo", "All"], dtype="object", name="C")
+        expected_data = np.array(
+            [
+                [2.0, 2.0, 4.0],
+                [2.0, 2.0, 4.0],
+                [2.0, 2.0, 4.0],
+                [2.0, np.nan, 2.0],
+                [np.nan, 2.0, 2.0],
+                [2.0, np.nan, 2.0],
+                [np.nan, 2.0, 2.0],
+                [2.0, np.nan, 2.0],
+                [np.nan, 2.0, 2.0],
+                [12.0, 12.0, 24.0],
+            ]
+        )
+        expected = DataFrame(
+            expected_data, index=expected_index, columns=expected_column
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_crosstab_dup_index_names(self):
+        # GH 13279
+        s = Series(range(3), name="foo")
+
+        result = crosstab(s, s)
+        expected_index = Index(range(3), name="foo")
+        expected = DataFrame(
+            np.eye(3, dtype=np.int64), index=expected_index, columns=expected_index
+        )
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("names", [["a", ("b", "c")], [("a", "b"), "c"]])
+    def test_crosstab_tuple_name(self, names):
+        s1 = Series(range(3), name=names[0])
+        s2 = Series(range(1, 4), name=names[1])
+
+        mi = MultiIndex.from_arrays([range(3), range(1, 4)], names=names)
+        expected = Series(1, index=mi).unstack(1, fill_value=0)
+
+        result = crosstab(s1, s2)
+        tm.assert_frame_equal(result, expected)
+
+    def test_crosstab_both_tuple_names(self):
+        # GH 18321
+        s1 = Series(range(3), name=("a", "b"))
+        s2 = Series(range(3), name=("c", "d"))
+
+        expected = DataFrame(
+            np.eye(3, dtype="int64"),
+            index=Index(range(3), name=("a", "b")),
+            columns=Index(range(3), name=("c", "d")),
+        )
+        result = crosstab(s1, s2)
+        tm.assert_frame_equal(result, expected)
+
+    def test_crosstab_unsorted_order(self):
+        df = DataFrame({"b": [3, 1, 2], "a": [5, 4, 6]}, index=["C", "A", "B"])
+        result = crosstab(df.index, [df.b, df.a])
+        e_idx = Index(["A", "B", "C"], name="row_0")
+        e_columns = MultiIndex.from_tuples([(1, 4), (2, 6), (3, 5)], names=["b", "a"])
+        expected = DataFrame(
+            [[1, 0, 0], [0, 1, 0], [0, 0, 1]], index=e_idx, columns=e_columns
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_crosstab_normalize_multiple_columns(self):
+        # GH 15150
+        df = DataFrame(
+            {
+                "A": ["one", "one", "two", "three"] * 6,
+                "B": ["A", "B", "C"] * 8,
+                "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4,
+                "D": [0] * 24,
+                "E": [0] * 24,
+            }
+        )
+        result = crosstab(
+            [df.A, df.B],
+            df.C,
+            values=df.D,
+            aggfunc=np.sum,
+            normalize=True,
+            margins=True,
+        )
+        expected = DataFrame(
+            np.array([0] * 29 + [1], dtype=float).reshape(10, 3),
+            columns=Index(["bar", "foo", "All"], dtype="object", name="C"),
+            index=MultiIndex.from_tuples(
+                [
+                    ("one", "A"),
+                    ("one", "B"),
+                    ("one", "C"),
+                    ("three", "A"),
+                    ("three", "B"),
+                    ("three", "C"),
+                    ("two", "A"),
+                    ("two", "B"),
+                    ("two", "C"),
+                    ("All", ""),
+                ],
+                names=["A", "B"],
+            ),
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_margin_normalize(self):
+        # GH 27500
+        df = DataFrame(
+            {
+                "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
+                "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
+                "C": [
+                    "small",
+                    "large",
+                    "large",
+                    "small",
+                    "small",
+                    "large",
+                    "small",
+                    "small",
+                    "large",
+                ],
+                "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
+                "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
+            }
+        )
+        # normalize on index
+        result = crosstab(
+            [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=0
+        )
+        expected = DataFrame(
+            [[0.5, 0.5], [0.5, 0.5], [0.666667, 0.333333], [0, 1], [0.444444, 0.555556]]
+        )
+        expected.index = MultiIndex(
+            levels=[["Sub-Total", "bar", "foo"], ["", "one", "two"]],
+            codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]],
+            names=["A", "B"],
+        )
+        expected.columns = Index(["large", "small"], dtype="object", name="C")
+        tm.assert_frame_equal(result, expected)
+
+        # normalize on columns
+        result = crosstab(
+            [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=1
+        )
+        expected = DataFrame(
+            [
+                [0.25, 0.2, 0.222222],
+                [0.25, 0.2, 0.222222],
+                [0.5, 0.2, 0.333333],
+                [0, 0.4, 0.222222],
+            ]
+        )
+        expected.columns = Index(
+            ["large", "small", "Sub-Total"], dtype="object", name="C"
+        )
+        expected.index = MultiIndex(
+            levels=[["bar", "foo"], ["one", "two"]],
+            codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
+            names=["A", "B"],
+        )
+        tm.assert_frame_equal(result, expected)
+
+        # normalize on both index and column
+        result = crosstab(
+            [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=True
+        )
+        expected = DataFrame(
+            [
+                [0.111111, 0.111111, 0.222222],
+                [0.111111, 0.111111, 0.222222],
+                [0.222222, 0.111111, 0.333333],
+                [0.000000, 0.222222, 0.222222],
+                [0.444444, 0.555555, 1],
+            ]
+        )
+        expected.columns = Index(
+            ["large", "small", "Sub-Total"], dtype="object", name="C"
+        )
+        expected.index = MultiIndex(
+            levels=[["Sub-Total", "bar", "foo"], ["", "one", "two"]],
+            codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]],
+            names=["A", "B"],
+        )
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index e09a2a7907177..75c3c565e9d58 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -17,7 +17,7 @@
 )
 import pandas._testing as tm
 from pandas.api.types import CategoricalDtype as CDT
-from pandas.core.reshape.pivot import crosstab, pivot_table
+from pandas.core.reshape.pivot import pivot_table
 
 
 @pytest.fixture(params=[True, False])
@@ -2064,708 +2064,3 @@ def agg(l):
         )
         with pytest.raises(KeyError, match="notpresent"):
             foo.pivot_table("notpresent", "X", "Y", aggfunc=agg)
-
-
-class TestCrosstab:
-    def setup_method(self, method):
-        df = DataFrame(
-            {
-                "A": [
-                    "foo",
-                    "foo",
-                    "foo",
-                    "foo",
-                    "bar",
-                    "bar",
-                    "bar",
-                    "bar",
-                    "foo",
-                    "foo",
-                    "foo",
-                ],
-                "B": [
-                    "one",
-                    "one",
-                    "one",
-                    "two",
-                    "one",
-                    "one",
-                    "one",
-                    "two",
-                    "two",
-                    "two",
-                    "one",
-                ],
-                "C": [
-                    "dull",
-                    "dull",
-                    "shiny",
-                    "dull",
-                    "dull",
-                    "shiny",
-                    "shiny",
-                    "dull",
-                    "shiny",
-                    "shiny",
-                    "shiny",
-                ],
-                "D": np.random.randn(11),
-                "E": np.random.randn(11),
-                "F": np.random.randn(11),
-            }
-        )
-
-        self.df = df.append(df, ignore_index=True)
-
-    def test_crosstab_single(self):
-        df = self.df
-        result = crosstab(df["A"], df["C"])
-        expected = df.groupby(["A", "C"]).size().unstack()
-        tm.assert_frame_equal(result, expected.fillna(0).astype(np.int64))
-
-    def test_crosstab_multiple(self):
-        df = self.df
-
-        result = crosstab(df["A"], [df["B"], df["C"]])
-        expected = df.groupby(["A", "B", "C"]).size()
-        expected = expected.unstack("B").unstack("C").fillna(0).astype(np.int64)
-        tm.assert_frame_equal(result, expected)
-
-        result = crosstab([df["B"], df["C"]], df["A"])
-        expected = df.groupby(["B", "C", "A"]).size()
-        expected = expected.unstack("A").fillna(0).astype(np.int64)
-        tm.assert_frame_equal(result, expected)
-
-    def test_crosstab_ndarray(self):
-        a = np.random.randint(0, 5, size=100)
-        b = np.random.randint(0, 3, size=100)
-        c = np.random.randint(0, 10, size=100)
-
-        df = DataFrame({"a": a, "b": b, "c": c})
-
-        result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c"))
-        expected = crosstab(df["a"], [df["b"], df["c"]])
-        tm.assert_frame_equal(result, expected)
-
-        result = crosstab([b, c], a, colnames=["a"], rownames=("b", "c"))
-        expected = crosstab([df["b"], df["c"]], df["a"])
-        tm.assert_frame_equal(result, expected)
-
-        # assign arbitrary names
-        result = crosstab(self.df["A"].values, self.df["C"].values)
-        assert result.index.name == "row_0"
-        assert result.columns.name == "col_0"
-
-    def test_crosstab_non_aligned(self):
-        # GH 17005
-        a = pd.Series([0, 1, 1], index=["a", "b", "c"])
-        b = pd.Series([3, 4, 3, 4, 3], index=["a", "b", "c", "d", "f"])
-        c = np.array([3, 4, 3])
-
-        expected = pd.DataFrame(
-            [[1, 0], [1, 1]],
-            index=Index([0, 1], name="row_0"),
-            columns=Index([3, 4], name="col_0"),
-        )
-
-        result = crosstab(a, b)
-        tm.assert_frame_equal(result, expected)
-
-        result = crosstab(a, c)
-        tm.assert_frame_equal(result, expected)
-
-    def test_crosstab_margins(self):
-        a = np.random.randint(0, 7, size=100)
-        b = np.random.randint(0, 3, size=100)
-        c = np.random.randint(0, 5, size=100)
-
-        df = DataFrame({"a": a, "b": b, "c": c})
-
-        result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c"), margins=True)
-
-        assert result.index.names == ("a",)
-        assert result.columns.names == ["b", "c"]
-
-        all_cols = result["All", ""]
-        exp_cols = df.groupby(["a"]).size().astype("i8")
-        # to keep index.name
-        exp_margin = Series([len(df)], index=Index(["All"], name="a"))
-        exp_cols = exp_cols.append(exp_margin)
-        exp_cols.name = ("All", "")
-
-        tm.assert_series_equal(all_cols, exp_cols)
-
-        all_rows = result.loc["All"]
-        exp_rows = df.groupby(["b", "c"]).size().astype("i8")
-        exp_rows = exp_rows.append(Series([len(df)], index=[("All", "")]))
-        exp_rows.name = "All"
-
-        exp_rows = exp_rows.reindex(all_rows.index)
-        exp_rows = exp_rows.fillna(0).astype(np.int64)
-        tm.assert_series_equal(all_rows, exp_rows)
-
-    def test_crosstab_margins_set_margin_name(self):
-        # GH 15972
-        a = np.random.randint(0, 7, size=100)
-        b = np.random.randint(0, 3, size=100)
-        c = np.random.randint(0, 5, size=100)
-
-        df = DataFrame({"a": a, "b": b, "c": c})
-
-        result = crosstab(
-            a,
-            [b, c],
-            rownames=["a"],
-            colnames=("b", "c"),
-            margins=True,
-            margins_name="TOTAL",
-        )
-
-        assert result.index.names == ("a",)
-        assert result.columns.names == ["b", "c"]
-
-        all_cols = result["TOTAL", ""]
-        exp_cols = df.groupby(["a"]).size().astype("i8")
-        # to keep index.name
-        exp_margin = Series([len(df)], index=Index(["TOTAL"], name="a"))
-        exp_cols = exp_cols.append(exp_margin)
-        exp_cols.name = ("TOTAL", "")
-
-        tm.assert_series_equal(all_cols, exp_cols)
-
-        all_rows = result.loc["TOTAL"]
-        exp_rows = df.groupby(["b", "c"]).size().astype("i8")
-        exp_rows = exp_rows.append(Series([len(df)], index=[("TOTAL", "")]))
-        exp_rows.name = "TOTAL"
-
-        exp_rows = exp_rows.reindex(all_rows.index)
-        exp_rows = exp_rows.fillna(0).astype(np.int64)
-        tm.assert_series_equal(all_rows, exp_rows)
-
-        msg = "margins_name argument must be a string"
-        for margins_name in [666, None, ["a", "b"]]:
-            with pytest.raises(ValueError, match=msg):
-                crosstab(
-                    a,
-                    [b, c],
-                    rownames=["a"],
-                    colnames=("b", "c"),
-                    margins=True,
-                    margins_name=margins_name,
-                )
-
-    def test_crosstab_pass_values(self):
-        a = np.random.randint(0, 7, size=100)
-        b = np.random.randint(0, 3, size=100)
-        c = np.random.randint(0, 5, size=100)
-        values = np.random.randn(100)
-
-        table = crosstab(
-            [a, b], c, values, aggfunc=np.sum, rownames=["foo", "bar"], colnames=["baz"]
-        )
-
-        df = DataFrame({"foo": a, "bar": b, "baz": c, "values": values})
-
-        expected = df.pivot_table(
-            "values", index=["foo", "bar"], columns="baz", aggfunc=np.sum
-        )
-        tm.assert_frame_equal(table, expected)
-
-    def test_crosstab_dropna(self):
-        # GH 3820
-        a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object)
-        b = np.array(["one", "one", "two", "one", "two", "two", "two"], dtype=object)
-        c = np.array(
-            ["dull", "dull", "dull", "dull", "dull", "shiny", "shiny"], dtype=object
-        )
-        res = pd.crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"], dropna=False)
-        m = MultiIndex.from_tuples(
-            [("one", "dull"), ("one", "shiny"), ("two", "dull"), ("two", "shiny")],
-            names=["b", "c"],
-        )
-        tm.assert_index_equal(res.columns, m)
-
-    def test_crosstab_no_overlap(self):
-        # GS 10291
-
-        s1 = pd.Series([1, 2, 3], index=[1, 2, 3])
-        s2 = pd.Series([4, 5, 6], index=[4, 5, 6])
-
-        actual = crosstab(s1, s2)
-        expected = pd.DataFrame()
-
-        tm.assert_frame_equal(actual, expected)
-
-    def test_margin_dropna(self):
-        # GH 12577
-        # pivot_table counts null into margin ('All')
-        # when margins=true and dropna=true
-
-        df = pd.DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]})
-        actual = pd.crosstab(df.a, df.b, margins=True, dropna=True)
-        expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [2, 3, 5]])
-        expected.index = Index([1.0, 2.0, "All"], name="a")
-        expected.columns = Index([3, 4, "All"], name="b")
-        tm.assert_frame_equal(actual, expected)
-
-        df = DataFrame(
-            {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]}
-        )
-        actual = pd.crosstab(df.a, df.b, margins=True, dropna=True)
-        expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]])
-        expected.index = Index([1.0, 2.0, "All"], name="a")
-        expected.columns = Index([3.0, 4.0, "All"], name="b")
-        tm.assert_frame_equal(actual, expected)
-
-        df = DataFrame(
-            {"a": [1, np.nan, np.nan, np.nan, np.nan, 2], "b": [3, 3, 4, 4, 4, 4]}
-        )
-        actual = pd.crosstab(df.a, df.b, margins=True, dropna=True)
-        expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]])
-        expected.index = Index([1.0, 2.0, "All"], name="a")
-        expected.columns = Index([3, 4, "All"], name="b")
-        tm.assert_frame_equal(actual, expected)
-
-        # GH 12642
-        # _add_margins raises KeyError: Level None not found
-        # when margins=True and dropna=False
-        df = pd.DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]})
-        actual = pd.crosstab(df.a, df.b, margins=True, dropna=False)
-        expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]])
-        expected.index = Index([1.0, 2.0, "All"], name="a")
-        expected.columns = Index([3, 4, "All"], name="b")
-        tm.assert_frame_equal(actual, expected)
-
-        df = DataFrame(
-            {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]}
-        )
-        actual = pd.crosstab(df.a, df.b, margins=True, dropna=False)
-        expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]])
-        expected.index = Index([1.0, 2.0, "All"], name="a")
-        expected.columns = Index([3.0, 4.0, "All"], name="b")
-        tm.assert_frame_equal(actual, expected)
-
-        a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object)
-        b = np.array(["one", "one", "two", "one", "two", np.nan, "two"], dtype=object)
-        c = np.array(
-            ["dull", "dull", "dull", "dull", "dull", "shiny", "shiny"], dtype=object
-        )
-
-        actual = pd.crosstab(
-            a, [b, c], rownames=["a"], colnames=["b", "c"], margins=True, dropna=False
-        )
-        m = MultiIndex.from_arrays(
-            [
-                ["one", "one", "two", "two", "All"],
-                ["dull", "shiny", "dull", "shiny", ""],
-            ],
-            names=["b", "c"],
-        )
-        expected = DataFrame(
-            [[1, 0, 1, 0, 2], [2, 0, 1, 1, 5], [3, 0, 2, 1, 7]], columns=m
-        )
-        expected.index = Index(["bar", "foo", "All"], name="a")
-        tm.assert_frame_equal(actual, expected)
-
-        actual = pd.crosstab(
-            [a, b], c, rownames=["a", "b"], colnames=["c"], margins=True, dropna=False
-        )
-        m = MultiIndex.from_arrays(
-            [["bar", "bar", "foo", "foo", "All"], ["one", "two", "one", "two", ""]],
-            names=["a", "b"],
-        )
-        expected = DataFrame(
-            [[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], [5, 2, 7]], index=m
-        )
-        expected.columns = Index(["dull", "shiny", "All"], name="c")
-        tm.assert_frame_equal(actual, expected)
-
-        actual = pd.crosstab(
-            [a, b], c, rownames=["a", "b"], colnames=["c"], margins=True, dropna=True
-        )
-        m = MultiIndex.from_arrays(
-            [["bar", "bar", "foo", "foo", "All"], ["one", "two", "one", "two", ""]],
-            names=["a", "b"],
-        )
-        expected = DataFrame(
-            [[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], [5, 1, 6]], index=m
-        )
-        expected.columns = Index(["dull", "shiny", "All"], name="c")
-        tm.assert_frame_equal(actual, expected)
-
-    def test_crosstab_normalize(self):
-        # Issue 12578
-        df = pd.DataFrame(
-            {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]}
-        )
-
-        rindex = pd.Index([1, 2], name="a")
-        cindex = pd.Index([3, 4], name="b")
-        full_normal = pd.DataFrame([[0.2, 0], [0.2, 0.6]], index=rindex, columns=cindex)
-        row_normal = pd.DataFrame(
-            [[1.0, 0], [0.25, 0.75]], index=rindex, columns=cindex
-        )
-        col_normal = pd.DataFrame([[0.5, 0], [0.5, 1.0]], index=rindex, columns=cindex)
-
-        # Check all normalize args
-        tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize="all"), full_normal)
-        tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize=True), full_normal)
-        tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize="index"), row_normal)
-        tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize="columns"), col_normal)
-        tm.assert_frame_equal(
-            pd.crosstab(df.a, df.b, normalize=1),
-            pd.crosstab(df.a, df.b, normalize="columns"),
-        )
-        tm.assert_frame_equal(
-            pd.crosstab(df.a, df.b, normalize=0),
-            pd.crosstab(df.a, df.b, normalize="index"),
-        )
-
-        row_normal_margins = pd.DataFrame(
-            [[1.0, 0], [0.25, 0.75], [0.4, 0.6]],
-            index=pd.Index([1, 2, "All"], name="a", dtype="object"),
-            columns=pd.Index([3, 4], name="b", dtype="object"),
-        )
-        col_normal_margins = pd.DataFrame(
-            [[0.5, 0, 0.2], [0.5, 1.0, 0.8]],
-            index=pd.Index([1, 2], name="a", dtype="object"),
-            columns=pd.Index([3, 4, "All"], name="b", dtype="object"),
-        )
-
-        all_normal_margins = pd.DataFrame(
-            [[0.2, 0, 0.2], [0.2, 0.6, 0.8], [0.4, 0.6, 1]],
-            index=pd.Index([1, 2, "All"], name="a", dtype="object"),
-            columns=pd.Index([3, 4, "All"], name="b", dtype="object"),
-        )
-        tm.assert_frame_equal(
-            pd.crosstab(df.a, df.b, normalize="index", margins=True), row_normal_margins
-        )
-        tm.assert_frame_equal(
-            pd.crosstab(df.a, df.b, normalize="columns", margins=True),
-            col_normal_margins,
-        )
-        tm.assert_frame_equal(
-            pd.crosstab(df.a, df.b, normalize=True, margins=True), all_normal_margins
-        )
-
-        # Test arrays
-        pd.crosstab(
-            [np.array([1, 1, 2, 2]), np.array([1, 2, 1, 2])], np.array([1, 2, 1, 2])
-        )
-
-        # Test with aggfunc
-        norm_counts = pd.DataFrame(
-            [[0.25, 0, 0.25], [0.25, 0.5, 0.75], [0.5, 0.5, 1]],
-            index=pd.Index([1, 2, "All"], name="a", dtype="object"),
-            columns=pd.Index([3, 4, "All"], name="b"),
-        )
-        test_case = pd.crosstab(
-            df.a, df.b, df.c, aggfunc="count", normalize="all", margins=True
-        )
-        tm.assert_frame_equal(test_case, norm_counts)
-
-        df = pd.DataFrame(
-            {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [0, 4, np.nan, 3, 3]}
-        )
-
-        norm_sum = pd.DataFrame(
-            [[0, 0, 0.0], [0.4, 0.6, 1], [0.4, 0.6, 1]],
-            index=pd.Index([1, 2, "All"], name="a", dtype="object"),
-            columns=pd.Index([3, 4, "All"], name="b", dtype="object"),
-        )
-        test_case = pd.crosstab(
-            df.a, df.b, df.c, aggfunc=np.sum, normalize="all", margins=True
-        )
-        tm.assert_frame_equal(test_case, norm_sum)
-
-    def test_crosstab_with_empties(self):
-        # Check handling of empties
-        df = pd.DataFrame(
-            {
-                "a": [1, 2, 2, 2, 2],
-                "b": [3, 3, 4, 4, 4],
-                "c": [np.nan, np.nan, np.nan, np.nan, np.nan],
-            }
-        )
-
-        empty = pd.DataFrame(
-            [[0.0, 0.0], [0.0, 0.0]],
-            index=pd.Index([1, 2], name="a", dtype="int64"),
-            columns=pd.Index([3, 4], name="b"),
-        )
-
-        for i in [True, "index", "columns"]:
-            calculated = pd.crosstab(
-                df.a, df.b, values=df.c, aggfunc="count", normalize=i
-            )
-            tm.assert_frame_equal(empty, calculated)
-
-        nans = pd.DataFrame(
-            [[0.0, np.nan], [0.0, 0.0]],
-            index=pd.Index([1, 2], name="a", dtype="int64"),
-            columns=pd.Index([3, 4], name="b"),
-        )
-
-        calculated = pd.crosstab(
-            df.a, df.b, values=df.c, aggfunc="count", normalize=False
-        )
-        tm.assert_frame_equal(nans, calculated)
-
-    def test_crosstab_errors(self):
-        # Issue 12578
-
-        df = pd.DataFrame(
-            {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]}
-        )
-
-        error = "values cannot be used without an aggfunc."
-        with pytest.raises(ValueError, match=error):
-            pd.crosstab(df.a, df.b, values=df.c)
-
-        error = "aggfunc cannot be used without values"
-        with pytest.raises(ValueError, match=error):
-            pd.crosstab(df.a, df.b, aggfunc=np.mean)
-
-        error = "Not a valid normalize argument"
-        with pytest.raises(ValueError, match=error):
-            pd.crosstab(df.a, df.b, normalize="42")
-
-        with pytest.raises(ValueError, match=error):
-            pd.crosstab(df.a, df.b, normalize=42)
-
-        error = "Not a valid margins argument"
-        with pytest.raises(ValueError, match=error):
-            pd.crosstab(df.a, df.b, normalize="all", margins=42)
-
-    def test_crosstab_with_categorial_columns(self):
-        # GH 8860
-        df = pd.DataFrame(
-            {
-                "MAKE": ["Honda", "Acura", "Tesla", "Honda", "Honda", "Acura"],
-                "MODEL": ["Sedan", "Sedan", "Electric", "Pickup", "Sedan", "Sedan"],
-            }
-        )
-        categories = ["Sedan", "Electric", "Pickup"]
-        df["MODEL"] = df["MODEL"].astype("category").cat.set_categories(categories)
-        result = pd.crosstab(df["MAKE"], df["MODEL"])
-
-        expected_index = pd.Index(["Acura", "Honda", "Tesla"], name="MAKE")
-        expected_columns = pd.CategoricalIndex(
-            categories, categories=categories, ordered=False, name="MODEL"
-        )
-        expected_data = [[2, 0, 0], [2, 0, 1], [0, 1, 0]]
-        expected = pd.DataFrame(
-            expected_data, index=expected_index, columns=expected_columns
-        )
-        tm.assert_frame_equal(result, expected)
-
-    def test_crosstab_with_numpy_size(self):
-        # GH 4003
-        df = pd.DataFrame(
-            {
-                "A": ["one", "one", "two", "three"] * 6,
-                "B": ["A", "B", "C"] * 8,
-                "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4,
-                "D": np.random.randn(24),
-                "E": np.random.randn(24),
-            }
-        )
-        result = pd.crosstab(
-            index=[df["A"], df["B"]],
-            columns=[df["C"]],
-            margins=True,
-            aggfunc=np.size,
-            values=df["D"],
-        )
-        expected_index = pd.MultiIndex(
-            levels=[["All", "one", "three", "two"], ["", "A", "B", "C"]],
-            codes=[[1, 1, 1, 2, 2, 2, 3, 3, 3, 0], [1, 2, 3, 1, 2, 3, 1, 2, 3, 0]],
-            names=["A", "B"],
-        )
-        expected_column = pd.Index(["bar", "foo", "All"], dtype="object", name="C")
-        expected_data = np.array(
-            [
-                [2.0, 2.0, 4.0],
-                [2.0, 2.0, 4.0],
-                [2.0, 2.0, 4.0],
-                [2.0, np.nan, 2.0],
-                [np.nan, 2.0, 2.0],
-                [2.0, np.nan, 2.0],
-                [np.nan, 2.0, 2.0],
-                [2.0, np.nan, 2.0],
-                [np.nan, 2.0, 2.0],
-                [12.0, 12.0, 24.0],
-            ]
-        )
-        expected = pd.DataFrame(
-            expected_data, index=expected_index, columns=expected_column
-        )
-        tm.assert_frame_equal(result, expected)
-
-    def test_crosstab_dup_index_names(self):
-        # GH 13279
-        s = pd.Series(range(3), name="foo")
-
-        result = pd.crosstab(s, s)
-        expected_index = pd.Index(range(3), name="foo")
-        expected = pd.DataFrame(
-            np.eye(3, dtype=np.int64), index=expected_index, columns=expected_index
-        )
-        tm.assert_frame_equal(result, expected)
-
-    @pytest.mark.parametrize("names", [["a", ("b", "c")], [("a", "b"), "c"]])
-    def test_crosstab_tuple_name(self, names):
-        s1 = pd.Series(range(3), name=names[0])
-        s2 = pd.Series(range(1, 4), name=names[1])
-
-        mi = pd.MultiIndex.from_arrays([range(3), range(1, 4)], names=names)
-        expected = pd.Series(1, index=mi).unstack(1, fill_value=0)
-
-        result = pd.crosstab(s1, s2)
-        tm.assert_frame_equal(result, expected)
-
-    def test_crosstab_both_tuple_names(self):
-        # GH 18321
-        s1 = pd.Series(range(3), name=("a", "b"))
-        s2 = pd.Series(range(3), name=("c", "d"))
-
-        expected = pd.DataFrame(
-            np.eye(3, dtype="int64"),
-            index=pd.Index(range(3), name=("a", "b")),
-            columns=pd.Index(range(3), name=("c", "d")),
-        )
-        result = crosstab(s1, s2)
-        tm.assert_frame_equal(result, expected)
-
-    def test_crosstab_unsorted_order(self):
-        df = pd.DataFrame({"b": [3, 1, 2], "a": [5, 4, 6]}, index=["C", "A", "B"])
-        result = pd.crosstab(df.index, [df.b, df.a])
-        e_idx = pd.Index(["A", "B", "C"], name="row_0")
-        e_columns = pd.MultiIndex.from_tuples(
-            [(1, 4), (2, 6), (3, 5)], names=["b", "a"]
-        )
-        expected = pd.DataFrame(
-            [[1, 0, 0], [0, 1, 0], [0, 0, 1]], index=e_idx, columns=e_columns
-        )
-        tm.assert_frame_equal(result, expected)
-
-    def test_crosstab_normalize_multiple_columns(self):
-        # GH 15150
-        df = pd.DataFrame(
-            {
-                "A": ["one", "one", "two", "three"] * 6,
-                "B": ["A", "B", "C"] * 8,
-                "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4,
-                "D": [0] * 24,
-                "E": [0] * 24,
-            }
-        )
-        result = pd.crosstab(
-            [df.A, df.B],
-            df.C,
-            values=df.D,
-            aggfunc=np.sum,
-            normalize=True,
-            margins=True,
-        )
-        expected = pd.DataFrame(
-            np.array([0] * 29 + [1], dtype=float).reshape(10, 3),
-            columns=Index(["bar", "foo", "All"], dtype="object", name="C"),
-            index=MultiIndex.from_tuples(
-                [
-                    ("one", "A"),
-                    ("one", "B"),
-                    ("one", "C"),
-                    ("three", "A"),
-                    ("three", "B"),
-                    ("three", "C"),
-                    ("two", "A"),
-                    ("two", "B"),
-                    ("two", "C"),
-                    ("All", ""),
-                ],
-                names=["A", "B"],
-            ),
-        )
-        tm.assert_frame_equal(result, expected)
-
-    def test_margin_normalize(self):
-        # GH 27500
-        df = pd.DataFrame(
-            {
-                "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
-                "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
-                "C": [
-                    "small",
-                    "large",
-                    "large",
-                    "small",
-                    "small",
-                    "large",
-                    "small",
-                    "small",
-                    "large",
-                ],
-                "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
-                "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
-            }
-        )
-        # normalize on index
-        result = pd.crosstab(
-            [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=0
-        )
-        expected = pd.DataFrame(
-            [[0.5, 0.5], [0.5, 0.5], [0.666667, 0.333333], [0, 1], [0.444444, 0.555556]]
-        )
-        expected.index = MultiIndex(
-            levels=[["Sub-Total", "bar", "foo"], ["", "one", "two"]],
-            codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]],
-            names=["A", "B"],
-        )
-        expected.columns = Index(["large", "small"], dtype="object", name="C")
-        tm.assert_frame_equal(result, expected)
-
-        # normalize on columns
-        result = pd.crosstab(
-            [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=1
-        )
-        expected = pd.DataFrame(
-            [
-                [0.25, 0.2, 0.222222],
-                [0.25, 0.2, 0.222222],
-                [0.5, 0.2, 0.333333],
-                [0, 0.4, 0.222222],
-            ]
-        )
-        expected.columns = Index(
-            ["large", "small", "Sub-Total"], dtype="object", name="C"
-        )
-        expected.index = MultiIndex(
-            levels=[["bar", "foo"], ["one", "two"]],
-            codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
-            names=["A", "B"],
-        )
-        tm.assert_frame_equal(result, expected)
-
-        # normalize on both index and column
-        result = pd.crosstab(
-            [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=True
-        )
-        expected = pd.DataFrame(
-            [
-                [0.111111, 0.111111, 0.222222],
-                [0.111111, 0.111111, 0.222222],
-                [0.222222, 0.111111, 0.333333],
-                [0.000000, 0.222222, 0.222222],
-                [0.444444, 0.555555, 1],
-            ]
-        )
-        expected.columns = Index(
-            ["large", "small", "Sub-Total"], dtype="object", name="C"
-        )
-        expected.index = MultiIndex(
-            levels=[["Sub-Total", "bar", "foo"], ["", "one", "two"]],
-            codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]],
-            names=["A", "B"],
-        )
-        tm.assert_frame_equal(result, expected)

From 7e49bd512874adabe6798fbaad7780519ef190c2 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 9 Mar 2020 14:27:56 -0700
Subject: [PATCH 5/5] CLN: remove Categorical.put (#32554)

---
 pandas/core/arrays/categorical.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 92859479ec73f..ba4c2e168e0c4 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1409,12 +1409,6 @@ def notna(self):
 
     notnull = notna
 
-    def put(self, *args, **kwargs):
-        """
-        Replace specific elements in the Categorical with given values.
-        """
-        raise NotImplementedError(("'put' is not yet implemented for Categorical"))
-
     def dropna(self):
         """
         Return the Categorical without null values.