pandas-dev · jreback · Apr 10, 2020 · Mar 25, 2020 · Mar 25, 2020 · Mar 25, 2020
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -410,6 +410,8 @@ Numeric
 - Bug in :meth:`to_numeric` with ``downcast="unsigned"`` fails for empty data (:issue:`32493`)
 - Bug in :meth:`DataFrame.mean` with ``numeric_only=False`` and either ``datetime64`` dtype or ``PeriodDtype`` column incorrectly raising ``TypeError`` (:issue:`32426`)
 - Bug in :meth:`DataFrame.count` with ``level="foo"`` and index level ``"foo"`` containing NaNs causes segmentation fault (:issue:`21824`)
+- Bug in :meth:`DataFrame.diff` with ``axis=1`` returning incorrect results with mixed dtypes (:issue:`32995`)
+-
 
 Conversion
 ^^^^^^^^^^

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -6768,6 +6768,11 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> "DataFrame":
         5  NaN  NaN   NaN
         """
         bm_axis = self._get_block_manager_axis(axis)
+        self._consolidate_inplace()
+
+        if bm_axis == 0 and periods != 0:
+            return self.T.diff(periods, axis=0).T
+
         new_data = self._mgr.diff(n=periods, axis=bm_axis)
         return self._constructor(new_data)
 

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -1775,7 +1775,14 @@ def interpolate(
         )
 
     def diff(self, n: int, axis: int = 1) -> List["Block"]:
+        if axis == 0 and n != 0:
+            # n==0 case will be a no-op so let is fall through
+            # Since we only have one column, the result will be all-NA.
+            #  Create this result by shifting along axis=0 past the length of
+            #  our values.
+            return super().diff(len(self.values), axis=0)
         if axis == 1:
+            # TODO(EA2D): unnecessary with 2D EAs
             # we are by definition 1D.
             axis = 0
         return super().diff(n, axis)

diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py
@@ -64,18 +64,15 @@ def test_diff_datetime_axis1(self, tz):
                 1: date_range("2010", freq="D", periods=2, tz=tz),
             }
         )
-        if tz is None:
-            result = df.diff(axis=1)
-            expected = DataFrame(
-                {
-                    0: pd.TimedeltaIndex(["NaT", "NaT"]),
-                    1: pd.TimedeltaIndex(["0 days", "0 days"]),
-                }
-            )
-            tm.assert_frame_equal(result, expected)
-        else:
-            with pytest.raises(NotImplementedError):
-                result = df.diff(axis=1)
+
+        result = df.diff(axis=1)
+        expected = DataFrame(
+            {
+                0: pd.TimedeltaIndex(["NaT", "NaT"]),
+                1: pd.TimedeltaIndex(["0 days", "0 days"]),
+            }
+        )
+        tm.assert_frame_equal(result, expected)
 
     def test_diff_timedelta(self):
         # GH#4533
@@ -118,3 +115,46 @@ def test_diff_axis(self):
         tm.assert_frame_equal(
             df.diff(axis=0), DataFrame([[np.nan, np.nan], [2.0, 2.0]])
         )
+
+    @pytest.mark.xfail(
+        reason="GH#32995 needs to operate column-wise or do inference",
+        raises=AssertionError,
+    )
+    def test_diff_period(self):
+        # GH#32995 Don't pass an incorrect axis
+        #  TODO(EA2D): this bug wouldn't have happened with 2D EA
+        pi = pd.date_range("2016-01-01", periods=3).to_period("D")
+        df = pd.DataFrame({"A": pi})
+
+        result = df.diff(1, axis=1)
+
+        # TODO: should we make Block.diff do type inference?  or maybe algos.diff?
+        expected = (df - pd.NaT).astype(object)
+        tm.assert_frame_equal(result, expected)
+
+    def test_diff_axis1_mixed_dtypes(self):
+        # GH#32995 operate column-wise when we have mixed dtypes and axis=1
+        df = pd.DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)})
+
+        expected = pd.DataFrame({"A": [np.nan, np.nan, np.nan], "B": df["B"] / 2})
+
+        result = df.diff(axis=1)
+        tm.assert_frame_equal(result, expected)
+
+    def test_diff_axis1_mixed_dtypes_large_periods(self):
+        # GH#32995 operate column-wise when we have mixed dtypes and axis=1
+        df = pd.DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)})
+
+        expected = df * np.nan
+
+        result = df.diff(axis=1, periods=3)
+        tm.assert_frame_equal(result, expected)
+
+    def test_diff_axis1_mixed_dtypes_negative_periods(self):
+        # GH#32995 operate column-wise when we have mixed dtypes and axis=1
+        df = pd.DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)})
+
+        expected = pd.DataFrame({"A": -1.0 * df["A"], "B": df["B"] * np.nan})
+
+        result = df.diff(axis=1, periods=-1)
+        tm.assert_frame_equal(result, expected)