From 4467730a77ea1a23182d851169991510c04baaf5 Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Sat, 19 Jan 2019 14:06:34 -0800 Subject: [PATCH 1/5] BUG-16807-1 SparseFrame fills with default_fill_value if data is None --- doc/source/whatsnew/v0.24.0.rst | 1 + pandas/core/sparse/frame.py | 4 ++-- pandas/tests/sparse/frame/test_frame.py | 8 ++++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index f46229feee250..16ba5e4080a6e 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1835,6 +1835,7 @@ Sparse - Bug in :meth:`SparseArray.nonzero` and :meth:`SparseDataFrame.dropna` returning shifted/incorrect results (:issue:`21172`) - Bug in :meth:`DataFrame.apply` where dtypes would lose sparseness (:issue:`23744`) - Bug in :func:`concat` when concatenating a list of :class:`Series` with all-sparse values changing the ``fill_value`` and converting to a dense Series (:issue:`24371`) +- Bug in :class:`SparseFrame` constructor where passing ``None`` as the data would cause ``default_fill_value`` to be ignored (:issue:`16807`) Style ^^^^^ diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 586193fe11850..e80debf6cc8ae 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -124,8 +124,8 @@ def __init__(self, data=None, index=None, columns=None, default_kind=None, columns = Index([]) else: for c in columns: - data[c] = SparseArray(np.nan, index=index, - kind=self._default_kind, + data[c] = SparseArray(self._default_fill_value, + index=index, kind=self._default_kind, fill_value=self._default_fill_value) mgr = to_manager(data, columns, index) if dtype is not None: diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index bfb5103c97adc..05b256b08f9b8 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -1246,6 +1246,14 @@ def test_notna(self): 'B': [True, False, True, True, False]}) tm.assert_frame_equal(res.to_dense(), exp) + def test_default_fill_value_with_no_data(self): + # GH 16807 + expected = pd.SparseDataFrame([[1.0, 1.0], [1.0, 1.0]], + columns=list('ab'), index=range(2)) + result = pd.SparseDataFrame(columns=list('ab'), index=range(2), + default_fill_value=1.0) + tm.assert_frame_equal(expected, result) + class TestSparseDataFrameArithmetic(object): From d93806fc01647c3b85912153829ea964662ca2b7 Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Mon, 21 Jan 2019 20:43:12 -0800 Subject: [PATCH 2/5] BUG-16807 add tests --- pandas/tests/sparse/frame/test_frame.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index 05b256b08f9b8..4e23c919d8beb 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -269,6 +269,19 @@ def test_type_coercion_at_construction(self): default_fill_value=0) tm.assert_sp_frame_equal(result, expected) + def test_default_dtype(self): + result = pd.SparseDataFrame(columns=list('ab'), index=range(2)) + expected = pd.SparseDataFrame([[np.nan, np.nan], [np.nan, np.nan]], + columns=list('ab'), index=range(2)) + tm.assert_sp_frame_equal(result, expected) + + def test_nan_data_with_int_dtype_raises_error(self): + sdf = pd.SparseDataFrame([[np.nan, np.nan], [np.nan, np.nan]], + columns=list('ab'), index=range(2)) + msg = "Cannot convert non-finite values (NA or inf) to integer" + with pytest.raises(ValueError, match=msg): + pd.SparseDataFrame(sdf, dtype=np.int64) + def test_dtypes(self): df = DataFrame(np.random.randn(10000, 4)) df.loc[:9998] = np.nan From 8fdf316e7b9d690bc96dad780ef35718212ae818 Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Mon, 21 Jan 2019 21:18:46 -0800 Subject: [PATCH 3/5] fix typo --- pandas/tests/sparse/frame/test_frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index 4e23c919d8beb..d917c94e813cd 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -278,7 +278,7 @@ def test_default_dtype(self): def test_nan_data_with_int_dtype_raises_error(self): sdf = pd.SparseDataFrame([[np.nan, np.nan], [np.nan, np.nan]], columns=list('ab'), index=range(2)) - msg = "Cannot convert non-finite values (NA or inf) to integer" + msg = "Cannot convert non-finite values" with pytest.raises(ValueError, match=msg): pd.SparseDataFrame(sdf, dtype=np.int64) From cc019788f4201af9d2e847730ce6a51749423299 Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Fri, 1 Mar 2019 08:46:43 -0800 Subject: [PATCH 4/5] BUG-16807 move whatsnew note --- doc/source/whatsnew/v0.24.0.rst | 1 - doc/source/whatsnew/v0.24.2.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 0ede88c4ba6dc..a49ea2cf493a6 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1907,7 +1907,6 @@ Sparse - Bug in :meth:`SparseArray.nonzero` and :meth:`SparseDataFrame.dropna` returning shifted/incorrect results (:issue:`21172`) - Bug in :meth:`DataFrame.apply` where dtypes would lose sparseness (:issue:`23744`) - Bug in :func:`concat` when concatenating a list of :class:`Series` with all-sparse values changing the ``fill_value`` and converting to a dense Series (:issue:`24371`) -- Bug in :class:`SparseFrame` constructor where passing ``None`` as the data would cause ``default_fill_value`` to be ignored (:issue:`16807`) Style ^^^^^ diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index 8f4beb3f484a4..4dd47dc712a00 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -97,6 +97,7 @@ Bug Fixes - Bug in :meth:`Series.is_unique` where single occurrences of ``NaN`` were not considered unique (:issue:`25180`) - Bug in :func:`merge` when merging an empty ``DataFrame`` with an ``Int64`` column or a non-empty ``DataFrame`` with an ``Int64`` column that is all ``NaN`` (:issue:`25183`) +- Bug in :class:`SparseFrame` constructor where passing ``None`` as the data would cause ``default_fill_value`` to be ignored (:issue:`16807`) - .. _whatsnew_0.242.contributors: From fe3eebd474fae62d2e0737fac821a1802231e102 Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Sat, 2 Mar 2019 19:05:44 -0800 Subject: [PATCH 5/5] move whatsnew note --- doc/source/whatsnew/v0.24.2.rst | 1 - doc/source/whatsnew/v0.25.0.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index 4dd47dc712a00..8f4beb3f484a4 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -97,7 +97,6 @@ Bug Fixes - Bug in :meth:`Series.is_unique` where single occurrences of ``NaN`` were not considered unique (:issue:`25180`) - Bug in :func:`merge` when merging an empty ``DataFrame`` with an ``Int64`` column or a non-empty ``DataFrame`` with an ``Int64`` column that is all ``NaN`` (:issue:`25183`) -- Bug in :class:`SparseFrame` constructor where passing ``None`` as the data would cause ``default_fill_value`` to be ignored (:issue:`16807`) - .. _whatsnew_0.242.contributors: diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 170e7f14da397..b1df419d15c1b 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -226,7 +226,7 @@ Sparse ^^^^^^ - Significant speedup in `SparseArray` initialization that benefits most operations, fixing performance regression introduced in v0.20.0 (:issue:`24985`) -- +- Bug in :class:`SparseFrame` constructor where passing ``None`` as the data would cause ``default_fill_value`` to be ignored (:issue:`16807`) -