diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 170e7f14da397..b1df419d15c1b 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -226,7 +226,7 @@ Sparse ^^^^^^ - Significant speedup in `SparseArray` initialization that benefits most operations, fixing performance regression introduced in v0.20.0 (:issue:`24985`) -- +- Bug in :class:`SparseFrame` constructor where passing ``None`` as the data would cause ``default_fill_value`` to be ignored (:issue:`16807`) - diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index e0af11d13774c..2d54b82a3c844 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -124,8 +124,8 @@ def __init__(self, data=None, index=None, columns=None, default_kind=None, columns = Index([]) else: for c in columns: - data[c] = SparseArray(np.nan, index=index, - kind=self._default_kind, + data[c] = SparseArray(self._default_fill_value, + index=index, kind=self._default_kind, fill_value=self._default_fill_value) mgr = to_manager(data, columns, index) if dtype is not None: diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index bfb5103c97adc..d917c94e813cd 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -269,6 +269,19 @@ def test_type_coercion_at_construction(self): default_fill_value=0) tm.assert_sp_frame_equal(result, expected) + def test_default_dtype(self): + result = pd.SparseDataFrame(columns=list('ab'), index=range(2)) + expected = pd.SparseDataFrame([[np.nan, np.nan], [np.nan, np.nan]], + columns=list('ab'), index=range(2)) + tm.assert_sp_frame_equal(result, expected) + + def test_nan_data_with_int_dtype_raises_error(self): + sdf = pd.SparseDataFrame([[np.nan, np.nan], [np.nan, np.nan]], + columns=list('ab'), index=range(2)) + msg = "Cannot convert non-finite values" + with pytest.raises(ValueError, match=msg): + pd.SparseDataFrame(sdf, dtype=np.int64) + def test_dtypes(self): df = DataFrame(np.random.randn(10000, 4)) df.loc[:9998] = np.nan @@ -1246,6 +1259,14 @@ def test_notna(self): 'B': [True, False, True, True, False]}) tm.assert_frame_equal(res.to_dense(), exp) + def test_default_fill_value_with_no_data(self): + # GH 16807 + expected = pd.SparseDataFrame([[1.0, 1.0], [1.0, 1.0]], + columns=list('ab'), index=range(2)) + result = pd.SparseDataFrame(columns=list('ab'), index=range(2), + default_fill_value=1.0) + tm.assert_frame_equal(expected, result) + class TestSparseDataFrameArithmetic(object):