From 6afeea39401b9b9fee36b428434bea6710d5175b Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 8 Apr 2017 11:32:52 -0400 Subject: [PATCH 1/2] BUG: Index.to_series() is not copying the index --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/indexes/base.py | 4 +++- pandas/tests/frame/test_query_eval.py | 2 +- pandas/tests/indexes/common.py | 9 +++++++++ pandas/tseries/index.py | 4 +++- 5 files changed, 17 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index e8170b4bf2113..fd1cd3d0022c9 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1219,6 +1219,7 @@ Conversion - Bug in ``DataFrame.fillna()`` with tz-aware datetimes (:issue:`15855`) - Bug in ``is_string_dtype``, ``is_timedelta64_ns_dtype``, and ``is_string_like_dtype`` in which an error was raised when ``None`` was passed in (:issue:`15941`) - Bug in the return type of ``pd.unique`` on a ``Categorical``, which was returning an ndarray and not a ``Categorical`` (:issue:`15903`) +- Bug in ``Index.to_series()`` where the index was not copied (and so mutating later would change the original), (:issue:`15949`) Indexing ^^^^^^^^ diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 91e2422873dd4..bf7975bcdb964 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -944,7 +944,9 @@ def to_series(self, **kwargs): """ from pandas import Series - return Series(self._to_embed(), index=self, name=self.name) + return Series(self._to_embed(), + index=self._shallow_copy(), + name=self.name) def _to_embed(self, keep_tz=False): """ diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 647af92b42273..f90b37b66d200 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -484,7 +484,7 @@ def test_date_index_query_with_NaT_duplicates(self): df = DataFrame(d) df.loc[np.random.rand(n) > 0.5, 'dates1'] = pd.NaT df.set_index('dates1', inplace=True, drop=True) - res = df.query('index < 20130101 < dates3', engine=engine, + res = df.query('dates1 < 20130101 < dates3', engine=engine, parser=parser) expec = df[(df.index.to_series() < '20130101') & ('20130101' < df.dates3)] diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index ba76945834aff..08f8f8d48e705 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -38,6 +38,15 @@ def test_pickle_compat_construction(self): # need an object to create with self.assertRaises(TypeError, self._holder) + def test_to_series(self): + # assert that we are creating a copy of the index + + idx = self.create_index() + s = idx.to_series() + assert s.values is not idx.values + assert s.index is not idx + assert s.name == idx.name + def test_shift(self): # GH8083 test the base class for shift diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 8fa842a836051..2c14d4f8ea79e 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -895,7 +895,9 @@ def to_series(self, keep_tz=False): Series """ from pandas import Series - return Series(self._to_embed(keep_tz), index=self, name=self.name) + return Series(self._to_embed(keep_tz), + index=self._shallow_copy(), + name=self.name) def _to_embed(self, keep_tz=False): """ From 2f6a9c22a14cd52b2ebba13dec409b3008388b02 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 8 Apr 2017 10:36:51 -0400 Subject: [PATCH 2/2] TST: clean up series/frame api tests inheritance a bit --- .../frame/{test_misc_api.py => test_api.py} | 0 .../series/{test_misc_api.py => test_api.py} | 0 pandas/tests/series/test_quantile.py | 42 ++++++++----------- pandas/tests/sparse/test_frame.py | 2 +- pandas/tests/sparse/test_series.py | 2 +- 5 files changed, 19 insertions(+), 27 deletions(-) rename pandas/tests/frame/{test_misc_api.py => test_api.py} (100%) rename pandas/tests/series/{test_misc_api.py => test_api.py} (100%) diff --git a/pandas/tests/frame/test_misc_api.py b/pandas/tests/frame/test_api.py similarity index 100% rename from pandas/tests/frame/test_misc_api.py rename to pandas/tests/frame/test_api.py diff --git a/pandas/tests/series/test_misc_api.py b/pandas/tests/series/test_api.py similarity index 100% rename from pandas/tests/series/test_misc_api.py rename to pandas/tests/series/test_api.py diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/test_quantile.py index b8d1b92081858..5aca34fb86576 100644 --- a/pandas/tests/series/test_quantile.py +++ b/pandas/tests/series/test_quantile.py @@ -16,17 +16,16 @@ class TestSeriesQuantile(TestData, tm.TestCase): def test_quantile(self): - from numpy import percentile q = self.ts.quantile(0.1) - self.assertEqual(q, percentile(self.ts.valid(), 10)) + self.assertEqual(q, np.percentile(self.ts.valid(), 10)) q = self.ts.quantile(0.9) - self.assertEqual(q, percentile(self.ts.valid(), 90)) + self.assertEqual(q, np.percentile(self.ts.valid(), 90)) # object dtype q = Series(self.ts, dtype=object).quantile(0.9) - self.assertEqual(q, percentile(self.ts.valid(), 90)) + self.assertEqual(q, np.percentile(self.ts.valid(), 90)) # datetime64[ns] dtype dts = self.ts.index.to_series() @@ -48,12 +47,11 @@ def test_quantile(self): self.ts.quantile(invalid) def test_quantile_multi(self): - from numpy import percentile qs = [.1, .9] result = self.ts.quantile(qs) - expected = pd.Series([percentile(self.ts.valid(), 10), - percentile(self.ts.valid(), 90)], + expected = pd.Series([np.percentile(self.ts.valid(), 10), + np.percentile(self.ts.valid(), 90)], index=qs, name=self.ts.name) tm.assert_series_equal(result, expected) @@ -70,50 +68,44 @@ def test_quantile_multi(self): [], dtype=float)) tm.assert_series_equal(result, expected) + @pytest.mark.skipif(_np_version_under1p9, + reason="Numpy version is under 1.9") def test_quantile_interpolation(self): # GH #10174 - if _np_version_under1p9: - pytest.skip("Numpy version is under 1.9") - - from numpy import percentile # interpolation = linear (default case) q = self.ts.quantile(0.1, interpolation='linear') - self.assertEqual(q, percentile(self.ts.valid(), 10)) + self.assertEqual(q, np.percentile(self.ts.valid(), 10)) q1 = self.ts.quantile(0.1) - self.assertEqual(q1, percentile(self.ts.valid(), 10)) + self.assertEqual(q1, np.percentile(self.ts.valid(), 10)) # test with and without interpolation keyword self.assertEqual(q, q1) + @pytest.mark.skipif(_np_version_under1p9, + reason="Numpy version is under 1.9") def test_quantile_interpolation_dtype(self): # GH #10174 - if _np_version_under1p9: - pytest.skip("Numpy version is under 1.9") - - from numpy import percentile # interpolation = linear (default case) q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='lower') - self.assertEqual(q, percentile(np.array([1, 3, 4]), 50)) + self.assertEqual(q, np.percentile(np.array([1, 3, 4]), 50)) self.assertTrue(is_integer(q)) q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='higher') - self.assertEqual(q, percentile(np.array([1, 3, 4]), 50)) + self.assertEqual(q, np.percentile(np.array([1, 3, 4]), 50)) self.assertTrue(is_integer(q)) + @pytest.mark.skipif(not _np_version_under1p9, + reason="Numpy version is greater 1.9") def test_quantile_interpolation_np_lt_1p9(self): # GH #10174 - if not _np_version_under1p9: - pytest.skip("Numpy version is greater than 1.9") - - from numpy import percentile # interpolation = linear (default case) q = self.ts.quantile(0.1, interpolation='linear') - self.assertEqual(q, percentile(self.ts.valid(), 10)) + self.assertEqual(q, np.percentile(self.ts.valid(), 10)) q1 = self.ts.quantile(0.1) - self.assertEqual(q1, percentile(self.ts.valid(), 10)) + self.assertEqual(q1, np.percentile(self.ts.valid(), 10)) # interpolation other than linear expErrMsg = "Interpolation methods other than " diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index ae1a1e35f1859..e6482d70e0ae3 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -22,7 +22,7 @@ from pandas.sparse.libsparse import BlockIndex, IntIndex from pandas.sparse.api import SparseSeries, SparseDataFrame, SparseArray -from pandas.tests.frame.test_misc_api import SharedWithSparse +from pandas.tests.frame.test_api import SharedWithSparse from pandas.tests.sparse.common import spmatrix # noqa: F401 diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index 8aa85a5b7f396..83f0237841dbd 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -18,7 +18,7 @@ from pandas.sparse.libsparse import BlockIndex, IntIndex from pandas.sparse.api import SparseSeries -from pandas.tests.series.test_misc_api import SharedWithSparse +from pandas.tests.series.test_api import SharedWithSparse def _test_data1():