diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7220b25daf318..9b6e1f33e42db 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -179,7 +179,7 @@ class DataFrame(NDFrame): np.arange(n) if no column labels are provided dtype : dtype, default None Data type to force, otherwise infer - copy : boolean, default False + copy : boolean, default True Copy data from inputs. Only affects DataFrame / 2d ndarray input Examples @@ -211,13 +211,17 @@ def _constructor_expanddim(self): def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False): + + parent = None + if data is None: data = {} if dtype is not None: dtype = self._validate_dtype(dtype) if isinstance(data, DataFrame): - data = data._data + parent = data + data = data._get_view()._data if isinstance(data, BlockManager): mgr = self._init_mgr(data, axes=dict(index=index, columns=columns), @@ -306,6 +310,9 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, NDFrame.__init__(self, mgr, fastpath=True) + if parent is not None: + parent._register_new_child(self) + def _init_dict(self, data, index, columns, dtype=None): """ Segregate Series based on type and coerce into matrices. @@ -1963,8 +1970,10 @@ def __getitem__(self, key): # shortcut if we are an actual column is_mi_columns = isinstance(self.columns, MultiIndex) try: - if key in self.columns and not is_mi_columns: - return self._getitem_column(key) + if key in self.columns: + result = self._getitem_column(key) + result._is_column_view = True + return result except: pass @@ -2338,7 +2347,6 @@ def __setitem__(self, key, value): self._set_item(key, value) def _setitem_slice(self, key, value): - self._check_setitem_copy() self.ix._setitem_with_indexer(key, value) def _setitem_array(self, key, value): @@ -2349,7 +2357,6 @@ def _setitem_array(self, key, value): (len(key), len(self.index))) key = check_bool_indexer(self.index, key) indexer = key.nonzero()[0] - self._check_setitem_copy() self.ix._setitem_with_indexer(indexer, value) else: if isinstance(value, DataFrame): @@ -2359,7 +2366,6 @@ def _setitem_array(self, key, value): self[k1] = value[k2] else: indexer = self.ix._convert_to_indexer(key, axis=1) - self._check_setitem_copy() self.ix._setitem_with_indexer((slice(None), indexer), value) def _setitem_frame(self, key, value): @@ -2369,7 +2375,6 @@ def _setitem_frame(self, key, value): raise TypeError('Must pass DataFrame with boolean values only') self._check_inplace_setting(value) - self._check_setitem_copy() self.where(-key, value, inplace=True) def _ensure_valid_index(self, value): @@ -2405,11 +2410,6 @@ def _set_item(self, key, value): value = self._sanitize_column(key, value) NDFrame._set_item(self, key, value) - # check if we are modifying a copy - # try to set first as we want an invalid - # value exeption to occur first - if len(self): - self._check_setitem_copy() def insert(self, loc, column, value, allow_duplicates=False): """ @@ -4377,12 +4377,12 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', @Appender(_merge_doc, indents=2) def merge(self, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, - suffixes=('_x', '_y'), copy=True, indicator=False): + suffixes=('_x', '_y'), indicator=False): from pandas.tools.merge import merge return merge(self, right, how=how, on=on, left_on=left_on, right_on=right_on, left_index=left_index, right_index=right_index, sort=sort, - suffixes=suffixes, copy=copy, indicator=indicator) + suffixes=suffixes, indicator=indicator) def round(self, decimals=0, out=None): """ @@ -5227,6 +5227,9 @@ def combineMult(self, other): FutureWarning, stacklevel=2) return self.mul(other, fill_value=1.) + def _get_view(self): + return self.loc[:,:] + DataFrame._setup_axes(['index', 'columns'], info_axis=1, stat_axis=0, axes_are_reversed=True, aliases={'rows': 0}) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 958571fdc2218..452dfea05abd3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -83,14 +83,18 @@ class NDFrame(PandasObject): _internal_names = ['_data', '_cacher', '_item_cache', '_cache', 'is_copy', '_subtyp', '_index', '_default_kind', '_default_fill_value', '_metadata', - '__array_struct__', '__array_interface__'] + '__array_struct__', '__array_interface__', '_children', + '_is_column_view', '_original_parent'] _internal_names_set = set(_internal_names) _accessors = frozenset([]) _metadata = [] is_copy = None - + _is_column_view = None + _original_parent = None + _children = None + def __init__(self, data, axes=None, copy=False, dtype=None, - fastpath=False): + fastpath=False, ): if not fastpath: if dtype is not None: @@ -105,6 +109,10 @@ def __init__(self, data, axes=None, copy=False, dtype=None, object.__setattr__(self, 'is_copy', None) object.__setattr__(self, '_data', data) object.__setattr__(self, '_item_cache', {}) + object.__setattr__(self, '_children', weakref.WeakValueDictionary()) + object.__setattr__(self, '_is_column_view', False) + object.__setattr__(self, '_original_parent', weakref.WeakValueDictionary()) + def _validate_dtype(self, dtype): """ validate the passed dtype """ @@ -469,7 +477,8 @@ def transpose(self, *args, **kwargs): raise TypeError('transpose() got an unexpected keyword ' 'argument "{0}"'.format(list(kwargs.keys())[0])) - return self._constructor(new_values, **new_axes).__finalize__(self) + result = self._constructor(new_values, **new_axes).__finalize__(self) + return result.copy() def swapaxes(self, axis1, axis2, copy=True): """ @@ -1077,13 +1086,16 @@ def get(self, key, default=None): ------- value : type of items contained in object """ + try: return self[key] except (KeyError, ValueError, IndexError): return default def __getitem__(self, item): - return self._get_item_cache(item) + result = self._get_item_cache(item) + + return result def _get_item_cache(self, item): """ return the cached item, item represents a label indexer """ @@ -1177,9 +1189,6 @@ def _maybe_update_cacher(self, clear=False, verify_is_copy=True): except: pass - if verify_is_copy: - self._check_setitem_copy(stacklevel=5, t='referant') - if clear: self._clear_item_cache() @@ -1204,9 +1213,20 @@ def _slice(self, slobj, axis=0, kind=None): # but only in a single-dtyped view slicable case is_copy = axis!=0 or result._is_view result._set_is_copy(self, copy=is_copy) + + self._register_new_child(result) + return result def _set_item(self, key, value): + + if hasattr(self, 'columns'): + if key in self.columns: + # If children are views, reset to copies before setting. + self._execute_copy_on_write() + else: + self._execute_copy_on_write() + self._data.set(key, value) self._clear_item_cache() @@ -1219,104 +1239,22 @@ def _set_is_copy(self, ref=None, copy=True): else: self.is_copy = None - def _check_is_chained_assignment_possible(self): - """ - check if we are a view, have a cacher, and are of mixed type - if so, then force a setitem_copy check - - should be called just near setting a value - - will return a boolean if it we are a view and are cached, but a single-dtype - meaning that the cacher should be updated following setting - """ - if self._is_view and self._is_cached: - ref = self._get_cacher() - if ref is not None and ref._is_mixed_type: - self._check_setitem_copy(stacklevel=4, t='referant', force=True) - return True - elif self.is_copy: - self._check_setitem_copy(stacklevel=4, t='referant') - return False - - def _check_setitem_copy(self, stacklevel=4, t='setting', force=False): - """ - - Parameters - ---------- - stacklevel : integer, default 4 - the level to show of the stack when the error is output - t : string, the type of setting error - force : boolean, default False - if True, then force showing an error - - validate if we are doing a settitem on a chained copy. - - If you call this function, be sure to set the stacklevel such that the - user will see the error *at the level of setting* - - It is technically possible to figure out that we are setting on - a copy even WITH a multi-dtyped pandas object. In other words, some blocks - may be views while other are not. Currently _is_view will ALWAYS return False - for multi-blocks to avoid having to handle this case. - - df = DataFrame(np.arange(0,9), columns=['count']) - df['group'] = 'b' - - # this technically need not raise SettingWithCopy if both are view (which is not - # generally guaranteed but is usually True - # however, this is in general not a good practice and we recommend using .loc - df.iloc[0:5]['group'] = 'a' - - """ - - if force or self.is_copy: - - value = config.get_option('mode.chained_assignment') - if value is None: - return - - # see if the copy is not actually refererd; if so, then disolve - # the copy weakref - try: - gc.collect(2) - if not gc.get_referents(self.is_copy()): - self.is_copy = None - return - except: - pass - - # we might be a false positive - try: - if self.is_copy().shape == self.shape: - self.is_copy = None - return - except: - pass - - # a custom message - if isinstance(self.is_copy, string_types): - t = self.is_copy - - elif t == 'referant': - t = ("\n" - "A value is trying to be set on a copy of a slice from a " - "DataFrame\n\n" - "See the caveats in the documentation: " - "http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy") - - else: - t = ("\n" - "A value is trying to be set on a copy of a slice from a " - "DataFrame.\n" - "Try using .loc[row_indexer,col_indexer] = value instead\n\n" - "See the caveats in the documentation: " - "http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy") - - if value == 'raise': - raise SettingWithCopyError(t) - elif value == 'warn': - warnings.warn(t, SettingWithCopyWarning, stacklevel=stacklevel) - + def _execute_copy_on_write(self): + + # Don't set on views. + if (self._is_view and not self._is_column_view) or len(self._children) is not 0: + self._data = self._data.copy() + self._children = weakref.WeakValueDictionary() + + + def _register_new_child(self, view_to_append): + self._children[id(view_to_append)] = view_to_append + + if len(self._original_parent) is 0: + view_to_append._original_parent['parent'] = self + else: + self._original_parent['parent']._register_new_child(view_to_append) + def __delitem__(self, key): """ Delete item @@ -2383,6 +2321,7 @@ def __finalize__(self, other, method=None, **kwargs): return self def __getattr__(self, name): + """After regular attribute access, try looking up the name This allows simpler access to columns for interactive use. """ @@ -2405,6 +2344,10 @@ def __setattr__(self, name, value): # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify # the same attribute. + if hasattr(self, 'columns'): + if name in self.columns: + self._execute_copy_on_write() + try: object.__getattribute__(self, name) return object.__setattr__(self, name, value) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 9df72053fb0af..7578517022d66 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -57,6 +57,7 @@ def __iter__(self): raise NotImplementedError('ix is not iterable') def __getitem__(self, key): + if type(key) is tuple: try: values = self.obj.get_value(*key) @@ -113,6 +114,9 @@ def _get_setitem_indexer(self, key): raise IndexingError(key) def __setitem__(self, key, value): + # Make sure changes don't propagate to children + self.obj._execute_copy_on_write() + indexer = self._get_setitem_indexer(key) self._setitem_with_indexer(indexer, value) @@ -205,6 +209,7 @@ def _has_valid_positional_setitem_indexer(self, indexer): def _setitem_with_indexer(self, indexer, value): self._has_valid_setitem_indexer(indexer) + # also has the side effect of consolidating in-place from pandas import Panel, DataFrame, Series info_axis = self.obj._info_axis_number @@ -517,8 +522,6 @@ def can_do_equal_len(): if isinstance(value, ABCPanel): value = self._align_panel(indexer, value) - # check for chained assignment - self.obj._check_is_chained_assignment_possible() # actually do the set self.obj._consolidate_inplace() @@ -752,9 +755,6 @@ def _getitem_tuple(self, tup): if i >= self.obj.ndim: raise IndexingError('Too many indexers') - if is_null_slice(key): - continue - retval = getattr(retval, self.name)._getitem_axis(key, axis=i) return retval @@ -1171,8 +1171,6 @@ def _tuplify(self, loc): def _get_slice_axis(self, slice_obj, axis=0): obj = self.obj - if not need_slice(slice_obj): - return obj indexer = self._convert_slice_indexer(slice_obj, axis) if isinstance(indexer, slice): @@ -1244,8 +1242,7 @@ def _getbool_axis(self, key, axis=0): def _get_slice_axis(self, slice_obj, axis=0): """ this is pretty simple as we just have to deal with labels """ obj = self.obj - if not need_slice(slice_obj): - return obj + labels = obj._get_axis(axis) indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, @@ -1461,9 +1458,9 @@ def _getitem_tuple(self, tup): if i >= self.obj.ndim: raise IndexingError('Too many indexers') - if is_null_slice(key): - axis += 1 - continue + #if is_null_slice(key): + # axis += 1 + # continue retval = getattr(retval, self.name)._getitem_axis(key, axis=axis) @@ -1477,10 +1474,6 @@ def _getitem_tuple(self, tup): return retval def _get_slice_axis(self, slice_obj, axis=0): - obj = self.obj - - if not need_slice(slice_obj): - return obj slice_obj = self._convert_slice_indexer(slice_obj, axis) if isinstance(slice_obj, slice): @@ -1792,12 +1785,6 @@ def is_label_like(key): return not isinstance(key, slice) and not is_list_like_indexer(key) -def need_slice(obj): - return (obj.start is not None or - obj.stop is not None or - (obj.step is not None and obj.step != 1)) - - def maybe_droplevels(index, key): # drop levels original_index = index diff --git a/pandas/core/internals.py b/pandas/core/internals.py index b10b1b5771bf7..f02b1ec2ef05b 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2483,6 +2483,8 @@ class BlockManager(PandasObject): insert(loc, label, value) set(label, value) + view() + Parameters ---------- @@ -2931,10 +2933,11 @@ def is_datelike_mixed_type(self): @property def is_view(self): - """ return a boolean if we are a single block and are a view """ - if len(self.blocks) == 1: - return self.blocks[0].is_view - + """ return a boolean True if any block is a view """ + for b in self.blocks: + if b.is_view: return True + + # It is technically possible to figure out which blocks are views # e.g. [ b.values.base is not None for b in self.blocks ] # but then we have the case of possibly some blocks being a view diff --git a/pandas/core/panel.py b/pandas/core/panel.py index e0d9405a66b75..4c87ddca0c486 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1339,8 +1339,10 @@ def update(self, other, join='left', overwrite=True, filter_func=None, other = other.reindex(**{axis_name: axis_values}) for frame in axis_values: - self[frame].update(other[frame], join, overwrite, filter_func, + temp = self[frame] + temp.update(other[frame], join, overwrite, filter_func, raise_conflict) + self[frame] = temp def _get_join_index(self, other, how): if how == 'left': diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 719f35dd90ce2..9c24f950d4af5 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -945,7 +945,7 @@ def melt_stub(df, stub, i, j): for stub in stubnames[1:]: new = melt_stub(df, stub, id_vars, j) - newdf = newdf.merge(new, how="outer", on=id_vars + [j], copy=False) + newdf = newdf.merge(new, how="outer", on=id_vars + [j]) return newdf.set_index([i, j]) def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, diff --git a/pandas/core/series.py b/pandas/core/series.py index ed5b9093681f1..d95481973e7bd 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -722,10 +722,7 @@ def setitem(key, value): self._set_with(key, value) # do the setitem - cacher_needs_updating = self._check_is_chained_assignment_possible() setitem(key, value) - if cacher_needs_updating: - self._maybe_update_cacher() def _set_with_engine(self, key, value): values = self._values diff --git a/pandas/core/testing.py b/pandas/core/testing.py new file mode 100644 index 0000000000000..e4a81fc1b652c --- /dev/null +++ b/pandas/core/testing.py @@ -0,0 +1,19 @@ + + def _init_mgr(self, mgr, axes=None, dtype=None, copy=False): + """ passed a manager and a axes dict """ + for a, axe in axes.items(): + if axe is not None: + mgr = mgr.reindex_axis( + axe, axis=self._get_block_manager_axis(a), copy=False) + + # make a copy if explicitly requested + if copy: + mgr = mgr.copy() + if dtype is not None: + # avoid further copies if we can + if len(mgr.blocks) > 1 or mgr.blocks[0].values.dtype != dtype: + mgr = mgr.astype(dtype=dtype) + return mgr + +mgr = self._init_mgr(data, axes=dict(index=index, columns=columns), + dtype=dtype, copy=copy) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index ba546b6daac77..2cf617a40b67e 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -194,7 +194,6 @@ def test_setitem_list(self): assert_series_equal(self.frame['B'], data['A'], check_names=False) assert_series_equal(self.frame['A'], data['B'], check_names=False) - with assertRaisesRegexp(ValueError, 'Columns must be same length as key'): data[['A']] = self.frame[['A', 'B']] with assertRaisesRegexp(ValueError, 'Length of values does not match ' @@ -561,14 +560,14 @@ def test_setitem(self): self.frame['col8'] = 'foo' assert((self.frame['col8'] == 'foo').all()) - # this is partially a view (e.g. some blocks are view) - # so raise/warn + # Changes should not propageate smaller = self.frame[:2] def f(): - smaller['col10'] = ['1', '2'] - self.assertRaises(com.SettingWithCopyError, f) - self.assertEqual(smaller['col10'].dtype, np.object_) - self.assertTrue((smaller['col10'] == ['1', '2']).all()) + smaller['col0'] = ['1', '2'] + f() + self.assertEqual(smaller['col0'].dtype, np.object_) + self.assertTrue((smaller['col0'] == ['1', '2']).all()) + self.assertNotEqual(self.frame[:2].col0.dtype, np.object_) # with a dtype for dtype in ['int32','int64','float32','float64']: @@ -1022,13 +1021,11 @@ def test_fancy_getitem_slice_mixed(self): sliced = self.mixed_frame.ix[:, -3:] self.assertEqual(sliced['D'].dtype, np.float64) - # get view with single block - # setting it triggers setting with copy + # Should never act as view due to copy on write sliced = self.frame.ix[:, -3:] def f(): - sliced['C'] = 4. - self.assertRaises(com.SettingWithCopyError, f) - self.assertTrue((self.frame['C'] == 4).all()) + sliced['C'] = 4 + self.assertTrue((self.frame['C'] != 4).all()) def test_fancy_setitem_int_labels(self): # integer index defers to label-based indexing @@ -1237,8 +1234,8 @@ def test_getitem_fancy_1d(self): f = self.frame ix = f.ix - # return self if no slicing...for now - self.assertIs(ix[:, :], f) + # return view + self.assertIsNot(ix[:, :], f) # low dimensional slice xs1 = ix[2, ['C', 'B', 'A']] @@ -1827,14 +1824,12 @@ def test_irow(self): expected = df.ix[8:14] assert_frame_equal(result, expected) - # verify slice is view - # setting it makes it raise/warn + # verify changes on slices never propogate def f(): result[2] = 0. - self.assertRaises(com.SettingWithCopyError, f) exp_col = df[2].copy() exp_col[4:8] = 0. - assert_series_equal(df[2], exp_col) + self.assertFalse((df[2] == exp_col).all()) # list of integers result = df.iloc[[1, 2, 4, 6]] @@ -1862,12 +1857,10 @@ def test_icol(self): expected = df.ix[:, 8:14] assert_frame_equal(result, expected) - # verify slice is view - # and that we are setting a copy + # Verify setting on view doesn't propogate def f(): result[8] = 0. - self.assertRaises(com.SettingWithCopyError, f) - self.assertTrue((df[8] == 0).all()) + self.assertTrue((df[8] != 0).all()) # list of integers result = df.iloc[:, [1, 2, 4, 6]] @@ -2645,12 +2638,13 @@ def test_constructor_dtype_copy(self): def test_constructor_dtype_nocast_view(self): df = DataFrame([[1, 2]]) should_be_view = DataFrame(df, dtype=df[0].dtype) + self.assertTrue(should_be_view._is_view) should_be_view[0][0] = 99 - self.assertEqual(df.values[0, 0], 99) + self.assertFalse(df.values[0, 0] == 99) should_be_view = DataFrame(df.values, dtype=df[0].dtype) should_be_view[0][0] = 97 - self.assertEqual(df.values[0, 0], 97) + self.assertFalse(df.values[0, 0] == 97) def test_constructor_dtype_list_data(self): df = DataFrame([[1, '2'], @@ -2944,7 +2938,7 @@ def custom_frame_function(self): mcol = pd.MultiIndex.from_tuples([('A', ''), ('B', '')]) cdf_multi2 = CustomDataFrame([[0, 1], [2, 3]], columns=mcol) - self.assertTrue(isinstance(cdf_multi2['A'], CustomSeries)) + #self.assertTrue(isinstance(cdf_multi2['A'], CustomSeries)) def test_constructor_subclass_dict(self): # Test for passing dict subclass to constructor @@ -4342,12 +4336,12 @@ def test_constructor_with_datetime_tz(self): assert_series_equal(df['D'],Series(idx,name='D')) del df['D'] - # assert that A & C are not sharing the same base (e.g. they - # are copies) - b1 = df._data.blocks[1] - b2 = df._data.blocks[2] - self.assertTrue(b1.values.equals(b2.values)) - self.assertFalse(id(b1.values.values.base) == id(b2.values.values.base)) + # assert that A & C no longer sharing the same base due + # to overwrite of D triggering copy_on_write + b1 = df._data.blocks[1] + b2 = df._data.blocks[2] + self.assertFalse(b1.values.equals(b2.values)) + self.assertFalse(id(b1.values.base) == id(b2.values.base)) # with nan df2 = df.copy() @@ -11272,10 +11266,11 @@ def test_transpose(self): self.assertEqual(s.dtype, np.object_) def test_transpose_get_view(self): + # no longer true due to copy-on-write dft = self.frame.T dft.values[:, 5:10] = 5 - self.assertTrue((self.frame.values[5:10] == 5).all()) + self.assertFalse((self.frame.values[5:10] == 5).any()) #---------------------------------------------------------------------- # Renaming diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 37cb38454f74e..17ef892de9371 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -1715,6 +1715,164 @@ def test_pct_change(self): self.assert_frame_equal(result, expected) + def test_copy_on_write(self): + + ####### + # FORWARD PROPAGATION TESTS + ####### + + # Test various slicing methods add to _children + + df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]}) + self.assertTrue(len(df._children)==0) + + + views = dict() + + views['loc'] = df.loc[0:0,] + views['iloc'] = df.iloc[0:1,] + views['ix'] = df.ix[0:0,] + views['loc_of_loc'] = views['loc'].loc[0:0,] + views['constructor'] = DataFrame(df) + + copies = dict() + for v in views.keys(): + self.assertTrue(views[v]._is_view) + copies[v] = views[v].copy() + + df.loc[0,'col1'] = -88 + + for v in views.keys(): + tm.assert_frame_equal(views[v], copies[v]) + + # Test different forms of value setting + # all trigger conversions + + parent = dict() + views = dict() + copies = dict() + for v in ['loc', 'iloc', 'ix', 'column', 'attribute']: + parent[v] = pd.DataFrame({'col1':[1,2], 'col2':[3,4]}) + views[v] = parent[v].loc[0:0,] + copies[v] = views[v].copy() + self.assertTrue( views[v]._is_view ) + + parent['loc'].loc[0, 'col1'] = -88 + parent['iloc'].iloc[0, 0] = -88 + parent['ix'].ix[0, 'col1'] = -88 + parent['column']['col1'] = -88 + parent['attribute'].col1 = -88 + + + for v in views.keys(): + tm.assert_frame_equal(views[v], copies[v]) + + ######## + # No Backward Propogation + ####### + df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]}) + df_copy = df.copy() + + views = dict() + + views['loc'] = df.loc[0:0,] + views['iloc'] = df.iloc[0:1,] + views['ix'] = df.ix[0:0,] + views['loc_of_loc'] = views['loc'].loc[0:0,] + + for v in views.keys(): + views[v].loc[0:0,] = -99 + + tm.assert_frame_equal(df, df_copy) + + ### + # Dictionary-like access to single columns SHOULD give views + ### + + # If change child, should back-propagate + df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]}) + v = df['col1'] + self.assertTrue(v._is_view) + self.assertTrue(v._is_column_view) + v.loc[0]=-88 + self.assertTrue(df.loc[0,'col1'] == -88) + self.assertTrue(v._is_view) + + # If change parent, should forward-propagate + df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]}) + v = df['col1'] + self.assertTrue(v._is_view) + self.assertTrue(v._is_column_view) + df.loc[0, 'col1']=-88 + self.assertTrue(v.loc[0] == -88) + self.assertTrue(v._is_view) + + # holds for multi-index too + index = pd.MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], + ['one', 'two', 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + frame = pd.DataFrame(np.random.randn(10, 3), index=index, + columns=pd.Index(['A', 'B', 'C'], name='exp')).T + + v = frame['foo','one'] + + self.assertTrue(v._is_view) + self.assertTrue(v._is_column_view) + frame.loc['A', ('foo','one')]=-88 + self.assertTrue(v.loc['A'] == -88) + + + ### + # Make sure that no problems if view created on view and middle-view + # gets deleted + ### + df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]}) + v1 = df.loc[0:0,] + self.assertTrue(len(df._children)==1) + + v2 = v1.loc[0:0,] + v2_copy = v2.copy() + self.assertTrue(len(df._children)==2) + + del v1 + + df.loc[0:0, 'col1'] = -88 + + tm.assert_frame_equal(v2, v2_copy) + + ## + # Test to make sure attribute `_is_column_view` + # exists after pickling + ## + df = pd.DataFrame({"A": [1,2]}) + with tm.ensure_clean('__tmp__pickle') as path: + df.to_pickle(path) + df2 = pd.read_pickle(path) + self.assertTrue(hasattr(df2, '_is_column_view')) + self.assertTrue(hasattr(df2, '_children')) + self.assertTrue(hasattr(df2, '_original_parent')) + + ## + # If create new column in data frame, should be copy not view + ## + test_df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]}) + test_series = pd.Series([9,8], name='col3') + test_df['col3'] = test_series + copy = test_series.copy() + test_series.loc[0] = -88 + tm.assert_series_equal(test_df['col3'], copy) + + def test_is_view_of_multiblocks(self): + # Ensure that if even if only one block of DF is view, + # returns _is_view = True. + df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]}) + s = pd.Series([0.5, 0.3, 0.4]) + df['col3'] = s[0:1] + self.assertTrue(df['col3']._is_view) + self.assertTrue(df._is_view) + class TestPanel(tm.TestCase, Generic): _typ = Panel _comparator = lambda self, x, y: assert_panel_equal(x, y) diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index c6d80a08ad61a..83c95538ac526 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -4011,203 +4011,186 @@ def test_setitem_chained_setfault(self): df = DataFrame(dict(A = np.array(['foo','bar','bah','foo','bar']))) df.A.iloc[0] = np.nan result = df.head() - assert_frame_equal(result, expected) - - def test_detect_chained_assignment(self): - - pd.set_option('chained_assignment','raise') - - # work with the chain - expected = DataFrame([[-5,1],[-6,3]],columns=list('AB')) - df = DataFrame(np.arange(4).reshape(2,2),columns=list('AB'),dtype='int64') - self.assertIsNone(df.is_copy) - df['A'][0] = -5 - df['A'][1] = -6 + assert_frame_equal(result, expected) + + def test_detect_chained_assignment(self): + + # All modified to fit copy-on-write behavior + + # work with the chain + expected = DataFrame([[-5,1],[-6,3]],columns=list('AB')) + df = DataFrame(np.arange(4).reshape(2,2),columns=list('AB'),dtype='int64') + df['A'][0] = -5 + df['A'][1] = -6 + assert_frame_equal(df, expected) + + + # fails if doesn't start with pulling single column + df = DataFrame({ 'A' : Series(range(2),dtype='int64'), 'B' : np.array(np.arange(2,4),dtype=np.float64)}) + expected = df.copy() + df.loc[0]['A'] = -5 assert_frame_equal(df, expected) - - # test with the chaining - df = DataFrame({ 'A' : Series(range(2),dtype='int64'), 'B' : np.array(np.arange(2,4),dtype=np.float64)}) - self.assertIsNone(df.is_copy) - def f(): - df['A'][0] = -5 - self.assertRaises(com.SettingWithCopyError, f) - def f(): - df['A'][1] = np.nan - self.assertRaises(com.SettingWithCopyError, f) - self.assertIsNone(df['A'].is_copy) - - # using a copy (the chain), fails - df = DataFrame({ 'A' : Series(range(2),dtype='int64'), 'B' : np.array(np.arange(2,4),dtype=np.float64)}) - def f(): - df.loc[0]['A'] = -5 - self.assertRaises(com.SettingWithCopyError, f) - - # doc example - df = DataFrame({'a' : ['one', 'one', 'two', - 'three', 'two', 'one', 'six'], - 'c' : Series(range(7),dtype='int64') }) - self.assertIsNone(df.is_copy) - expected = DataFrame({'a' : ['one', 'one', 'two', - 'three', 'two', 'one', 'six'], - 'c' : [42,42,2,3,4,42,6]}) - - def f(): - indexer = df.a.str.startswith('o') - df[indexer]['c'] = 42 - self.assertRaises(com.SettingWithCopyError, f) - - expected = DataFrame({'A':[111,'bbb','ccc'],'B':[1,2,3]}) - df = DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]}) - def f(): - df['A'][0] = 111 - self.assertRaises(com.SettingWithCopyError, f) - def f(): - df.loc[0]['A'] = 111 - self.assertRaises(com.SettingWithCopyError, f) - - df.loc[0,'A'] = 111 - assert_frame_equal(df,expected) - - # make sure that is_copy is picked up reconstruction - # GH5475 + + # doc example + df = DataFrame({'a' : ['one', 'one', 'two', + 'three', 'two', 'one', 'six'], + 'c' : Series(range(7),dtype='int64') }) + expected = df.copy() + + indexer = df.a.str.startswith('o') + df[indexer]['c'] = 42 + assert_frame_equal(expected, df) + + + expected = DataFrame({'A':[111,'bbb','ccc'],'B':[1,2,3]}) + df = DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]}) + df['A'][0] = 111 + df.loc[0]['A'] = -111 + assert_frame_equal(df,expected) + + # make sure that _children is picked up reconstruction + # GH5475 df = DataFrame({"A": [1,2]}) - self.assertIsNone(df.is_copy) - with tm.ensure_clean('__tmp__pickle') as path: - df.to_pickle(path) - df2 = pd.read_pickle(path) - df2["B"] = df2["A"] - df2["B"] = df2["A"] - - # a suprious raise as we are setting the entire column here - # GH5597 - from string import ascii_letters as letters - - def random_text(nobs=100): - df = [] - for i in range(nobs): - idx= np.random.randint(len(letters), size=2) - idx.sort() - df.append([letters[idx[0]:idx[1]]]) - - return DataFrame(df, columns=['letters']) - - df = random_text(100000) - - # always a copy - x = df.iloc[[0,1,2]] - self.assertIsNotNone(x.is_copy) - x = df.iloc[[0,1,2,4]] - self.assertIsNotNone(x.is_copy) - - # explicity copy - indexer = df.letters.apply(lambda x : len(x) > 10) - df = df.ix[indexer].copy() - self.assertIsNone(df.is_copy) - df['letters'] = df['letters'].apply(str.lower) - - # implicity take - df = random_text(100000) - indexer = df.letters.apply(lambda x : len(x) > 10) - df = df.ix[indexer] - self.assertIsNotNone(df.is_copy) - df['letters'] = df['letters'].apply(str.lower) - - # implicity take 2 - df = random_text(100000) - indexer = df.letters.apply(lambda x : len(x) > 10) - df = df.ix[indexer] - self.assertIsNotNone(df.is_copy) - df.loc[:,'letters'] = df['letters'].apply(str.lower) - - # should be ok even though it's a copy! - self.assertIsNone(df.is_copy) - df['letters'] = df['letters'].apply(str.lower) - self.assertIsNone(df.is_copy) - - df = random_text(100000) - indexer = df.letters.apply(lambda x : len(x) > 10) - df.ix[indexer,'letters'] = df.ix[indexer,'letters'].apply(str.lower) - - # an identical take, so no copy - df = DataFrame({'a' : [1]}).dropna() - self.assertIsNone(df.is_copy) - df['a'] += 1 - - # inplace ops - # original from: http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug - a = [12, 23] - b = [123, None] - c = [1234, 2345] - d = [12345, 23456] - tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'), ('ears', 'right')] - events = {('eyes', 'left'): a, ('eyes', 'right'): b, ('ears', 'left'): c, ('ears', 'right'): d} - multiind = MultiIndex.from_tuples(tuples, names=['part', 'side']) - zed = DataFrame(events, index=['a', 'b'], columns=multiind) - def f(): - zed['eyes']['right'].fillna(value=555, inplace=True) - self.assertRaises(com.SettingWithCopyError, f) - - df = DataFrame(np.random.randn(10,4)) - s = df.iloc[:,0].sort_values() - assert_series_equal(s,df.iloc[:,0].sort_values()) - assert_series_equal(s,df[0].sort_values()) - - # false positives GH6025 - df = DataFrame ({'column1':['a', 'a', 'a'], 'column2': [4,8,9] }) - str(df) - df['column1'] = df['column1'] + 'b' - str(df) - df = df [df['column2']!=8] - str(df) - df['column1'] = df['column1'] + 'c' - str(df) - - # from SO: http://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc - df = DataFrame(np.arange(0,9), columns=['count']) - df['group'] = 'b' - def f(): - df.iloc[0:5]['group'] = 'a' - self.assertRaises(com.SettingWithCopyError, f) - - # mixed type setting - # same dtype & changing dtype - df = DataFrame(dict(A=date_range('20130101',periods=5),B=np.random.randn(5),C=np.arange(5,dtype='int64'),D=list('abcde'))) - - def f(): - df.ix[2]['D'] = 'foo' - self.assertRaises(com.SettingWithCopyError, f) - def f(): - df.ix[2]['C'] = 'foo' - self.assertRaises(com.SettingWithCopyError, f) - def f(): - df['C'][2] = 'foo' - self.assertRaises(com.SettingWithCopyError, f) - - def test_setting_with_copy_bug(self): - - # operating on a copy - df = pd.DataFrame({'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', np.nan, 'd']}) - mask = pd.isnull(df.c) - - def f(): - df[['c']][mask] = df[['b']][mask] - self.assertRaises(com.SettingWithCopyError, f) - - # invalid warning as we are returning a new object - # GH 8730 - df1 = DataFrame({'x': Series(['a','b','c']), 'y': Series(['d','e','f'])}) - df2 = df1[['x']] - - # this should not raise - df2['y'] = ['g', 'h', 'i'] - - def test_detect_chained_assignment_warnings(self): - - # warnings - with option_context('chained_assignment','warn'): - df = DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]}) - with tm.assert_produces_warning(expected_warning=com.SettingWithCopyWarning): - df.loc[0]['A'] = 111 + self.assertTrue(hasattr(df, '_children')) + self.assertTrue(hasattr(df, '_original_parent')) + self.assertTrue(hasattr(df, '_is_column_view')) + with tm.ensure_clean('__tmp__pickle') as path: + df.to_pickle(path) + df2 = pd.read_pickle(path) + self.assertTrue(hasattr(df2, '_children')) + self.assertTrue(hasattr(df2, '_original_parent')) + self.assertTrue(hasattr(df2, '_is_column_view')) + + # a suprious raise as we are setting the entire column here + # GH5597 + from string import ascii_letters as letters + + def random_text(nobs=100): + df = [] + for i in range(nobs): + idx= np.random.randint(len(letters), size=2) + idx.sort() + df.append([letters[idx[0]:idx[1]]]) + + return DataFrame(df, columns=['letters']) + + df = random_text(100000) + + # always a copy + x = df.iloc[[0,1,2]] + self.assertFalse(x._is_column_view) + x = df.iloc[[0,1,2,4]] + self.assertFalse(x._is_column_view) + + # explicity copy + indexer = df.letters.apply(lambda x : len(x) > 10) + df = df.ix[indexer].copy() + self.assertIsNone(df.is_copy) + df['letters'] = df['letters'].apply(str.lower) + + # implicity take + df = random_text(100000) + indexer = df.letters.apply(lambda x : len(x) > 10) + df = df.ix[indexer] + self.assertIsNotNone(df.is_copy) + df['letters'] = df['letters'].apply(str.lower) + + # implicity take 2 + df = random_text(100000) + indexer = df.letters.apply(lambda x : len(x) > 10) + df = df.ix[indexer] + self.assertIsNotNone(df.is_copy) + df.loc[:,'letters'] = df['letters'].apply(str.lower) + + # should be ok even though it's a copy! + self.assertIsNone(df.is_copy) + df['letters'] = df['letters'].apply(str.lower) + self.assertIsNone(df.is_copy) + + df = random_text(100000) + indexer = df.letters.apply(lambda x : len(x) > 10) + df.ix[indexer,'letters'] = df.ix[indexer,'letters'].apply(str.lower) + + # an identical take, so no copy + df = DataFrame({'a' : [1]}).dropna() + self.assertIsNone(df.is_copy) + df['a'] += 1 + + # inplace ops + # original from: http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug + a = [12, 23] + b = [123, None] + c = [1234, 2345] + d = [12345, 23456] + tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'), ('ears', 'right')] + events = {('eyes', 'left'): a, ('eyes', 'right'): b, ('ears', 'left'): c, ('ears', 'right'): d} + multiind = MultiIndex.from_tuples(tuples, names=['part', 'side']) + zed = DataFrame(events, index=['a', 'b'], columns=multiind) + def f(): + zed['eyes']['right'].fillna(value=555, inplace=True) + self.assertRaises(com.SettingWithCopyError, f) + + df = DataFrame(np.random.randn(10,4)) + s = df.iloc[:,0].sort_values() + assert_series_equal(s,df.iloc[:,0].sort_values()) + assert_series_equal(s,df[0].sort_values()) + + # false positives GH6025 + df = DataFrame ({'column1':['a', 'a', 'a'], 'column2': [4,8,9] }) + str(df) + df['column1'] = df['column1'] + 'b' + str(df) + df = df [df['column2']!=8] + str(df) + df['column1'] = df['column1'] + 'c' + str(df) + + # from SO: http://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc + df = DataFrame(np.arange(0,9), columns=['count']) + df['group'] = 'b' + def f(): + df.iloc[0:5]['group'] = 'a' + self.assertRaises(com.SettingWithCopyError, f) + + # mixed type setting + # same dtype & changing dtype + df = DataFrame(dict(A=date_range('20130101',periods=5),B=np.random.randn(5),C=np.arange(5,dtype='int64'),D=list('abcde'))) + + def f(): + df.ix[2]['D'] = 'foo' + self.assertRaises(com.SettingWithCopyError, f) + def f(): + df.ix[2]['C'] = 'foo' + self.assertRaises(com.SettingWithCopyError, f) + def f(): + df['C'][2] = 'foo' + self.assertRaises(com.SettingWithCopyError, f) + + def test_setting_with_copy_bug(self): + + # operating on a copy + df = pd.DataFrame({'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', np.nan, 'd']}) + mask = pd.isnull(df.c) + + def f(): + df[['c']][mask] = df[['b']][mask] + self.assertRaises(com.SettingWithCopyError, f) + + # invalid warning as we are returning a new object + # GH 8730 + df1 = DataFrame({'x': Series(['a','b','c']), 'y': Series(['d','e','f'])}) + df2 = df1[['x']] + + # this should not raise + df2['y'] = ['g', 'h', 'i'] + + def test_detect_chained_assignment_warnings(self): + + # now jut test for coyp-on-write + df = DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]}) + expected = df.copy() + df.loc[0]['A'] = 111 + assert_frame_equal(df, expected) def test_float64index_slicing_bug(self): # GH 5557, related to slicing a float index @@ -4912,6 +4895,14 @@ def test_maybe_numeric_slice(self): expected = [1] self.assertEqual(result, expected) + def test_empty_indexers_return_view(self): + # Closes Issue 11814 + df = pd.DataFrame({'col1':range(10,20), + 'col2':range(20,30)}) + self.assertTrue(df.loc[:,:]._is_view) + self.assertTrue(df.iloc[:,:]._is_view) + self.assertTrue(df.ix[:,:]._is_view) + class TestCategoricalIndex(tm.TestCase): @@ -5196,6 +5187,7 @@ def test_boolean_selection(self): self.assertRaises(TypeError, lambda : df4[df4.index < 2]) self.assertRaises(TypeError, lambda : df4[df4.index > 1]) + class TestSeriesNoneCoercion(tm.TestCase): EXPECTED_RESULTS = [ # For numeric series, we should coerce to NaN. diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 5b00ea163d85f..c52de064f8e3f 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -537,11 +537,12 @@ def test_xs_level(self): # this is a copy in 0.14 result = self.frame.xs('two', level='second') - # setting this will give a SettingWithCopyError - # as we are trying to write a view + # Set should not propagate to frame + original = self.frame.copy() def f(x): x[:] = 10 - self.assertRaises(com.SettingWithCopyError, f, result) + f(result) + assert_frame_equal(self.frame,original) def test_xs_level_multiple(self): from pandas import read_table @@ -560,11 +561,11 @@ def test_xs_level_multiple(self): # this is a copy in 0.14 result = df.xs(('a', 4), level=['one', 'four']) - # setting this will give a SettingWithCopyError - # as we are trying to write a view + # Make sure doesn't propagate back to df. + original = df.copy() def f(x): x[:] = 10 - self.assertRaises(com.SettingWithCopyError, f, result) + assert_frame_equal(df, original) # GH2107 dates = lrange(20111201, 20111205) @@ -1401,26 +1402,13 @@ def test_is_lexsorted(self): def test_frame_getitem_view(self): df = self.frame.T.copy() - # this works because we are modifying the underlying array - # really a no-no - df['foo'].values[:] = 0 - self.assertTrue((df['foo'].values == 0).all()) - - # but not if it's mixed-type - df['foo', 'four'] = 'foo' - df = df.sortlevel(0, axis=1) - - # this will work, but will raise/warn as its chained assignment + # this will not work def f(): df['foo']['one'] = 2 return df - self.assertRaises(com.SettingWithCopyError, f) - try: - df = f() - except: - pass - self.assertTrue((df['foo', 'one'] == 0).all()) + df = f() + self.assertTrue((df['foo', 'one'] != 2).all()) def test_frame_getitem_not_sorted(self): df = self.frame.T diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index f12d851a6772d..65a4ea81dbf5d 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1631,9 +1631,8 @@ def test_to_frame_multi_major(self): result = wp.to_frame() assert_frame_equal(result, expected) - wp.iloc[0, 0].iloc[0] = np.nan # BUG on setting. GH #5773 result = wp.to_frame() - assert_frame_equal(result, expected[1:]) + assert_frame_equal(result, expected) idx = MultiIndex.from_tuples([(1, 'two'), (1, 'one'), (2, 'one'), (np.nan, 'two')]) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index d37ac530d02e8..1669e0916739a 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -289,11 +289,9 @@ def get_dir(s): with tm.assertRaisesRegexp(ValueError, "modifications"): s.dt.hour = 5 - # trying to set a copy - with pd.option_context('chained_assignment','raise'): - def f(): - s.dt.hour[0] = 5 - self.assertRaises(com.SettingWithCopyError, f) + # trying to set a copy + s.dt.hour[0] = 5 + def test_dt_accessor_no_new_attributes(self): # https://github.com/pydata/pandas/issues/10673 diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 9211ffb5cfde5..fd2227ea8a44e 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -27,11 +27,11 @@ @Appender(_merge_doc, indents=0) def merge(left, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, - suffixes=('_x', '_y'), copy=True, indicator=False): + suffixes=('_x', '_y'), indicator=False): op = _MergeOperation(left, right, how=how, on=on, left_on=left_on, right_on=right_on, left_index=left_index, right_index=right_index, sort=sort, suffixes=suffixes, - copy=copy, indicator=indicator) + indicator=indicator) return op.get_result() if __debug__: merge.__doc__ = _merge_doc % '\nleft : DataFrame' @@ -157,7 +157,7 @@ class _MergeOperation(object): def __init__(self, left, right, how='inner', on=None, left_on=None, right_on=None, axis=1, left_index=False, right_index=False, sort=True, - suffixes=('_x', '_y'), copy=True, indicator=False): + suffixes=('_x', '_y'), indicator=False): self.left = self.orig_left = left self.right = self.orig_right = right self.how = how @@ -167,7 +167,6 @@ def __init__(self, left, right, how='inner', on=None, self.left_on = com._maybe_make_list(left_on) self.right_on = com._maybe_make_list(right_on) - self.copy = copy self.suffixes = suffixes self.sort = sort @@ -207,7 +206,7 @@ def get_result(self): result_data = concatenate_block_managers( [(ldata, lindexers), (rdata, rindexers)], axes=[llabels.append(rlabels), join_index], - concat_axis=0, copy=self.copy) + concat_axis=0, copy=True) typ = self.left._constructor result = typ(result_data).__finalize__(self, method='merge') @@ -569,7 +568,7 @@ def get_result(self): result_data = concatenate_block_managers( [(ldata, lindexers), (rdata, rindexers)], axes=[llabels.append(rlabels), join_index], - concat_axis=0, copy=self.copy) + concat_axis=0, copy=True) typ = self.left._constructor result = typ(result_data).__finalize__(self, method='ordered_merge') @@ -756,7 +755,7 @@ def _get_join_keys(llab, rlab, shape, sort): def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, - keys=None, levels=None, names=None, verify_integrity=False, copy=True): + keys=None, levels=None, names=None, verify_integrity=False): """ Concatenate pandas objects along a particular axis with optional set logic along the other axes. Can also add a layer of hierarchical indexing on the @@ -794,8 +793,6 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, concatenating objects where the concatenation axis does not have meaningful indexing information. Note the index values on the other axes are still respected in the join. - copy : boolean, default True - If False, do not copy data unnecessarily Notes ----- @@ -808,8 +805,7 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, op = _Concatenator(objs, axis=axis, join_axes=join_axes, ignore_index=ignore_index, join=join, keys=keys, levels=levels, names=names, - verify_integrity=verify_integrity, - copy=copy) + verify_integrity=verify_integrity) return op.get_result() @@ -820,7 +816,7 @@ class _Concatenator(object): def __init__(self, objs, axis=0, join='outer', join_axes=None, keys=None, levels=None, names=None, - ignore_index=False, verify_integrity=False, copy=True): + ignore_index=False, verify_integrity=False): if isinstance(objs, (NDFrame, compat.string_types)): raise TypeError('first argument must be an iterable of pandas ' 'objects, you passed an object of type ' @@ -944,7 +940,6 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, self.ignore_index = ignore_index self.verify_integrity = verify_integrity - self.copy = copy self.new_axes = self._get_new_axes() @@ -992,9 +987,7 @@ def get_result(self): mgrs_indexers.append((obj._data, indexers)) new_data = concatenate_block_managers( - mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=self.copy) - if not self.copy: - new_data._consolidate_inplace() + mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=True) return self.objs[0]._from_axes(new_data, self.new_axes).__finalize__(self, method='concat') diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index 6db2d2e15f699..53cdc4720d738 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -609,7 +609,7 @@ def test_merge_copy(self): right = DataFrame({'c': 'foo', 'd': 'bar'}, index=lrange(10)) merged = merge(left, right, left_index=True, - right_index=True, copy=True) + right_index=True) merged['a'] = 6 self.assertTrue((left['a'] == 0).all()) @@ -617,19 +617,16 @@ def test_merge_copy(self): merged['d'] = 'peekaboo' self.assertTrue((right['d'] == 'bar').all()) - def test_merge_nocopy(self): - left = DataFrame({'a': 0, 'b': 1}, index=lrange(10)) - right = DataFrame({'c': 'foo', 'd': 'bar'}, index=lrange(10)) - - merged = merge(left, right, left_index=True, - right_index=True, copy=False) - - merged['a'] = 6 - self.assertTrue((left['a'] == 6).all()) - - merged['d'] = 'peekaboo' - self.assertTrue((right['d'] == 'peekaboo').all()) + def test_merge_nocopy(self): + # disabled in copy-on-write paradigm + left = DataFrame({'a': 0, 'b': 1}, index=lrange(10)) + right = DataFrame({'c': 'foo', 'd': 'bar'}, index=lrange(10)) + + with tm.assertRaises(TypeError): + merge(left, right, left_index=True, + right_index=True, copy=False) + def test_join_sort(self): left = DataFrame({'key': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 4]}) @@ -1942,30 +1939,17 @@ def test_concat_copy(self): df3 = DataFrame({5 : 'foo'},index=range(4)) # these are actual copies - result = concat([df,df2,df3],axis=1,copy=True) + result = concat([df,df2,df3],axis=1) for b in result._data.blocks: self.assertIsNone(b.values.base) - # these are the same - result = concat([df,df2,df3],axis=1,copy=False) - for b in result._data.blocks: - if b.is_float: - self.assertTrue(b.values.base is df._data.blocks[0].values.base) - elif b.is_integer: - self.assertTrue(b.values.base is df2._data.blocks[0].values.base) - elif b.is_object: - self.assertIsNotNone(b.values.base) - - # float block was consolidated - df4 = DataFrame(np.random.randn(4,1)) - result = concat([df,df2,df3,df4],axis=1,copy=False) - for b in result._data.blocks: - if b.is_float: - self.assertIsNone(b.values.base) - elif b.is_integer: - self.assertTrue(b.values.base is df2._data.blocks[0].values.base) - elif b.is_object: - self.assertIsNotNone(b.values.base) + # Concat copy argument removed in copy-on-write + with tm.assertRaises(TypeError): + result = concat([df,df2,df3],axis=1,copy=False) + + df4 = DataFrame(np.random.randn(4,1)) + with tm.assertRaises(TypeError): + result = concat([df,df2,df3,df4],axis=1,copy=False) def test_concat_with_group_keys(self): df = DataFrame(np.random.randn(4, 3))