Skip to content
33 changes: 18 additions & 15 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ class DataFrame(NDFrame):
np.arange(n) if no column labels are provided
dtype : dtype, default None
Data type to force, otherwise infer
copy : boolean, default False
copy : boolean, default True
Copy data from inputs. Only affects DataFrame / 2d ndarray input

Examples
Expand Down Expand Up @@ -211,13 +211,17 @@ def _constructor_expanddim(self):

def __init__(self, data=None, index=None, columns=None, dtype=None,
copy=False):

parent = None

if data is None:
data = {}
if dtype is not None:
dtype = self._validate_dtype(dtype)

if isinstance(data, DataFrame):
data = data._data
parent = data
data = data._get_view()._data

if isinstance(data, BlockManager):
mgr = self._init_mgr(data, axes=dict(index=index, columns=columns),
Expand Down Expand Up @@ -306,6 +310,9 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,

NDFrame.__init__(self, mgr, fastpath=True)

if parent is not None:
parent._register_new_child(self)

def _init_dict(self, data, index, columns, dtype=None):
"""
Segregate Series based on type and coerce into matrices.
Expand Down Expand Up @@ -1963,8 +1970,10 @@ def __getitem__(self, key):
# shortcut if we are an actual column
is_mi_columns = isinstance(self.columns, MultiIndex)
try:
if key in self.columns and not is_mi_columns:
return self._getitem_column(key)
if key in self.columns:
result = self._getitem_column(key)
result._is_column_view = True
return result
except:
pass

Expand Down Expand Up @@ -2338,7 +2347,6 @@ def __setitem__(self, key, value):
self._set_item(key, value)

def _setitem_slice(self, key, value):
self._check_setitem_copy()
self.ix._setitem_with_indexer(key, value)

def _setitem_array(self, key, value):
Expand All @@ -2349,7 +2357,6 @@ def _setitem_array(self, key, value):
(len(key), len(self.index)))
key = check_bool_indexer(self.index, key)
indexer = key.nonzero()[0]
self._check_setitem_copy()
self.ix._setitem_with_indexer(indexer, value)
else:
if isinstance(value, DataFrame):
Expand All @@ -2359,7 +2366,6 @@ def _setitem_array(self, key, value):
self[k1] = value[k2]
else:
indexer = self.ix._convert_to_indexer(key, axis=1)
self._check_setitem_copy()
self.ix._setitem_with_indexer((slice(None), indexer), value)

def _setitem_frame(self, key, value):
Expand All @@ -2369,7 +2375,6 @@ def _setitem_frame(self, key, value):
raise TypeError('Must pass DataFrame with boolean values only')

self._check_inplace_setting(value)
self._check_setitem_copy()
self.where(-key, value, inplace=True)

def _ensure_valid_index(self, value):
Expand Down Expand Up @@ -2405,11 +2410,6 @@ def _set_item(self, key, value):
value = self._sanitize_column(key, value)
NDFrame._set_item(self, key, value)

# check if we are modifying a copy
# try to set first as we want an invalid
# value exeption to occur first
if len(self):
self._check_setitem_copy()

def insert(self, loc, column, value, allow_duplicates=False):
"""
Expand Down Expand Up @@ -4377,12 +4377,12 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
@Appender(_merge_doc, indents=2)
def merge(self, right, how='inner', on=None, left_on=None, right_on=None,
left_index=False, right_index=False, sort=False,
suffixes=('_x', '_y'), copy=True, indicator=False):
suffixes=('_x', '_y'), indicator=False):
from pandas.tools.merge import merge
return merge(self, right, how=how, on=on,
left_on=left_on, right_on=right_on,
left_index=left_index, right_index=right_index, sort=sort,
suffixes=suffixes, copy=copy, indicator=indicator)
suffixes=suffixes, indicator=indicator)

def round(self, decimals=0, out=None):
"""
Expand Down Expand Up @@ -5227,6 +5227,9 @@ def combineMult(self, other):
FutureWarning, stacklevel=2)
return self.mul(other, fill_value=1.)

def _get_view(self):
return self.loc[:,:]


DataFrame._setup_axes(['index', 'columns'], info_axis=1, stat_axis=0,
axes_are_reversed=True, aliases={'rows': 0})
Expand Down
155 changes: 49 additions & 106 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,18 @@ class NDFrame(PandasObject):
_internal_names = ['_data', '_cacher', '_item_cache', '_cache',
'is_copy', '_subtyp', '_index',
'_default_kind', '_default_fill_value', '_metadata',
'__array_struct__', '__array_interface__']
'__array_struct__', '__array_interface__', '_children',
'_is_column_view', '_original_parent']
_internal_names_set = set(_internal_names)
_accessors = frozenset([])
_metadata = []
is_copy = None

_is_column_view = None
_original_parent = None
_children = None

def __init__(self, data, axes=None, copy=False, dtype=None,
fastpath=False):
fastpath=False, ):

if not fastpath:
if dtype is not None:
Expand All @@ -105,6 +109,10 @@ def __init__(self, data, axes=None, copy=False, dtype=None,
object.__setattr__(self, 'is_copy', None)
object.__setattr__(self, '_data', data)
object.__setattr__(self, '_item_cache', {})
object.__setattr__(self, '_children', weakref.WeakValueDictionary())
object.__setattr__(self, '_is_column_view', False)
object.__setattr__(self, '_original_parent', weakref.WeakValueDictionary())


def _validate_dtype(self, dtype):
""" validate the passed dtype """
Expand Down Expand Up @@ -469,7 +477,8 @@ def transpose(self, *args, **kwargs):
raise TypeError('transpose() got an unexpected keyword '
'argument "{0}"'.format(list(kwargs.keys())[0]))

return self._constructor(new_values, **new_axes).__finalize__(self)
result = self._constructor(new_values, **new_axes).__finalize__(self)
return result.copy()

def swapaxes(self, axis1, axis2, copy=True):
"""
Expand Down Expand Up @@ -1077,13 +1086,16 @@ def get(self, key, default=None):
-------
value : type of items contained in object
"""

try:
return self[key]
except (KeyError, ValueError, IndexError):
return default

def __getitem__(self, item):
return self._get_item_cache(item)
result = self._get_item_cache(item)

return result

def _get_item_cache(self, item):
""" return the cached item, item represents a label indexer """
Expand Down Expand Up @@ -1177,9 +1189,6 @@ def _maybe_update_cacher(self, clear=False, verify_is_copy=True):
except:
pass

if verify_is_copy:
self._check_setitem_copy(stacklevel=5, t='referant')

if clear:
self._clear_item_cache()

Expand All @@ -1204,9 +1213,20 @@ def _slice(self, slobj, axis=0, kind=None):
# but only in a single-dtyped view slicable case
is_copy = axis!=0 or result._is_view
result._set_is_copy(self, copy=is_copy)

self._register_new_child(result)

return result

def _set_item(self, key, value):

if hasattr(self, 'columns'):
if key in self.columns:
# If children are views, reset to copies before setting.
self._execute_copy_on_write()
else:
self._execute_copy_on_write()

self._data.set(key, value)
self._clear_item_cache()

Expand All @@ -1219,104 +1239,22 @@ def _set_is_copy(self, ref=None, copy=True):
else:
self.is_copy = None

def _check_is_chained_assignment_possible(self):
"""
check if we are a view, have a cacher, and are of mixed type
if so, then force a setitem_copy check

should be called just near setting a value

will return a boolean if it we are a view and are cached, but a single-dtype
meaning that the cacher should be updated following setting
"""
if self._is_view and self._is_cached:
ref = self._get_cacher()
if ref is not None and ref._is_mixed_type:
self._check_setitem_copy(stacklevel=4, t='referant', force=True)
return True
elif self.is_copy:
self._check_setitem_copy(stacklevel=4, t='referant')
return False

def _check_setitem_copy(self, stacklevel=4, t='setting', force=False):
"""

Parameters
----------
stacklevel : integer, default 4
the level to show of the stack when the error is output
t : string, the type of setting error
force : boolean, default False
if True, then force showing an error

validate if we are doing a settitem on a chained copy.

If you call this function, be sure to set the stacklevel such that the
user will see the error *at the level of setting*

It is technically possible to figure out that we are setting on
a copy even WITH a multi-dtyped pandas object. In other words, some blocks
may be views while other are not. Currently _is_view will ALWAYS return False
for multi-blocks to avoid having to handle this case.

df = DataFrame(np.arange(0,9), columns=['count'])
df['group'] = 'b'

# this technically need not raise SettingWithCopy if both are view (which is not
# generally guaranteed but is usually True
# however, this is in general not a good practice and we recommend using .loc
df.iloc[0:5]['group'] = 'a'

"""

if force or self.is_copy:

value = config.get_option('mode.chained_assignment')
if value is None:
return

# see if the copy is not actually refererd; if so, then disolve
# the copy weakref
try:
gc.collect(2)
if not gc.get_referents(self.is_copy()):
self.is_copy = None
return
except:
pass

# we might be a false positive
try:
if self.is_copy().shape == self.shape:
self.is_copy = None
return
except:
pass

# a custom message
if isinstance(self.is_copy, string_types):
t = self.is_copy

elif t == 'referant':
t = ("\n"
"A value is trying to be set on a copy of a slice from a "
"DataFrame\n\n"
"See the caveats in the documentation: "
"http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy")

else:
t = ("\n"
"A value is trying to be set on a copy of a slice from a "
"DataFrame.\n"
"Try using .loc[row_indexer,col_indexer] = value instead\n\n"
"See the caveats in the documentation: "
"http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy")

if value == 'raise':
raise SettingWithCopyError(t)
elif value == 'warn':
warnings.warn(t, SettingWithCopyWarning, stacklevel=stacklevel)

def _execute_copy_on_write(self):

# Don't set on views.
if (self._is_view and not self._is_column_view) or len(self._children) is not 0:
self._data = self._data.copy()
self._children = weakref.WeakValueDictionary()


def _register_new_child(self, view_to_append):
self._children[id(view_to_append)] = view_to_append

if len(self._original_parent) is 0:
view_to_append._original_parent['parent'] = self
else:
self._original_parent['parent']._register_new_child(view_to_append)

def __delitem__(self, key):
"""
Delete item
Expand Down Expand Up @@ -2383,6 +2321,7 @@ def __finalize__(self, other, method=None, **kwargs):
return self

def __getattr__(self, name):

"""After regular attribute access, try looking up the name
This allows simpler access to columns for interactive use.
"""
Expand All @@ -2405,6 +2344,10 @@ def __setattr__(self, name, value):
# e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify
# the same attribute.

if hasattr(self, 'columns'):
if name in self.columns:
self._execute_copy_on_write()

try:
object.__getattribute__(self, name)
return object.__setattr__(self, name, value)
Expand Down
Loading