Skip to content

Commit

Permalink
Merge pull request #4830 from jtratner/copy-index-and-columns
Browse files Browse the repository at this point in the history
BUG: Fix copy s.t. it always copies index/columns.
  • Loading branch information
jtratner committed Sep 24, 2013
2 parents 54349d1 + 42d1d74 commit d7d9a6c
Show file tree
Hide file tree
Showing 13 changed files with 116 additions and 89 deletions.
2 changes: 2 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,8 @@ Bug Fixes
- Tests for fillna on empty Series (:issue:`4346`), thanks @immerrr
- Fixed a bug where ``ValueError`` wasn't correctly raised when column names
weren't strings (:issue:`4956`)
- Fixed ``copy()`` to shallow copy axes/indices as well and thereby keep
separate metadata. (:issue:`4202`, :issue:`4830`)

pandas 0.12.0
-------------
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1988,7 +1988,7 @@ def transform(self, func, *args, **kwargs):

# broadcasting
if isinstance(res, Series):
if res.index is obj.index:
if res.index.is_(obj.index):
group.T.values[:] = res
else:
group.values[:] = res
Expand Down
76 changes: 42 additions & 34 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import pandas.core.common as com
from pandas.core.common import _values_from_object
from pandas.core.config import get_option
import warnings


__all__ = ['Index']
Expand All @@ -27,6 +26,7 @@ def _indexOp(opname):
Wrapper function for index comparison operations, to avoid
code duplication.
"""

def wrapper(self, other):
func = getattr(self.view(np.ndarray), opname)
result = func(other)
Expand Down Expand Up @@ -54,6 +54,7 @@ def _shouldbe_timestamp(obj):


class Index(FrozenNDArray):

"""
Immutable ndarray implementing an ordered, sliceable set. The basic object
storing axis labels for all pandas objects
Expand Down Expand Up @@ -160,7 +161,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False,

elif np.isscalar(data):
raise TypeError('Index(...) must be called with a collection '
'of some kind, %s was passed' % repr(data))
'of some kind, %s was passed' % repr(data))
else:
# other iterable of some kind
subarr = com._asarray_tuplesafe(data, dtype=object)
Expand All @@ -171,7 +172,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False,
return Int64Index(subarr.astype('i8'), copy=copy, name=name)
elif inferred != 'string':
if (inferred.startswith('datetime') or
tslib.is_timestamp_array(subarr)):
tslib.is_timestamp_array(subarr)):
from pandas.tseries.index import DatetimeIndex
return DatetimeIndex(data, copy=copy, name=name, **kwargs)
elif inferred == 'period':
Expand Down Expand Up @@ -234,7 +235,7 @@ def to_series(self):
useful with map for returning an indexer based on an index
"""
import pandas as pd
return pd.Series(self.values,index=self,name=self.name)
return pd.Series(self.values, index=self, name=self.name)

def astype(self, dtype):
return Index(self.values.astype(dtype), name=self.name,
Expand Down Expand Up @@ -279,7 +280,7 @@ def _get_names(self):
def _set_names(self, values):
if len(values) != 1:
raise ValueError('Length of new names must be 1, got %d'
% len(values))
% len(values))
self.name = values[0]

names = property(fset=_set_names, fget=_get_names)
Expand Down Expand Up @@ -335,11 +336,11 @@ def _has_complex_internals(self):
def summary(self, name=None):
if len(self) > 0:
head = self[0]
if hasattr(head,'format') and\
if hasattr(head, 'format') and\
not isinstance(head, compat.string_types):
head = head.format()
tail = self[-1]
if hasattr(tail,'format') and\
if hasattr(tail, 'format') and\
not isinstance(tail, compat.string_types):
tail = tail.format()
index_summary = ', %s to %s' % (com.pprint_thing(head),
Expand Down Expand Up @@ -571,7 +572,7 @@ def to_native_types(self, slicer=None, **kwargs):
def _format_native_types(self, na_rep='', **kwargs):
""" actually format my specific types """
mask = isnull(self)
values = np.array(self,dtype=object,copy=True)
values = np.array(self, dtype=object, copy=True)
values[mask] = na_rep
return values.tolist()

Expand All @@ -595,7 +596,7 @@ def identical(self, other):
Similar to equals, but check that other comparable attributes are also equal
"""
return self.equals(other) and all(
( getattr(self,c,None) == getattr(other,c,None) for c in self._comparables ))
(getattr(self, c, None) == getattr(other, c, None) for c in self._comparables))

def asof(self, label):
"""
Expand Down Expand Up @@ -886,7 +887,8 @@ def set_value(self, arr, key, value):
Fast lookup of value from 1-dimensional ndarray. Only use this if you
know what you're doing
"""
self._engine.set_value(_values_from_object(arr), _values_from_object(key), value)
self._engine.set_value(
_values_from_object(arr), _values_from_object(key), value)

def get_level_values(self, level):
"""
Expand Down Expand Up @@ -1357,7 +1359,7 @@ def slice_locs(self, start=None, end=None):

# get_loc will return a boolean array for non_uniques
# if we are not monotonic
if isinstance(start_slice,np.ndarray):
if isinstance(start_slice, np.ndarray):
raise KeyError("cannot peform a slice operation "
"on a non-unique non-monotonic index")

Expand All @@ -1379,7 +1381,7 @@ def slice_locs(self, start=None, end=None):
if not is_unique:

# get_loc will return a boolean array for non_uniques
if isinstance(end_slice,np.ndarray):
if isinstance(end_slice, np.ndarray):
raise KeyError("cannot perform a slice operation "
"on a non-unique non-monotonic index")

Expand Down Expand Up @@ -1447,6 +1449,7 @@ def drop(self, labels):


class Int64Index(Index):

"""
Immutable ndarray implementing an ordered, sliceable set. The basic object
storing axis labels for all pandas objects. Int64Index is a special case of `Index`
Expand Down Expand Up @@ -1579,6 +1582,7 @@ def _wrap_joined_index(self, joined, other):


class MultiIndex(Index):

"""
Implements multi-level, a.k.a. hierarchical, index object for pandas
objects
Expand Down Expand Up @@ -1625,7 +1629,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
if names is not None:
subarr._set_names(names)


if sortorder is not None:
subarr.sortorder = int(sortorder)
else:
Expand All @@ -1636,7 +1639,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
def _get_levels(self):
return self._levels


def _set_levels(self, levels, copy=False):
# This is NOT part of the levels property because it should be
# externally not allowed to set levels. User beware if you change
Expand Down Expand Up @@ -1686,7 +1688,7 @@ def _get_labels(self):
def _set_labels(self, labels, copy=False):
if len(labels) != self.nlevels:
raise ValueError("Length of levels and labels must be the same.")
self._labels = FrozenList(_ensure_frozen(labs,copy=copy)._shallow_copy()
self._labels = FrozenList(_ensure_frozen(labs, copy=copy)._shallow_copy()
for labs in labels)

def set_labels(self, labels, inplace=False):
Expand Down Expand Up @@ -1811,13 +1813,13 @@ def _set_names(self, values):
values = list(values)
if len(values) != self.nlevels:
raise ValueError('Length of names (%d) must be same as level '
'(%d)' % (len(values),self.nlevels))
'(%d)' % (len(values), self.nlevels))
# set the name
for name, level in zip(values, self.levels):
level.rename(name, inplace=True)


names = property(fset=_set_names, fget=_get_names, doc="Names of levels in MultiIndex")
names = property(
fset=_set_names, fget=_get_names, doc="Names of levels in MultiIndex")

def _format_native_types(self, **kwargs):
return self.tolist()
Expand Down Expand Up @@ -1845,7 +1847,7 @@ def _get_level_number(self, level):
count = self.names.count(level)
if count > 1:
raise ValueError('The name %s occurs multiple times, use a '
'level number' % level)
'level number' % level)
level = self.names.index(level)
except ValueError:
if not isinstance(level, int):
Expand Down Expand Up @@ -1980,9 +1982,9 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
formatted = lev.take(lab).format(formatter=formatter)

# we have some NA
mask = lab==-1
mask = lab == -1
if mask.any():
formatted = np.array(formatted,dtype=object)
formatted = np.array(formatted, dtype=object)
formatted[mask] = na_rep
formatted = formatted.tolist()

Expand All @@ -2000,7 +2002,6 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
level.append(com.pprint_thing(name, escape_chars=('\t', '\r', '\n'))
if name is not None else '')


level.extend(np.array(lev, dtype=object))
result_levels.append(level)

Expand All @@ -2010,8 +2011,9 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
if sparsify:
sentinal = ''
# GH3547
# use value of sparsify as sentinal, unless it's an obvious "Truthey" value
if sparsify not in [True,1]:
# use value of sparsify as sentinal, unless it's an obvious
# "Truthey" value
if sparsify not in [True, 1]:
sentinal = sparsify
# little bit of a kludge job for #1217
result_levels = _sparsify(result_levels,
Expand Down Expand Up @@ -2138,7 +2140,8 @@ def __contains__(self, key):
def __reduce__(self):
"""Necessary for making this object picklable"""
object_state = list(np.ndarray.__reduce__(self))
subclass_state = (list(self.levels), list(self.labels), self.sortorder, list(self.names))
subclass_state = (list(self.levels), list(
self.labels), self.sortorder, list(self.names))
object_state[2] = (object_state[2], subclass_state)
return tuple(object_state)

Expand Down Expand Up @@ -2490,7 +2493,8 @@ def reindex(self, target, method=None, level=None, limit=None,
"with a method or limit")
return self[target], target

raise Exception("cannot handle a non-takeable non-unique multi-index!")
raise Exception(
"cannot handle a non-takeable non-unique multi-index!")

if not isinstance(target, MultiIndex):
if indexer is None:
Expand Down Expand Up @@ -2685,12 +2689,13 @@ def partial_selection(key):

# here we have a completely specified key, but are using some partial string matching here
# GH4758
can_index_exactly = any([ l.is_all_dates and not isinstance(k,compat.string_types) for k, l in zip(key, self.levels) ])
if any([ l.is_all_dates for k, l in zip(key, self.levels) ]) and not can_index_exactly:
can_index_exactly = any(
[l.is_all_dates and not isinstance(k, compat.string_types) for k, l in zip(key, self.levels)])
if any([l.is_all_dates for k, l in zip(key, self.levels)]) and not can_index_exactly:
indexer = slice(*self.slice_locs(key, key))

# we have a multiple selection here
if not indexer.stop-indexer.start == 1:
if not indexer.stop - indexer.start == 1:
return partial_selection(key)

key = tuple(self[indexer].tolist()[0])
Expand Down Expand Up @@ -2913,7 +2918,8 @@ def _assert_can_do_setop(self, other):

def astype(self, dtype):
if np.dtype(dtype) != np.object_:
raise TypeError("Setting %s dtype to anything other than object is not supported" % self.__class__)
raise TypeError(
"Setting %s dtype to anything other than object is not supported" % self.__class__)
return self._shallow_copy()

def insert(self, loc, item):
Expand All @@ -2935,7 +2941,8 @@ def insert(self, loc, item):
if not isinstance(item, tuple):
item = (item,) + ('',) * (self.nlevels - 1)
elif len(item) != self.nlevels:
raise ValueError('Item must have length equal to number of levels.')
raise ValueError(
'Item must have length equal to number of levels.')

new_levels = []
new_labels = []
Expand Down Expand Up @@ -2990,7 +2997,7 @@ def _wrap_joined_index(self, joined, other):

# For utility purposes

def _sparsify(label_list, start=0,sentinal=''):
def _sparsify(label_list, start=0, sentinal=''):
pivoted = lzip(*label_list)
k = len(label_list)

Expand Down Expand Up @@ -3031,7 +3038,7 @@ def _ensure_index(index_like, copy=False):
if isinstance(index_like, list):
if type(index_like) != list:
index_like = list(index_like)
# #2200 ?
# 2200 ?
converted, all_arrays = lib.clean_index_list(index_like)

if len(converted) > 0 and all_arrays:
Expand Down Expand Up @@ -3169,7 +3176,8 @@ def _get_consensus_names(indexes):

# find the non-none names, need to tupleify to make
# the set hashable, then reverse on return
consensus_names = set([ tuple(i.names) for i in indexes if all(n is not None for n in i.names) ])
consensus_names = set([tuple(i.names)
for i in indexes if all(n is not None for n in i.names)])
if len(consensus_names) == 1:
return list(list(consensus_names)[0])
return [None] * indexes[0].nlevels
Expand Down
8 changes: 6 additions & 2 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2334,8 +2334,12 @@ def copy(self, deep=True):
-------
copy : BlockManager
"""
new_axes = list(self.axes)
return self.apply('copy', axes=new_axes, deep=deep, do_integrity_check=False)
if deep:
new_axes = [ax.view() for ax in self.axes]
else:
new_axes = list(self.axes)
return self.apply('copy', axes=new_axes, deep=deep,
ref_items=new_axes[0], do_integrity_check=False)

def as_matrix(self, items=None):
if len(self.blocks) == 0:
Expand Down
20 changes: 13 additions & 7 deletions pandas/sparse/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,19 +235,25 @@ def __setstate__(self, state):
self._minor_axis = _ensure_index(com._unpickle_array(minor))
self._frames = frames

def copy(self):
def copy(self, deep=True):
"""
Make a (shallow) copy of the sparse panel
Make a copy of the sparse panel
Returns
-------
copy : SparsePanel
"""
return SparsePanel(self._frames.copy(), items=self.items,
major_axis=self.major_axis,
minor_axis=self.minor_axis,
default_fill_value=self.default_fill_value,
default_kind=self.default_kind)

d = self._construct_axes_dict()
if deep:
new_data = dict((k, v.copy(deep=True)) for k, v in compat.iteritems(self._frames))
d = dict((k, v.copy(deep=True)) for k, v in compat.iteritems(d))
else:
new_data = self._frames.copy()
d['default_fill_value']=self.default_fill_value
d['default_kind']=self.default_kind

return SparsePanel(new_data, **d)

def to_frame(self, filter_observations=True):
"""
Expand Down
Loading

0 comments on commit d7d9a6c

Please sign in to comment.