Merge pull request #4830 from jtratner/copy-index-and-columns

BUG: Fix copy s.t. it always copies index/columns.
pandas-dev · Sep 24, 2013 · d7d9a6c · d7d9a6c
2 parents 54349d1 + 42d1d74
commit d7d9a6c
Show file tree

Hide file tree

Showing 13 changed files with 116 additions and 89 deletions.
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -455,6 +455,8 @@ Bug Fixes
   - Tests for fillna on empty Series (:issue:`4346`), thanks @immerrr
   - Fixed a bug where ``ValueError`` wasn't correctly raised when column names
     weren't strings (:issue:`4956`)
+  - Fixed ``copy()`` to shallow copy axes/indices as well and thereby keep
+    separate metadata. (:issue:`4202`, :issue:`4830`)
 
 pandas 0.12.0
 -------------

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -1988,7 +1988,7 @@ def transform(self, func, *args, **kwargs):
 
             # broadcasting
             if isinstance(res, Series):
-                if res.index is obj.index:
+                if res.index.is_(obj.index):
                     group.T.values[:] = res
                 else:
                     group.values[:] = res

diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -16,7 +16,6 @@
 import pandas.core.common as com
 from pandas.core.common import _values_from_object
 from pandas.core.config import get_option
-import warnings
 
 
 __all__ = ['Index']
@@ -27,6 +26,7 @@ def _indexOp(opname):
     Wrapper function for index comparison operations, to avoid
     code duplication.
     """
+
     def wrapper(self, other):
         func = getattr(self.view(np.ndarray), opname)
         result = func(other)
@@ -54,6 +54,7 @@ def _shouldbe_timestamp(obj):
 
 
 class Index(FrozenNDArray):
+
     """
     Immutable ndarray implementing an ordered, sliceable set. The basic object
     storing axis labels for all pandas objects
@@ -160,7 +161,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False,
 
         elif np.isscalar(data):
             raise TypeError('Index(...) must be called with a collection '
-                             'of some kind, %s was passed' % repr(data))
+                            'of some kind, %s was passed' % repr(data))
         else:
             # other iterable of some kind
             subarr = com._asarray_tuplesafe(data, dtype=object)
@@ -171,7 +172,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False,
                 return Int64Index(subarr.astype('i8'), copy=copy, name=name)
             elif inferred != 'string':
                 if (inferred.startswith('datetime') or
-                    tslib.is_timestamp_array(subarr)):
+                        tslib.is_timestamp_array(subarr)):
                     from pandas.tseries.index import DatetimeIndex
                     return DatetimeIndex(data, copy=copy, name=name, **kwargs)
                 elif inferred == 'period':
@@ -234,7 +235,7 @@ def to_series(self):
         useful with map for returning an indexer based on an index
         """
         import pandas as pd
-        return pd.Series(self.values,index=self,name=self.name)
+        return pd.Series(self.values, index=self, name=self.name)
 
     def astype(self, dtype):
         return Index(self.values.astype(dtype), name=self.name,
@@ -279,7 +280,7 @@ def _get_names(self):
     def _set_names(self, values):
         if len(values) != 1:
             raise ValueError('Length of new names must be 1, got %d'
-                                 % len(values))
+                             % len(values))
         self.name = values[0]
 
     names = property(fset=_set_names, fget=_get_names)
@@ -335,11 +336,11 @@ def _has_complex_internals(self):
     def summary(self, name=None):
         if len(self) > 0:
             head = self[0]
-            if hasattr(head,'format') and\
+            if hasattr(head, 'format') and\
                not isinstance(head, compat.string_types):
                 head = head.format()
             tail = self[-1]
-            if hasattr(tail,'format') and\
+            if hasattr(tail, 'format') and\
                not isinstance(tail, compat.string_types):
                 tail = tail.format()
             index_summary = ', %s to %s' % (com.pprint_thing(head),
@@ -571,7 +572,7 @@ def to_native_types(self, slicer=None, **kwargs):
     def _format_native_types(self, na_rep='', **kwargs):
         """ actually format my specific types """
         mask = isnull(self)
-        values = np.array(self,dtype=object,copy=True)
+        values = np.array(self, dtype=object, copy=True)
         values[mask] = na_rep
         return values.tolist()
 
@@ -595,7 +596,7 @@ def identical(self, other):
         Similar to equals, but check that other comparable attributes are also equal
         """
         return self.equals(other) and all(
-            ( getattr(self,c,None) == getattr(other,c,None) for c in self._comparables ))
+            (getattr(self, c, None) == getattr(other, c, None) for c in self._comparables))
 
     def asof(self, label):
         """
@@ -886,7 +887,8 @@ def set_value(self, arr, key, value):
         Fast lookup of value from 1-dimensional ndarray. Only use this if you
         know what you're doing
         """
-        self._engine.set_value(_values_from_object(arr), _values_from_object(key), value)
+        self._engine.set_value(
+            _values_from_object(arr), _values_from_object(key), value)
 
     def get_level_values(self, level):
         """
@@ -1357,7 +1359,7 @@ def slice_locs(self, start=None, end=None):
 
                     # get_loc will return a boolean array for non_uniques
                     # if we are not monotonic
-                    if isinstance(start_slice,np.ndarray):
+                    if isinstance(start_slice, np.ndarray):
                         raise KeyError("cannot peform a slice operation "
                                        "on a non-unique non-monotonic index")
 
@@ -1379,7 +1381,7 @@ def slice_locs(self, start=None, end=None):
                 if not is_unique:
 
                     # get_loc will return a boolean array for non_uniques
-                    if isinstance(end_slice,np.ndarray):
+                    if isinstance(end_slice, np.ndarray):
                         raise KeyError("cannot perform a slice operation "
                                        "on a non-unique non-monotonic index")
 
@@ -1447,6 +1449,7 @@ def drop(self, labels):
 
 
 class Int64Index(Index):
+
     """
     Immutable ndarray implementing an ordered, sliceable set. The basic object
     storing axis labels for all pandas objects. Int64Index is a special case of `Index`
@@ -1579,6 +1582,7 @@ def _wrap_joined_index(self, joined, other):
 
 
 class MultiIndex(Index):
+
     """
     Implements multi-level, a.k.a. hierarchical, index object for pandas
     objects
@@ -1625,7 +1629,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
         if names is not None:
             subarr._set_names(names)
 
-
         if sortorder is not None:
             subarr.sortorder = int(sortorder)
         else:
@@ -1636,7 +1639,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
     def _get_levels(self):
         return self._levels
 
-
     def _set_levels(self, levels, copy=False):
         # This is NOT part of the levels property because it should be
         # externally not allowed to set levels. User beware if you change
@@ -1686,7 +1688,7 @@ def _get_labels(self):
     def _set_labels(self, labels, copy=False):
         if len(labels) != self.nlevels:
             raise ValueError("Length of levels and labels must be the same.")
-        self._labels = FrozenList(_ensure_frozen(labs,copy=copy)._shallow_copy()
+        self._labels = FrozenList(_ensure_frozen(labs, copy=copy)._shallow_copy()
                                   for labs in labels)
 
     def set_labels(self, labels, inplace=False):
@@ -1811,13 +1813,13 @@ def _set_names(self, values):
         values = list(values)
         if len(values) != self.nlevels:
             raise ValueError('Length of names (%d) must be same as level '
-                              '(%d)' % (len(values),self.nlevels))
+                             '(%d)' % (len(values), self.nlevels))
         # set the name
         for name, level in zip(values, self.levels):
             level.rename(name, inplace=True)
 
-
-    names = property(fset=_set_names, fget=_get_names, doc="Names of levels in MultiIndex")
+    names = property(
+        fset=_set_names, fget=_get_names, doc="Names of levels in MultiIndex")
 
     def _format_native_types(self, **kwargs):
         return self.tolist()
@@ -1845,7 +1847,7 @@ def _get_level_number(self, level):
             count = self.names.count(level)
             if count > 1:
                 raise ValueError('The name %s occurs multiple times, use a '
-                                'level number' % level)
+                                 'level number' % level)
             level = self.names.index(level)
         except ValueError:
             if not isinstance(level, int):
@@ -1980,9 +1982,9 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
                 formatted = lev.take(lab).format(formatter=formatter)
 
                 # we have some NA
-                mask = lab==-1
+                mask = lab == -1
                 if mask.any():
-                    formatted = np.array(formatted,dtype=object)
+                    formatted = np.array(formatted, dtype=object)
                     formatted[mask] = na_rep
                     formatted = formatted.tolist()
 
@@ -2000,7 +2002,6 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
                 level.append(com.pprint_thing(name, escape_chars=('\t', '\r', '\n'))
                              if name is not None else '')
 
-
             level.extend(np.array(lev, dtype=object))
             result_levels.append(level)
 
@@ -2010,8 +2011,9 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
         if sparsify:
             sentinal = ''
             # GH3547
-            # use value of sparsify as sentinal,  unless it's an obvious "Truthey" value
-            if sparsify not in [True,1]:
+            # use value of sparsify as sentinal,  unless it's an obvious
+            # "Truthey" value
+            if sparsify not in [True, 1]:
                 sentinal = sparsify
             # little bit of a kludge job for #1217
             result_levels = _sparsify(result_levels,
@@ -2138,7 +2140,8 @@ def __contains__(self, key):
     def __reduce__(self):
         """Necessary for making this object picklable"""
         object_state = list(np.ndarray.__reduce__(self))
-        subclass_state = (list(self.levels), list(self.labels), self.sortorder, list(self.names))
+        subclass_state = (list(self.levels), list(
+            self.labels), self.sortorder, list(self.names))
         object_state[2] = (object_state[2], subclass_state)
         return tuple(object_state)
 
@@ -2490,7 +2493,8 @@ def reindex(self, target, method=None, level=None, limit=None,
                                              "with a method or limit")
                         return self[target], target
 
-                    raise Exception("cannot handle a non-takeable non-unique multi-index!")
+                    raise Exception(
+                        "cannot handle a non-takeable non-unique multi-index!")
 
         if not isinstance(target, MultiIndex):
             if indexer is None:
@@ -2685,12 +2689,13 @@ def partial_selection(key):
 
                         # here we have a completely specified key, but are using some partial string matching here
                         # GH4758
-                        can_index_exactly = any([ l.is_all_dates and not isinstance(k,compat.string_types) for k, l in zip(key, self.levels) ])
-                        if any([ l.is_all_dates for k, l in zip(key, self.levels) ]) and not can_index_exactly:
+                        can_index_exactly = any(
+                            [l.is_all_dates and not isinstance(k, compat.string_types) for k, l in zip(key, self.levels)])
+                        if any([l.is_all_dates for k, l in zip(key, self.levels)]) and not can_index_exactly:
                             indexer = slice(*self.slice_locs(key, key))
 
                             # we have a multiple selection here
-                            if not indexer.stop-indexer.start == 1:
+                            if not indexer.stop - indexer.start == 1:
                                 return partial_selection(key)
 
                             key = tuple(self[indexer].tolist()[0])
@@ -2913,7 +2918,8 @@ def _assert_can_do_setop(self, other):
 
     def astype(self, dtype):
         if np.dtype(dtype) != np.object_:
-            raise TypeError("Setting %s dtype to anything other than object is not supported" % self.__class__)
+            raise TypeError(
+                "Setting %s dtype to anything other than object is not supported" % self.__class__)
         return self._shallow_copy()
 
     def insert(self, loc, item):
@@ -2935,7 +2941,8 @@ def insert(self, loc, item):
         if not isinstance(item, tuple):
             item = (item,) + ('',) * (self.nlevels - 1)
         elif len(item) != self.nlevels:
-            raise ValueError('Item must have length equal to number of levels.')
+            raise ValueError(
+                'Item must have length equal to number of levels.')
 
         new_levels = []
         new_labels = []
@@ -2990,7 +2997,7 @@ def _wrap_joined_index(self, joined, other):
 
 # For utility purposes
 
-def _sparsify(label_list, start=0,sentinal=''):
+def _sparsify(label_list, start=0, sentinal=''):
     pivoted = lzip(*label_list)
     k = len(label_list)
 
@@ -3031,7 +3038,7 @@ def _ensure_index(index_like, copy=False):
     if isinstance(index_like, list):
         if type(index_like) != list:
             index_like = list(index_like)
-        # #2200 ?
+        # 2200 ?
         converted, all_arrays = lib.clean_index_list(index_like)
 
         if len(converted) > 0 and all_arrays:
@@ -3169,7 +3176,8 @@ def _get_consensus_names(indexes):
 
     # find the non-none names, need to tupleify to make
     # the set hashable, then reverse on return
-    consensus_names = set([ tuple(i.names) for i in indexes if all(n is not None for n in i.names) ])
+    consensus_names = set([tuple(i.names)
+                          for i in indexes if all(n is not None for n in i.names)])
     if len(consensus_names) == 1:
         return list(list(consensus_names)[0])
     return [None] * indexes[0].nlevels

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -2334,8 +2334,12 @@ def copy(self, deep=True):
         -------
         copy : BlockManager
         """
-        new_axes = list(self.axes)
-        return self.apply('copy', axes=new_axes, deep=deep, do_integrity_check=False)
+        if deep:
+            new_axes = [ax.view() for ax in self.axes]
+        else:
+            new_axes = list(self.axes)
+        return self.apply('copy', axes=new_axes, deep=deep,
+                        ref_items=new_axes[0], do_integrity_check=False)
 
     def as_matrix(self, items=None):
         if len(self.blocks) == 0:

diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py
@@ -235,19 +235,25 @@ def __setstate__(self, state):
         self._minor_axis = _ensure_index(com._unpickle_array(minor))
         self._frames = frames
 
-    def copy(self):
+    def copy(self, deep=True):
         """
-        Make a (shallow) copy of the sparse panel
+        Make a copy of the sparse panel
 
         Returns
         -------
         copy : SparsePanel
         """
-        return SparsePanel(self._frames.copy(), items=self.items,
-                           major_axis=self.major_axis,
-                           minor_axis=self.minor_axis,
-                           default_fill_value=self.default_fill_value,
-                           default_kind=self.default_kind)
+
+        d = self._construct_axes_dict()
+        if deep:
+            new_data = dict((k, v.copy(deep=True)) for k, v in compat.iteritems(self._frames))
+            d = dict((k, v.copy(deep=True)) for k, v in compat.iteritems(d))
+        else:
+            new_data = self._frames.copy()
+        d['default_fill_value']=self.default_fill_value
+        d['default_kind']=self.default_kind
+
+        return SparsePanel(new_data, **d)
 
     def to_frame(self, filter_observations=True):
         """