moving around unit tests, working on fancy get/setitem for objects. f…

…ixed buglet in parseCSV
pandas-dev · Jun 3, 2011 · 2dc78dc · 2dc78dc
1 parent a656df2
commit 2dc78dc
Show file tree

Hide file tree

Showing 9 changed files with 392 additions and 274 deletions.
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -1,3 +1,49 @@
+
+************************
+pandas 0.4 Release Notes
+************************
+
+==========
+What is it
+==========
+
+**pandas** is a library of labeled data structures, statistical models, and
+general code for working with time series and cross-sectional data. It was
+designed with the practical needs of statistical modeling and large,
+inhomogeneous data sets in mind.
+
+===============
+Where to get it
+===============
+
+Source code: http://github.com/wesm/pandas
+Binary installers on PyPI: http://pypi.python.org/pypi/pandas
+Documentation: http://pandas.sourceforge.net
+
+=============
+Release notes
+=============
+
+**Release date:** NOT YET RELEASED
+
+**New features / modules**
+
+* `DataFrame.describe`
+* `DataFrame.quantile`, `Series.quantile`
+* `DataFrame.describe`
+* Fancy indexing
+
+**Improvements**
+
+
+**API Changes**
+
+**Bug fixes**
+
+************************
+pandas 0.3 Release Notes
+************************
+
 =============
 Release Notes
 =============

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -88,6 +88,8 @@ def null_out_axis(arr, mask, axis):
 
         arr[tuple(indexer)] = np.NaN
 
+#-------------------------------------------------------------------------------
+# Lots of little utilities
 
 def ensure_float(arr):
     if issubclass(arr.dtype.type, np.integer):
@@ -102,3 +104,23 @@ def _mut_exclusive(arg1, arg2):
         return arg1
     else:
         return arg2
+
+
+def _is_list_like(obj):
+    return isinstance(obj, (list, np.ndarray))
+
+def _is_label_slice(labels, obj):
+    def crit(x):
+        if x in labels:
+            return False
+        else:
+            return isinstance(x, int) or x is None
+    return not crit(obj.start) or not crit(obj.stop)
+
+def _need_slice(obj):
+    return obj.start is not None or obj.stop is not None
+
+def _check_step(obj):
+    if obj.step is not None:
+        raise Exception('steps other than 1 are not supported')
+
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -10,7 +10,9 @@
 from numpy import NaN
 import numpy as np
 
-from pandas.core.common import (_pickle_array, _unpickle_array, isnull, notnull)
+from pandas.core.common import (_pickle_array, _unpickle_array, isnull, notnull,
+                                _check_step, _is_list_like, _need_slice,
+                                _is_label_slice)
 from pandas.core.daterange import DateRange
 from pandas.core.generic import PandasGeneric
 from pandas.core.index import Index, NULL_INDEX
@@ -2361,7 +2363,7 @@ def ix(self):
 
         return self._ix
 
-    def _fancy_index(self, key, axis=0):
+    def _fancy_getitem(self, key, axis=0):
         labels = self._get_axis(axis)
         axis_name = self._get_axis_name(axis)
 
@@ -2376,21 +2378,21 @@ def _fancy_index(self, key, axis=0):
         else:
             return self.reindex(**{axis_name : key})
 
-    def _fancy_index_tuple(self, rowkey, colkey):
-        result = self._fancy_index_axis(colkey, axis=1)
+    def _fancy_getitem_tuple(self, rowkey, colkey):
+        result = self._fancy_getitem_axis(colkey, axis=1)
 
         if isinstance(result, Series):
             result = result[rowkey]
         else:
-            result = result._fancy_index_axis(rowkey, axis=0)
+            result = result._fancy_getitem_axis(rowkey, axis=0)
 
         return result
 
-    def _fancy_index_axis(self, key, axis=0):
+    def _fancy_getitem_axis(self, key, axis=0):
         if isinstance(key, slice):
             return self._slice_axis(key, axis=axis)
         elif _is_list_like(key):
-            return self._fancy_index(key, axis=axis)
+            return self._fancy_getitem(key, axis=axis)
         elif axis == 0:
             idx = key
             if isinstance(key, int):
@@ -2445,37 +2447,19 @@ def __init__(self, frame):
     def __getitem__(self, key):
         frame = self.frame
         if isinstance(key, slice):
-            return frame._fancy_index_axis(key, axis=0)
+            return frame._fancy_getitem_axis(key, axis=0)
         elif isinstance(key, tuple):
             if len(key) != 2:
                 raise Exception('only length 2 tuple supported')
-            return frame._fancy_index_tuple(*key)
+            return frame._fancy_getitem_tuple(*key)
         elif _is_list_like(key):
-            return frame._fancy_index(key, axis=0)
+            return frame._fancy_getitem(key, axis=0)
         else:
-            return frame._fancy_index_axis(key, axis=0)
+            return frame._fancy_getitem_axis(key, axis=0)
 
     def __setitem__(self, key, value):
         raise NotImplementedError
 
-def _is_list_like(obj):
-    return isinstance(obj, (list, np.ndarray))
-
-def _is_label_slice(labels, obj):
-    def crit(x):
-        if x in labels:
-            return False
-        else:
-            return isinstance(x, int) or x is None
-    return not crit(obj.start) or not crit(obj.stop)
-
-def _need_slice(obj):
-    return obj.start is not None or obj.stop is not None
-
-def _check_step(obj):
-    if obj.step is not None:
-        raise Exception('steps other than 1 are not supported')
-
 def try_sort(iterable):
     listed = list(iterable)
     try:

diff --git a/pandas/core/functions.py b/pandas/core/functions.py
@@ -81,6 +81,31 @@ def dumb_way(series, buckets):
     sampled2 = sampled2.reindex(buckets)
     return sampled2
 
+def ts_upsample(dates, buckets, values, aggfunc, inclusive=True):
+    '''
+    put something here
+    '''
+    nbuckets = len(buckets)
+    nvalues = len(dates)
+    output = np.empty(nbuckets, dtype=float)
+
+    if inclusive:
+        _check = lambda x, y: x < y
+    else:
+        _check = lambda x, y: x <= y
+
+    j = 0
+    for i, bound in enumerate(buckets):
+        next_bound = buckets[i + 1]
+        jstart = j
+
+        while _check(dates[j], next_bound) and j < nvalues:
+            j += 1
+
+        output[i] = aggfunc(values[jstart:j])
+
+    return Series(output, index=buckets)
+
 if __name__ == '__main__':
     N = 1000000
     K = 1000

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -14,6 +14,9 @@
 import numpy as np
 
 from pandas.core.common import isnull, notnull
+from pandas.core.common import (_check_step, _is_list_like, _need_slice,
+                                _is_label_slice)
+
 from pandas.core.daterange import DateRange
 from pandas.core.generic import PandasGeneric
 from pandas.core.index import Index, NULL_INDEX
@@ -1331,41 +1334,82 @@ def select(self, crit):
         """
         return self._select_generic(crit, axis=0)
 
+    _ix = None
+    @property
+    def ix(self):
+        if self._ix is None:
+            self._ix = _SeriesIndexer(self)
+
+        return self._ix
+
+    def _fancy_getitem(self, key):
+        # asarray can be unsafe, NumPy strings are weird
+        if _isboolarr(key):
+            if isinstance(key, Series):
+                if not key.index.equals(self.index):
+                    raise Exception('Cannot use boolean index with misaligned '
+                                    'or unequal labels')
+            return self.reindex(self.index[key])
+        elif isinstance(key, slice):
+            if _is_label_slice(self.index, key):
+                i, j = self.index.slice_locs(key.start, key.stop)
+                return self[i:j]
+            else:
+                return self[key]
+        else:
+            return self.reindex(key)
+
+    def _fancy_setitem(self, key, value):
+        if _isboolarr(key) or isinstance(key, slice):
+            if isinstance(key, Series):
+                if not key.index.equals(self.index):
+                    raise Exception('Cannot use boolean index with misaligned '
+                                    'or unequal labels')
+            self[key] = value
+        else:
+            inds, mask = self.index.get_indexer(key)
+            if not mask.all():
+                raise Exception('Indices %s not found' % key[-mask])
+            self.put(inds, value)
+
 class TimeSeries(Series):
     pass
 
 
-def ts_upsample(dates, buckets, values, aggfunc, inclusive=True):
-    '''
-    put something here
-    '''
-    nbuckets = len(buckets)
-    nvalues = len(dates)
-    output = np.empty(nbuckets, dtype=float)
-
-    if inclusive:
-        _check = lambda x, y: x < y
-    else:
-        _check = lambda x, y: x <= y
+class _SeriesIndexer(object):
+    """
+    Class to support fancy indexing, potentially using labels
 
-    j = 0
-    for i, bound in enumerate(buckets):
-        next_bound = buckets[i + 1]
-        jstart = j
+    Notes
+    -----
+    Indexing based on labels is INCLUSIVE
+    Slicing uses PYTHON SEMANTICS (endpoint is excluded)
 
-        while _check(dates[j], next_bound) and j < nvalues:
-            j += 1
+    If Index contains int labels, these will be used rather than the locations,
+    so be very careful (ambiguous).
 
-        output[i] = aggfunc(values[jstart:j])
+    Examples
+    --------
+    >>> ts.ix[5:10] # equivalent to ts[5:10]
+    >>> ts.ix[[date1, date2, date3]]
+    >>> ts.ix[date1:date2] = 0
+    """
+    def __init__(self, series):
+        self.series = series
 
-    return Series(output, index=buckets)
+    def __getitem__(self, key):
+        return self.series._fancy_getitem(key)
 
+    def __setitem__(self, key, value):
+        return self.series._fancy_setitem(key, value)
 
 #-------------------------------------------------------------------------------
 # Supplementary functions
 
 _ndgi = ndarray.__getitem__
 
+_isboolarr = lambda x: np.asarray(x).dtype == np.bool_
+
 def remove_na(arr):
     """
     Return array containing only true/non-NaN values, possibly empty.