Skip to content

Commit

Permalink
moving around unit tests, working on fancy get/setitem for objects. f…
Browse files Browse the repository at this point in the history
…ixed buglet in parseCSV
  • Loading branch information
wesm committed Jun 3, 2011
1 parent a656df2 commit 2dc78dc
Show file tree
Hide file tree
Showing 9 changed files with 392 additions and 274 deletions.
46 changes: 46 additions & 0 deletions RELEASE.rst
@@ -1,3 +1,49 @@

************************
pandas 0.4 Release Notes
************************

==========
What is it
==========

**pandas** is a library of labeled data structures, statistical models, and
general code for working with time series and cross-sectional data. It was
designed with the practical needs of statistical modeling and large,
inhomogeneous data sets in mind.

===============
Where to get it
===============

Source code: http://github.com/wesm/pandas
Binary installers on PyPI: http://pypi.python.org/pypi/pandas
Documentation: http://pandas.sourceforge.net

=============
Release notes
=============

**Release date:** NOT YET RELEASED

**New features / modules**

* `DataFrame.describe`
* `DataFrame.quantile`, `Series.quantile`
* `DataFrame.describe`
* Fancy indexing

**Improvements**


**API Changes**

**Bug fixes**

************************
pandas 0.3 Release Notes
************************

=============
Release Notes
=============
Expand Down
22 changes: 22 additions & 0 deletions pandas/core/common.py
Expand Up @@ -88,6 +88,8 @@ def null_out_axis(arr, mask, axis):

arr[tuple(indexer)] = np.NaN

#-------------------------------------------------------------------------------
# Lots of little utilities

def ensure_float(arr):
if issubclass(arr.dtype.type, np.integer):
Expand All @@ -102,3 +104,23 @@ def _mut_exclusive(arg1, arg2):
return arg1
else:
return arg2


def _is_list_like(obj):
return isinstance(obj, (list, np.ndarray))

def _is_label_slice(labels, obj):
def crit(x):
if x in labels:
return False
else:
return isinstance(x, int) or x is None
return not crit(obj.start) or not crit(obj.stop)

def _need_slice(obj):
return obj.start is not None or obj.stop is not None

def _check_step(obj):
if obj.step is not None:
raise Exception('steps other than 1 are not supported')

42 changes: 13 additions & 29 deletions pandas/core/frame.py
Expand Up @@ -10,7 +10,9 @@
from numpy import NaN
import numpy as np

from pandas.core.common import (_pickle_array, _unpickle_array, isnull, notnull)
from pandas.core.common import (_pickle_array, _unpickle_array, isnull, notnull,
_check_step, _is_list_like, _need_slice,
_is_label_slice)
from pandas.core.daterange import DateRange
from pandas.core.generic import PandasGeneric
from pandas.core.index import Index, NULL_INDEX
Expand Down Expand Up @@ -2361,7 +2363,7 @@ def ix(self):

return self._ix

def _fancy_index(self, key, axis=0):
def _fancy_getitem(self, key, axis=0):
labels = self._get_axis(axis)
axis_name = self._get_axis_name(axis)

Expand All @@ -2376,21 +2378,21 @@ def _fancy_index(self, key, axis=0):
else:
return self.reindex(**{axis_name : key})

def _fancy_index_tuple(self, rowkey, colkey):
result = self._fancy_index_axis(colkey, axis=1)
def _fancy_getitem_tuple(self, rowkey, colkey):
result = self._fancy_getitem_axis(colkey, axis=1)

if isinstance(result, Series):
result = result[rowkey]
else:
result = result._fancy_index_axis(rowkey, axis=0)
result = result._fancy_getitem_axis(rowkey, axis=0)

return result

def _fancy_index_axis(self, key, axis=0):
def _fancy_getitem_axis(self, key, axis=0):
if isinstance(key, slice):
return self._slice_axis(key, axis=axis)
elif _is_list_like(key):
return self._fancy_index(key, axis=axis)
return self._fancy_getitem(key, axis=axis)
elif axis == 0:
idx = key
if isinstance(key, int):
Expand Down Expand Up @@ -2445,37 +2447,19 @@ def __init__(self, frame):
def __getitem__(self, key):
frame = self.frame
if isinstance(key, slice):
return frame._fancy_index_axis(key, axis=0)
return frame._fancy_getitem_axis(key, axis=0)
elif isinstance(key, tuple):
if len(key) != 2:
raise Exception('only length 2 tuple supported')
return frame._fancy_index_tuple(*key)
return frame._fancy_getitem_tuple(*key)
elif _is_list_like(key):
return frame._fancy_index(key, axis=0)
return frame._fancy_getitem(key, axis=0)
else:
return frame._fancy_index_axis(key, axis=0)
return frame._fancy_getitem_axis(key, axis=0)

def __setitem__(self, key, value):
raise NotImplementedError

def _is_list_like(obj):
return isinstance(obj, (list, np.ndarray))

def _is_label_slice(labels, obj):
def crit(x):
if x in labels:
return False
else:
return isinstance(x, int) or x is None
return not crit(obj.start) or not crit(obj.stop)

def _need_slice(obj):
return obj.start is not None or obj.stop is not None

def _check_step(obj):
if obj.step is not None:
raise Exception('steps other than 1 are not supported')

def try_sort(iterable):
listed = list(iterable)
try:
Expand Down
25 changes: 25 additions & 0 deletions pandas/core/functions.py
Expand Up @@ -81,6 +81,31 @@ def dumb_way(series, buckets):
sampled2 = sampled2.reindex(buckets)
return sampled2

def ts_upsample(dates, buckets, values, aggfunc, inclusive=True):
'''
put something here
'''
nbuckets = len(buckets)
nvalues = len(dates)
output = np.empty(nbuckets, dtype=float)

if inclusive:
_check = lambda x, y: x < y
else:
_check = lambda x, y: x <= y

j = 0
for i, bound in enumerate(buckets):
next_bound = buckets[i + 1]
jstart = j

while _check(dates[j], next_bound) and j < nvalues:
j += 1

output[i] = aggfunc(values[jstart:j])

return Series(output, index=buckets)

if __name__ == '__main__':
N = 1000000
K = 1000
Expand Down
84 changes: 64 additions & 20 deletions pandas/core/series.py
Expand Up @@ -14,6 +14,9 @@
import numpy as np

from pandas.core.common import isnull, notnull
from pandas.core.common import (_check_step, _is_list_like, _need_slice,
_is_label_slice)

from pandas.core.daterange import DateRange
from pandas.core.generic import PandasGeneric
from pandas.core.index import Index, NULL_INDEX
Expand Down Expand Up @@ -1331,41 +1334,82 @@ def select(self, crit):
"""
return self._select_generic(crit, axis=0)

_ix = None
@property
def ix(self):
if self._ix is None:
self._ix = _SeriesIndexer(self)

return self._ix

def _fancy_getitem(self, key):
# asarray can be unsafe, NumPy strings are weird
if _isboolarr(key):
if isinstance(key, Series):
if not key.index.equals(self.index):
raise Exception('Cannot use boolean index with misaligned '
'or unequal labels')
return self.reindex(self.index[key])
elif isinstance(key, slice):
if _is_label_slice(self.index, key):
i, j = self.index.slice_locs(key.start, key.stop)
return self[i:j]
else:
return self[key]
else:
return self.reindex(key)

def _fancy_setitem(self, key, value):
if _isboolarr(key) or isinstance(key, slice):
if isinstance(key, Series):
if not key.index.equals(self.index):
raise Exception('Cannot use boolean index with misaligned '
'or unequal labels')
self[key] = value
else:
inds, mask = self.index.get_indexer(key)
if not mask.all():
raise Exception('Indices %s not found' % key[-mask])
self.put(inds, value)

class TimeSeries(Series):
pass


def ts_upsample(dates, buckets, values, aggfunc, inclusive=True):
'''
put something here
'''
nbuckets = len(buckets)
nvalues = len(dates)
output = np.empty(nbuckets, dtype=float)

if inclusive:
_check = lambda x, y: x < y
else:
_check = lambda x, y: x <= y
class _SeriesIndexer(object):
"""
Class to support fancy indexing, potentially using labels
j = 0
for i, bound in enumerate(buckets):
next_bound = buckets[i + 1]
jstart = j
Notes
-----
Indexing based on labels is INCLUSIVE
Slicing uses PYTHON SEMANTICS (endpoint is excluded)
while _check(dates[j], next_bound) and j < nvalues:
j += 1
If Index contains int labels, these will be used rather than the locations,
so be very careful (ambiguous).
output[i] = aggfunc(values[jstart:j])
Examples
--------
>>> ts.ix[5:10] # equivalent to ts[5:10]
>>> ts.ix[[date1, date2, date3]]
>>> ts.ix[date1:date2] = 0
"""
def __init__(self, series):
self.series = series

return Series(output, index=buckets)
def __getitem__(self, key):
return self.series._fancy_getitem(key)

def __setitem__(self, key, value):
return self.series._fancy_setitem(key, value)

#-------------------------------------------------------------------------------
# Supplementary functions

_ndgi = ndarray.__getitem__

_isboolarr = lambda x: np.asarray(x).dtype == np.bool_

def remove_na(arr):
"""
Return array containing only true/non-NaN values, possibly empty.
Expand Down

0 comments on commit 2dc78dc

Please sign in to comment.