Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

Already on GitHub? Sign in to your account

API/CLN: add in common operations to Series/Index, refactored as a OpsMixin #6380

Merged
merged 1 commit into from Feb 18, 2014
Jump to file or symbol
Failed to load files and symbols.
+279 −42
Split
View
@@ -424,10 +424,25 @@ Time series-related
Series.shift
Series.first_valid_index
Series.last_valid_index
- Series.weekday
Series.resample
Series.tz_convert
Series.tz_localize
+ Series.year
+ Series.month
+ Series.day
+ Series.hour
+ Series.minute
+ Series.second
+ Series.microsecond
+ Series.nanosecond
+ Series.date
+ Series.time
+ Series.dayofyear
+ Series.weekofyear
+ Series.week
+ Series.dayofweek
+ Series.weekday
+ Series.quarter
String handling
~~~~~~~~~~~~~~~~~~~
@@ -1129,7 +1144,9 @@ Time/Date Components
DatetimeIndex.dayofweek
DatetimeIndex.weekday
DatetimeIndex.quarter
-
+ DatetimeIndex.tz
+ DatetimeIndex.freq
+ DatetimeIndex.freqstr
Selecting
~~~~~~~~~
View
@@ -69,6 +69,14 @@ API Changes
- ``dtypes`` and ``ftypes`` now return a series with ``dtype=object`` on empty containers (:issue:`5740`)
- The ``interpolate`` ``downcast`` keyword default has been changed from ``infer`` to
``None``. This is to preseve the original dtype unless explicitly requested otherwise (:issue:`6290`).
+- allow a Series to utilize index methods for its index type, e.g. ``Series.year`` is now defined
+ for a Series with a ``DatetimeIndex`` or a ``PeriodIndex``; trying this on a non-supported Index type will
+ now raise a ``TypeError``. (:issue:`4551`, :issue:`4056`, :issue:`5519`)
+
+ The following affected:
+ - ``date,time,year,month,day,hour,minute,second,weekofyear``
+ - ``week,dayofweek,dayofyear,quarter,microsecond,nanosecond,qyear``
+ - ``min(),max()``
Experimental Features
~~~~~~~~~~~~~~~~~~~~~
View
@@ -31,6 +31,21 @@ API changes
- The ``DataFrame.interpolate()`` ``downcast`` keyword default has been changed from ``infer`` to
``None``. This is to preseve the original dtype unless explicitly requested otherwise (:issue:`6290`).
+- allow a Series to utilize index methods for its index type, e.g. ``Series.year`` is now defined
+ for a Series with a ``DatetimeIndex`` or a ``PeriodIndex``; trying this on a non-supported Index type will
+ now raise a ``TypeError``. (:issue:`4551`, :issue:`4056`, :issue:`5519`)
+
+ The following affected:
+ - ``date,time,year,month,day,hour,minute,second,weekofyear``
+ - ``week,dayofweek,dayofyear,quarter,microsecond,nanosecond,qyear``
+ - ``min(),max()``
+
+ .. ipython:: python
+
+ s = Series(np.random.randn(5),index=tm.makeDateIndex(5))
+ s
+ s.year
+ s.index.year
MultiIndexing Using Slicers
~~~~~~~~~~~~~~~~~~~~~~~~~~~
View
@@ -5,7 +5,6 @@
import numpy as np
from pandas.core import common as com
-
class StringMixin(object):
"""implements string methods so long as object defines a `__unicode__`
@@ -200,3 +199,90 @@ def __unicode__(self):
prepr = com.pprint_thing(self, escape_chars=('\t', '\r', '\n'),
quote_strings=True)
return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype)
+
+
+# facilitate the properties on the wrapped ops
+def _field_accessor(name, docstring=None):
+ op_accessor = '_{0}'.format(name)
+ def f(self):
@cpcloud

cpcloud Feb 18, 2014

Member

Did you mean to use @wraps here?

@jreback

jreback Feb 18, 2014

Contributor

I think I originally did, but sort of did it 'manually' by assigning name/doc string....bad?

@cpcloud

cpcloud Feb 18, 2014

Member

Looking at the source for wraps it basically does that ... only diff is that it updates the __module__ attribute and copies the __dict__ attribute. Not "bad" per se.

@cpcloud

cpcloud Feb 18, 2014

Member

This is fine ... I was just asking.

+ return self._ops_compat(name,op_accessor)
+
+ f.__name__ = name
+ f.__doc__ = docstring
+ return property(f)
+
+class IndexOpsMixin(object):
+ """ common ops mixin to support a unified inteface / docs for Series / Index """
+
+ def _is_allowed_index_op(self, name):
+ if not self._allow_index_ops:
+ raise TypeError("cannot perform an {name} operations on this type {typ}".format(
+ name=name,typ=type(self._get_access_object())))
+
+ def _is_allowed_datetime_index_op(self, name):
+ if not self._allow_datetime_index_ops:
+ raise TypeError("cannot perform an {name} operations on this type {typ}".format(
+ name=name,typ=type(self._get_access_object())))
+
+ def _is_allowed_period_index_op(self, name):
+ if not self._allow_period_index_ops:
+ raise TypeError("cannot perform an {name} operations on this type {typ}".format(
+ name=name,typ=type(self._get_access_object())))
+
+ def _ops_compat(self, name, op_accessor):
+ from pandas.tseries.index import DatetimeIndex
+ from pandas.tseries.period import PeriodIndex
+ obj = self._get_access_object()
+ if isinstance(obj, DatetimeIndex):
+ self._is_allowed_datetime_index_op(name)
+ elif isinstance(obj, PeriodIndex):
+ self._is_allowed_period_index_op(name)
+ try:
+ return self._wrap_access_object(getattr(obj,op_accessor))
+ except AttributeError:
+ raise TypeError("cannot perform an {name} operations on this type {typ}".format(
+ name=name,typ=type(obj)))
+
+ def _get_access_object(self):
+ if isinstance(self, com.ABCSeries):
+ return self.index
+ return self
+
+ def _wrap_access_object(self, obj):
+ # we may need to coerce the input as we don't want non int64 if
+ # we have an integer result
+ if hasattr(obj,'dtype') and com.is_integer_dtype(obj):
+ obj = obj.astype(np.int64)
+
+ if isinstance(self, com.ABCSeries):
+ return self._constructor(obj,index=self.index).__finalize__(self)
+
+ return obj
+
+ def max(self):
+ """ The maximum value of the object """
+ self._is_allowed_index_op('max')
+ return self.values.max()
+
+ def min(self):
+ """ The minimum value of the object """
+ self._is_allowed_index_op('min')
+ return self.values.min()
+
+ date = _field_accessor('date','Returns numpy array of datetime.date. The date part of the Timestamps')
+ time = _field_accessor('time','Returns numpy array of datetime.time. The time part of the Timestamps')
+ year = _field_accessor('year', "The year of the datetime")
+ month = _field_accessor('month', "The month as January=1, December=12")
+ day = _field_accessor('day', "The days of the datetime")
+ hour = _field_accessor('hour', "The hours of the datetime")
+ minute = _field_accessor('minute', "The minutes of the datetime")
+ second = _field_accessor('second', "The seconds of the datetime")
+ microsecond = _field_accessor('microsecond', "The microseconds of the datetime")
+ nanosecond = _field_accessor('nanosecond', "The nanoseconds of the datetime")
+ weekofyear = _field_accessor('weekofyear', "The week ordinal of the year")
+ week = weekofyear
+ dayofweek = _field_accessor('dayofweek', "The day of the week with Monday=0, Sunday=6")
+ weekday = dayofweek
+ dayofyear = _field_accessor('dayofyear', "The ordinal day of the year")
+ quarter = _field_accessor('quarter', "The quarter of the date")
+ qyear = _field_accessor('qyear')
View
@@ -10,7 +10,7 @@
import pandas.algos as _algos
import pandas.index as _index
from pandas.lib import Timestamp, is_datetime_array
-from pandas.core.base import FrozenList, FrozenNDArray
+from pandas.core.base import FrozenList, FrozenNDArray, IndexOpsMixin
from pandas.util.decorators import cache_readonly, deprecate
from pandas.core.common import isnull
@@ -57,7 +57,7 @@ def _shouldbe_timestamp(obj):
_Identity = object
-class Index(FrozenNDArray):
+class Index(IndexOpsMixin, FrozenNDArray):
"""
Immutable ndarray implementing an ordered, sliceable set. The basic object
@@ -92,6 +92,9 @@ class Index(FrozenNDArray):
name = None
asi8 = None
_comparables = ['name']
+ _allow_index_ops = True
+ _allow_datetime_index_ops = False
+ _allow_period_index_ops = False
_engine_type = _index.ObjectEngine
View
@@ -30,7 +30,7 @@
from pandas.core.indexing import (
_check_bool_indexer, _check_slice_bounds,
_is_index_slice, _maybe_convert_indices)
-from pandas.core import generic
+from pandas.core import generic, base
from pandas.core.internals import SingleBlockManager
from pandas.core.categorical import Categorical
from pandas.tseries.index import DatetimeIndex
@@ -91,7 +91,7 @@ def f(self, *args, **kwargs):
# Series class
-class Series(generic.NDFrame):
+class Series(base.IndexOpsMixin, generic.NDFrame):
"""
One-dimensional ndarray with axis labels (including time series).
@@ -122,6 +122,15 @@ class Series(generic.NDFrame):
Copy input data
"""
_metadata = ['name']
+ _allow_index_ops = True
+
+ @property
+ def _allow_datetime_index_ops(self):
+ return self.index.is_all_dates and isinstance(self.index, DatetimeIndex)
+
+ @property
+ def _allow_period_index_ops(self):
+ return self.index.is_all_dates and isinstance(self.index, PeriodIndex)
def __init__(self, data=None, index=None, dtype=None, name=None,
copy=False, fastpath=False):
@@ -2297,11 +2306,6 @@ def asof(self, where):
new_values = com.take_1d(values, locs)
return self._constructor(new_values, index=where).__finalize__(self)
- @property
- def weekday(self):
- return self._constructor([d.weekday() for d in self.index],
- index=self.index).__finalize__(self)
-
@cache_readonly
def str(self):
from pandas.core.strings import StringMethods
View
@@ -1,9 +1,14 @@
import re
import numpy as np
import pandas.compat as compat
+import pandas as pd
from pandas.compat import u
from pandas.core.base import FrozenList, FrozenNDArray
from pandas.util.testing import assertRaisesRegexp, assert_isinstance
+from pandas import Series, Index, DatetimeIndex, PeriodIndex
+from pandas import _np_version_under1p7
+import nose
+
import pandas.util.testing as tm
class CheckStringMixin(object):
@@ -120,6 +125,101 @@ def test_values(self):
self.assert_numpy_array_equal(self.container, original)
self.assertEqual(vals[0], n)
+class Ops(tm.TestCase):
+ def setUp(self):
+ self.int_index = tm.makeIntIndex(10)
+ self.float_index = tm.makeFloatIndex(10)
+ self.dt_index = tm.makeDateIndex(10)
+ self.period_index = tm.makePeriodIndex(10)
+ self.string_index = tm.makeStringIndex(10)
+
+ arr = np.random.randn(10)
+ self.int_series = Series(arr, index=self.int_index)
+ self.float_series = Series(arr, index=self.int_index)
+ self.dt_series = Series(arr, index=self.dt_index)
+ self.period_series = Series(arr, index=self.period_index)
+ self.string_series = Series(arr, index=self.string_index)
+
+ self.objs = [ getattr(self,"{0}_{1}".format(t,f)) for t in ['int','float','dt','period','string'] for f in ['index','series'] ]
+
+ def check_ops_properties(self, props, filter=None, ignore_failures=False):
+ for op in props:
+ for o in self.is_valid_objs:
+
+ # if a filter, skip if it doesn't match
+ if filter is not None:
+ filt = o.index if isinstance(o, Series) else o
+ if not filter(filt):
+ continue
+
+ try:
+ if isinstance(o, Series):
+ expected = Series(getattr(o.index,op),index=o.index)
+ else:
+ expected = getattr(o,op)
+ except (AttributeError):
+ if ignore_failures:
+ continue
+
+ result = getattr(o,op)
+
+ # these couuld be series, arrays or scalars
+ if isinstance(result,Series) and isinstance(expected,Series):
+ tm.assert_series_equal(result,expected)
+ elif isinstance(result,Index) and isinstance(expected,Index):
+ tm.assert_index_equal(result,expected)
+ elif isinstance(result,np.ndarray) and isinstance(expected,np.ndarray):
+ self.assert_numpy_array_equal(result,expected)
+ else:
+ self.assertEqual(result, expected)
+
+ # freq raises AttributeError on an Int64Index because its not defined
+ # we mostly care about Series hwere anyhow
+ if not ignore_failures:
+ for o in self.not_valid_objs:
+ self.assertRaises(TypeError, lambda : getattr(o,op))
+
+class TestIndexOps(Ops):
+
+ def setUp(self):
+ super(TestIndexOps, self).setUp()
+ self.is_valid_objs = [ o for o in self.objs if o._allow_index_ops ]
+ self.not_valid_objs = [ o for o in self.objs if not o._allow_index_ops ]
+
+ def test_ops(self):
+ if _np_version_under1p7:
+ raise nose.SkipTest("test only valid in numpy >= 1.7")
+ for op in ['max','min']:
+ for o in self.objs:
+ result = getattr(o,op)()
+ expected = getattr(o.values,op)()
+ self.assertEqual(result, expected)
+
+class TestDatetimeIndexOps(Ops):
+ _allowed = '_allow_datetime_index_ops'
+
+ def setUp(self):
+ super(TestDatetimeIndexOps, self).setUp()
+ mask = lambda x: x._allow_datetime_index_ops or x._allow_period_index_ops
+ self.is_valid_objs = [ o for o in self.objs if mask(o) ]
+ self.not_valid_objs = [ o for o in self.objs if not mask(o) ]
+
+ def test_ops_properties(self):
+ self.check_ops_properties(['year','month','day','hour','minute','second','weekofyear','week','dayofweek','dayofyear','quarter'])
+ self.check_ops_properties(['date','time','microsecond','nanosecond'], lambda x: isinstance(x,DatetimeIndex))
+
+class TestPeriodIndexOps(Ops):
+ _allowed = '_allow_period_index_ops'
+
+ def setUp(self):
+ super(TestPeriodIndexOps, self).setUp()
+ mask = lambda x: x._allow_datetime_index_ops or x._allow_period_index_ops
+ self.is_valid_objs = [ o for o in self.objs if mask(o) ]
+ self.not_valid_objs = [ o for o in self.objs if not mask(o) ]
+
+ def test_ops_properties(self):
+ self.check_ops_properties(['year','month','day','hour','minute','second','weekofyear','week','dayofweek','dayofyear','quarter'])
+ self.check_ops_properties(['qyear'], lambda x: isinstance(x,PeriodIndex))
if __name__ == '__main__':
import nose
Oops, something went wrong.