Skip to content

Commit

Permalink
Merge commit 'v0.8.0b2-68-g7240b87' into debian-0.8
Browse files Browse the repository at this point in the history
* commit 'v0.8.0b2-68-g7240b87': (68 commits)
  TST: additional coverage and cruft removal for ts plotting pandas-dev#1245
  BUG: test coverage, groupby bug fixes
  BUG: fix NumPy 1.7 argmin workaround, test coverage
  BUG: out of bounds on buffer access if time doesn't exist in TimeSeries.at_time
  BUG: revert mpl hackaround
  TST: resample test coverage etc. pandas-dev#1245
  BUG: test coverage and misc bug fixes, cruft deletion in period.py pandas-dev#1245
  TST: finish test coverage of pandas.tseries.index pandas-dev#1245
  BUG: fix closed='left' resample bug. test coverage pandas-dev#1245
  TST: test coverage pandas-dev#1245
  BUG: raise exception in DataFrame.fillna when axis=1 and pass dict/Series. close pandas-dev#1485
  BUG: fillna called with Series should be analogous to with dict close pandas-dev#1486
  BUG: fix MS/BMS range generation / onOffset bugs causing pandas-dev#1483
  ENH: at_time/between_time work with tz-localized time series. refactoring and cleanup close pandas-dev#1481
  BUG: label slicing with duplicate values, close pandas-dev#1480
  TST: remove rogue print statement
  BUG: fixed broken imports
  BUG: do not convert bday freq in ts plots pandas-dev#1482
  BUG: mask NaNs in non-ts plots
  TST: test case for tseries plots with data gaps
  ...
  • Loading branch information
yarikoptic committed Jun 21, 2012
2 parents a1d7688 + 7240b87 commit 8562adc
Show file tree
Hide file tree
Showing 60 changed files with 2,425 additions and 1,605 deletions.
4 changes: 4 additions & 0 deletions RELEASE.rst
Expand Up @@ -139,6 +139,9 @@ pandas 0.8.0
- Series.append and DataFrame.append no longer check for duplicate indexes
by default, add verify_integrity parameter (#1394)
- Refactor Factor class, old constructor moved to Factor.from_array
- Modified internals of MultiIndex to use less memory (no longer represented
as array of tuples) internally, speed up construction time and many methods
which construct intermediate hierarchical indexes (#1467)

**Bug fixes**

Expand Down Expand Up @@ -186,6 +189,7 @@ pandas 0.8.0
- Reset index mapping when grouping Series in Cython (#1423)
- Fix outer/inner DataFrame.join with non-unique indexes (#1421)
- Fix MultiIndex groupby bugs with empty lower levels (#1401)
- Calling fillna with a Series will have same behavior as with dict (#1486)

pandas 0.7.3
============
Expand Down
2 changes: 1 addition & 1 deletion doc/make.py
Expand Up @@ -79,7 +79,7 @@ def latex():

os.chdir('../..')
else:
print 'latex build has not been tested on windows'
print('latex build has not been tested on windows')

def check_build():
build_dirs = [
Expand Down
2 changes: 1 addition & 1 deletion doc/source/visualization.rst
Expand Up @@ -44,7 +44,7 @@ The ``plot`` method on Series and DataFrame is just a simple wrapper around
@savefig series_plot_basic.png width=4.5in
ts.plot()
If the index consists of dates, it calls ``gca().autofmt_xdate()`` to try to
If the index consists of dates, it calls ``gcf().autofmt_xdate()`` to try to
format the x-axis nicely as per above. The method takes a number of arguments
for controlling the look of the plot:

Expand Down
18 changes: 13 additions & 5 deletions pandas/core/algorithms.py
Expand Up @@ -78,21 +78,21 @@ def _count_generic(values, table_type, type_caster):
from pandas.core.series import Series

values = type_caster(values)
table = table_type(len(values))
table = table_type(min(len(values), 1000000))
uniques, labels, counts = table.factorize(values)

return Series(counts, index=uniques)

def _match_generic(values, index, table_type, type_caster):
values = type_caster(values)
index = type_caster(index)
table = table_type(len(index))
table = table_type(min(len(index), 1000000))
table.map_locations(index)
return table.lookup(values)

def _unique_generic(values, table_type, type_caster):
values = type_caster(values)
table = table_type(len(values))
table = table_type(min(len(values), 1000000))
uniques = table.unique(values)
return type_caster(uniques)

Expand Down Expand Up @@ -223,17 +223,25 @@ def quantile(x, q, interpolation_method='fraction'):
score : float
Score at percentile.
Examples
Examplesb
--------
>>> from scipy import stats
>>> a = np.arange(100)
>>> stats.scoreatpercentile(a, 50)
49.5
"""
values = np.sort(np.asarray(x))
x = np.asarray(x)
mask = com.isnull(x)

x = x[-mask]

values = np.sort(x)

def _get_score(at):
if len(values) == 0:
return np.nan

idx = at * (len(values) - 1)
if (idx % 1 == 0):
score = values[idx]
Expand Down
7 changes: 3 additions & 4 deletions pandas/core/api.py
Expand Up @@ -6,7 +6,7 @@
from pandas.core.algorithms import factorize, match, unique, value_counts

from pandas.core.common import isnull, notnull, save, load
from pandas.core.factor import Factor
from pandas.core.categorical import Categorical, Factor
from pandas.core.format import (set_printoptions, reset_printoptions,
set_eng_float_format)
from pandas.core.index import Index, Int64Index, MultiIndex
Expand All @@ -15,17 +15,16 @@
from pandas.core.frame import DataFrame
from pandas.core.panel import Panel
from pandas.core.groupby import groupby
from pandas.core.reshape import pivot_simple as pivot
from pandas.core.reshape import pivot_simple as pivot, get_dummies

WidePanel = Panel

from pandas.core.daterange import DateRange # deprecated

from pandas.tseries.offsets import DateOffset
from pandas.tseries.tools import to_datetime
from pandas.tseries.index import (DatetimeIndex, Timestamp,
date_range, bdate_range)
from pandas.tseries.period import Period, PeriodIndex

# legacy
from pandas.core.daterange import DateRange # deprecated
import pandas.core.datetools as datetools
65 changes: 41 additions & 24 deletions pandas/core/factor.py → pandas/core/categorical.py
Expand Up @@ -6,9 +6,9 @@
import pandas.core.common as com


def _factor_compare_op(op):
def _cat_compare_op(op):
def f(self, other):
if isinstance(other, (Factor, np.ndarray)):
if isinstance(other, (Categorical, np.ndarray)):
values = np.asarray(self)
f = getattr(values, op)
return f(np.asarray(other))
Expand All @@ -23,7 +23,7 @@ def f(self, other):

return f

class Factor(object):
class Categorical(object):
"""
Represents a categorical variable in classic R / S-plus fashion
Expand All @@ -41,12 +41,6 @@ class Factor(object):
* levels : ndarray
"""
def __init__(self, labels, levels, name=None):
from pandas.core.index import _ensure_index

levels = _ensure_index(levels)
if not levels.is_unique:
raise ValueError('Factor levels must be unique')

self.labels = labels
self.levels = levels
self.name = name
Expand All @@ -58,28 +52,49 @@ def from_array(cls, data):
except TypeError:
labels, levels, _ = factorize(data, sort=False)

return Factor(labels, levels)
return Categorical(labels, levels,
name=getattr(data, 'name', None))

_levels = None
def _set_levels(self, levels):
from pandas.core.index import _ensure_index

levels = _ensure_index(levels)
if not levels.is_unique:
raise ValueError('Categorical levels must be unique')
self._levels = levels

def _get_levels(self):
return self._levels

levels = None
levels = property(fget=_get_levels, fset=_set_levels)

__eq__ = _factor_compare_op('__eq__')
__ne__ = _factor_compare_op('__ne__')
__lt__ = _factor_compare_op('__lt__')
__gt__ = _factor_compare_op('__gt__')
__le__ = _factor_compare_op('__le__')
__ge__ = _factor_compare_op('__ge__')
__eq__ = _cat_compare_op('__eq__')
__ne__ = _cat_compare_op('__ne__')
__lt__ = _cat_compare_op('__lt__')
__gt__ = _cat_compare_op('__gt__')
__le__ = _cat_compare_op('__le__')
__ge__ = _cat_compare_op('__ge__')

def __array__(self, dtype=None):
return com.take_1d(self.levels, self.labels)
return com.take_1d(self.levels.values, self.labels)

def __len__(self):
return len(self.labels)

def __repr__(self):
temp = 'Factor:%s\n%s\nLevels (%d): %s'
temp = 'Categorical: %s\n%s\n%s'
values = np.asarray(self)
levheader = 'Levels (%d): ' % len(self.levels)
levstring = np.array_repr(self.levels,
max_line_width=60)

indent = ' ' * (levstring.find('[') + len(levheader) + 1)
lines = levstring.split('\n')
levstring = '\n'.join([lines[0]] + [indent + x.lstrip() for x in lines[1:]])

return temp % ('' if self.name is None else self.name,
repr(values), len(self.levels), self.levels)
repr(values), levheader + levstring)

def __getitem__(self, key):
if isinstance(key, (int, np.integer)):
Expand All @@ -89,22 +104,24 @@ def __getitem__(self, key):
else:
return self.levels[i]
else:
return Factor(self.labels[key], self.levels)
return Categorical(self.labels[key], self.levels)

def equals(self, other):
"""
Returns True if factors are equal
Returns True if categorical arrays are equal
Parameters
----------
other : Factor
other : Categorical
Returns
-------
are_equal : boolean
"""
if not isinstance(other, Factor):
if not isinstance(other, Categorical):
return False

return (self.levels.equals(other.levels) and
np.array_equal(self.labels, other.labels))

Factor = Categorical
41 changes: 25 additions & 16 deletions pandas/core/common.py
Expand Up @@ -56,29 +56,36 @@ def isnull(obj):
return lib.checknull(obj)

from pandas.core.generic import PandasObject
from pandas import Series
if isinstance(obj, np.ndarray):
if obj.dtype.kind in ('O', 'S'):
# Working around NumPy ticket 1542
shape = obj.shape
result = np.empty(shape, dtype=bool)
vec = lib.isnullobj(obj.ravel())
result[:] = vec.reshape(shape)

if isinstance(obj, Series):
result = Series(result, index=obj.index, copy=False)
elif obj.dtype == np.dtype('M8[ns]'):
# this is the NaT pattern
result = np.array(obj).view('i8') == lib.iNaT
else:
result = -np.isfinite(obj)
return result
return _isnull_ndarraylike(obj)
elif isinstance(obj, PandasObject):
# TODO: optimize for DataFrame, etc.
return obj.apply(isnull)
elif hasattr(obj, '__array__'):
return _isnull_ndarraylike(obj)
else:
return obj is None

def _isnull_ndarraylike(obj):
from pandas import Series
values = np.asarray(obj)

if values.dtype.kind in ('O', 'S'):
# Working around NumPy ticket 1542
shape = values.shape
result = np.empty(shape, dtype=bool)
vec = lib.isnullobj(values.ravel())
result[:] = vec.reshape(shape)

if isinstance(obj, Series):
result = Series(result, index=obj.index, copy=False)
elif values.dtype == np.dtype('M8[ns]'):
# this is the NaT pattern
result = values.view('i8') == lib.iNaT
else:
result = -np.isfinite(obj)
return result

def notnull(obj):
'''
Replacement for numpy.isfinite / -numpy.isnan which is suitable
Expand Down Expand Up @@ -482,6 +489,8 @@ def _possibly_cast_item(obj, item, dtype):

def _is_bool_indexer(key):
if isinstance(key, np.ndarray) and key.dtype == np.object_:
key = np.asarray(key)

if not lib.is_bool_array(key):
if isnull(key).any():
raise ValueError('cannot index with vector containing '
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/format.py
Expand Up @@ -672,8 +672,8 @@ def _has_names(index):
# Global formatting options

def set_printoptions(precision=None, column_space=None, max_rows=None,
max_columns=None, colheader_justify='right',
max_colwidth=50, notebook_repr_html=None,
max_columns=None, colheader_justify=None,
max_colwidth=None, notebook_repr_html=None,
date_dayfirst=None, date_yearfirst=None):
"""
Alter default behavior of DataFrame.toString
Expand Down

0 comments on commit 8562adc

Please sign in to comment.