Skip to content

Commit

Permalink
Add pandas accessors for DataFrame, Series, and Index objects.
Browse files Browse the repository at this point in the history
  • Loading branch information
shawnbrown committed Jun 10, 2019
1 parent 4aafe89 commit 456b594
Show file tree
Hide file tree
Showing 4 changed files with 241 additions and 0 deletions.
3 changes: 3 additions & 0 deletions datatest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@

from ._predicate import Predicate

# Pandas extensions.
from ._pandas_integration import register_accessors

# Unittest-style API
from .case import DataTestCase
from .runner import mandatory
Expand Down
214 changes: 214 additions & 0 deletions datatest/_pandas_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
"""Adds "validate" accessors to pandas DataFrame, Series, and Index."""
import inspect
from ._compatibility import contextlib
from .validation import ValidationError
from .validation import validate


class ValidationAccessor(object):
def __init__(self, pandas_obj):
self._data = pandas_obj

# Note: Below, the try/except code is duplicated for every method because
# using a shared method would create an additional traceback entry without
# adding any useful information. This should be avoided--even at the cost
# of some code duplication.

def __call__(self, requirement, msg=None):
try:
return validate(self._data, requirement, msg=msg)
except ValidationError as err:
__tracebackhide__ = True
err.__traceback__ = None
err.__cause__ = None
raise err

def predicate(self, requirement, msg=None):
"""Check that data satisfies predicate requirement."""
try:
return validate.predicate(self._data, requirement, msg=msg)
except ValidationError as err:
__tracebackhide__ = True
err.__traceback__ = None
err.__cause__ = None
raise err

def regex(self, requirement, flags=0, msg=None):
"""Check that data matches regex requirement."""
try:
return validate.regex(self._data, requirement, flags=flags, msg=msg)
except ValidationError as err:
__tracebackhide__ = True
err.__traceback__ = None
err.__cause__ = None
raise err

def approx(self, requirement, places=None, msg=None, delta=None):
"""Check that data approximately matches requirement."""
try:
return validate.approx(self._data, requirement, places=places, msg=msg, delta=delta)
except ValidationError as err:
__tracebackhide__ = True
err.__traceback__ = None
err.__cause__ = None
raise err

def fuzzy(self, requirement, cutoff=0.6, msg=None):
"""Check that strings match with a similarity greater than or
equal to cutoff (default 0.6).
"""
try:
return validate.fuzzy(self._data, requirement, cutoff=cutoff, msg=msg)
except ValidationError as err:
__tracebackhide__ = True
err.__traceback__ = None
err.__cause__ = None
raise err

def interval(self, min=None, max=None, msg=None):
"""Check that values are within the given interval."""
try:
return validate.interval(self._data, min=min, max=max, msg=msg)
except ValidationError as err:
__tracebackhide__ = True
err.__traceback__ = None
err.__cause__ = None
raise err

def set(self, requirement, msg=None):
"""Check that the set of elements in data matches the set of
elements in requirement.
"""
try:
return validate.set(self._data, requirement, msg=msg)
except ValidationError as err:
__tracebackhide__ = True
err.__traceback__ = None
err.__cause__ = None
raise err

def subset(self, requirement, msg=None):
"""Check that requirement is a subset of the values in data."""
try:
return validate.subset(self._data, requirement, msg=msg)
except ValidationError as err:
__tracebackhide__ = True
err.__traceback__ = None
err.__cause__ = None
raise err

def superset(self, requirement, msg=None):
"""Check that requirement is a superset of the values in data."""
try:
return validate.superset(self._data, requirement, msg=msg)
except ValidationError as err:
__tracebackhide__ = True
err.__traceback__ = None
err.__cause__ = None
raise err

def unique(self, msg=None):
"""Check that elements in data are unique."""
try:
return validate.unique(self._data, msg=msg)
except ValidationError as err:
__tracebackhide__ = True
err.__traceback__ = None
err.__cause__ = None
raise err

def order(self, requirement, msg=None):
"""Check that elements in data match the relative order of
elements in requirement.
"""
try:
return validate.order(self._data, requirement, msg=msg)
except ValidationError as err:
__tracebackhide__ = True
err.__traceback__ = None
err.__cause__ = None
raise err


# From the documented Pandas API, it's not entirely clear that
# a single class is intended to be registered as an accessor on
# multiple pandas objects. For reliability, we define a separate
# subclass for each pandas object being extended. While it would
# be more concise to call core.accessor._register_accessor()
# directly, that is not a user facing interface--DO NOT USE IT.

class ValidateDataFrame(ValidationAccessor):
"""Check that values in DataFrame satisfy the *requirement*."""
pass


class ValidateSeries(ValidationAccessor):
"""Check that values in Series satisfy the *requirement*."""
pass


class ValadateIndex(ValidationAccessor):
"""Check that values in Index satisfy the *requirement*."""
pass


##############################################
# Import pandas and register custom accessors.
##############################################

def register_accessors():
"""Register "validate" accessors for :class:`pandas.DataFrame`,
:class:`pandas.Series`, and :class:`pandas.Index` objects:
.. code-block:: python
:emphasize-lines: 4
import pandas as pd
import datatest as dt
dt.register_accessors()
...
After registering the accessors, ``validate`` can be used as a
method of :class:`DataFrame <pandas.DataFrame>`, :class:`Series
<pandas.Series>`,and :class:`Index <pandas.Index>` objects:
.. code-block:: python
:emphasize-lines: 3
...
df = pd.read_csv('example.csv')
df['A'].validate(int)
"""
global ValidateDataFrame
global ValidateSeries
global ValadateIndex

try:
import pandas

try:
accessor = getattr(pandas.DataFrame, 'validate', None)
if not (accessor and issubclass(accessor, ValidateDataFrame)):
decorator = pandas.api.extensions.register_dataframe_accessor('validate')
ValidateDataFrame = decorator(ValidateDataFrame)

accessor = getattr(pandas.Series, 'validate', None)
if not (accessor and issubclass(accessor, ValidateSeries)):
decorator = pandas.api.extensions.register_series_accessor('validate')
ValidateSeries = decorator(ValidateSeries)

accessor = getattr(pandas.Index, 'validate', None)
if not (accessor and issubclass(accessor, ValadateIndex)):
decorator = pandas.api.extensions.register_index_accessor('validate')
ValadateIndex = decorator(ValadateIndex)

except AttributeError:
import warnings
message = 'unable to register accessors; extension API unavailable'
warnings.warn(message)

except ImportError:
import warnings
message = 'unable to register accessors; unable to import pandas'
warnings.warn(message)
1 change: 1 addition & 0 deletions docs/reference/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Reference
:maxdepth: 2

Datatest Core <datatest-core>
pandas-integration
Data Handling <data-handling>
unittest-support

Expand Down
23 changes: 23 additions & 0 deletions docs/reference/pandas-integration.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@

.. module:: datatest

.. meta::
:description: datatest API for pandas integration
:keywords: datatest, pandas, validate, validation


##################
Pandas Integration
##################

Datatest can validate :class:`pandas.DataFrame`, :class:`pandas.Series`,
and :class:`pandas.Index` objects the same way it does with built-in
types like :py:class:`dict` and :py:class:`list`.


*********
Accessors
*********

.. autofunction:: register_accessors

0 comments on commit 456b594

Please sign in to comment.