Skip to content

Commit

Permalink
Initial groundwork for the rewrite
Browse files Browse the repository at this point in the history
  • Loading branch information
multimeric committed Nov 17, 2019
1 parent 9413ec1 commit 2f7e7f3
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 397 deletions.
63 changes: 45 additions & 18 deletions pandas_schema/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,51 @@
from . import validation
from .validation_warning import ValidationWarning

class Column:
def __init__(self, name: str, validations: typing.Iterable['validation._BaseValidation'] = [], allow_empty=False):
"""
Creates a new Column object

:param name: The column header that defines this column. This must be identical to the header used in the CSV/Data Frame you are validating.
:param validations: An iterable of objects implementing _BaseValidation that will generate ValidationErrors
:param allow_empty: True if an empty column is considered valid. False if we leave that logic up to the Validation
"""
self.name = name
self.validations = list(validations)
self.allow_empty = allow_empty
def _column(
validations: typing.Iterable[validation.IndexSeriesValidation],
index: typing.Union[int, str] = None,
position: bool = False
):
"""
A utility method for setting the index data on a set of Validations
:param validations: A list of validations to modify
:param index: The index of the series that these validations will now consider
:param position: If true, these validations use positional indexing.
See :py:class:`pandas_schema.validation.IndexSeriesValidation`
"""
for valid in validations:
valid.index = index
valid.position = position

def validate(self, series: pd.Series) -> typing.List[ValidationWarning]:
"""
Creates a list of validation errors using the Validation objects contained in the Column

:param series: A pandas Series to validate
:return: An iterable of ValidationError instances generated by the validation
"""
return [error for validation in self.validations for error in validation.get_errors(series, self)]
def label_column(
validations: typing.Iterable[validation.IndexSeriesValidation],
index: typing.Union[int, str],
):
"""
A utility method for setting the label-based column for each validation
:param validations: A list of validations to modify
:param index: The label of the series that these validations will now consider
"""
return _column(
validations,
index,
position=False
)


def positional_column(
validations: typing.Iterable[validation.IndexSeriesValidation],
index: int,
):
"""
A utility method for setting the position-based column for each validation
:param validations: A list of validations to modify
:param index: The index of the series that these validations will now consider
"""
return _column(
validations,
index,
position=True
)
6 changes: 6 additions & 0 deletions pandas_schema/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ class PanSchInvalidSchemaError(PanSchError):
"""


class PanSchNoIndexError(PanSchInvalidSchemaError):
"""
A validation was provided that has not specified an index
"""


class PanSchArgumentError(PanSchError):
"""
An argument passed to a function has an invalid type or value
Expand Down
2 changes: 1 addition & 1 deletion pandas_schema/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class Schema:
A schema that defines the columns required in the target DataFrame
"""

def __init__(self, columns: typing.Iterable[Column], ordered: bool = False):
def __init__(self, columns: typing.Iterable[Column], ordered: bool = False):
"""
:param columns: A list of column objects
:param ordered: True if the Schema should associate its Columns with DataFrame columns by position only, ignoring
Expand Down
Loading

2 comments on commit 2f7e7f3

@contang0
Copy link

@contang0 contang0 commented on 2f7e7f3 Jan 21, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this branch meant to address #25 ?

@multimeric
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, and broadly any schema that needs more advanced validation than per-column

Please sign in to comment.