Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
deprecate allow_duplicates, pandas_dtype, transformer, PandasDtype en…
…um (#811)

* deprecate allow_duplicates, pandas_dtype, transformer, PandasDtype enum

* autoupdate pre-commit

* downgrade pylint

* fix pylint
  • Loading branch information
cosmicBboy committed Apr 1, 2022
1 parent a7268e0 commit 6800848
Show file tree
Hide file tree
Showing 18 changed files with 32 additions and 399 deletions.
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Expand Up @@ -2,7 +2,7 @@ exclude: (^asv_bench|setup.py|requirements-dev.txt)

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.0.1
rev: v4.1.0
hooks:
- id: check-ast
description: Simply check whether files parse as valid python
Expand All @@ -24,18 +24,18 @@ repos:
description: Replaces or checks mixed line ending

- repo: https://github.com/pre-commit/mirrors-isort
rev: v5.9.3
rev: v5.10.1
hooks:
- id: isort
args: ["--line-length=79", "--skip=docs/source/conf.py"]

- repo: https://github.com/ikamensh/flynt
rev: '0.69'
rev: '0.76'
hooks:
- id: flynt

- repo: https://github.com/psf/black
rev: 22.1.0
rev: 22.3.0
hooks:
- id: black
args: ["--line-length=79"]
Expand All @@ -48,7 +48,7 @@ repos:
exclude: (^docs/|^scripts)

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.921
rev: v0.942
hooks:
- id: mypy
entry: mypy pandera tests
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/series_schema.py
Expand Up @@ -20,7 +20,7 @@ def setup(self):
Check(lambda x: len(x) > 3, element_wise=True)
],
nullable=False,
allow_duplicates=True,
unique=False,
name="my_series")
self.series = pd.Series(["foobar", "foobar", "foobar"],
name="my_series")
Expand Down
15 changes: 0 additions & 15 deletions docs/source/reference/dtypes.rst
Expand Up @@ -119,18 +119,3 @@ Engines
pandera.engines.engine.Engine
pandera.engines.numpy_engine.Engine
pandera.engines.pandas_engine.Engine


PandasDtype Enum
----------------

.. warning::

This class deprecated and will be removed from the pandera API in ``0.9.0``

.. autosummary::
:toctree: generated
:template: class.rst
:nosignatures:

pandera.engines.pandas_engine.PandasDtype
3 changes: 0 additions & 3 deletions docs/source/schema_models.rst
Expand Up @@ -228,9 +228,6 @@ however, a couple of gotchas.
Dtype aliases
^^^^^^^^^^^^^

:mod:`pandera.typing` aliases will be deprecated in a future version,
please use :class:`~pandera.dtypes.DataType` subclasses instead.

.. code-block::
import pandera as pa
Expand Down
2 changes: 1 addition & 1 deletion docs/source/series_schemas.rst
Expand Up @@ -27,7 +27,7 @@ The :class:`~pandera.schemas.SeriesSchema` class allows for the validation of pa
pa.Check(lambda x: len(x) > 3, element_wise=True)
],
nullable=False,
allow_duplicates=True,
unique=False,
name="my_series")

validated_series = schema.validate(
Expand Down
5 changes: 1 addition & 4 deletions pandera/__init__.py
Expand Up @@ -42,9 +42,8 @@
UINT16,
UINT32,
UINT64,
pandas_version,
)
from pandera.engines.pandas_engine import _PandasDtype as PandasDtype
from pandera.engines.pandas_engine import pandas_version

from . import errors, pandas_accessor, typing
from .checks import Check
Expand Down Expand Up @@ -128,8 +127,6 @@
"UINT32",
"UINT64",
# pandera.engines.pandas_engine
"PandasDtype",
# pandera.engines.pandas_engine
"pandas_version",
# checks
"Check",
Expand Down
38 changes: 0 additions & 38 deletions pandera/deprecations.py

This file was deleted.

105 changes: 0 additions & 105 deletions pandera/engines/pandas_engine.py
Expand Up @@ -11,7 +11,6 @@
import datetime
import inspect
import warnings
from enum import Enum
from typing import Any, Dict, Iterable, List, Optional, Type, Union

import numpy as np
Expand Down Expand Up @@ -803,107 +802,3 @@ def _coerce_row(row):
),
)
return coerced_df.drop(["failure_cases"], axis="columns")


class PandasDtype(Enum):
# pylint: disable=line-too-long,invalid-name
"""Enumerate all valid pandas data types.
This class simply enumerates the valid numpy dtypes for pandas arrays.
For convenience ``PandasDtype`` enums can all be accessed in the top-level
``pandera`` name space via the same enum name.
.. warning::
This class is deprecated and will be removed in pandera v0.9.0. Use
python types, pandas type string aliases, numpy dtypes, or pandas
dtypes instead. See :ref:`dtypes` for details.
:examples:
>>> import pandas as pd
>>> import pandera as pa
>>>
>>>
>>> pa.SeriesSchema(pa.PandasDtype.Int).validate(pd.Series([1, 2, 3]))
0 1
1 2
2 3
dtype: int64
>>> pa.SeriesSchema(pa.PandasDtype.Float).validate(pd.Series([1.1, 2.3, 3.4]))
0 1.1
1 2.3
2 3.4
dtype: float64
>>> pa.SeriesSchema(pa.PandasDtype.String).validate(pd.Series(["a", "b", "c"]))
0 a
1 b
2 c
dtype: object
"""

# numpy data types
Bool = "bool" #: ``"bool"`` numpy dtype
DateTime = "datetime64" #: ``"datetime64[ns]"`` numpy dtype
Timedelta = "timedelta64" #: ``"timedelta64[ns]"`` numpy dtype
Float = "float" #: ``"float"`` numpy dtype
Float16 = "float16" #: ``"float16"`` numpy dtype
Float32 = "float32" #: ``"float32"`` numpy dtype
Float64 = "float64" #: ``"float64"`` numpy dtype
Int = "int" #: ``"int"`` numpy dtype
Int8 = "int8" #: ``"int8"`` numpy dtype
Int16 = "int16" #: ``"int16"`` numpy dtype
Int32 = "int32" #: ``"int32"`` numpy dtype
Int64 = "int64" #: ``"int64"`` numpy dtype
UInt8 = "uint8" #: ``"uint8"`` numpy dtype
UInt16 = "uint16" #: ``"uint16"`` numpy dtype
UInt32 = "uint32" #: ``"uint32"`` numpy dtype
UInt64 = "uint64" #: ``"uint64"`` numpy dtype
Object = "object" #: ``"object"`` numpy dtype
Complex = "complex" #: ``"complex"`` numpy dtype
Complex64 = "complex64" #: ``"complex"`` numpy dtype
Complex128 = "complex128" #: ``"complex"`` numpy dtype
Complex256 = "complex256" #: ``"complex"`` numpy dtype

# pandas data types
Category = "category" #: pandas ``"categorical"`` datatype
INT8 = "Int8" #: ``"Int8"`` pandas dtype:: pandas 0.24.0+
INT16 = "Int16" #: ``"Int16"`` pandas dtype: pandas 0.24.0+
INT32 = "Int32" #: ``"Int32"`` pandas dtype: pandas 0.24.0+
INT64 = "Int64" #: ``"Int64"`` pandas dtype: pandas 0.24.0+
FLOAT32 = "Float32" #: ``"Float32"`` pandas dtype: pandas 1.2.0+
FLOAT64 = "Float64" #: ``"Float64"`` pandas dtype: pandas 1.2.0+
UINT8 = "UInt8" #: ``"UInt8"`` pandas dtype: pandas 0.24.0+
UINT16 = "UInt16" #: ``"UInt16"`` pandas dtype: pandas 0.24.0+
UINT32 = "UInt32" #: ``"UInt32"`` pandas dtype: pandas 0.24.0+
UINT64 = "UInt64" #: ``"UInt64"`` pandas dtype: pandas 0.24.0+
String = "str" #: ``"str"`` numpy dtype

#: ``"string"`` pandas dtypes: pandas 1.0.0+. For <1.0.0, this enum will
#: fall back on the str-as-object-array representation.
STRING = "string"


# NOTE: This is a hack to raise a deprecation warning to show for users who
# are still using the PandasDtype enum.
# pylint:disable=invalid-name
class __PandasDtype__:
def __init__(self):
self.pandas_dtypes = PandasDtype

def __getattr__(self, name):
warnings.warn(
"The PandasDtype class is deprecated and will be removed in "
"pandera v0.9.0. Use python types, pandas type string aliases, "
"numpy dtypes, or pandas dtypes instead.",
DeprecationWarning,
)
return Engine.dtype(getattr(self.pandas_dtypes, name).value)

def __iter__(self):
for k in self.pandas_dtypes:
yield k.name


_PandasDtype = __PandasDtype__()
3 changes: 0 additions & 3 deletions pandera/io.py
Expand Up @@ -197,9 +197,6 @@ def _deserialize_component_stats(serialized_component_stats):
"name",
"nullable",
"unique",
# deserialize allow_duplicates property for backwards
# compatibility. Remove this for 0.8.0 release
"allow_duplicates",
"coerce",
"required",
"regex",
Expand Down
2 changes: 1 addition & 1 deletion pandera/model.py
Expand Up @@ -225,7 +225,7 @@ def to_schema(cls) -> DataFrameSchema:
columns,
index=index,
checks=cls.__dataframe_checks__, # type: ignore
**kwargs,
**kwargs, # type: ignore
)
if cls not in MODEL_CACHE:
MODEL_CACHE[cls] = cls.__schema__ # type: ignore
Expand Down
29 changes: 7 additions & 22 deletions pandera/model_components.py
Expand Up @@ -49,7 +49,6 @@ class FieldInfo:
"checks",
"nullable",
"unique",
"allow_duplicates",
"coerce",
"regex",
"check_name",
Expand All @@ -65,7 +64,6 @@ def __init__(
checks: Optional[_CheckList] = None,
nullable: bool = False,
unique: bool = False,
allow_duplicates: Optional[bool] = None,
coerce: bool = False,
regex: bool = False,
alias: Any = None,
Expand All @@ -77,7 +75,6 @@ def __init__(
self.checks = _to_checklist(checks)
self.nullable = nullable
self.unique = unique
self.allow_duplicates = allow_duplicates
self.coerce = coerce
self.regex = regex
self.alias = alias
Expand Down Expand Up @@ -124,30 +121,29 @@ def __set__(self, instance: Any, value: Any) -> None: # pragma: no cover

def _to_schema_component(
self,
pandas_dtype: PandasDtypeInputTypes,
dtype: PandasDtypeInputTypes,
component: Type[SchemaComponent],
checks: _CheckList = None,
**kwargs: Any,
) -> SchemaComponent:
if self.dtype_kwargs:
pandas_dtype = pandas_dtype(**self.dtype_kwargs) # type: ignore
dtype = dtype(**self.dtype_kwargs) # type: ignore
checks = self.checks + _to_checklist(checks)
return component(pandas_dtype, checks=checks, **kwargs) # type: ignore
return component(dtype, checks=checks, **kwargs) # type: ignore

def to_column(
self,
pandas_dtype: PandasDtypeInputTypes,
dtype: PandasDtypeInputTypes,
checks: _CheckList = None,
required: bool = True,
name: str = None,
) -> Column:
"""Create a schema_components.Column from a field."""
return self._to_schema_component(
pandas_dtype,
dtype,
Column,
nullable=self.nullable,
unique=self.unique,
allow_duplicates=self.allow_duplicates,
coerce=self.coerce,
regex=self.regex,
required=required,
Expand All @@ -159,17 +155,16 @@ def to_column(

def to_index(
self,
pandas_dtype: PandasDtypeInputTypes,
dtype: PandasDtypeInputTypes,
checks: _CheckList = None,
name: str = None,
) -> Index:
"""Create a schema_components.Index from a field."""
return self._to_schema_component(
pandas_dtype,
dtype,
Index,
nullable=self.nullable,
unique=self.unique,
allow_duplicates=self.allow_duplicates,
coerce=self.coerce,
name=name,
checks=checks,
Expand All @@ -196,7 +191,6 @@ def Field(
str_startswith: Optional[str] = None,
nullable: bool = False,
unique: bool = False,
allow_duplicates: Optional[bool] = None,
coerce: bool = False,
regex: bool = False,
ignore_na: bool = True,
Expand All @@ -221,14 +215,6 @@ def Field(
:param nullable: Whether or not the column/index can contain null values.
:param unique: Whether column values should be unique.
:param allow_duplicates: Whether or not column can contain duplicate
values.
.. warning::
This option will be deprecated in 0.8.0. Use the ``unique``
argument instead.
:param coerce: coerces the data type if ``True``.
:param regex: whether or not the field name or alias is a regex pattern.
:param ignore_na: whether or not to ignore null values in the checks.
Expand Down Expand Up @@ -278,7 +264,6 @@ def Field(
checks=checks or None,
nullable=nullable,
unique=unique,
allow_duplicates=allow_duplicates,
coerce=coerce,
regex=regex,
check_name=check_name,
Expand Down

0 comments on commit 6800848

Please sign in to comment.