In [4]:
import pandas as pd
import pandera as pa

# data to validate
df = pd.DataFrame({
    "column1": [1, 4, 0, 10, 9],
    "column2": [-1.3, -1.4, -2.9, -10.1, -20.4],
    "column3": ["value_1", "value_2", "vlue_3", "value_2", "value_1"],
})

In [5]:
schema = pa.DataFrameSchema({
    "column1": pa.Column(int, checks=pa.Check.le(10)),
    "column2": pa.Column(float, checks=pa.Check.lt(-1.2)),
    "column3": pa.Column(str, checks=[
        pa.Check.str_startswith("value_"),
        # define custom checks as functions that take a series as input and
        # outputs a boolean or boolean Series
        pa.Check(lambda s: s.str.split("_", expand=True).shape[1] == 2)
    ]),
})

In [6]:
validated_df = schema(df)
print(validated_df)

SchemaError: <Schema Column(name=column3, type=DataType(str))> failed element-wise validator 0:
<Check str_startswith: str_startswith('value_')>
failure cases:
   index failure_case
0      2       vlue_3

In [7]:
from pandera.typing import Series

class Schema(pa.DataFrameModel):

    column1: int = pa.Field(le=10)
    column2: float = pa.Field(lt=-1.2)
    column3: str = pa.Field(str_startswith="value_")

    @pa.check("column3")
    def column_3_check(cls, series: Series[str]) -> Series[bool]:
        """Check that column3 values have two elements after being split with '_'"""
        return series.str.split("_", expand=True).shape[1] == 2

Schema.validate(df)

SchemaError: <Schema Column(name=column3, type=DataType(str))> failed element-wise validator 0:
<Check str_startswith: str_startswith('value_')>
failure cases:
   index failure_case
0      2       vlue_3