In [None]:
# default_exp core

# pydantic-pandas

> API details.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#exporti

from pandas.core.frame import DataFrame
from pydantic import (
    validator,
    root_validator
)
from pydantic import BaseModel as PydanticBaseModel
from pydantic.main import ModelMetaclass
from pydantic_pandas.default_standard_lib import *
from pydantic_pandas.utils import delegates

In [None]:
#export 

class TypedArray(pd.Series):
    
    @classmethod
    def __get_validators__(cls):
        yield cls.validate_type
        
    @classmethod
    def __modify_schema__(cls,field_schema:Dict)->Dict:
        field_schema.update(
            type="<Pandas Series>"
        )
        
    @classmethod
    def validate_type(cls,val):
        print('validating array')
        print(val)
        return cls(val)
    
class ArrayMeta(type):
    def __getitem__(self,t):
        return type('Array', (TypedArray,), {'inner_type':t})
    
class Array(pd.Series,metaclass=ArrayMeta):
    pass

def conarray(*args,**kwargs) -> Type[TypedArray]:
    
    return type('ConstrainedArray',(TypedArray,), kwargs)

In [None]:
#export 

class PandasDataFrame(DataFrame):
    """
    Pandas DataFrame Validation
    """

    @classmethod
    def __get_validators__(cls):
        # one or more validators may be yielded which will be called in the
        # order to validate the input, each validator will receive as an input
        # the value returned from the previous validator
        yield cls.validate

    @classmethod
    def __modify_schema__(cls, field_schema):
        # __modify_schema__ should mutate the dict it receives in place,
        # the returned value will be ignored
        field_schema.update(
            type='Pandas DataFrame'
        )

    @classmethod
    def validate(cls, v):
        if not isinstance(v, pd.DataFrame):
            raise TypeError(f'Dataframe required. Got {type(v)} instead')
        if v.empty:
            raise ValueError("Dataframe can't be empty")
        return v
    def __init__(self,*args,**kwargs):
        print('making the pandas data frame')
        super(PandasDataFrame,self).__init__(*args,**kwargs)

In [None]:
#exporti 

DataFrameModelMetaclass = ForwardRef('DataFrameModelMetaclass')

def extract_ddf_from_model_fields(model:ModelMetaclass) -> 'PandasDataFrame':
    """Returns default df for DataFrameModelMetaclass ._repr_html_() method"""
    d={}
    for k,v in model.__fields__.items():
        d[k]=[v.required,v.type_]
    return pd.DataFrame(d,index=['required','type'])

In [None]:
#export 

class DataFrameModelMetaclass(ModelMetaclass):
    def __new__(cls,name,bases,dct):
        print("in DF Meta __new__")
        print(name)
        print(bases)

        d = dct.get('__annotations__')
        if d:
            d = {k:conarray(v) for k,v in d.items()}
            dct['__annotations__']=d
        print(dct)
        model = ModelMetaclass.__new__(cls,name,bases,dct)
        model._default_df_ = extract_ddf_from_model_fields(model)
        
        return model
#     def __getitem__(self,t):
#         return type('Array', (TypedArray,), {'inner_type':t})
    
#     def _repr_json_(cls):
        
#         both_schemas = dict(
#             pandas_schema = json.loads(cls._default_df_.to_json()),
#             pydantic_schema = cls.schema()
#         )
#         return both_schemas
    
    def _repr_html_(cls):
        return cls._default_df_.to_html()

class BaseFrame(PydanticBaseModel,PandasDataFrame,metaclass=DataFrameModelMetaclass):
    """Doc from BaseFame"""
    def __init__(self,*args,**kwargs):
        print("base frame init")
        super(BaseFrame,self).__init__(**kwargs)
        super(PandasDataFrame,self).__init__(self.dict())
    
    @root_validator()
    def _base_frame_root_validator(cls,values):
        print('In Base Frame Root validator')
        return values
    

in DF Meta __new__
BaseFrame
(<class 'pydantic.main.BaseModel'>, <class '__main__.PandasDataFrame'>)
{'__module__': '__main__', '__qualname__': 'BaseFrame', '__doc__': 'Doc from BaseFame', '__init__': <function BaseFrame.__init__ at 0x7fbb2d226c20>, '_base_frame_root_validator': <classmethod object at 0x7fbb2d230650>, '__classcell__': <cell at 0x7fbb2d2304d0: empty>}


In [None]:
class MovieModel(BaseFrame):
    title: str
    year: int

in DF Meta __new__
MovieModel
(<class '__main__.BaseFrame'>,)
{'__module__': '__main__', '__qualname__': 'MovieModel', '__annotations__': {'title': <class '__main__.ConstrainedArray'>, 'year': <class '__main__.ConstrainedArray'>}}


In [None]:
MovieModel

__main__.MovieModel

In [None]:
show_doc(MovieModel)

<h2 id="MovieModel" class="doc_header"><code>class</code> <code>MovieModel</code><a href="" class="source_link" style="float:right">[source]</a></h2>

> <code>MovieModel</code>(**\*`args`**, **`title`**:`ConstrainedArray`, **`year`**:`ConstrainedArray`) :: [`BaseFrame`](/pydantic_pandas/core.html#BaseFrame)

Doc from BaseFame

In [None]:
#MovieModel(title='cocktail',year=1988)

In [None]:
movies = MovieModel(
        title=['Cocktail','Cheers','That Thing You Do!'],
        year=[1988,1982,1996]
    )


base frame init
validating array
['Cocktail', 'Cheers', 'That Thing You Do!']
validating array
[1988, 1982, 1996]
In Base Frame Root validator


In [None]:
movies

Unnamed: 0,title,year
0,Cocktail,1988
1,Cheers,1982
2,That Thing You Do!,1996


In [None]:
try:
    MovieModel.from_records(movies.to_dict('records'))
except Exception as e:
    print("I wish this worked")
    print(e)

base frame init
In Base Frame Root validator
I wish this worked
2 validation errors for MovieModel
title
  field required (type=value_error.missing)
year
  field required (type=value_error.missing)


## How about Sub-classing ? 

In [None]:
class DetailedMovieModel(MovieModel):
    review: str
    stars: int

in DF Meta __new__
DetailedMovieModel
(<class '__main__.MovieModel'>,)
{'__module__': '__main__', '__qualname__': 'DetailedMovieModel', '__annotations__': {'review': <class '__main__.ConstrainedArray'>, 'stars': <class '__main__.ConstrainedArray'>}}


In [None]:
DetailedMovieModel(
    review=['Perfect','Amazing','The Best'],
    stars=[10,10,10],
    **movies.dict(
        include=set(movies.__fields__.keys()),
    )
)

base frame init
validating array
0              Cocktail
1                Cheers
2    That Thing You Do!
dtype: object
validating array
0    1988
1    1982
2    1996
dtype: int64
validating array
['Perfect', 'Amazing', 'The Best']
validating array
[10, 10, 10]
In Base Frame Root validator


Unnamed: 0,title,year,review,stars
0,Cocktail,1988,Perfect,10
1,Cheers,1982,Amazing,10
2,That Thing You Do!,1996,The Best,10


In [None]:
!nbdev_build_lib

Converted 00_core.ipynb.
Converted 98_utils.ipynb.
Converted 99_default_standard_lib.ipynb.
Converted core.ipynb.
Converted index.ipynb.
