# Migrating to Pydantic

It makes sense to migrate to a well supported package for handling metadata, like `Pydantic`.  

This will also remove the costly step of reading the JSON files each time an object is initiated and weird things that happen when you set `_attr_dict`.  

Steps:

1. Convert existing JSON to JSON Schema files and move to `mt_metadata.mt_metadata.standards` folder for reference later.
2. Convert the JSON Schema files to `pydantic.BaseModel` objects using `datamodel-code-generator`

## 1. Convert existing JSON

This is relatively straight forward just a simple mapping and inclusion of some other things.  The mapping is being done in `mt_metadata.utils.converters.to_json_schema`.

## 2. Convert JSON Schema to pydantic.BaseModel

This is tougher.  For now use `datamodel-code-generator`.

```
datamodel-codegen.exe --input .\person_schema.json --input-file-type jsonschema --field-constraints --formatters black --use-annotated --output person_model.py
```

In [72]:
from typing import Literal, Optional, Annotated, List
from pydantic import BaseModel, Field, ConfigDict, create_model
from pydantic.fields import PrivateAttr, FieldInfo
import json

In [2]:
def custom_docstring():
    def decorator(cls):
        cls.__doc__ = cls.model_json_schema()["properties"]
        return cls
    return decorator

# @custom_docstring()
# class Order(BaseModel):
#     order_id: int
#     amount: float

# print(Order.__doc__)

In [None]:
class DocPreservingModelMetaclass(ModelMetaclass):
     def __new__(mcs, name, bases, namespace, **kwargs):
         doc = namespace.pop('__doc__', None)
         cls = super().__new__(mcs, name, bases, namespace, **kwargs)
         cls.__doc__ = doc
         return cls

In [85]:
def get_all_fields(model: BaseModel):  # -> Dict[str, Any]:
    fields = {}
    for field_name, field_value in model.model_fields.items():
        if hasattr(field_value.annotation, "model_fields"):
            print(field_value)
            fields[field_name] = field_value.annotation().all_attributes()
        else:
            fields[field_name] = field_value

    return fields


class Dipole(BaseModel):
    model_config = ConfigDict(validate_assignment=True, use_attribute_docstrings=True)

    length: float = Field(
        default=0,
        description="dipole length",
        examples=10,
        required=True,
        json_schema_extra={"units": "m", "required": True},
    )
    azimuth: float = Field(
        default=0,
        description="measurement azimuth clockwise positive from north",
        examples=36,
        json_schema_extra={"units": "degrees"},
    )

    def all_attributes(self):
        return get_all_fields(self)


class EX(BaseModel):
    """


    EX
    """


    model_config = ConfigDict(
        validate_assignment=True, use_attribute_docstrings=True, extra="allow"
    )

    _default_keys: List[str] = PrivateAttr(
        default=["title", "annotation", "default", "examples", "description"],
    )
    _json_extras: List[str] = ["units"]


    type: Optional[str] = Field(

        default_factory="",
        description="name of person",

        examples="ann",
        required=True,
    )


    name: str = Field(

        default="EX",

        description="The name of the EX.",

        title="Name",
    )


    description: str = Field(
        default="a",

        description="A brief description of the EX.",

        title="Description",
    )

    dipole: Dipole = Dipole()

    # __doc__ = json.dumps(EX().model_dump_json(), indent=4)

    @property
    def other(self):
        return 10

    # def __pydantic_init_subclass__(cls) -> None:
    #     cls.__doc__ = cls.model_json_schema()

    def __str__(self):
        return json.dumps(self.model_dump_json(), indent=4)

    def __eq__(self, other):
        if isinstance(other, EX):
            return self.model_dump() == other.model_dump()
        return False

    def all_attributes(self):
        return get_all_fields(self)

    # @classmethod
    # def get_required_fields(cls: BaseModel, recursive: bool = True):# -> Iterator[str]:
    #     for field_name, field_info in cls.model_fields.items():
    #         # if not field_info.is_required():
    #         #     continue

    #         if not recursive or not hasattr(field_type_hint := field_info.annotation, "model_fields"):
    #             yield field_name
    #         else:
    #             yield from cls.get_required_fields(field_type_hint, recursive=True)

    # def get_all_fields(cls: BaseModel): # -> Dict[str, Any]:
    #     fields = {}
    #     for field_name, field_value in cls.model_fields.items():
    #         if hasattr(field_value.annotation, "model_fields"):
    #             print(field_value)
    #             attr_obj = getattr(cls, field_name)
    #             fields[field_name] = attr_obj.get_all_fields()
    #         else:
    #             fields[field_name] = field_value
    #     return fields

In [86]:
a = EX(name="a", type="a")
b = EX(name="a", description="new", type="b")


In [92]:
d = a.model_fields
d["new"] = FieldInfo(annotation=str, default="k", description="new_field")

fields = {k: (v.annotation, v) for k, v in d.items()}

new = create_model("new_model", __base__=EX, **fields)

In [96]:
d["new"]

FieldInfo(annotation=str, required=False, default='k', description='new_field')

In [99]:
a.name


'a'

In [81]:
fields

{'type': (typing.Optional[str],
  FieldInfo(annotation=Union[str, NoneType], required=False, default_factory=str, description='name of person', examples='ann', json_schema_extra={'required': True})),
 'name': (str,
  FieldInfo(annotation=str, required=False, default='EX', title='Name', description='The name of the EX.')),
 'description': (str,
  FieldInfo(annotation=str, required=False, default='a', title='Description', description='A brief description of the EX.')),
 'dipole': (__main__.Dipole,
  FieldInfo(annotation=Dipole, required=False, default=Dipole(length=0, azimuth=0))),
 'new': (str,
  FieldInfo(annotation=str, required=False, default='k', description='new_field'))}

In [68]:
c = a.model_copy(update={"new": Annotated[str,Field(default="k", description="new_field")]})

In [70]:
c.new

typing.Annotated[str, FieldInfo(annotation=NoneType, required=False, default='k', description='new_field')]

In [52]:
from mt_metadata.base import helpers

In [54]:
helpers.flatten_dict(a.all_attributes())

annotation=Dipole required=False default=Dipole(length=0, azimuth=0)


{'type': FieldInfo(annotation=Union[str, NoneType], required=False, default_factory=str, description='name of person', examples='ann', json_schema_extra={'required': True}),
 'name': FieldInfo(annotation=str, required=False, default='EX', title='Name', description='The name of the EX.'),
 'description': FieldInfo(annotation=str, required=False, default='a', title='Description', description='A brief description of the EX.'),
 'dipole.length': FieldInfo(annotation=float, required=False, default=0, description='dipole length', examples=10, json_schema_extra={'units': 'm', 'required': True}),
 'dipole.azimuth': FieldInfo(annotation=float, required=False, default=0, description='measurement azimuth clockwise positive from north', examples=36, json_schema_extra={'units': 'degrees'})}

In [60]:
type(a.dipole.model_fields["length"].json_schema_extra)


dict

In [39]:

a.model_json_schema()


{'$defs': {'Dipole': {'properties': {'length': {'default': 0,
     'description': 'dipole length',
     'examples': 10,
     'required': True,
     'title': 'Length',
     'type': 'number',
     'units': 'm'},
    'azimuth': {'default': 0,
     'description': 'measurement azimuth clockwise positive from north',
     'examples': 36,
     'title': 'Azimuth',
     'type': 'number',
     'units': 'degrees'}},
   'title': 'Dipole',
   'type': 'object'}},
 'description': 'EX',
 'properties': {'default_keys': {'default': ['title',
    'annotation',
    'default',
    'examples',
    'description'],
   'description': 'default keys to look for',
   'items': {'type': 'string'},
   'title': 'Default Keys',
   'type': 'array'},
  'type': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
   'description': 'name of person',
   'examples': 'ann',
   'required': True,
   'title': 'Type'},
  'name': {'default': 'EX',
   'description': 'The name of the EX.',
   'title': 'Name',
   'type': 'string'},
  '

In [85]:
sorted(c.model_fields.keys())

['description', 'dipole', 'name', 'type']

In [88]:
f = c.model_fields["type"]


In [92]:
c.model_fields

{'type': FieldInfo(annotation=Union[str, NoneType], required=False, default_factory=str, description='name of person', examples='ann', json_schema_extra={'required': True}),
 'name': FieldInfo(annotation=str, required=False, default='EX', title='Name', description='The name of the EX.'),
 'description': FieldInfo(annotation=str, required=False, default='a', title='Description', description='A brief description of the EX.'),
 'dipole': FieldInfo(annotation=Dipole, required=False, default=Dipole(length=0, azimuth=0))}

In [26]:
with open(r"c:\Users\peaco\OneDrive\Documents\GitHub\mt_metadata\examples\notebooks\person_schema.json", "w") as fid:
    json.dump(a.model_json_schema(), fid, indent=4)

In [25]:
with open(r"c:\Users\peaco\OneDrive\Documents\GitHub\mt_metadata\mt_metadata\timeseries\standards\person.json", "r") as fid:
    d = json.load(fid)

In [None]:

def convert_to_json_schema(old, object_name):
    new = {"title": object_name}
    new["type"] = "object"
    new["properties"] = {}
    new["required"] = []
    new["description"] = object_name
    for key, value in old.items():
        new["properties"][key] = {}
        new["properties"][key]["type"] = value["type"]
        new["properties"][key]["description"] = value["description"]
        new["properties"][key]["title"] = key
        new["properties"][key]["examples"] = value["example"]
        new["properties"][key]["default"] = value["default"]
        if value["required"]:
            new["required"].append(key)
        # need to sort out string formats
    return new



In [35]:
nd = convert_to_json_schema(d, "person")

In [37]:
with open(r"c:\Users\peaco\OneDrive\Documents\GitHub\mt_metadata\examples\notebooks\person_schema.json", "w") as fid:
    json.dump(nd, fid, indent=4)

In [1]:
from mt_metadata.timeseries.person_test import Person

In [3]:
p = Person(name="steve", time="1980-01-01T00:00:00")

In [5]:
p.time = "2020-01-01 00:00:00"

In [9]:
p.model_fields["name"].description

'Persons name, should be full first and last name.'