You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
hello! After upgrading my unit tests started failing. and that's because of input data mutation. i attached as small as i can reproducible example. i can understand that the problem is that DotDict doesn't copy input dict, BUT the question is why it works on pydantic v2.4.2 and doesn't work on v2.5.2. i just want to understand what causes this issue and if isn't it a bug of pydantic or unplanned change
to reproduce:
pip install pydantic==2.4.2
run the script below. it will print "OK"
pip install pydantic==2.5.2
run the script below again. it will throw AssertionError
Example Code
importreimporttypingastfromcopyimportdeepcopyimporttyping_extensionsastefrompydanticimportBaseModel, ConfigDict, Field, GetJsonSchemaHandlerfrompydantic.json_schemaimportJsonSchemaValuefrompydantic_coreimportcore_schemadef_convert_camel_case_to_snake_case(string: str) ->str:
s=re.sub('(.)([A-Z][a-z]+)', r'\1_\2', string)
returnre.sub('([a-z0-9])([A-Z])', r'\1_\2', s).lower()
def_convert_snake_case_to_camel_case(string: str) ->str:
s=''.join([w.capitalize() forwinstring.split('_')])
returns[0].lower() +s[1:]
def_is_snake_case(string: str) ->bool:
returnstring==_convert_camel_case_to_snake_case(string)
def_convert_to_opposite_case(string: str) ->str:
if_is_snake_case(string):
return_convert_snake_case_to_camel_case(string)
return_convert_camel_case_to_snake_case(string)
classDotDict:
"""Dot notation for dictionaries. Note: If the record is out of the official lexicon, it`s impossible to deserialize it to a proper data model. Such models will fall back to dictionaries. All unknown "Union" types will also be caught as dicts. This class exists to provide an ability to use such fallbacks as “real” data models. Example: >>> test_data = {'a': 1, 'b': {'c': 2}, 'd': [{'e': 3}, 4, 5]} >>> model = DotDict(test_data) >>> assert isinstance(model, DotDict) >>> assert model.nonExistingField is None >>> assert model.a == 1 >>> assert model['a'] == 1 >>> assert model['b']['c'] == 2 >>> assert model.b.c == 2 >>> assert model.b['c'] == 2 >>> assert model['b'].c == 2 >>> assert model.d[0].e == 3 >>> assert model['d'][0]['e'] == 3 >>> assert model['d'][0].e == 3 >>> assert model['d'][1] == 4 >>> assert model['d'][2] == 5 >>> model['d'][0]['e'] = 6 >>> assert model['d'][0]['e'] == 6 >>> assert DotDict(test_data) == DotDict(test_data) >>> assert model.to_dict() == test_data """def__init__(self, data: dict) ->None:
self._data=datafork, vinself._data.items():
self.__setitem__(k, v)
defto_dict(self) ->dict:
"""Unwrap DotDict to Python built-in dict."""returndeepcopy(self._data)
def__getitem__(self, item: str) ->t.Optional[t.Any]:
value=self._data.get(item)
ifvalueisnotNone:
returnvaluereturnself._data.get(_convert_to_opposite_case(item))
__getattr__=__getitem__def__setitem__(self, key: str, value: t.Any) ->None:
ifkey=='_data':
super().__setattr__(key, value)
return# we store the field in case that was firstly meet to not create duplicatesifkeynotinself._dataand_is_snake_case(key):
key=_convert_snake_case_to_camel_case(key)
self._data.__setitem__(key, DotDict.__convert(value))
__setattr__=__setitem__def__eq__(self, other: t.Any) ->bool:
ifisinstance(other, DotDict):
returnself._data==other._dataifisinstance(other, dict):
returnself._data==otherraiseNotImplementedErrordef__str__(self) ->str:
returnstr(self._data)
def__repr__(self) ->str:
returnrepr(self._data)
def__reduce_ex__(self, protocol: int):
returngetattr(self._data, '__reduce_ex__', None)(protocol)
def__reduce__(self):
returngetattr(self._data, '__reduce__', None)()
@staticmethoddef__convert(obj: t.Any) ->t.Any:
ifisinstance(obj, dict):
returnDotDict(obj)
ifisinstance(obj, list):
return [DotDict.__convert(v) forvinobj]
ifisinstance(obj, set):
return {DotDict.__convert(v) forvinobj}
ifisinstance(obj, tuple):
returntuple(DotDict.__convert(v) forvinobj)
returnobjclass_DotDictPydanticAnnotation:
@classmethoddef__get_pydantic_core_schema__(
cls,
_source_type: t.Any,
_handler: t.Callable[[t.Any], core_schema.CoreSchema],
) ->core_schema.CoreSchema:
""" We return a pydantic_core.CoreSchema that behaves in the following ways: * dicts will be parsed as `DotDict` instances with the int as the _data attribute * `DotDict` instances will be parsed as `DotDict` instances without any changes * Nothing else will pass validation * Serialization will always return just a dict """defvalidate_from_dict(value: dict) ->DotDict:
returnDotDict(value)
from_dict_schema=core_schema.chain_schema(
[
core_schema.dict_schema(),
core_schema.no_info_plain_validator_function(validate_from_dict),
]
)
returncore_schema.json_or_python_schema(
json_schema=from_dict_schema,
python_schema=core_schema.union_schema(
[
# check if it's an instance first before doing any further workcore_schema.is_instance_schema(DotDict),
from_dict_schema,
]
),
serialization=core_schema.plain_serializer_function_ser_schema(lambdainstance: instance.to_dict()),
)
@classmethoddef__get_pydantic_json_schema__(
cls, _core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler
) ->JsonSchemaValue:
# Use the same schema that would be used for `dict`returnhandler(core_schema.dict_schema())
DotDictType=te.Annotated[DotDict, _DotDictPydanticAnnotation]
classModelBase(BaseModel):
model_config=ConfigDict(extra='forbid', populate_by_name=True, strict=True)
classBlobRefLink(BaseModel):
link: str=Field(alias='$link')
classBlobRef(BaseModel):
model_config=ConfigDict(extra='forbid', populate_by_name=True, strict=True)
mime_type: str=Field(alias='mimeType')
size: intref: BlobRefLinkpy_type: te.Literal['blob'] =Field(default='blob', alias='$type')
classModel1(ModelBase):
blob: BlobRefpy_type: te.Literal['model1'] =Field(default='model1', alias='$type', frozen=True)
classModel2(ModelBase):
blob: BlobRefpy_type: te.Literal['model2'] =Field(default='model2', alias='$type', frozen=True)
classModel3(ModelBase):
blob: BlobRefpy_type: te.Literal['model3'] =Field(default='model3', alias='$type', frozen=True)
UnknownRecordTypePydantic=te.Annotated[
t.Union[
'Model1',
'Model2',
'Model3',
],
Field(discriminator='py_type'),
]
UnknownType: te.TypeAlias=t.Union[UnknownRecordTypePydantic, DotDictType]
classRecord(ModelBase):
value: 'UnknownType'if__name__=='__main__':
Record.model_rebuild()
test_data= {
'value': {
'$type': 'model1',
'blob': {
'$type': 'blob',
'ref': {
'$link': 'blabla'
},
'mimeType': 'image/png',
'size': 40930
},
}
}
instance=Record(**test_data)
# call again with the same input dict to reproduce the issueinstance2=Record(**test_data)
assertisinstance(instance.value.blob, BlobRef)
assertisinstance(instance.value.blob.ref, BlobRefLink)
# assert fails on pydantic >= 2.5.0# works fine on pydantic < 2.5.0assertisinstance(instance2.value.blob, BlobRef)
assertisinstance(instance2.value.blob.ref, BlobRefLink)
print('OK')
Thanks for sharing. The mutation is happening in your DotDict validator. If I add some prints, and adjust your __repr__ of DotDict to show it's a DotDict:
Then I see that calling DotDict(value) is replacing nested dictionaries inside value with DotDict instances.
The reason why this has only broken with the bump to 2.5 is that on 2.5 the new union behaviour is doing a little bit more work than before to check that DotDict is not a better match than your model instances. On 2.4 the DotDict validation was never run, on 2.5 it is now being run which is why you see the mutation bug arising.
@davidhewitt so the more strict validations in the new version of pydantic helped me to find the bug in my code. awesome! thank you. ig we can close it
Initial Checks
Description
hello! After upgrading my unit tests started failing. and that's because of input data mutation. i attached as small as i can reproducible example. i can understand that the problem is that DotDict doesn't copy input dict, BUT the question is why it works on pydantic v2.4.2 and doesn't work on v2.5.2. i just want to understand what causes this issue and if isn't it a bug of pydantic or unplanned change
to reproduce:
pip install pydantic==2.4.2
pip install pydantic==2.5.2
AssertionError
Example Code
Python, Pydantic & OS Version
The text was updated successfully, but these errors were encountered: