In [15]:
import pydantic
from datetime import datetime
from pydantic import BaseModel, PositiveInt, Field,  AfterValidator,  BeforeValidator, ValidationError
import regex as re
from typing import Annotated
import json

In [65]:
def safe_json(obj):
    """Convert non-serializable objects into JSON-safe types."""
    if isinstance(obj, (bytes, bytearray)):
        return obj.decode("utf-8", errors="replace")
    elif isinstance(obj, Exception):
        return str(obj)  # ✅ handle ValueError, TypeError, etc.
    elif hasattr(obj, "__dict__"):
        return obj.__dict__  # fallback for objects like Pydantic internal structures
    return str(obj)  # final fallback for any unknown types

**Documentation:** https://docs.pydantic.dev/latest/

In [23]:
class User(BaseModel):
    id: int  
    name: str 
    signup_ts: datetime | None  
    tastes: dict[str, PositiveInt]  

In [39]:
external_data = {
    'id': 123,
    "name": "John Doe",
    'signup_ts': '2019-06-01 12:22',  
    'tastes': {
        'wine': 9,
        b'cheese': 7,  
        'cabbage': '1',  
    },
}

user = User(**external_data)  
print(f"User: {user}")
print(f"Type: {type(user)}")
print(f"User ID: {user.id}")
print(f"User Name: {user.name}")
print(f"User Signup: {user.signup_ts}")
print(f"User Tastes: {user.tastes}")


User: id=123 name='John Doe' signup_ts=datetime.datetime(2019, 6, 1, 12, 22) tastes={'wine': 9, 'cheese': 7, 'cabbage': 1}
Type: <class '__main__.User'>
User ID: 123
User Name: John Doe
User Signup: 2019-06-01 12:22:00
User Tastes: {'wine': 9, 'cheese': 7, 'cabbage': 1}


In [7]:
user.model_dump()   #this converts the user model into dictionary

{'id': 123,
 'name': 'John Doe',
 'signup_ts': datetime.datetime(2019, 6, 1, 12, 22),
 'tastes': {'wine': 9, 'cheese': 7, 'cabbage': 1}}

**Missing field error**

In [45]:
# name field is missing
external_data = {
    'id': 123,
    'signup_ts': '2019-06-01 12:22',  
    'tastes': {
        'wine': 9,
        'cheese': 7,  
        'cabbage': '1',  
    },
}

try:
    user = User(**external_data)  
except ValidationError as e:
      # Convert error details to a structured dict
    error_dict = e.errors()
    print(error_dict)

 # Save to JSON file
    with open("pydantic_errors_1.json", "w") as f:
        json.dump(error_dict, f, indent=4, default = safe_json)
    

[{'type': 'missing', 'loc': ('name',), 'msg': 'Field required', 'input': {'id': 123, 'signup_ts': '2019-06-01 12:22', 'tastes': {'wine': 9, 'cheese': 7, 'cabbage': '1'}}, 'url': 'https://errors.pydantic.dev/2.5/v/missing'}]


In [41]:
# even though id is string "123". Pydantic is able to parse it into integer
external_data = {
    'id': "123",
    "name": "John Doe",
    'signup_ts': '2019-06-01 12:22',  
    'tastes': {
        'wine': 9,
        b'cheese': 7,  
        'cabbage': '1',  
    },
}

user = User(**external_data) 
print(f"User: {user}")

User: id=123 name='John Doe' signup_ts=datetime.datetime(2019, 6, 1, 12, 22) tastes={'wine': 9, 'cheese': 7, 'cabbage': 1}


**Invalid data type error**

In [47]:
# here the id string is invalid as "123a" cannot be parsed into integer
external_data = {
    'id': "123a",
    "name": "John Doe",
    'signup_ts': '2019-06-01 12:22',  
    'tastes': {
        'wine': 9,
        'cheese': 7,  
        'cabbage': '1',  
    },
}

try:
    user = User(**external_data)
except ValidationError as e:
      # Convert error details to a structured dict
    error_dict = e.errors()
    print(error_dict)

 # Save to JSON file
    with open("pydantic_errors_2.json", "w") as f:
        json.dump(error_dict, f, indent=4, default = safe_json)

[{'type': 'int_parsing', 'loc': ('id',), 'msg': 'Input should be a valid integer, unable to parse string as an integer', 'input': '123a', 'url': 'https://errors.pydantic.dev/2.5/v/int_parsing'}]


**Field:** https://docs.pydantic.dev/latest/api/fields/#pydantic.fields.Field

In [49]:
class BookRating(BaseModel):
    user_name: str = Field(description = "Name of the user", 
                           min_length = 3, 
                           max_length = 100)
    book_name: str = Field(description = "Name of the book",
                           min_length = 2, 
                           max_length = 100)
    rating: float = Field(description = "Rating", 
                          gt = 1.0, lt = 5.0)

In [51]:
external_data = {
    "user_name": "Bob",
    "book_name": "Alice in Wonderland",
    "rating": 3.5
}

br = BookRating(**external_data)
br

BookRating(user_name='Bob', book_name='Alice in Wonderland', rating=3.5)

In [55]:
# username is a string with length 1.
external_data = {
    "user_name": "A",
    "book_name": "Alice in Wonderland",
    "rating": 10.0
}

try:
    br = BookRating(**external_data)
except ValidationError as e:
      # Convert error details to a structured dict
    error_dict = e.errors()
    print(error_dict)

 # Save to JSON file
    with open("pydantic_errors_3.json", "w") as f:
        json.dump(error_dict, f, indent=4, default = safe_json)

[{'type': 'string_too_short', 'loc': ('user_name',), 'msg': 'String should have at least 3 characters', 'input': 'A', 'ctx': {'min_length': 3}, 'url': 'https://errors.pydantic.dev/2.5/v/string_too_short'}, {'type': 'less_than', 'loc': ('rating',), 'msg': 'Input should be less than 5', 'input': 10.0, 'ctx': {'lt': 5.0}, 'url': 'https://errors.pydantic.dev/2.5/v/less_than'}]


**Custom Validator:** https://docs.pydantic.dev/latest/concepts/validators/#__tabbed_1_1

In [67]:
def validate_username(v):
    if not re.match(r"^[A-Za-z\s]+$", v):
        raise ValueError("user_name must contain only letters and spaces")
    if len(v) < 3 or len(v) > 100:
        raise ValueError("user_name should have between 3 to 100 characters")
    return v

class BookRating2(BaseModel):
    user_name: Annotated[
        str,
        AfterValidator(validate_username),
        Field(description="Name of the user, letters and spaces only, 3-100 chars")
        ]
    book_name: str = Field(description = "Name of the book", 
                           min_length = 2, 
                           max_length = 100)
    rating: float = Field(description = "Rating", 
                          gt = 1.0, lt = 5.0)

In [69]:
external_data = {
    "user_name": "Bob2",
    "book_name": "Alice in Wonderland",
    "rating": 3.5
}

try:
    br2 = BookRating2(**external_data)
except ValidationError as e:
      # Convert error details to a structured dict
    error_dict = e.errors()
    print(error_dict)

 # Save to JSON file
    with open("pydantic_errors_4.json", "w") as f:
        json.dump(error_dict, f, indent=4, default = safe_json)

[{'type': 'value_error', 'loc': ('user_name',), 'msg': 'Value error, user_name must contain only letters and spaces', 'input': 'Bob2', 'ctx': {'error': ValueError('user_name must contain only letters and spaces')}, 'url': 'https://errors.pydantic.dev/2.5/v/value_error'}]


**Difference between BeforeValidator and AfterValidator**

In [115]:
def strip_whitespace(v: str) -> str:
    return v.strip()

class User1(BaseModel):
    name: Annotated[str, BeforeValidator(strip_whitespace)]

class User2(BaseModel):
    name: Annotated[str, AfterValidator(strip_whitespace)]

user1 = User1(name="   Alice   ")
print(user1.name)  

user2 = User2(name="   Alice   ")
print(user2.name)  

Alice
Alice


In [123]:
# BeforeValidator converts the value to string first
def clean_before(v):
    print("BeforeValidator:", type(v), repr(v))
    return str(v).strip()

def clean_after(v):
    print("AfterValidator:", type(v), repr(v))
    return v.strip()

class UserBefore(BaseModel):
    name: Annotated[str, BeforeValidator(clean_before)]

class UserAfter(BaseModel):
    name: Annotated[str, AfterValidator(clean_after)]

# Pass a non-string input
input_value = 123

user_b = UserBefore(name=input_value)
print(f"{user_b.name} | type: {type(user_b.name)}")
user_a = UserAfter(name=input_value)

BeforeValidator: <class 'int'> 123
123 | type: <class 'str'>


ValidationError: 1 validation error for UserAfter
name
  Input should be a valid string [type=string_type, input_value=123, input_type=int]
    For further information visit https://errors.pydantic.dev/2.5/v/string_type

In [127]:
# BeforeValidator converts the value to string first
def clean_before(v):
    print("BeforeValidator:", type(v), repr(v))
    return v

def clean_after(v):
    print("AfterValidator:", type(v), repr(v))
    return v

class UserBefore(BaseModel):
    name: Annotated[int, BeforeValidator(clean_before)]

class UserAfter(BaseModel):
    name: Annotated[int, AfterValidator(clean_after)]

# Pass a string input
input_value = "123"

user_b = UserBefore(name=input_value)
print(f"{user_b.name} | type: {type(user_b.name)}")
user_a = UserAfter(name=input_value)
print(f"{user_a.name} | type: {type(user_a.name)}")

BeforeValidator: <class 'str'> '123'
123 | type: <class 'int'>
AfterValidator: <class 'int'> 123
123 | type: <class 'int'>
