My first objective is to create a Pydantic model to validate user input data in a fictional customer support system.
I'll use venv to create a virtual environment .venv into which I'll install the IPkernel and Pydantic packages (it's a best practice and will prevent any conflicts with packages installed in the global environment). You'll also need to `pip install pydantic[email]` - the email validator.

In [16]:
# Import the pydantic and json libraries
from pydantic import BaseModel, ValidationError, EmailStr, Field
from typing import Optional
from datetime import date
import json

## Define a Pydantic model for user input and populate it with some sample data

In [17]:
# Create a Pydantic model to validate user input data consisting of a customer_number (int), email (str), and issue_description (str).
class UserInput(BaseModel):
    customer_number: int
    email: EmailStr
    issue_description: str
    
# Sample data to populate the model
sample_data = {
    "customer_number": 12345,
    "email": "  user@example.com  ",
    "issue_description": "I am unable to access my account."
}

Validate the sample data

In [18]:
# Validate sample_data

user_input = UserInput(**sample_data)

print(user_input)

customer_number=12345 email='user@example.com' issue_description='I am unable to access my account.'


Create an error on purpose - string instead of number and no email

In [19]:
# Populate model with invalid data to see validation errors
invalid_data = {
    "customer_number": "not_a_number",
    "email": "invalid_email",
    "issue_description": 6
}

#Run this cell below to see validation errors returned as JSON
try:
    #I'm using the UserInput model defined above to validate data
    user_input = UserInput(**invalid_data)
except ValidationError as e:
    print(e.json()) 

[{"type":"int_parsing","loc":["customer_number"],"msg":"Input should be a valid integer, unable to parse string as an integer","input":"not_a_number","url":"https://errors.pydantic.dev/2.11/v/int_parsing"},{"type":"value_error","loc":["email"],"msg":"value is not a valid email address: An email address must have an @-sign.","input":"invalid_email","ctx":{"reason":"An email address must have an @-sign."},"url":"https://errors.pydantic.dev/2.11/v/value_error"},{"type":"string_type","loc":["issue_description"],"msg":"Input should be a valid string","input":6,"url":"https://errors.pydantic.dev/2.11/v/string_type"}]


In [None]:
# Create a function that takes a dictionary as input, validates it against the UserInput model, and returns either the validated data or the validation errors.
def validate_user_input(data: dict):
    try:
        user_input = UserInput(**data)
        print(f"✅ Valid user input created:")
        #Print out the JSON representation of the validated data with indentation for readability
        print(f"{user_input.model_dump_json(indent=2)}")
        return user_input
    except ValidationError as e:
        print(f"❌ Validation errors found:")
        print(f"{e.json(indent=2)}")
        return e.json()

In [21]:
#Try to validate invalid data
validate_user_input(invalid_data)

❌ Validation errors found:
[
  {
    "type": "int_parsing",
    "loc": [
      "customer_number"
    ],
    "msg": "Input should be a valid integer, unable to parse string as an integer",
    "input": "not_a_number",
    "url": "https://errors.pydantic.dev/2.11/v/int_parsing"
  },
  {
    "type": "value_error",
    "loc": [
      "email"
    ],
    "msg": "value is not a valid email address: An email address must have an @-sign.",
    "input": "invalid_email",
    "ctx": {
      "reason": "An email address must have an @-sign."
    },
    "url": "https://errors.pydantic.dev/2.11/v/value_error"
  },
  {
    "type": "string_type",
    "loc": [
      "issue_description"
    ],
    "msg": "Input should be a valid string",
    "input": 6,
    "url": "https://errors.pydantic.dev/2.11/v/string_type"
  }
]


'[{"type":"int_parsing","loc":["customer_number"],"msg":"Input should be a valid integer, unable to parse string as an integer","input":"not_a_number","url":"https://errors.pydantic.dev/2.11/v/int_parsing"},{"type":"value_error","loc":["email"],"msg":"value is not a valid email address: An email address must have an @-sign.","input":"invalid_email","ctx":{"reason":"An email address must have an @-sign."},"url":"https://errors.pydantic.dev/2.11/v/value_error"},{"type":"string_type","loc":["issue_description"],"msg":"Input should be a valid string","input":6,"url":"https://errors.pydantic.dev/2.11/v/string_type"}]'

In [22]:
#Create invalid data with a missing field
incomplete_data = {
    "customer_number": 67890,
    "issue_description": "My order hasn't arrived yet."
}

In [23]:
# Try validating the incomplete data
validate_user_input(incomplete_data)

❌ Validation errors found:
[
  {
    "type": "missing",
    "loc": [
      "email"
    ],
    "msg": "Field required",
    "input": {
      "customer_number": 67890,
      "issue_description": "My order hasn't arrived yet."
    },
    "url": "https://errors.pydantic.dev/2.11/v/missing"
  }
]


'[{"type":"missing","loc":["email"],"msg":"Field required","input":{"customer_number":67890,"issue_description":"My order hasn\'t arrived yet."},"url":"https://errors.pydantic.dev/2.11/v/missing"}]'

In [24]:
# Create sample data with additional unexpected fields
extra_data = {
    "customer_number": 54321,
    "email": "extra@example.com",
    "issue_description": "I have an extra field.",
    "unexpected_field": "This field is not defined in the model."
}

In [25]:
# Validate the extra data (validated data will not include the extra field)
validate_user_input(extra_data)

✅ Valid user input created:
{
  "customer_number": 54321,
  "email": "extra@example.com",
  "issue_description": "I have an extra field."
}


UserInput(customer_number=54321, email='extra@example.com', issue_description='I have an extra field.')

In [28]:
# Enhanced input model 
class UserInput(BaseModel):
    customer_number: int
    email: EmailStr
    issue_description: str
    order_id: Optional[int] = Field(
        #Default is None
        None,
        description="5-digit order number (cannot start with 0)",
        # Greater than or equal to 10000 and less than or equal to 99999
        ge=10000,
        le=99999
    )
    transaction_date: Optional[date] = None

In [30]:
# Test the enhanced model with valid data
valid_enhanced_data = {
    "customer_number": 11223,
    "email": "  valid@example.com  ",
    "issue_description": "My order hasn't arrived yet.",
    "order_id": 12345,
    "transaction_date": "2023-10-01"
}

validate_user_input(valid_enhanced_data)

✅ Valid user input created:
{
  "customer_number": 11223,
  "email": "valid@example.com",
  "issue_description": "My order hasn't arrived yet.",
  "order_id": 12345,
  "transaction_date": "2023-10-01"
}


UserInput(customer_number=11223, email='valid@example.com', issue_description="My order hasn't arrived yet.", order_id=12345, transaction_date=datetime.date(2023, 10, 1))

In [31]:
valid_enhanced_data_short = {
    "customer_number": 11223,
    "email": "  valid@example.com  ",
    "issue_description": "My order hasn't arrived yet."
}

validate_user_input(valid_enhanced_data_short)

✅ Valid user input created:
{
  "customer_number": 11223,
  "email": "valid@example.com",
  "issue_description": "My order hasn't arrived yet.",
  "order_id": null,
  "transaction_date": null
}


UserInput(customer_number=11223, email='valid@example.com', issue_description="My order hasn't arrived yet.", order_id=None, transaction_date=None)

In [32]:
#Inspect validated data structure
validated_data = validate_user_input(valid_enhanced_data)
print(validated_data)

✅ Valid user input created:
{
  "customer_number": 11223,
  "email": "valid@example.com",
  "issue_description": "My order hasn't arrived yet.",
  "order_id": 12345,
  "transaction_date": "2023-10-01"
}
customer_number=11223 email='valid@example.com' issue_description="My order hasn't arrived yet." order_id=12345 transaction_date=datetime.date(2023, 10, 1)


In [34]:
# What if the valid data is in JSON format?
valid_json_data = '''{
    "customer_number": 33445,
    "email": "  valid@example.com  ",
    "issue_description": "My order hasn't arrived yet.",
    "order_id": 12345,
    "transaction_date": "2023-10-01"
}'''
# Parse the JSON string into a Python dictionary that can be used as an argument in the validation function
data_dict = json.loads(valid_json_data)
print("Parsed JSON:", data_dict)
# Validate the parsed data
validate_user_input(data_dict)

Parsed JSON: {'customer_number': 33445, 'email': '  valid@example.com  ', 'issue_description': "My order hasn't arrived yet.", 'order_id': 12345, 'transaction_date': '2023-10-01'}
✅ Valid user input created:
{
  "customer_number": 33445,
  "email": "valid@example.com",
  "issue_description": "My order hasn't arrived yet.",
  "order_id": 12345,
  "transaction_date": "2023-10-01"
}


UserInput(customer_number=33445, email='valid@example.com', issue_description="My order hasn't arrived yet.", order_id=12345, transaction_date=datetime.date(2023, 10, 1))

In [35]:
# Try invalid JSON data
invalid_json_data = '''{
    "customer_number": "not_a_number",
    "email": "invalid_email",
    "issue_description": 6
}'''
# Parse the invalid JSON string into a Python dictionary
data_dict = json.loads(invalid_json_data)
print("Parsed JSON:", data_dict)
# Validate the parsed data
validate_user_input(data_dict)

Parsed JSON: {'customer_number': 'not_a_number', 'email': 'invalid_email', 'issue_description': 6}
❌ Validation errors found:
[
  {
    "type": "int_parsing",
    "loc": [
      "customer_number"
    ],
    "msg": "Input should be a valid integer, unable to parse string as an integer",
    "input": "not_a_number",
    "url": "https://errors.pydantic.dev/2.11/v/int_parsing"
  },
  {
    "type": "value_error",
    "loc": [
      "email"
    ],
    "msg": "value is not a valid email address: An email address must have an @-sign.",
    "input": "invalid_email",
    "ctx": {
      "reason": "An email address must have an @-sign."
    },
    "url": "https://errors.pydantic.dev/2.11/v/value_error"
  },
  {
    "type": "string_type",
    "loc": [
      "issue_description"
    ],
    "msg": "Input should be a valid string",
    "input": 6,
    "url": "https://errors.pydantic.dev/2.11/v/string_type"
  }
]


'[{"type":"int_parsing","loc":["customer_number"],"msg":"Input should be a valid integer, unable to parse string as an integer","input":"not_a_number","url":"https://errors.pydantic.dev/2.11/v/int_parsing"},{"type":"value_error","loc":["email"],"msg":"value is not a valid email address: An email address must have an @-sign.","input":"invalid_email","ctx":{"reason":"An email address must have an @-sign."},"url":"https://errors.pydantic.dev/2.11/v/value_error"},{"type":"string_type","loc":["issue_description"],"msg":"Input should be a valid string","input":6,"url":"https://errors.pydantic.dev/2.11/v/string_type"}]'

In [None]:
# Use the model_validate_json method to validate JSON data directly
def validate_user_input_json(json_data: str):
    try:
        #This is a Pydantic v2 method to validate JSON data directly
        user_input = UserInput.model_validate_json(json_data)
        print(f"✅ Valid user input created:")
        print(f"{user_input.model_dump_json(indent=2)}")
        return user_input
    except ValidationError as e:
        print(f"❌ Validation errors found:")
        print(f"{e.json(indent=2)}")
        return e.json()