In [1]:
# pip install pydantic

In [3]:
# pip install pydantic[email]

In [74]:
from datetime import date
from uuid import UUID, uuid4
from enum import Enum

from pydantic import BaseModel, EmailStr

In [76]:
class Department(Enum):
    HR          = "HR"
    SALES       = "SALES"
    IT          = "IT"
    ENGINEERING = "ENGINEERING"

In [78]:
class Employee(BaseModel):
    employee_id: UUID = uuid4()
    name: str
    email: EmailStr
    date_of_birth: date
    salary: float
    department: Department
    elected_benefits: bool

In [80]:
e1 = Employee(
...     name         = "bhupen",
...     email        = "gridflowai@gmail.com",
...     date_of_birth= "1998-04-02",
...     salary       = 123_000.00,
...     department   = "IT",
...     elected_benefits=True,
... )

In [82]:
e1

Employee(employee_id=UUID('3886e0fb-ab6c-4501-8de9-c5152336d9f5'), name='bhupen', email='gridflowai@gmail.com', date_of_birth=datetime.date(1998, 4, 2), salary=123000.0, department=<Department.IT: 'IT'>, elected_benefits=True)

In [84]:
Employee(
...     employee_id     = "123",
...     name            = False,
...     email           = "cdetumaexamplecom",
...     date_of_birth   = "1939804-02",
...     salary          = "high paying",
...     department      = "PRODUCT",
...     elected_benefits= 300,
... )

ValidationError: 7 validation errors for Employee
employee_id
  Input should be a valid UUID, invalid length: expected length 32 for simple format, found 3 [type=uuid_parsing, input_value='123', input_type=str]
    For further information visit https://errors.pydantic.dev/2.8/v/uuid_parsing
name
  Input should be a valid string [type=string_type, input_value=False, input_type=bool]
    For further information visit https://errors.pydantic.dev/2.8/v/string_type
email
  value is not a valid email address: An email address must have an @-sign. [type=value_error, input_value='cdetumaexamplecom', input_type=str]
date_of_birth
  Input should be a valid date or datetime, invalid date separator, expected `-` [type=date_from_datetime_parsing, input_value='1939804-02', input_type=str]
    For further information visit https://errors.pydantic.dev/2.8/v/date_from_datetime_parsing
salary
  Input should be a valid number, unable to parse string as a number [type=float_parsing, input_value='high paying', input_type=str]
    For further information visit https://errors.pydantic.dev/2.8/v/float_parsing
department
  Input should be 'HR', 'SALES', 'IT' or 'ENGINEERING' [type=enum, input_value='PRODUCT', input_type=str]
    For further information visit https://errors.pydantic.dev/2.8/v/enum
elected_benefits
  Input should be a valid boolean, unable to interpret input [type=bool_parsing, input_value=300, input_type=int]
    For further information visit https://errors.pydantic.dev/2.8/v/bool_parsing

#### Example 1: using field types

In [87]:
from pydantic import BaseModel

class User(BaseModel):
    id: int
    name: str
    age: int

try:
    user = User(id=1, name='John Doe', age=25)
except ValidationError as e:
    print(e)

In [89]:
try:
    user = User(id='one', name='John Doe', age='twenty-five')  # Raises a validation error
except ValidationError as e:
    print(e)

2 validation errors for User
id
  Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='one', input_type=str]
    For further information visit https://errors.pydantic.dev/2.8/v/int_parsing
age
  Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='twenty-five', input_type=str]
    For further information visit https://errors.pydantic.dev/2.8/v/int_parsing


#### Example 2: Using Field Constraints
- Pydantic's Field class allows you to add constraints to fields. Here are some examples:

In [92]:
from pydantic import BaseModel, Field

In [94]:
class User(BaseModel):
    id: int   = Field(gt=0)                         # id must be greater than 0
    name: str = Field(min_length=2, max_length=50)  # name must be between 2 and 50 characters
    age:  int = Field(ge=18, le=100)                # age must be between 18 and 100

user = User(id=1, name='John', age=30)

#### using model validators

In [97]:
from pydantic import BaseModel, Field, ValidationError, model_validator

In [99]:
class ListingModel(BaseModel):
    gender: str
    height_type: str
    height_cms: float   = Field(default=None)
    height_hands: float = Field(default=None)

    @model_validator(mode='wrap')
    def check_height_type(cls, values, handler):
        if values.get('height_type') == "cms" and values.get('height_cms') is None:
            raise ValueError("height_cms must be provided when height_type is 'cms'")
        
        # Validate the model
        return handler(values)

In [101]:
# Example usage
try:
    model = ListingModel(gender='female', height_type='cms')
except ValidationError as e:
    print(e)

1 validation error for ListingModel
  Value error, height_cms must be provided when height_type is 'cms' [type=value_error, input_value={'gender': 'female', 'height_type': 'cms'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.8/v/value_error


#### Example 2: Cross-field Validation
- Ensure that if one field is provided, another field must also be provided.

In [104]:
class EventModel(BaseModel):
    event_type: str
    start_date: str = None
    end_date: str   = None

    @model_validator(mode='wrap')
    def check_dates(cls, values, handler):
        if values.get('event_type') == 'timed_event':
            if not values.get('start_date') or not values.get('end_date'):
                raise ValueError('start_date and end_date must be provided for timed events')
        return handler(values)

In [106]:
# Example usage
try:
    event = EventModel(event_type='timed_event', start_date='2024-09-17')
except ValidationError as e:
    print(e)

1 validation error for EventModel
  Value error, start_date and end_date must be provided for timed events [type=value_error, input_value={'event_type': 'timed_eve...art_date': '2024-09-17'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.8/v/value_error


## Pydantic in LLMs

In [109]:
from pydantic import BaseModel, Field

class InputData(BaseModel):
    prompt: str        = Field(min_length=1, max_length=500)
    max_tokens: int    = Field(default=50,   ge=1, le=500)
    temperature: float = Field(default=0.7,  ge=0, le=1)

# Example usage
try:
    input_data = InputData(prompt="Translate the following text to French:", max_tokens=100)
except ValidationError as e:
    print(e)

input_data

InputData(prompt='Translate the following text to French:', max_tokens=100, temperature=0.7)

#### Response Validation and Formatting
- LLMs can produce diverse outputs, and sometimes you need to ensure the structure of these outputs.
- Pydantic models can help in validating and formatting the response data, especially when the output needs to follow a specific schema.

In [55]:
class LLMResponse(BaseModel):
    text: str
    tokens_used: int

# Example usage
raw_response = {"text": "Ceci est un exemple de réponse.", 
                "tokens_used": 42}

try:
    response = LLMResponse(**raw_response)

except ValidationError as e:
    print(e)

response

LLMResponse(text='Ceci est un exemple de réponse.', tokens_used=42)

#### configure and validate LLM parameters for OpenAI's GPT models using Pydantic

In [111]:
import openai

In [113]:
class LLMConfig(BaseModel):
    model: str         = Field(default="text-davinci-003")
    max_tokens: int    = Field(default=100, ge=1, le=4096)
    temperature: float = Field(default=0.7, ge=0, le=1)
    top_p: float       = Field(default=1.0, ge=0, le=1)
    n: int             = Field(default=1, ge=1)  # Number of completions to generate
    stop: list[str]    = None                    # List of stop sequences

In [115]:
# Example configuration
config = LLMConfig(
    model       ="gpt-4o-mini",
    max_tokens  =150,
    temperature =0.6,
    top_p       =0.9,
    n           =1,
    stop        =["\n"]
)

In [117]:
client = openai.OpenAI(
    # defaults to os.environ.get("OPENAI_API_KEY")
    # api_key=openai_api_key
)

In [119]:
def generate_text(message_prompt: str, config: LLMConfig):
    # Make the API call using the validated configuration
    response = client.chat.completions.create(
            messages=[
                {
                    "role": "user", 
                    "content": "You are an expert in AI models like machine or language translations."
                },
                {
                    'role': 'user',
                    'content': f'''{message_prompt}'''
                },
            ],
        model      = config.model,
        max_tokens = config.max_tokens,
        temperature= config.temperature,
        top_p      = config.top_p,
        n          = config.n,
        stop       = config.stop
    )
    return response.choices[0].message.content


In [72]:
# Example usage
prompt = "Translate the following text to French: 'Hello, how are you?'"
result = generate_text(prompt, config)
print(result)

The translation of "Hello, how are you?" in French is: "Bonjour, comment ça va ?"
