# Dynamically Structured LLM Output 
an experiment on how it may be possible to let the LLM create dynamically structured output under fixed constraints for the pydantic BaseModel to still have reliable outputs while letting the llm decide which properties are necessary.

It has to be mentioned, that this solution is not entirely explicit and therefore not clean.
It leverages a combination of:
- the pydantic `create_model()` factory function, 
- the pydantic `model_validator` and 
- the instructor library.

In [4]:
import instructor
from pydantic import (
    BaseModel, 
    Field, 
    ValidationInfo, 
    create_model,
    model_validator,
)

from dotenv import load_dotenv

from openai import OpenAI
from typing import TypeVar, List, Iterable, Optional, Dict, Literal, Type


load_dotenv()

client = instructor.patch(OpenAI())

#### The Pydantic `create_model`function dynamically creates and returns a new Pydantic model -> class factory :

In [11]:
def create_model_by_schema(schema) -> type[BaseModel]:

    type_definition = {
        "string": ("str",""),
        "integer": ("int",0),
        "boolean": ("bool", True),
    }
    attributes = {attribute.name: (type_definition[attribute.type]) for attribute in schema}
        
    print(f"{attributes=}")
    return create_model(
        'CustomUser',
        **attributes, 
        __base__=BaseModel,
    )

In [5]:
context = [
    "Brandon is 33 years old. He works as a solution architect.",
    "Dominic is 45 years old. He is retired.",
    "There onces was a prince, named Benny. He ruled for 10 years, which just ended. He started at 22.",
    "Simon says, why are you 22 years old marvin?",
]

#### The Models and a Model Validator:

In [12]:
class Property(BaseModel):
    """property name and corresponding type"""
    name: str = Field(...,description="must be snake case")
    type: Literal["string", "integer", "boolean"]


class CustomUserModel(BaseModel):
    """model for user properties"""
    custom_properties: List[Property] = Field(...)
    user_model: Optional[Type[BaseModel]] = None
    users: Optional[List[BaseModel] ] = None

    @model_validator(mode="after")
    def parse_users(self,  info:ValidationInfo):
        print(f"{info=}")
        print(f"{self=}")

        if not self.custom_properties:
            raise ValueError("user schema empty")
        
        CustomUser: BaseModel = create_model_by_schema(self.custom_properties)
        self.user_model = CustomUser

        resp = client.chat.completions.create(
            response_model=Iterable[CustomUser],
            messages=[
                
                {
                    "role": "user",
                    "content": f"parse the users: {info}."
                },
            ],
            model="gpt-3.5-turbo",
            temperature=0,
        )  # type: ignore
       
        print(f"inner response: {resp}")
       
        self.users=resp
        return self

    

#### The LLM Query

In [13]:

resp = client.chat.completions.create(
    #model="gpt-4",
    model="gpt-3.5-turbo",
    response_model=CustomUserModel,
    max_retries=2,
    messages=[
        {
            "role": "system",
            "content": "You are a world class data structure extractor.",
        },
        {
            "role": "user",
            "content": f"""provide all necessary properties for a user data structure which captures all given information about the users decribed in the context below. 
            context: {' '.join(context)}""",
        },
        
    ],
    validation_context={"context": " ".join(context)},
    
)



info=ValidationInfo(config={'title': 'CustomUserModel'}, context={'context': 'Brandon is 33 years old. He works as a solution architect. Dominic is 45 years old. He is retired. There onces was a prince, named Benny. He ruled for 10 years, which just ended. He started at 22. Simon says, why are you 22 years old marvin?'}, data=None, field_name=None)
self=CustomUserModel(custom_properties=[Property(name='name', type='string'), Property(name='age', type='integer'), Property(name='occupation', type='string'), Property(name='employment_status', type='string'), Property(name='reign_duration', type='integer')], user_model=None, users=[BaseModel(), BaseModel(), BaseModel(), BaseModel()])
attributes={'name': ('str', ''), 'age': ('int', 0), 'occupation': ('str', ''), 'employment_status': ('str', ''), 'reign_duration': ('int', 0)}
inner response: [CustomUser(name='Brandon', age=33, occupation='solution architect', employment_status='employed', reign_duration=0), CustomUser(name='Dominic', age=45,

In [14]:
resp.users

[CustomUser(name='Brandon', age=33, occupation='solution architect', employment_status='employed', reign_duration=0),
 CustomUser(name='Dominic', age=45, occupation='retired', employment_status='unemployed', reign_duration=0),
 CustomUser(name='Benny', age=22, occupation='prince', employment_status='employed', reign_duration=10)]

In [15]:
resp.user_model

__main__.CustomUser

In [16]:
type(resp.user_model)

pydantic._internal._model_construction.ModelMetaclass

In [17]:
import pprint
pprint.pprint(resp.user_model.model_json_schema())

{'properties': {'age': {'default': 0, 'title': 'Age', 'type': 'integer'},
                'employment_status': {'default': '',
                                      'title': 'Employment Status',
                                      'type': 'string'},
                'name': {'default': '', 'title': 'Name', 'type': 'string'},
                'occupation': {'default': '',
                               'title': 'Occupation',
                               'type': 'string'},
                'reign_duration': {'default': 0,
                                   'title': 'Reign Duration',
                                   'type': 'integer'}},
 'title': 'CustomUser',
 'type': 'object'}


In [18]:
resp.custom_properties

[Property(name='name', type='string'),
 Property(name='age', type='integer'),
 Property(name='occupation', type='string'),
 Property(name='employment_status', type='string'),
 Property(name='reign_duration', type='integer')]

In [19]:
for user in resp.users:
    print(user.model_dump_json(indent=2))

{
  "name": "Brandon",
  "age": 33,
  "occupation": "solution architect",
  "employment_status": "employed",
  "reign_duration": 0
}
{
  "name": "Dominic",
  "age": 45,
  "occupation": "retired",
  "employment_status": "unemployed",
  "reign_duration": 0
}
{
  "name": "Benny",
  "age": 22,
  "occupation": "prince",
  "employment_status": "employed",
  "reign_duration": 10
}
