# Dynamically Structured LLM Output 
an experiment on how it may be possible to let the LLM create dynamically structured output under fixed boundary conditions - explicitely spoken: letting the LLM create a schema dynamically based on inner knowledge and the pydantic BaseModel.

The not entirely explicite solution used here leverages a combination of 
- the pydantic `create_model()` factory function, 
- the pydantic `model_validator` and 
- the instructor library.

In [1]:
import instructor
from pydantic import (
    BaseModel, 
    Field, 
    ValidationInfo, 
    create_model,
    model_validator,
)

from dotenv import load_dotenv

from openai import OpenAI
from typing import TypeVar, List, Iterable, Optional, Dict, Literal, Type


load_dotenv()

client = instructor.patch(OpenAI())

#### The Pydantic `create_model`function dynamically creates and returns a new Pydantic model -> class factory :

In [2]:
def get_model_by_fields(fields):
    return create_model(
        'CustomUser',
        **fields,   #{property_name: property_type]) for property_name, property_type in fields.items()},
        __base__=BaseModel,
    )

In [3]:
context = [
    "Brandon is 33 years old. He works as a solution architect.",
    "Dominic is 45 years old. He is retired.",
    "There onces was a prince, named Benny. He ruled for 10 years, which just ended. He started at 22.",
    "Simon says, why are you 22 years old marvin?",
]

#### The Models and a Model Validator:

In [4]:

CustomUser = TypeVar("CustomUser", bound=BaseModel)


class Property(BaseModel):
    """property name and corresponding type"""
    name: str = Field(...,description="must be snake case")
    type: Literal["string", "integer", "boolean"]


class CustomUserModel(BaseModel):
    """model for user properties"""
    custom_properties: List[Property] = Field(...)
    user_model: Optional[Type[BaseModel]] = None
    users: Optional[List[CustomUser] ] = None

    @model_validator(mode="after")
    def parse_users(self,  info:ValidationInfo):
        print(f"{info=}")
        print(f"{self=}")

        prop_definition = {
            "string": ("str",""),
            "integer": ("int",0),
            "boolean": ("bool", True),
        }
        properties = {prop.name: (prop_definition[prop.type]) for prop in self.custom_properties}
        
        print(f"{properties=}")
        if not properties:
            raise ValueError("user schema empty")
        CustomUser = get_model_by_fields(properties)
       
        self.user_model = CustomUser


        resp = client.chat.completions.create(
            response_model=Iterable[CustomUser],
            messages=[
                
                {
                    "role": "user",
                    "content": f"parse the users: {info}."
                },
            ],
            model="gpt-3.5-turbo",
            temperature=0,
        )  # type: ignore
       
        print(f"inner response: {resp}")
       
        self.users=resp
        return self

    

#### The LLM Query

In [5]:

resp = client.chat.completions.create(
    #model="gpt-4",
    model="gpt-3.5-turbo",
    response_model=CustomUserModel,
    max_retries=2,
    messages=[
        {
            "role": "system",
            "content": "You are a world class data structure extractor.",
        },
        {
            "role": "user",
            "content": f"""provide all necessary properties for a user data structure which captures all given information about the users decribed in the context below. 
            context: {' '.join(context)}""",
        },
        
    ],
    validation_context={"context": " ".join(context)},
    
)



info=ValidationInfo(config={'title': 'CustomUserModel'}, context={'context': 'Brandon is 33 years old. He works as a solution architect. Dominic is 45 years old. He is retired. There onces was a prince, named Benny. He ruled for 10 years, which just ended. He started at 22. Simon says, why are you 22 years old marvin?'}, data=None, field_name=None)
self=CustomUserModel(custom_properties=[Property(name='name', type='string'), Property(name='age', type='integer'), Property(name='occupation', type='string'), Property(name='is_retired', type='boolean'), Property(name='reign_years', type='integer'), Property(name='start_age', type='integer')], user_model=None, users=None)
properties={'name': ('str', ''), 'age': ('int', 0), 'occupation': ('str', ''), 'is_retired': ('bool', True), 'reign_years': ('int', 0), 'start_age': ('int', 0)}
inner response: [CustomUser(name='Brandon', age=33, occupation='solution architect', is_retired=False, reign_years=0, start_age=0), CustomUser(name='Dominic', age=

In [6]:
resp.users

[CustomUser(name='Brandon', age=33, occupation='solution architect', is_retired=False, reign_years=0, start_age=0),
 CustomUser(name='Dominic', age=45, occupation='', is_retired=True, reign_years=0, start_age=0),
 CustomUser(name='Benny', age=0, occupation='prince', is_retired=False, reign_years=10, start_age=22),
 CustomUser(name='Simon', age=0, occupation='', is_retired=False, reign_years=0, start_age=0),
 CustomUser(name='marvin', age=22, occupation='', is_retired=False, reign_years=0, start_age=0)]

In [7]:
resp.user_model

__main__.CustomUser

In [8]:
type(resp.user_model)

pydantic._internal._model_construction.ModelMetaclass

In [9]:
import pprint
pprint.pprint(resp.user_model.model_json_schema())

{'properties': {'age': {'default': 0, 'title': 'Age', 'type': 'integer'},
                'is_retired': {'default': True,
                               'title': 'Is Retired',
                               'type': 'boolean'},
                'name': {'default': '', 'title': 'Name', 'type': 'string'},
                'occupation': {'default': '',
                               'title': 'Occupation',
                               'type': 'string'},
                'reign_years': {'default': 0,
                                'title': 'Reign Years',
                                'type': 'integer'},
                'start_age': {'default': 0,
                              'title': 'Start Age',
                              'type': 'integer'}},
 'title': 'CustomUser',
 'type': 'object'}


In [10]:
resp.custom_properties

[Property(name='name', type='string'),
 Property(name='age', type='integer'),
 Property(name='occupation', type='string'),
 Property(name='is_retired', type='boolean'),
 Property(name='reign_years', type='integer'),
 Property(name='start_age', type='integer')]

In [11]:
for user in resp.users:
    print(user.model_dump_json(indent=2))

{
  "name": "Brandon",
  "age": 33,
  "occupation": "solution architect",
  "is_retired": false,
  "reign_years": 0,
  "start_age": 0
}
{
  "name": "Dominic",
  "age": 45,
  "occupation": "",
  "is_retired": true,
  "reign_years": 0,
  "start_age": 0
}
{
  "name": "Benny",
  "age": 0,
  "occupation": "prince",
  "is_retired": false,
  "reign_years": 10,
  "start_age": 22
}
{
  "name": "Simon",
  "age": 0,
  "occupation": "",
  "is_retired": false,
  "reign_years": 0,
  "start_age": 0
}
{
  "name": "marvin",
  "age": 22,
  "occupation": "",
  "is_retired": false,
  "reign_years": 0,
  "start_age": 0
}
