### Setup your environnement variables

In [1]:
import os

os.environ["OPENAI_API_KEY"] = "YOUR API KEY"
os.environ["UIFORM_API_KEY"] = "YOUR API KEY" # go to https://uiform.com to create your API Key

In [1]:
from dotenv import load_dotenv
assert load_dotenv("../.env.local") # Load environment variables from a .env file

# Get started

In [2]:
from uiform import UiForm
uiclient = UiForm()
uiclient.models.list()

RemoteProtocolError: Server disconnected without sending a response.

# Example 1 - Analyze informations from a document

#### Option A - You use our preprocess endpoint and openAI client

In [7]:
import json
from uiform import UiForm, Schema
from openai import OpenAI

with open("freight/schema.json", "r") as f:
    json_schema = json.load(f)


uiclient = UiForm()
doc_msg = uiclient.documents.create_messages(
    document = "freight/booking_confirmation.jpg",
)

schema_obj = Schema(
    json_schema = json_schema
)


# Now you can use your favorite model to analyze your document
client = OpenAI()
completion = client.beta.chat.completions.parse(
    model="gpt-4o-mini", 
    messages=schema_obj.openai_messages + doc_msg.openai_messages,
    response_format=schema_obj.inference_pydantic_model
)

# Validate the response against the original schema if you want to remove the reasoning fields
assert completion.choices[0].message.content is not None
extraction = schema_obj.pydantic_model.model_validate_json(
    completion.choices[0].message.content
)

print(extraction)

BadRequestError: Error code: 400 - {'error': {'message': "Invalid schema for response_format 'RoadBookingConfirmationData': In context=('properties', 'packing'), 'additionalProperties' is required to be supplied and to be false.", 'type': 'invalid_request_error', 'param': 'response_format', 'code': None}}

In [8]:
completion = client.chat.completions.create(
    model="gpt-4o",
    messages=schema_obj.openai_messages + doc_msg.openai_messages,
    response_format={
        "type": "json_schema",
        "json_schema": {
            "name": schema_obj.schema_version,
            "schema": schema_obj.inference_json_schema,
            "strict": True
        }
    }
)

In [15]:
from uiform._utils.json_schema import convert_json_schema_to_basemodel
convert_json_schema_to_basemodel(schema_obj.inference_json_schema).model_json_schema()

{'$defs': {'AddressDataSimple': {'properties': {'city': {'anyOf': [{'type': 'string'},
      {'type': 'null'}],
     'description': 'City, district, suburb, town, or village.\nUser Provided a Default Value: null',
     'title': 'City'},
    'postal_code': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
     'description': 'ZIP or postal code. If french postal code, it should be a pure number, without letters. It can be incomplete if the postal code is not fully visible, but should contain at least two digits.\nUser Provided a Default Value: null',
     'title': 'Postal Code'},
    'country': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
     'description': 'Two-letter country code (ISO 3166-1 alpha-2).\nUser Provided a Default Value: null',
     'title': 'Country'},
    'line1': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
     'description': 'Address line 1 (e.g., street, PO Box, or company name).\nUser Provided a Default Value: null',
     'title': 'Line1'},
    'line2': {

In [17]:
schema_obj.inference_pydantic_model.model_json_schema()

{'$defs': {'AddressDataSimple': {'properties': {'city': {'anyOf': [{'type': 'string'},
      {'type': 'null'}],
     'description': 'City, district, suburb, town, or village.\nUser Provided a Default Value: null',
     'title': 'City'},
    'postal_code': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
     'description': 'ZIP or postal code. If french postal code, it should be a pure number, without letters. It can be incomplete if the postal code is not fully visible, but should contain at least two digits.\nUser Provided a Default Value: null',
     'title': 'Postal Code'},
    'country': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
     'description': 'Two-letter country code (ISO 3166-1 alpha-2).\nUser Provided a Default Value: null',
     'title': 'Country'},
    'line1': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
     'description': 'Address line 1 (e.g., street, PO Box, or company name).\nUser Provided a Default Value: null',
     'title': 'Line1'},
    'line2': {

#### You can also add some preprocessing operations to find informations that the LLM struggles to parse

In [20]:
text_operations = {
    'regex_instructions' : [
        {
            "name": "vat_number",
            "pattern": r"[Ff][Rr]\s*(\d\s*){11}",
            "description": "VAT number in the format XX999999999"
        }
    ]
}

doc_msg = uiclient.documents.create_messages(
    document = "freight/booking_confirmation.jpg",
    text_operations=text_operations
)

#### Option B - Using UiForm `extract` endpoint

In [21]:
import json
from uiform.client import UiForm

uiclient = UiForm()
completion = uiclient.documents.extractions.parse(
    json_schema = "freight/schema.json",
    document = "freight/booking_confirmation.jpg",
    model="gpt-4o-mini",
    temperature=0
)

print(completion.choices[0].message.parsed)

booking_id='SHIP-001' payment=TransportPriceData(total_price=1500.0, currency='EUR') client=ClientData(company_name='ACME Corporation', VAT_number=None, city='London', postal_code='WC2N 5DU', country='GB', code=None, email='client@acme.com') shipments=[ShipmentData(shipment_id='SHIP-001', sender=SenderData(company_name='ACME Corporation', address=AddressDataSimple(city='London', postal_code='WC2N 5DU', country='GB', line1='123 Elm Street', line2='Suite 500'), phone_number='+442079460958', email_address='client@acme.com', pickup_datetime=PickupDatetimeData(date='2023-02-05', start_time='08:00:00', end_time='12:00:00'), observations=None), recipient=RecipientData(company_name='Beta Industries', address=AddressDataSimple(city='Munich', postal_code='80331', country='DE', line1='789 Pine Street', line2=None), phone_number='+4989123456', email_address=None, delivery_datetime=DeliveryDatetimeData(date='2023-03-05', start_time='10:00:00', end_time='16:00:00'), observations=None), goods=GoodsDa

## Appendix A - Use text as modality

In [22]:
from uiform.client import UiForm

uiclient = UiForm()

completion = uiclient.documents.extractions.parse(
    json_schema = "freight/schema.json",
    document="freight/booking_confirmation.jpg",
    model="gpt-4o-mini-2024-07-18",
    temperature=0,
    modality='text'
)

print(completion.choices[0].message.parsed)

booking_id='SHIP-001' payment=TransportPriceData(total_price=1500.0, currency='EUR') client=ClientData(company_name='ACME Corporation', VAT_number=None, city='London', postal_code='WC2N S5DU', country='GB', code=None, email='client@acme.com') shipments=[ShipmentData(shipment_id='SHIP-001', sender=SenderData(company_name='ACME Corporation', address=AddressDataSimple(city='Manchester', postal_code='M1 4WP', country='GB', line1='456 Oak Avenue', line2='Floor 3 Suite 500'), phone_number='+442079460958', email_address='client@acme.com', pickup_datetime=PickupDatetimeData(date='2023-02-05', start_time='08:00:00', end_time='12:00:00'), observations=None), recipient=RecipientData(company_name='Beta Industries', address=AddressDataSimple(city='Munich', postal_code='80331', country='DE', line1='789 Pine Street', line2=None), phone_number='+4989123456', email_address=None, delivery_datetime=DeliveryDatetimeData(date='2023-03-05', start_time='10:00:00', end_time='16:00:00'), observations=None), go

## Appendix B - Add text operations

In [None]:
from uiform.client import UiForm

uiclient = UiForm()

text_operations = {
    'regex_instructions' : [
        {
            "name": "vat_number",
            "pattern": r"[Ff][Rr]\s*(\d\s*){11}",
            "description": "VAT number in the format XX999999999"
        }
    ]
}

completion = uiclient.documents.extractions.parse(
    json_schema = "freight/schema.json",
    document = "freight/booking_confirmation.jpg",
    model="gpt-4o-mini-2024-07-18",
    temperature=0,
    text_operations=text_operations
)

print(completion.choices[0].message.parsed)