### Setup your environnement variables

In [1]:
import os

os.environ["OPENAI_API_KEY"] = "YOUR API KEY"
os.environ["UIFORM_API_KEY"] = "YOUR API KEY" # go to https://uiform.com to create your API Key

# Get started

In [2]:
from dotenv import load_dotenv
assert load_dotenv(override=True) # Load environment variables from a .env file

In [None]:
from uiform import UiForm
uiclient = UiForm()
uiclient.models.list()

# Example 1 - Analyze informations from a document

#### Option A - You use our preprocess endpoint and openAI client

In [None]:
import json
from uiform import UiForm, Schema
from openai import OpenAI

with open("freight/schema.json", "r") as f:
    json_schema = json.load(f)


uiclient = UiForm()
doc_msg = uiclient.documents.create_messages(
    document = "freight/booking_confirmation.jpg",
)

schema_obj = Schema(
    json_schema = json_schema
)


# Now you can use your favorite model to analyze your document
client = OpenAI()
completion = client.chat.completions.create(
    model="gpt-4o-mini", 
    messages=schema_obj.openai_messages + doc_msg.openai_messages,
    response_format=schema_obj.response_format_json
)

# Validate the response against the original schema if you want to remove the reasoning fields
assert completion.choices[0].message.content is not None
extraction = schema_obj.pydantic_model.model_validate_json(
    completion.choices[0].message.content
)

print(extraction)

In [None]:
extraction.model_dump()

#### You can also add some preprocessing operations to find informations that the LLM struggles to parse

In [12]:
text_operations = {
    'regex_instructions' : [
        {
            "name": "vat_number",
            "pattern": r"[Ff][Rr]\s*(\d\s*){11}",
            "description": "VAT number in the format XX999999999"
        }
    ]
}

doc_msg = uiclient.documents.create_messages(
    document = "freight/booking_confirmation.jpg",
    text_operations=text_operations
)

#### Option B - Using UiForm `extractions.parse` endpoint

In [None]:
import json
from uiform.client import UiForm

uiclient = UiForm()
completion = uiclient.documents.extractions.parse(
    json_schema = "freight/schema.json",
    document = "freight/booking_confirmation.jpg",
    model="gpt-4o-mini",
    temperature=0
)

print(completion.choices[0].message.parsed)

#### Option C - Using UiForm `extractions.stream` endpoint

In [None]:
import json
from uiform.client import UiForm
from IPython.display import clear_output, display

uiclient = UiForm()

for completion in uiclient.documents.extractions.stream(
    json_schema = "freight/schema.json",
    document = "freight/booking_confirmation.jpg",
    model="gpt-4o-mini",
    temperature=0
):
    clear_output(wait=True)
    if completion.choices[0].message.parsed is not None:
        display(completion.choices[0].message.parsed.model_dump())

In [None]:
completion.choices[0].message.parsed

## Appendix A - Use text as modality

In [None]:
from uiform.client import UiForm

uiclient = UiForm()

completion = uiclient.documents.extractions.parse(
    json_schema = "freight/schema.json",
    document="freight/booking_confirmation.jpg",
    model="gpt-4o-mini-2024-07-18",
    temperature=0,
    modality='text'
)

print(completion.choices[0].message.parsed)

## Appendix B - Add text operations

In [None]:
from uiform.client import UiForm

uiclient = UiForm()

text_operations = {
    'regex_instructions' : [
        {
            "name": "vat_number",
            "pattern": r"[Ff][Rr]\s*(\d\s*){11}",
            "description": "VAT number in the format XX999999999"
        }
    ]
}

completion = uiclient.documents.extractions.parse(
    json_schema = "freight/schema.json",
    document = "freight/booking_confirmation.jpg",
    model="gpt-4o-mini-2024-07-18",
    temperature=0,
    text_operations=text_operations
)

print(completion.choices[0].message.parsed)