In [1]:
# %pip install retab

In [None]:
# Generate a Schema

from dotenv import load_dotenv
from retab import Retab
import json

load_dotenv() # We recommend to create a .env file containing your RETAB_API_KEY=sk_retab_***

client = Retab()

json_schema = client.schemas.generate(
    documents=["../assets/docs/invoice.jpeg"],
    model="gpt-4.1",          # or any model your plan supports
    temperature=0.0,          # keep the generation deterministic
    modality="native",        # "native" = let the API decide best modality
)

print(json.dumps(json_schema, indent=2))

{
  "object": "schema",
  "created_at": "2025-08-01T09:15:47.515156Z",
  "json_schema": {
    "title": "Supplier Invoice Document Schema",
    "description": "A schema for storing structured data extracted from supplier invoices, including billing, shipping, contact, line items, and payment summary details.",
    "type": "object",
    "X-SchemaType": "generic",
    "properties": {
      "supplier_name": {
        "type": "string",
        "description": "The name of the supplier (free-text)."
      },
      "supplier_address": {
        "type": "string",
        "description": "The address of the supplier (free-text)."
      },
      "invoice_number": {
        "type": "string",
        "description": "The unique invoice number (free-text, may include numbers and letters)."
      },
      "invoice_date": {
        "type": "string",
        "description": "The date the invoice was issued, in ISO-8601 format."
      },
      "bill_to": {
        "$ref": "#/$defs/contact_info",
        "d

In [None]:
# Extract Data

extraction = client.documents.extract(
    json_schema = json_schema, # we use the json schema that has just been generated
    document = "../assets/docs/invoice.jpeg",
    model="gpt-4.1",
    modality="text",
    temperature=0
)

print(extraction.model_dump_json(indent=2))

{
  "id": "chatcmpl-Bzg9crFsttMJG9nZR3QZvPk9uFJ6s",
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "message": {
        "content": "{\"supplier_name\": \"AMS AMNOSH SUPPLIERS\", \"supplier_address\": \"9291 Proin Road\\nLake Charles, ME-11292\", \"invoice_number\": \"1437\", \"invoice_date\": \"2021-11-24\", \"bill_to\": {\"name\": \"Johnson Carrie\", \"company\": \"Abcxyz Traders\", \"address\": \"45 Lightning Road,\\nArizona, AZ 88776\"}, \"ship_to\": {\"name\": \"Johnny Patel\", \"company\": \"Abcxyz Traders\", \"address\": \"45 Lightning Road,\\nArizona, AZ 88776\"}, \"contact_details\": {\"email\": \"proprietor@abcxyz.com\", \"phone\": \"321-321-1234\", \"terms\": \"\", \"po_number\": \"\", \"status\": \"\"}, \"line_items\": [{\"quantity\": 3, \"description\": \"Drag Series Transmission Build - A WD DSM\", \"price_each\": 1129.03, \"amount\": 3387.09}, {\"quantity\": 2, \"description\": \"Drive Shaft Automatic Right\", \"price_e

# **ANNEXES**

In [4]:
# Retab supports a wide range of models

models_list = client.models.list()
print(models_list)

[Model(id='auto-large', created=0, object='model', owned_by='Retab'), Model(id='auto-small', created=0, object='model', owned_by='Retab'), Model(id='auto-micro', created=0, object='model', owned_by='Retab'), Model(id='grok-3', created=0, object='model', owned_by='xAI'), Model(id='grok-3-mini', created=0, object='model', owned_by='xAI'), Model(id='o3', created=0, object='model', owned_by='OpenAI'), Model(id='o4-mini', created=0, object='model', owned_by='OpenAI'), Model(id='gpt-4.1', created=0, object='model', owned_by='OpenAI'), Model(id='gpt-4.1-mini', created=0, object='model', owned_by='OpenAI'), Model(id='gpt-4.1-nano', created=0, object='model', owned_by='OpenAI'), Model(id='gemini-2.5-flash', created=0, object='model', owned_by='Google'), Model(id='gemini-2.5-flash-lite', created=0, object='model', owned_by='Google'), Model(id='gemini-2.5-pro', created=0, object='model', owned_by='Google')]
