In [1]:
# %pip install retab

`X-ReasoningPrompt` generates reasoning fields alongside data fields, helping LLMs provide step-by-step justification for extracted values in complex scenarios.

**For more information on Reasoning, check our [documentation](https://docs.retab.com/core-concepts/Reasoning).**

## Example 1

In [None]:
# We re-use the example from 01.build-your-schema.ipynb
# Draft an Initial Schema
from pydantic import BaseModel

class Invoice(BaseModel):
    date: str
    invoice_number: str
    total: str
    status: str
    customer: str
    customer_address: str
    customer_email: str
    customer_phone: str
    customer_website: str

# Execute with Consensus

from dotenv import load_dotenv
from retab import Retab
import json

load_dotenv() # You need to create a .env file containing your RETAB_API_KEY=sk_retab_***

client = Retab()

response = client.documents.extract(
    documents=["../assets/docs/invoice.jpeg"],
    model="gemini-2.5-flash",          # or any model your plan supports
    json_schema=Invoice.model_json_schema(),
    modality="native",     
    n_consensus=5
)

print(json.dumps(response.likelihoods, indent=2))

In [None]:
# Update the Schema adding a reasoning field
from pydantic import Field

# You can define the custom annotations in the `pydantic.Field` class using the `json_schema_extra` field.

class Invoice_with_Reasoning(BaseModel):
    date: str
    invoice_number: str
    total: str

    status: str = Field(...,
        description="Invoice Status, either Blanck, Paid or Unpaid.",
        # Reasoning Prompt
        json_schema_extra={ 
            "X-ReasoningPrompt": "If the Status is not specified, make it explicit that it is blank. Otherwise, use the provided status making sure it is either Paid or Unpaid.",
        }
    )

    customer: str
    customer_address: str
    customer_email: str
    customer_phone: str
    customer_website: str

    # Evaluate the precision of the new Schema
response = client.documents.extract(
    documents=["../assets/docs/invoice.jpeg"],
    model="gemini-2.5-flash",          # or any model your plan supports
    json_schema=Invoice_with_Reasoning.model_json_schema(),
    modality="native",    
    n_consensus=5
)

print(json.dumps(response.likelihoods, indent=2))

Adding the `Reasoning prompt` improves the likelihood on the `status` field from 0.6 to 1.0, without having to modify / give more constraint to the schema!

## Example 2

In [None]:
from datetime import date

class TemperatureReport(BaseModel):
    date: date
    location: str
    temperature_Fahrenheit: float
    humidity: float
    conditions: str

response = client.documents.extract(
    documents=["../assets/code/temperature-report.md"],
    model="gemini-2.5-flash",          
    json_schema=TemperatureReport.model_json_schema(),
    modality="native",
    n_consensus=5
)

print(json.dumps(response.likelihoods, indent=2))
print(response.choices[0].message.content)

Without reasoning, the LLM has incorrectly used 22.5°F instead of converting from Celsius, this with a likelihood of 1.0.

In [None]:
# Update the Schema adding a reasoning field

class TemperatureReport_with_Reasoning(BaseModel):
    date: date
    location: str
    temperature: float = Field(...,
        description="temperature in Fahrenheit",
        json_schema_extra={
            "X-ReasoningPrompt": "If the temperature is given in Celsius, make the explicit computation to convert it to Fahrenheit. If the temperature is given in Fahrenheit, leave it as is.",
        }
    )
    humidity: float
    conditions: str

response = client.documents.extract(
    documents=["../assets/code/temperature-report.md"],
    model="gemini-2.5-flash",          
    json_schema=TemperatureReport_with_Reasoning.model_json_schema(),
    modality="native",     
    n_consensus=5
)

print(json.dumps(response.likelihoods, indent=2))
print(response.choices[0].message.content)

With reasoning, you can see that the “reasoning___” fields help the LLM show its work while the final output follows your schema structure.