In [1]:
import requests

In [4]:
pdf_url = "https://s23.q4cdn.com/407969754/files/doc_earnings/2023/q4/transcript/Uber-Q4-23-Prepared-Remarks.pdf"

# Get PDF bytes
pdf_response = requests.get(pdf_url)
assert(pdf_response.status_code == 200)
pdf_bytes = pdf_response.content

In [5]:
from uuid import uuid4

user_id = str(uuid4())
headers = {"x-key": user_id}

In [6]:
from pydantic import BaseModel, Field

class FinancialData(BaseModel):
    name: str = Field(..., description="Name of the financial figure, such as revenue.")
    value: float = Field(..., description="Nominal earnings in local currency.")
    scale: str = Field(..., description="Scale of figure, such as MM, B, or percent.")
    period_start: str = Field(..., description="The start of the time period in ISO format.")
    period_duration: int = Field(..., description="Duration of period, in months")
    evidence: str = Field(..., description="Verbatim sentence of text where figure was found.")

In [15]:
url = "http://localhost:8000"

data = {
    "user_id": user_id,
    "description": "Financial revenues and other figures.",
    "schema": FinancialData.schema(),
    "instruction": (
        "Extract standard financial figures, specifically earnings and "
        "revenue figures. Only extract historical facts, not estimates or guidance."
    )
}

response = requests.post(f"{url}/extractors", json=data, headers=headers)
extractor = response.json()

In [18]:
result = requests.post(
    f"{url}/extract",
    data={"extractor_id": extractor["uuid"], "model_name": "gpt-3.5-turbo"},
    files={"file": pdf_bytes},
    headers=headers,
)

result.json()

{'data': [{'name': 'revenue',
   'scale': 'B',
   'value': 9.9,
   'evidence': 'Both Gross Bookings and Adjusted EBITDA surpassed the high end of our Q4 outlook. Gross Bookings growth accelerated to 21% YoY on a constant-currency basis (23% excluding Freight), as we generated Gross Bookings of $37.6 billion. Foreign exchange was a tailwind of roughly $500 million YoY (or roughly 160 bps). We grew our revenue by 13% YoY on a constant-currency basis to $9.9 billion.',
   'period_start': '2023-10-01',
   'period_duration': 3},
  {'name': 'ebitda',
   'scale': 'B',
   'value': 1.3,
   'evidence': 'We maintained our focus on operational eﬃciency and disciplined expense management, which contributed to all-time high Adjusted EBITDA of $1.3 billion (note: foreign exchange was a $30 million YoY tailwind). This result represents a record Adjusted EBITDA margin of 3.4% of Gross Bookings, an increase of 130 bps YoY.',
   'period_start': '2023-10-01',
   'period_duration': 3},
  {'name': 'adjusted

In [17]:
examples = [
    {
        "text": "In 2022, Revenue was $1 million and EBIT was $2M.",
        "output": [
            FinancialData(
                name="revenue",
                value=1,
                scale="MM",
                period_start="2022-01-01",
                period_duration=12,
                evidence="In 2022, Revenue was $1 million and EBIT was $2M.",
            ).dict(),
            FinancialData(
                name="ebit",
                value=2,
                scale="MM",
                period_start="2022-01-01",
                period_duration=12,
                evidence="In 2022, Revenue was $1 million and EBIT was $2M.",
            ).dict()
        ],
    },
]

responses = []
for example in examples:
    create_request = {
        "extractor_id": extractor["uuid"],
        "content": example["text"],
        "output": example['output'],
    }
    response = requests.post(f"{url}/examples", json=create_request, headers=headers)
    responses.append(response)