In [1]:
from enum import Enum

import httpx
from dotenv import load_dotenv
from markdownify import markdownify as md
from openai import OpenAI
from pydantic import BaseModel
from pydantic import Field
from rich import print

In [2]:
load_dotenv()

True

In [3]:
# binance spot api
url = "https://developers.binance.com/docs/binance-spot-api-docs"
max_length = 5000
model = "gpt-4o-mini"
temperature = 0
max_tokens = 1000

In [4]:
resp = httpx.get(url)
resp.raise_for_status()

print(resp.text[:1000])

In [5]:
markdown_content = md(resp.content, strip=["a", "img"])
print(markdown_content[:1000])

## Structured Outputs

- [OpenAI docs](https://platform.openai.com/docs/guides/structured-outputs)
- [OpenAI playground](https://platform.openai.com/playground/chat)


In [6]:
class Category(str, Enum):
    BREAKING_CHANGES = "breaking changes"
    NEW_FEATURES = "new features"
    DEPRECATIONS = "deprecations"
    BUG_FIXES = "bug fixes"
    PERFORMANCE_IMPROVEMENTS = "performance improvements"
    SECURITY_UPDATES = "security updates"


class Date(BaseModel):
    year: int
    month: int
    day: int


class Change(BaseModel):
    content: str
    category: Category


class Entry(BaseModel):
    date: Date
    changes: list[Change] = Field(..., description="The changes made")


class Changelog(BaseModel):
    entries: list[Entry]


print(Changelog.model_json_schema())

In [7]:
client = OpenAI()


system_prompt = """
Extract information from the provided API changelog and structure it according to the specified schema.
Only use information directly present in the context and do not fabricate or create placeholders.

# Steps
1. **Identify the Date**: Extract the date for each set of changes in the format of year, month, and day.
2. **Categorize Changes**: For each change mentioned, determine its category from the given options:
   - Breaking Changes
   - New Features
   - Deprecations
   - Bug Fixes
   - Performance Improvements
   - Security Updates
3. **Structure the Data**: For each date, compile all the changes and their categories.

# Notes
- Ensure that all extracted information directly corresponds to what is provided in the changelog.
- Avoid assumptions or additions outside the given text.
- Maintain correctness and completeness according to the specified schema.
""".strip()
messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": markdown_content[:max_length]},
]

response = client.beta.chat.completions.parse(
    messages=messages,
    model=model,
    temperature=temperature,
    max_tokens=max_tokens,
    response_format=Changelog,
)


parsed = response.choices[0].message.parsed
print(parsed)

In [8]:
for change in parsed.entries:
    print(f"Date: {change.date}")

    for item in change.changes:
        print(item)