In [None]:
# %%
!pip install pymupdf4llm

In [None]:
# %%
import logging

import openai
import pymupdf4llm
from dotenv import load_dotenv
from openai import OpenAI
from rich import print

logging.basicConfig(level=logging.WARNING)
load_dotenv()

# %%
api_key = os.getenv("openai_api_key")
client = OpenAI(api_key=api_key)
COMPLETIONS_MODEL = "gpt-4o"

# %%
# Define models for Structured Outputs
from pydantic import BaseModel


class Item(BaseModel):
    product: str
    price: float
    quantity: int


class Receipt(BaseModel):
    total_paid_amount: float
    supplier: str
    items: list[Item]
    order_reference_number: str


# PDF to markdown text
md_text = pymupdf4llm.to_markdown("../data/raw/transaction/receipt.pdf")

# Send request using Structured Outputs/Response Format
completion = client.beta.chat.completions.parse(
    model=COMPLETIONS_MODEL,
    messages=[
        {"role": "system", "content": "Extract the information from the receipt"},
        {"role": "user", "content": md_text},
    ],
    response_format=Receipt,
)

output = completion.choices[0].message.parsed
receipt = Receipt.model_validate(output)
print(receipt)


In [3]:
import excalibur as ex


def create_deepseek_diagram():
    # Create a new Excalidraw diagram
    diagram = ex.Diagram()

    # Add columns for DeepSeek-V3, DeepSeek-R1-Zero, and DeepSeek-R1
    v3_box = diagram.add_rectangle(
        text="DeepSeek-V3", x=100, y=50, width=200, height=60, color="blue"
    )
    r1_zero_box = diagram.add_rectangle(
        text="DeepSeek-R1-Zero", x=350, y=50, width=200, height=60, color="red"
    )
    r1_box = diagram.add_rectangle(
        text="DeepSeek-R1", x=600, y=50, width=200, height=60, color="green"
    )

    # Add process steps for DeepSeek-V3
    pretrain = diagram.add_rectangle(
        text="Pre-training (14.8T tokens)", x=100, y=150, width=200, height=50
    )
    sft = diagram.add_rectangle(
        text="Supervised Fine-Tuning (1.5M samples)", x=100, y=220, width=200, height=50
    )
    rlhf = diagram.add_rectangle(
        text="Reinforcement Learning (RLHF)", x=100, y=290, width=200, height=50
    )

    # Connect steps to DeepSeek-V3
    diagram.add_arrow(pretrain, v3_box)
    diagram.add_arrow(sft, v3_box)
    diagram.add_arrow(rlhf, v3_box)

    # Add process steps for DeepSeek-R1-Zero
    rl_only = diagram.add_rectangle(
        text="Reinforcement Learning (RL Only)", x=350, y=150, width=200, height=50
    )
    grpo = diagram.add_rectangle(
        text="Group Relative Policy Optimization", x=350, y=220, width=200, height=50
    )
    issue = diagram.add_rectangle(
        text="Readability Issues", x=350, y=290, width=200, height=50, color="orange"
    )

    # Connect DeepSeek-V3 to DeepSeek-R1-Zero
    diagram.add_arrow(v3_box, rl_only)
    diagram.add_arrow(rl_only, grpo)
    diagram.add_arrow(grpo, r1_zero_box)
    diagram.add_arrow(r1_zero_box, issue)

    # Add process steps for DeepSeek-R1
    cot_sft = diagram.add_rectangle(
        text="Supervised Fine-Tuning (CoT Examples)", x=600, y=150, width=200, height=50
    )
    rlhf_r1 = diagram.add_rectangle(
        text="Reinforcement Learning (RLHF for Reasoning)",
        x=600,
        y=220,
        width=200,
        height=50,
    )
    final_rl = diagram.add_rectangle(
        text="Final RL Refinement", x=600, y=290, width=200, height=50
    )

    # Connect DeepSeek-V3 to DeepSeek-R1
    diagram.add_arrow(v3_box, cot_sft)
    diagram.add_arrow(cot_sft, rlhf_r1)
    diagram.add_arrow(rlhf_r1, final_rl)
    diagram.add_arrow(final_rl, r1_box)

    return diagram


# Generate and save the diagram
diagram = create_deepseek_diagram()
diagram.save("deepseek_v3_r1_workflow.excalidraw")


AttributeError: module 'excalibur' has no attribute 'Diagram'