In [None]:
import asyncio
import base64
import nest_asyncio
from openai import OpenAI
from playwright.async_api import async_playwright
from IPython.display import display, Markdown

import yaml
with open("config.yaml", "r") as file:
    config = yaml.safe_load(file)

client = OpenAI(api_key=config["OPENAI_KEY_TEST"],)
nest_asyncio.apply()


async def get_screenshot(page):
    return await page.screenshot()

async def handle_model_action(page, action):
    display(Markdown(f"**Action requested:** `{action['type']}`"))
    match action['type']:
        case 'click':
            await page.mouse.click(action['x'], action['y'], button=action.get('button', 'left'))
        case 'scroll':
            await page.mouse.move(action['x'], action['y'])
            await page.evaluate(f"window.scrollBy({action['scroll_x']}, {action['scroll_y']})")
        case 'keypress':
            for key in action['keys']:
                await page.keyboard.press(key)
        case 'type':
            await page.keyboard.type(action['text'])
        case 'wait':
            await asyncio.sleep(2)
        case _:
            print("⚠️ Unknown action:", action['type'])


In [None]:
async def setup_browser():
    p = await async_playwright().start()
    browser = await p.chromium.launch(headless=False)
    page = await browser.new_page()
    await page.goto("https://compendium.ch")
    await asyncio.sleep(2)
    return p, browser, page


Text(value='Dafalgan', description='Medikament:')

Text(value='Standarddosierung für Erwachsene', description='Frage:')

Button(button_style='success', description='Start', style=ButtonStyle())

In [None]:
async def send_initial_prompt(page, med_name, user_question):
    display(Markdown(f"🧠 **Sending prompt to CUA:** _{med_name} – {user_question}_"))

    return client.responses.create(
        model="computer-use-preview",
        tools=[{
            "type": "computer_use_preview",
            "display_width": 1280,
            "display_height": 768,
            "environment": "browser"
        }],
        input=[
            {"role": "user", "content": f"Suche auf compendium.ch nach '{med_name}' und finde '{user_question}'"}
        ],
        reasoning={"generate_summary": "concise"},
        truncation="auto"
    )
