In [31]:
# 💊 Compendium.ch Agent via LangGraph + TavilySearchResults + Scraping

import requests
from bs4 import BeautifulSoup

import os
import re
from dotenv import load_dotenv
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.messages import HumanMessage
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import create_react_agent
from langchain_openai import ChatOpenAI
from langchain.tools import Tool
import nest_asyncio

# Setup for async in notebook
test_thread_id = "compendium-fast-001"
nest_asyncio.apply()
load_dotenv()

# === API keys ===
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# === LangChain components ===
tavily_tool = TavilySearchResults(k=5)
tools = [tavily_tool]

model = ChatOpenAI(
    api_key=OPENAI_API_KEY,
    model="gpt-3.5-turbo",  # Faster and better
    temperature=0.0,
)

memory = MemorySaver()
agent_executor = create_react_agent(model, tools, checkpointer=memory)
config = {"configurable": {"thread_id": test_thread_id}}

def extract_compendium_links(results: list) -> dict:
    product_url = fachinfo_url = patientinfo_url = None

    for r in results:
        url = r.get("url", "")
        title = r.get("title", "").lower()
        if not url.startswith("https://compendium.ch"):
            continue

        if re.search(r"/product/\d+-", url):
            # Prefer URLs with exact medication name in title or URL
            if "dolo" in title or "tabl" in title or "500 mg" in title:
                product_url = url if not product_url else product_url
        if "/mpro" in url:
            fachinfo_url = url
        if "/mpub" in url:
            patientinfo_url = url

    return {
        "Produktseite": product_url,
        "Fachinformation": fachinfo_url,
        "Patienteninformation": patientinfo_url,
    }


# === Scraping logic ===
def scrape_compendium_product_page(url: str) -> str:
    try:
        headers = {"User-Agent": "Mozilla/5.0"}
        res = requests.get(url, headers=headers, timeout=10)
        res.raise_for_status()
        soup = BeautifulSoup(res.text, "html.parser")

        # Extract main content block
        content_div = soup.find("div", class_="productDetail")
        if not content_div:
            return "⚠️ Kein relevanter Inhalt gefunden."

        # Clean and format
        text_blocks = [p.get_text(separator=" ", strip=True) for p in content_div.find_all(["h1", "h2", "p", "li"])]
        filtered = "\n".join([line for line in text_blocks if line and not line.startswith("Drucken")])
        return filtered[:5000] + "..."  # clip for length
    except Exception as e:
        return f"❌ Fehler beim Abrufen von Compendium-Seite: {e}"

def get_compendium_info_with_scraping(prompt: str) -> str:
    links = get_compendium_links_langchain(prompt, as_dict=True)

    if not links or not links.get("Produktseite"):
        return f"⚠️ Kein Produktlink gefunden für {prompt}."

    summary = scrape_compendium_product_page(links["Produktseite"])
    return f"### 📦 Informationen zu **{prompt}**\n\n{summary}\n\n🔗 [Produktlink]({links['Produktseite']})"


# === Lookup function ===
def get_compendium_links_langchain(prompt: str, *, as_dict=False) -> str:
    query = f"site:compendium.ch {prompt}"
    try:
        results = tavily_tool.run(query)
        links = extract_compendium_links(results)

        if as_dict:
            return links

        if not links["Produktseite"]:
            return f"⚠️ Kein Produkt auf compendium.ch gefunden für **{prompt}**."

        output = f"### 💊 Ergebnis für **{prompt}**:\n"
        output += f"- 📦 [Produktseite]({links['Produktseite']})\n"
        if links["Fachinformation"]:
            output += f"- 📄 [Fachinformation]({links['Fachinformation']})\n"
        if links["Patienteninformation"]:
            output += f"- 👥 [Patienteninformation]({links['Patienteninformation']})\n"
        if not (links["Fachinformation"] or links["Patienteninformation"]):
            output += "⚠️ Keine Fach- oder Patienteninformation separat gefunden.\n"

        return output
    except Exception as e:
        return f"❌ Fehler bei Tavily-Suche: {e}"

# === Tool wrapper for agent usage ===
CompendiumScrapingTool = Tool(
    name="Compendium.ch Scraper",
    func=lambda x: get_compendium_info_with_scraping(x),
    description="Scraped pharmazeutische Informationen von Compendium.ch Produktseiten"
)

# === Agent test ===
msgs = [
    HumanMessage(content="Ich bin Apotheker aus Zürich. Hi."),
    HumanMessage(content="Finde Informationen zu Dafalgan Dolo Tabl 500 mg auf compendium.ch"),
]

for m in msgs:
    for step in agent_executor.stream({"messages": [m]}, config, stream_mode="values"):
        print("\u2794", step["messages"][-1].content)

# === Direct structured + scraped lookup ===
print("\n\n✨ Compendium-Abfrage mit Scraping:")
print(get_compendium_info_with_scraping("DAFALGAN Dolo Tabl 500 mg"))


➔ Ich bin Apotheker aus Zürich. Hi.
➔ Hallo! Wie kann ich Ihnen heute helfen?
➔ Finde Informationen zu Dafalgan Dolo Tabl 500 mg auf compendium.ch
➔ 
➔ Ich habe Informationen zu Dafalgan Dolo Tabl 500 mg auf compendium.ch gefunden. Hier sind die Details:

- **Produkt:** [DAFALGAN Tabl 500 mg N02BE01 Paracetamol](https://compendium.ch/de/product/17776-dafalgan-tabl-500-mg)
- **Hersteller:** UPSA Switzerland AG
- **Zusammensetzung:** 
  - **Wirkstoff:** Paracetamol
- **Charakteristika:** Analgetikum, Paracetamol
- **ATC-Code:** N02BE01 Paracetamol

Für weitere Informationen können Sie die [Detailseite auf compendium.ch](https://compendium.ch/de/product/17776-dafalgan-tabl-500-mg) besuchen.


✨ Compendium-Abfrage mit Scraping:
### 📦 Informationen zu **DAFALGAN Dolo Tabl 500 mg**

⚠️ Kein relevanter Inhalt gefunden.

🔗 [Produktlink](https://compendium.ch/de/product/17776-dafalgan-tabl-500-mg)


In [16]:
def get_compendium_links_langchain(prompt: str) -> str:
    query = f"site:compendium.ch {prompt}"
    try:
        results = tavily_tool.run(query)
        urls = [r['url'] for r in results if 'compendium.ch/product/' in r.get("url", "")]

        # Initialize placeholders
        product_url = fach_url = patient_url = None

        for url in urls:
            if re.match(r"https://compendium\.ch/product/\d+-[^/]+$", url):
                product_url = product_url or url
            elif "/mpro" in url:
                fach_url = fach_url or url
            elif "/mpub" in url:
                patient_url = patient_url or url

        if not product_url:
            return f"⚠️ Kein Produkt auf compendium.ch gefunden für **{prompt}**."

        output = f"### 💊 Ergebnis für **{prompt}**:\n"
        output += f"- 📦 [Produktseite]({product_url})\n"
        if fach_url:
            output += f"- 📄 [Fachinformation]({fach_url})\n"
        if patient_url:
            output += f"- 👥 [Patienteninformation]({patient_url})\n"
        if not (fach_url or patient_url):
            output += "⚠️ Keine Fach- oder Patienteninformation separat gefunden.\n"

        return output
    except Exception as e:
        return f"❌ Fehler bei Tavily-LangChain-Suche: {e}"


In [19]:

from langchain.agents import initialize_agent
from langchain.agents.agent_types import AgentType
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0)
agent = initialize_agent(
    tools=[tavily_tool],
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
)
def get_compendium_links_langchain_agent(prompt: str) -> str:
    try:
        response = agent.run(prompt)
        if not response:
            return f"⚠️ Kein Ergebnis für **{prompt}** gefunden."
        return response
    except Exception as e:
        return f"❌ Fehler bei LangChain-Agent: {e}"
    

# Example usage
if __name__ == "__main__":
    prompt = "Ibuprofen"
    print(get_compendium_links_langchain(prompt))
    print(get_compendium_links_langchain_agent(prompt))

### 💊 Ergebnis für **Ibuprofen**:
- 📦 [Produktseite](https://compendium.ch/product/1176907-ibuprofen-sandoz-filmtabl-600-mg)
⚠️ Keine Fach- oder Patienteninformation separat gefunden.



[1m> Entering new AgentExecutor chain...[0m


  response = agent.run(prompt)


[32;1m[1;3mI should use the search engine to find information about Ibuprofen
Action: tavily_search_results_json
Action Input: Ibuprofen[0m
Thought:[32;1m[1;3mI have gathered information about Ibuprofen from various sources
Final Answer: Ibuprofen is a nonsteroidal anti-inflammatory drug (NSAID) used to manage various conditions, including inflammatory diseases, rheumatoid disorders, mild to moderate pain, fever, dysmenorrhea, and osteoarthritis. It is available over-the-counter and in prescription strength.[0m

[1m> Finished chain.[0m
Ibuprofen is a nonsteroidal anti-inflammatory drug (NSAID) used to manage various conditions, including inflammatory diseases, rheumatoid disorders, mild to moderate pain, fever, dysmenorrhea, and osteoarthritis. It is available over-the-counter and in prescription strength.
