In [2]:
from bs4 import BeautifulSoup
import requests


# Standard headers to fetch a website
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

def fetch_website_contents(url):
    """
    Return the title and contents of the website at the given url;
    truncate to 2,000 characters as a sensible limit
    """
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    title = soup.title.string if soup.title else "No title found"
    if soup.body:
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        text = soup.body.get_text(separator="\n", strip=True)
    else:
        text = ""
    return (title + "\n\n" + text)[:2_000]

In [5]:
import os
from dotenv import load_dotenv
from IPython.display import Markdown, display
import anthropic

# from scraper import fetch_website_contents

load_dotenv(override=True)


def get_anthropic_client() -> anthropic.Anthropic:
    """Create and return an Anthropic client."""
    api_key = os.getenv("ANTHROPIC_API_KEY")

    if not api_key:
        raise ValueError("ANTHROPIC_API_KEY not found in environment variables")

    return anthropic.Anthropic(api_key=api_key)


SYSTEM_PROMPT = """
Wewe ni msaidizi wa habari kwa wasomaji wa Kenya.

Kazi yako ni:
- Kuchambua maandishi ya tovuti ya habari ya Kenya.
- Kuchuja maandishi ya menu, footer, na matangazo.
- Kutoa muhtasari mfupi wa habari muhimu pekee.
- Kutoa majibu kwa Kiswahili safi, rahisi kueleweka.
- Kutaja pointi kuu za habari kwa nukta (bullet points).
- Kama kuna habari za kitaifa, siasa, uchumi, au usalama, zipe kipaumbele.
- Usitaje maandishi ya urambazaji wa tovuti.
- Usitumie Kiingereza kabisa.
- Jibu kwa muundo wa Markdown.
"""

USER_PROMPT_PREFIX = """
Haya ndiyo maandishi kutoka tovuti ya habari.
Toa muhtasari wa habari kuu za leo kwa Kiswahili rahisi:

"""


def build_prompt(website_text: str) -> str:
    """Combine system and user prompts into a single Claude-friendly prompt."""
    return f"{SYSTEM_PROMPT}\n\n{USER_PROMPT_PREFIX}{website_text}"


def summarize_kenyan_news(url: str) -> str:
    """Fetch Citizen TV content and summarize key news in Kiswahili."""
    website_text = fetch_website_contents(url)
    client = get_anthropic_client()

    prompt = build_prompt(website_text)

    response = client.messages.create(
        model="claude-3-haiku-20240307",
        max_tokens=500,
        messages=[{"role": "user", "content": prompt}],
    )

    return response.content[0].text


def display_kenyan_news_summary() -> None:
    """Render Kiswahili news summary as Markdown in Jupyter."""
    url = "https://www.citizen.digital"
    summary = summarize_kenyan_news(url)
    display(Markdown(summary))


display_kenyan_news_summary()

ValueError: ANTHROPIC_API_KEY not found in environment variables