In [None]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

In [None]:
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    print("API_KEY not set")
else:
    print("API_KEY set")

In [None]:
openai = OpenAI()

In [None]:
class Website:
    url: str
    title: str
    text: str

    def __init__(self, url):
        self.url = url
        response = requests.get(url)
        soup = BeautifulSoup(response.content, "html.parser")
        self.title = soup.title.string if soup.title else "No title found"
        for irrevalent in soup.body(["script", "style", "img", "input"]):
            irrevalent.decompose()
        self.text = soup.get_text(separator="\n", strip=True)

In [None]:
ed = Website("https://www.google.com")
print(ed.title)
print(ed.text)

In [None]:
def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt


system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [None]:
print(user_prompt_for(ed))

In [None]:
def message_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)},
    ]


def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=message_for(website),
    )
    return response.choices[0].message.content

In [None]:
summarize("https://www.google.com")
def display_summary(url):
    summary = summarize(url)
    display(Markdown(f"### {summary}"))
display_summary("https://www.anthropic.com")