In [11]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI
from requests_html import AsyncHTMLSession


In [None]:
!pip install lxml_html_clean

In [None]:
!pip install requests_html

In [None]:
os.environ['OPENAI_API_KEY'] = ''

In [7]:
api_key = os.getenv('OPENAI_API_KEY')

In [8]:
openai = OpenAI()

In [12]:
class Website:
    def __init__(self, url):
        self.url = url
        self.title = ""
        self.text = ""

    async def fetch(self):
        session = AsyncHTMLSession()
        response = await session.get(self.url)
        await response.html.arender()  # Async version!
        title_elem = response.html.find('title', first=True)
        self.title = title_elem.text if title_elem else "No title found"
        body_elem = response.html.find('body', first=True)
        self.text = body_elem.text if body_elem else ""



In [None]:
ed = Website("https://edwarddonner.com/")

import asyncio
await ed.fetch()

print(ed.title)
print(ed.text)

In [14]:
system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [15]:
def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [None]:
print(user_prompt_for(ed))

In [17]:
def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [20]:
async def summarize(url):
    website = Website(url)
    await website.fetch()
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [None]:
summarize("https://edwarddonner.com")

In [24]:
async def display_summary(url):
    summary = await summarize(url)
    display(Markdown(summary))

In [None]:
await display_summary("https://edwarddonner.com")