In [1]:
from IPython.display import Markdown, display
from dotenv import load_dotenv 
from bs4 import BeautifulSoup
import requests
import os
from openai import OpenAI

In [2]:
load_dotenv()

True

In [3]:
api_key = os.getenv('OPENAI_API_KEY')

In [4]:
openai = OpenAI()

In [5]:
class Website:
    """
    Class to parse website content while removing unhelpful tags.
    """

    def __init__(self, url):
        self.url = url

        content = requests.get(self.url).content

        soup = BeautifulSoup(content, 'html.parser')

        for tag in soup(['script', 'style', 'img', 'input']):
            tag.decompose()

        self.title = soup.title.string

        self.text = soup.body.get_text(separator='\n', strip=True)

In [6]:
system_prompt = 'You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown.'

In [7]:
def user_prompt_for(website):

    user_prompt = f"You are looking at a website titled {website.title}. \
The contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"

    user_prompt += website.text
    return user_prompt

In [8]:
def messages_for(website):
    return [
        {'role': 'system', 'content': system_prompt},
        {'role': 'user', 'content': user_prompt_for(website)},
    ]

In [9]:
def summarize(url):

    website = Website(url)

    response = openai.chat.completions.create(
        model='gpt-4o-mini',
        messages=messages_for(website)
    )

    return response.choices[0].message.content

In [10]:
summary = summarize('https://www.cnn.com/')

In [11]:
display(Markdown(summary))

# CNN Website Summary 

CNN's website provides breaking news, a wide array of articles, and video content on various topics including **U.S. news**, **world events**, **politics**, **business**, **health**, **entertainment**, **sports**, and **science**. 

## Recent Highlights
- **Ukraine-Russia War**: Ukraine has launched attacks into Russia, following U.S. authorization to use longer-range missiles.
- **Trump Administration Developments**: Trump is expected to nominate financial industry executive Howard Lutnik for the Secretary of Commerce position.
- **Health News**: A Michigan man made headlines after receiving a rare face transplant.
- **Crime Reports**: Multiple stabbings occurred in Manhattan, resulting in three fatalities, raising concerns about public safety.

## Additional Topics 
The site also covers **climate issues**, **Black Friday deals**, and trending topics like the **Israel-Hamas War** and ongoing **elections** in 2024.

Overall, CNN remains a prominent source for comprehensive coverage of both national and international news.