In [1]:
# Imports
from dotenv import load_dotenv
import os
from openai import OpenAI
from bs4 import BeautifulSoup
import requests
from IPython.display import Markdown, display

In [9]:
# Load all the config items and validate API keys exist
load_dotenv(override=True, dotenv_path="../config/.env")

openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
    raise ValueError ("OPENAI_API_KEY is not set")

In [3]:
# create openai api client
openai_client = OpenAI()

In [4]:
# Build the system and user prompts
system_prompt = """
You are a helpful assistant
"""

user_prompt = """
Hi there! This is my first message to an LLM!!
"""

In [9]:
# call to LLM
MODEL_NAME="gpt-4.1-nano"
messages=[
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_prompt}
]
response = openai_client.chat.completions.create(
    model=MODEL_NAME,
    messages=messages
)
response.choices[0].message.content

"Hello! Welcome to interacting with an AI language model. I'm here to help with any questions or tasks you have. Feel free to ask anything!"

In [5]:
def fetch_website_contents(url):
    """
    Return the title and contents of the website at the given url;
    truncate to 2,000 characters as a sensible limit
    """
    # Standard headers to fetch a website
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    title = soup.title.string if soup.title else "No title found"
    if soup.body:
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        text = soup.body.get_text(separator="\n", strip=True)
    else:
        text = ""
    return (title + "\n\n" + text)[:2_000]

In [None]:
content = fetch_website_contents("https://cnn.com")
print(content)

In [17]:
system_prompt = """
You are a helpful assistant that can summarize a website given its content.
"""

user_prompt_prefix = """
SUmmarize the content of a website given below. Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.


"""

In [18]:
def summarize_website(url):
    content = fetch_website_contents(url)
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_prefix + "\n" + content}
    ]
    response = openai_client.chat.completions.create(
        model=MODEL_NAME,
        messages=messages
    )
    return response.choices[0].message.content

In [19]:
display(Markdown(summarize_website("https://cnn.com")))

# CNN Website Content Summary

## Overview
CNN provides news coverage on various topics including breaking news, politics, world events, health, entertainment, business, science, climate, travel, sports, and technology. It offers video content, newsletters, and personalized topic followings.

## Features
- **Ad Feedback:** Users can submit feedback on ads related to relevance, loading issues, and other technical problems.
- **Navigation:** Multiple categories such as US, World, Politics, Business, Health, Entertainment, Style, Travel, Sports, Science, Climate, Weather, and War (Ukraine-Russia and Israel-Hamas).
- **Media Content:** Videos and live broadcasts available on various topics.
- **Personal Accounts:** Sign in, account settings, newsletters, and topic management.
- **International Editions:** US, International, Arabic, and Espa√±ol versions.
- **Special Reports:** Sections on crime, justice, and political developments.
- **Business & Market Data:** Tech, media, markets, pre-hours, after-hours, investing, and financial tools.
- **Health & Lifestyle:** Fitness, sleep, relationships, fashion, beauty, food, travel, pets, and more.
- **Entertainment & Culture:** Movies, television, celebrities, arts, design, fashion, architecture, luxury, and food & drink.
- **Sports:** Includes pro football and other major sports updates.

## Additional Features
- **Videos & Multimedia:** Regular video updates and special features.
- **Polls & Surveys:** Engagement tools like CNN Polls and election tracking.
- **Market Tools:** Calculators, market analysis, and investing insights.
- **Subscription & Account Management:** Sign in options for personalized experience.

This summary covers the main sections and functionalities of the CNN website based on its content and layout.