### Load Packages

In [1]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI


### Connect to OpenAI API

In [3]:
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise ValueError("OPENAI_API_KEY environment variable is not set.")
elif not api_key.startswith("sk-"):
    raise ValueError("OPENAI_API_KEY must start with 'sk-'.")
elif api_key.strip() != api_key:
    raise ValueError("OPENAI_API_KEY must not contain leading or trailing whitespace.")
else:
    print("OPENAI_API_KEY is set and valid.")

openai = OpenAI()

OPENAI_API_KEY is set and valid.


### Create a Website class

In [4]:
class Website:
    url: str
    title: str
    content: str

    def __init__(self, url: str):
        self.url = url
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup(['script', 'style', 'img', 'input']):
            irrelevant.decompose()
        self.content = soup.get_text(separator=' \n', strip=True)
    


In [22]:
page = Website("https://books.toscrape.com/")
print(f"Title: {page.title}")
# index_find = page.content.find('violated the 10th Amendment and federal law')
print(f"Content: {page.content[500:1000]}")

Title: 
    All products | Books to Scrape - Sandbox

Content: iness 
Biography 
Thriller 
Contemporary 
Spirituality 
Academic 
Self Help 
Historical 
Christian 
Suspense 
Short Stories 
Novels 
Health 
Politics 
Cultural 
Erotica 
Crime 
All products 
1000 
results - showing 
1 
to 
20 
. 
This is a demo website for web scraping purposes. Prices and ratings here were randomly assigned and have no real meaning. 
A Light in the ... 
£51.77 
In stock 
Add to basket 
Tipping the Velvet 
£53.74 
In stock 
Add to basket 
Soumission 
£50.10 
In stock 



### Create the Prompts

In [25]:
system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

# A function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.content
    return user_prompt

def message_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

message_for(page)

[{'role': 'system',
  'content': 'You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown.'},
 {'role': 'user',

### Call the OpenAI API

In [26]:
def summarize_website(url):
    website = Website(url)
    messages = message_for(website)
    
    response = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=messages,
        # max_tokens=500,
        # temperature=0.7
    )
    
    summary = response.choices[0].message.content.strip()
    return summary

def display_summary(url):
    summary = summarize_website(url)
    display(Markdown(f"### Summary of {url}\n\n{summary}"))



In [27]:
display_summary("https://books.toscrape.com/")

### Summary of https://books.toscrape.com/

# Summary of "All products | Books to Scrape - Sandbox"

The "Books to Scrape - Sandbox" website is a demo platform designed for web scraping purposes, showcasing a variety of fictitious books across multiple genres. The website features categories such as Fiction, Nonfiction, Mystery, Romance, Science Fiction, and many others, totaling over a thousand products.

## Key Features:
- **Product Listings**: Each book is listed with a title, price, and an "Add to basket" option. 
- **Randomized Data**: The prices and ratings displayed on the site are randomly assigned and do not correlate with real products.
- **Genre Coverage**: The site includes a wide array of genres, from Classics and Fantasy to Psychology and Erotica.

**Note**: This is a demo site, and the products listed are not real.

In [28]:
display_summary("https://www.cnn.com/")

### Summary of https://www.cnn.com/

# CNN News Summary

CNN's website serves as a hub for breaking news, updates, and feature articles across various topics including US and world news, politics, business, health, entertainment, and more. 

### Key Headlines:
1. **Los Angeles Protests**: Active duty service members are being mobilized to support National Guard troops in response to protests in LA, which have drawn attention to tensions involving Elon Musk and President Trump.
2. **Sean 'Diddy' Combs Trial**: Testimonies from an accuser reveal troubling details about their relationship and incidents of alleged abuse, providing insight into the ongoing court case.
3. **Recent Deaths**: Sly Stone, a pioneering figure in funk music, has passed away at 82.
4. **Middle East Tensions**: An Israeli soldier taken hostage has prompted significant media attention, and notable activists including Greta Thunberg have been detained by Israeli forces.

### Additional Updates:
- Coverage on the **Israel-Hamas War** includes details about military strategies and humanitarian issues.
- Reports of ongoing challenges related to **climate change** and significant **science findings** related to space and biological activity on distant planets.

CNN continues to provide live updates, breaking news alerts, and in-depth analyses across these stories, aiming to keep the public informed about pressing global events.