In [43]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
import openai


In [60]:
class WebscraperSummarizer:
    url: str
    title:str
    content:str
    system_prompt: str
    user_prompt: str
    context: list()
    summarized_content: str

    def __init__(self, url):
        self.url = url
        response = requests.get(url)
        soup = BeautifulSoup(response.content,'html.parser')

        self.title = soup.title.string if soup.title else 'no title found'
        
        #remove tags
        for tags in soup.body(['script','style','img','input']):
            tags.decompose()
        
        self.content = soup.body.get_text(separator='\n', strip=True)
        load_dotenv()
        #os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
        openai.api_key = os.getenv('OPENAI_API_KEY')
        
        self.system_prompt = """You are a reliable assistant that analyze website content. 
                                You provide a good summary and navigation links and images. 
                                You translate technical words to a very understable way.
                                You always respond in markdown."""

        self.user_prompt = f"You are reading  this website {self.title}"
        self.user_prompt += "The contents of this website is as follows;" + self.content 
        self.user_prompt += "Provide a short summary of this website. Summarize news or announcement.\n\n"
        self.user_prompt += "The final summary should be in structured as markdown.\n\n"


    def messages(self):
        self.context = [
             {'role':'system', 'content': self.system_prompt},
             {'role':'user',   'content': self.user_prompt},
         ]
       
        
        return self

    def summarizer(self):
        
        response=openai.chat.completions.create(
            model='gpt-4o-mini',
            messages=self.context)
        
        self.summarized_content=response.choices[0].message.content
        return self

    def view(self):
        display(Markdown(self.summarized_content))
        return self
    


In [61]:
url = 'https://edition.cnn.com/'
(
                WebscraperSummarizer(url)
                        .messages()
                        .summarizer()
                        .view()
)


# Summary of CNN Website Content

CNN provides a comprehensive platform for the latest news and updates across various topics, including:

## Key Sections:
- **News Categories:**
  - US
  - World
  - Politics
  - Business
  - Health
  - Entertainment
  - Style
  - Travel
  - Sports
  - Science
  - Climate
  - Weather
  - Ongoing Conflicts (e.g., Ukraine-Russia War, Israel-Hamas War)

- **Video & Audio Content:**
  - Live TV coverage
  - Various podcasts and shows available for listening

- **Special Features:**
  - Investigative pieces and profiles
  - Environmental awareness projects
  - Celebrating CNN Heroes

## Recent Headlines:
- **International Events:**
  - Tensions in Israel and threats against Iran.
  - Major explosions reported in Beirut.

- **US News:**
  - High-profile personalities addressing controversial topics such as abortion rights.
  - Allegations of misconduct against prominent figures.

- **Business Updates:**
  - Reports on the impact of Hurricane Helene on various sectors.
  - Analysis of market conditions and employment statistics.

- **Cultural Insights:**
  - Celebrity updates and activities linked to major events.
  - Features on design and fashion, with a focus on contemporary styles.

## User Interaction:
The site encourages user feedback regarding advertisements and technical experience, ensuring an engaging and responsive user interface.

## Accessibility:
- Options to view content in multiple languages (Arabic, Spanish, etc.)
- Access to personalized content based on user preferences.

## Navigation Links:
- [Home](https://www.cnn.com)
- [US News](https://www.cnn.com/us)
- [World News](https://www.cnn.com/world)
- [Politics](https://www.cnn.com/politics)
- [Business](https://www.cnn.com/business)
- [Health](https://www.cnn.com/health)
- [Entertainment](https://www.cnn.com/entertainment)
- [Sports](https://www.cnn.com/sports)
- [Science](https://www.cnn.com/science)

## Images (Icons for Navigation):
- 📰 News
- 🌍 World
- 🏛️ Politics
- 💼 Business
- 🏥 Health
- 🎬 Entertainment
- ⚽ Sports
- 🔬 Science

This structured approach allows readers to easily navigate CNN’s extensive offerings while staying updated on current events and important topics.

<__main__.WebscraperSummarizer at 0x1537356f0d0>