In [1]:
# Importing libraries
import os
from dotenv import load_dotenv
import requests
import gradio as gr
from openai import OpenAI
from bs4 import BeautifulSoup
from IPython.display import display, Markdown

In [3]:
# Load environment variables from the .env file
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

In [5]:
openai = OpenAI()

In [6]:
# Website class to handle the extraction of title and text content from a webpage
class Website():
    url: str
    title: str
    text: str
    
    def __init__(self,url):
        self.url = url
        response = requests.get(url)
        soup = BeautifulSoup(response.content,'html.parser')
        self.title = soup.title.string if soup.title else 'no title exist'
        for irrelevant in soup.body(['script', 'input','img','style']):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [7]:
# System prompt defines LLM task and toner --> summarizing website content
system_prompt = """
You are an assistant that analyzes website content and provides summaries. \
Respond in Markdown format and offer styles such as key points, detailed summaries or Q&A format.
"""

In [17]:
# Function to generate user-specific prompt based on style
def user_prompt(website, style):
    if style == "Key Points":
        user_prompt = f"Summarize the website titled '{website.title}' in key points:\n\n{website.text}"
    elif style == "Detailed":
        user_prompt = f"Provide a comprehensive summary of the website titled '{website.title}':\n\n{website.text}"
    elif style == "Q&A":
        user_prompt = f"Generate a question-answer format summary for the website titled '{website.title}':\n\n{website.text}"
    return user_prompt

In [18]:
# Define the function to structure the message for GPT
def message_for(website, style):
       return[{"role": "system", "content": system_prompt},
              {"role": "user","content": user_prompt(website, style)}]

In [19]:
# Define the summarize function to generate summary for given URL
def summarize(url, style):
    website = Website(url)
    response = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages= message_for(website, style)
    )
    return response.choices[0].message.content

In [20]:
# Function to display this nicely in the Jupyter output
def display_summary(url, style):
    summary = summarize(url, style)
    display(Markdown(summary))

In [21]:
display_summary('https://www.nytimes.com/','Detailed')

# Summary of The New York Times Website

## Overview
The New York Times (NYT) is an established American newspaper that provides comprehensive coverage of national and international news, as well as various other categories including politics, business, arts, lifestyle, opinion, and more. The website is known for its in-depth reporting, analyses, and inspirational storytelling.

## Key Sections of the Website
- **U.S. News**: Covers local news, politics, education, health, and sports.
- **World News**: Reports on global issues, including international relations, crises, and culture in various regions.
- **Business**: Focuses on economics, finance, and technological advancements with sub-sections like DealBook and Your Money.
- **Arts**: Features reviews and articles related to books, music, movies, and pop culture.
- **Lifestyle**: Offers content on health, travel, food, and personal finance, including cooking tips and real estate advice.
- **Opinion**: Includes editorials, guest essays, and columns from notable journalists and thought leaders.
- **Podcasts**: Offers a range of audio content covering news analysis, cultural discussions, and storytelling.
- **Games**: Provides interactive content such as crosswords, word puzzles, and logic games.
- **Cooking**: Features recipes, cooking advice, and meal planning ideas from culinary experts.

## Notable Features
- **Newsletters**: The NYT offers various newsletters to cater to specific interests, including daily news briefings and thematic content.
- **Podcasts**: Prominent programs like *The Daily* provide concise analyses of current events, while others focus on specific topics like technology and literature.
- **Wirecutter**: A product review service that focuses on consumer electronics and other household products.
- **The Athletic**: A sports coverage service providing personalized content about specific leagues and teams.

## Current Highlights
### Top Stories
1. **Trade War Impact**: The analysis shows that Trump voters could be significantly affected by new tariffs imposed by other nations.
2. **Labor Abuse Investigation**: An in-depth piece investigates the conditions faced by East African maids in Saudi Arabia, highlighting systemic exploitation.
3. **Geopolitical Tensions**: Coverage on tensions between Israel and Lebanon and the implications for military recruitments within Israel.
4. **Cultural Features**: Articles discussing art, literature, and personal narratives that resonate with audiences.

### Recent Headlines
- Marco Rubio's statement on deportations following legal actions.
- A spotlight on the reactions to Black Lives Matter mural removal in Washington D.C.
- Severe weather updates and their impacts across regions.

## Conclusion
The New York Times serves as a reliable resource for anyone seeking reliable and varied news coverage. Its diverse sections ensure that there is content available for nearly every reader's interest, and its commitment to quality journalism is reflected throughout its extensive offerings.

In [22]:
# Gradio Interface
def main_interface(url, style):
    if url:
        summary = summarize(url, style)
        return summary
    else:
        return "Please provide a URL."

# Gradio app
interface = gr.Interface(
    fn=main_interface,
    inputs=[
        gr.Textbox(label="Enter URL for Summarization"),
        gr.Radio(["Key Points", "Detailed", "Q&A"], label="Choose Summary Style", value="Key Points"),
    ],
    outputs=[
        gr.Markdown(label="Summary")
    ],
    title="Website Summarizer",
    description="Paste a URL and choose a summary style."
)

# Launch the Gradio app
interface.launch(share=True)

* Running on local URL:  http://127.0.0.1:7861

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


