In [13]:
# Import necessary libraries
import os
from dotenv import load_dotenv
import requests
from bs4 import BeautifulSoup
from openai import OpenAI
from IPython.display import display, Markdown
import fitz
import gradio as gr

In [14]:
# Load environment variables from the .env file
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

In [15]:
# Initialize OpenAI client
openai = OpenAI()

In [16]:
# Extract text from a PDF file
def extract_text_from_pdf(pdf_path):
    text = ""
    try:
        doc = fitz.open(pdf_path)
        for page in doc:
            text += page.get_text()
        return text
    except Exception as e:
        return f"Error extracting text from PDF: {e}"

In [26]:
# Website class to handle the extraction of title and text content from a webpage
class Website:
    def __init__(self, url, pdf_path=None):
        self.url = url
        self.pdf_path = pdf_path
        if url:
            self._extract_from_url(url)
        elif pdf_path:
            self._extract_from_pdf(pdf_path)
        else:
            self.title = "No input provided"
            self.text = "No content available."
            
    def _extract_from_url(self, url):
        try:
            response = requests.get(url, timeout=10)
            response.raise_for_status()
            soup = BeautifulSoup(response.content, "html.parser")
            self.title = soup.title.string if soup.title else "No title available"
            # Remove unnecessary elements
            for tag in soup.body.find_all(["script", "style", "img", "input"]):
                tag.decompose()              
            self.text = soup.body.get_text(separator="\n", strip=True) if soup.body else "No text available"
        except requests.exceptions.RequestException as e:
            self.title = "Error"
            self.text = f"Failed to fetch content: {e}"

    def _extract_from_pdf(self, pdf_path):
        self.title = os.path.basename(pdf_path)
        self.text = extract_text_from_pdf(pdf_path)

In [18]:
# AI Research Assistant Prompt
system_prompt = """
You are an AI-powered research assistant. Your job is to:
1. Extract key insights, arguments, and claims from articles.
2. Provide fact-based summaries and indicate source credibility.
3. Answer user queries based on article content.
4. Identify important trends across multiple sources.
5. Format responses in Markdown for clarity.
"""

In [19]:
# Function to generate user prompt
def user_prompt(website, query=None):
    if query:
        return f"Answer this question based on '{website.title}':\n\n{query}\n\nContent:\n{website.text}"
    return f"Analyze and summarize the website '{website.title}'. Extract key points, claims and credibility indicators:\n\n{website.text}"

In [20]:
# Define the function to structure the message for GPT
def message_for(website, query=None):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt(website, query)}
    ]

In [33]:
# AI Research Function
def research(url=None, query=None, pdf_path=None):
    website = Website(url, pdf_path)
    if website.title == "Error":
        return website.text  # Return error message if fetching failed
    response = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=message_for(website, query)
    )
    return response.choices[0].message.content

In [24]:
# Display research results in Jupyter Notebook
def display_research(url, query:str = None):
    result = research(url, query)
    display(Markdown(result))

In [27]:
display_research("https://www.nytimes.com/")

# Summary of The New York Times

## Overview
The New York Times (NYT) is a prominent American newspaper known for its extensive coverage of breaking news, politics, business, culture, and international affairs. It features a variety of sections including U.S. News, World News, Opinion, Arts, and Lifestyle.

## Key Points and Claims

### News Coverage
- **U.S. News**: Includes various topics such as Politics, Health, Science, Education, and Immigration. Recent headlines address critical issues like the Trump administration's detention policies and ongoing legal maneuvers regarding deportations.
- **World News**: Reports cover geopolitical dynamics such as the Russia-Ukraine conflict and tensions in the Middle East. There are features on significant historical events, contemporary crises, and diplomatic relationships.
- **Business News**: Focuses on economic updates, stock markets, technology, and policies impacting industries, including the implications of AI developments.
- **Lifestyle & Culture**: Offers insights into food, travel, style, and the arts, alongside features on wellness and modern life trends.
  
### Opinion Pieces
- NYT hosts a range of opinion articles discussing political strategies, societal issues, and cultural commentary. Notable contributors include recognized journalists and thought leaders.

### Multimedia Features
- The platform provides access to podcasts and newsletters, such as **The Daily** and **The Morning**, delivering succinct news summaries and analyses.

### Interactive Elements
- The website includes games and interactive puzzles such as crossword puzzles and word games, enhancing user engagement.

## Source Credibility Indicators
- **Reputation**: The New York Times is regarded as one of the leading newspapers globally, known for its rigorous journalistic standards and investigative reporting.
- **Awards**: It has received numerous Pulitzer Prizes, underscoring its commitment to high-quality journalism.
- **Expert Opinion**: Many articles feature insights or analyses from experts in respective fields, lending authority to the reporting.
- **Fact-Checking**: The NYT maintains a substantial reputation for accuracy, often citing sources and providing context to news stories, though, like all news organizations, it has faced scrutiny and claims of bias.

## Trends Identified
- **Geopolitical Tensions**: Continual coverage of evolving crises, particularly in Eastern Europe and the Middle East, reflects a global focus amid rising international conflicts.
- **Domestic Policies**: Increasing attention on U.S. domestic policies, especially regarding immigration and healthcare, highlights ongoing societal debates.
- **The Impact of Technology**: Reporting on AI and technology reflects a growing intersection between technological advancement and daily life, portraying its implications across various sectors.

## Conclusion
The New York Times serves as a vital resource for news and analysis across multiple domains. Its comprehensive reporting and diverse perspectives keep readers informed about both domestic and international matters while sustaining its commitment to journalistic integrity.

---
This summary presents an overview of the key features and credibility of The New York Times based on general knowledge of the publication. If you need specific articles analyzed from their website, please provide the details!

In [34]:
# Gradio Interface for AI Research Assistant
def research_interface(url: str = None, pdf_file: gr.File = None, query: str = None) -> str:
    if pdf_file:
        # Save the uploaded PDF file
        pdf_path = pdf_file.name
        return research(pdf_path=pdf_path, query=query)
    elif url:
        return research(url=url, query=query)
    else:
        return "Please provide a valid URL or PDF file."

# Gradio App Interface
interface = gr.Interface(
    fn=research_interface,
    inputs=[
        gr.Textbox(label="Enter URL for Analysis (Optional)"),
        gr.File(label="Upload PDF for Analysis (Optional)"),
        gr.Textbox(label="Ask a question about this content (optional)")
    ],
    outputs=gr.Markdown(label="Research Insights"),
    title="AI-Powered Research Assistant",
    description="Paste a URL or upload a PDF and ask research questions about its content.",
)

# Launch Gradio App
interface.launch(share=True)

* Running on local URL:  http://127.0.0.1:7870

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


