In [1]:

import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

In [6]:
# Load environment variables in a file called .env

load_dotenv(dotenv_path="/Users/kalra/Documents/miniconda3/envs/condaenv_course2/PytorchExamples/AI-Projects-Imageclassifier/.env",override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the key

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")

API key found and looks good so far!


In [7]:
openai = OpenAI()

In [8]:
message = "Hello, GPT! This is my first ever message to you! Hi!"
response = openai.chat.completions.create(model="gpt-4o-mini", messages=[{"role":"user", "content":message}])
print(response.choices[0].message.content)

Hello! Welcome! I'm glad to have your first message here. How can I assist you today?


In [9]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [14]:
ed = Website("https://en.wikipedia.org/wiki/Artificial_intelligence")
print(ed.title)
print(ed.text)

Artificial intelligence - Wikipedia
Jump to content
Main menu
Main menu
move to sidebar
hide
Navigation
Main page
Contents
Current events
Random article
About Wikipedia
Contact us
Contribute
Help
Learn to edit
Community portal
Recent changes
Upload file
Special pages
Search
Search
Appearance
Donate
Create account
Log in
Personal tools
Donate
Create account
Log in
Pages for logged out editors
learn more
Contributions
Talk
Contents
move to sidebar
hide
(Top)
1
Goals
Toggle Goals subsection
1.1
Reasoning and problem-solving
1.2
Knowledge representation
1.3
Planning and decision-making
1.4
Learning
1.5
Natural language processing
1.6
Perception
1.7
Social intelligence
1.8
General intelligence
2
Techniques
Toggle Techniques subsection
2.1
Search and optimization
2.1.1
State space search
2.1.2
Local search
2.2
Logic
2.3
Probabilistic methods for uncertain reasoning
2.4
Classifiers and statistical learning methods
2.5
Artificial neural networks
2.6
Deep learning
2.7
GPT
2.8
Hardware and softw

In [15]:
# Define our system prompt 

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [16]:
# A function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [17]:
print(user_prompt_for(ed))
user_prompt_for(ed)

You are looking at a website titled Artificial intelligence - Wikipedia
The contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.

Jump to content
Main menu
Main menu
move to sidebar
hide
Navigation
Main page
Contents
Current events
Random article
About Wikipedia
Contact us
Contribute
Help
Learn to edit
Community portal
Recent changes
Upload file
Special pages
Search
Search
Appearance
Donate
Create account
Log in
Personal tools
Donate
Create account
Log in
Pages for logged out editors
learn more
Contributions
Talk
Contents
move to sidebar
hide
(Top)
1
Goals
Toggle Goals subsection
1.1
Reasoning and problem-solving
1.2
Knowledge representation
1.3
Planning and decision-making
1.4
Learning
1.5
Natural language processing
1.6
Perception
1.7
Social intelligence
1.8
General intelligence
2
Techniques
Toggle Techniques subsection
2.1
Search and optimization
2.1.1
State space search
2.



Messages
The API from OpenAI expects to receive messages in a particular structure. Many of the other APIs share this structure:

[
    {"role": "system", "content": "system message goes here"},
    {"role": "user", "content": "user message goes here"}
]


In [19]:
def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [20]:
messages_for(ed)

[{'role': 'system',
  'content': 'You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown.'},
 {'role': 'user',

In [21]:
def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [23]:
summarize("https://en.wikipedia.org/wiki/Artificial_intelligence")

'# Summary of the Wikipedia Page on Artificial Intelligence\n\nThe Wikipedia article on **Artificial Intelligence (AI)** provides a comprehensive overview of AI, its goals, techniques, applications, ethical considerations, and future implications. Here’s a breakdown of the key sections:\n\n## Overview\n- **Definition**: AI refers to computational systems that perform tasks typically associated with human intelligence, including learning, reasoning, problem-solving, perception, and decision-making.\n- **History**: The field emerged in 1956 and has undergone multiple cycles of optimism and setbacks, known as "AI winters".\n\n## Goals and Techniques\n- **Goals**: AI research aims for capabilities like reasoning, learning, natural language processing, perception, and social intelligence.\n- **Techniques**: Modern AI employs various techniques, such as:\n  - **Machine Learning**: Enables programs to improve automatically through experience.\n  - **Deep Learning**: A subfield of machine lear

In [25]:
def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [26]:
display_summary("https://en.wikipedia.org/wiki/Artificial_intelligence")

# Summary of the Wikipedia Page on Artificial Intelligence

## Overview
Artificial intelligence (AI) refers to the computational systems capable of performing tasks typically attributed to human intelligence, including learning, reasoning, problem-solving, perception, and decision-making. The field has developed through various research methods that enable machines to understand their environments and optimize their actions to achieve specific goals.

## Goals of AI
AI research encompasses various goals, including:
- **Reasoning and Problem-Solving:** Emulating human logical deduction and handling uncertain information.
- **Knowledge Representation:** Structuring knowledge for AI systems to answer questions and make decisions.
- **Planning and Decision-Making:** Developing agents that can autonomously execute actions to fulfill given objectives.
- **Learning:** Improving performance over time through methods like supervised and unsupervised learning.
- **Natural Language Processing:** Allowing machines to interpret and generate human language.
- **Perception, Social Intelligence, and General Intelligence:** Targeting a broad range of human-like capabilities.

## Techniques
AI employs various techniques to achieve its goals:
- **Machine Learning:** A subset of AI focused on algorithms that improve as they are exposed to more data.
- **Deep Learning:** A specialized form of machine learning using neural networks with many layers.
- **Genetic Algorithms:** Optimization techniques inspired by natural selection.
- **Logic and Statistical Methods:** Used for reasoning and handling uncertain information.

## Applications
AI has diverse applications across multiple domains:
- **Healthcare:** Enhancing diagnostics and patient care through data analysis.
- **Finance:** Automating trading and risk assessment.
- **Military:** Improving command and control systems.
- **Games:** Developing AI for strategic games like chess and Go.
- **Generative AI:** Creating content across text, images, and videos, such as ChatGPT and various art generators.

## Ethical and Regulatory Considerations
As AI technology advances, there are significant ethical issues regarding privacy, bias, misinformation, and the potential for harmful applications, such as autonomous weapon systems. Ethical AI development seeks to ensure that AI aligns with human values and societal norms.

## Future of AI
Future discussions revolve around achieving artificial general intelligence (AGI) and managing risks associated with superintelligent AI. This includes considerations on existential risks posed by advanced AI systems, their impact on employment, and the need for regulatory frameworks to ensure AI benefits humanity.

## Recent Developments
- **Generative AI Boom:** The current era has seen advancements in generative AI techniques capable of creating realistic text and images.
- **Regulatory Responses:** International efforts to establish guidelines and frameworks for safe AI use, highlighted by recent global summits.

This summary encapsulates the major topics covered in the Wikipedia article on artificial intelligence without delving into navigation-related content or details beyond the main focus.

In [27]:
display_summary("https://cnn.com")

# CNN Website Summary

The CNN website serves as a major source of breaking news, covering the latest topics in various sectors including US and world news, politics, business, health, entertainment, sports, and science. Key features include:

- **Live Updates and Breaking News**: Coverage of ongoing events such as the Ukraine-Russia War and the Israel-Hamas War.
- **Trending Stories**: Current headlines like the trial of Sean ‘Diddy’ Combs, the death of actor Jonathan Joss, and ongoing developments regarding former President Donald Trump.
- **Diverse Topics**: Articles cover a wide range of interests from crime and justice to health and wellness, as well as features on travel and lifestyle.
- **Investigative Reports**: In-depth investigations on pressing issues like rising antisemitism and economic concerns linked to Trump’s policies.
- **Podcasts and Video Content**: A section dedicated to audio and visual content, including shows featuring CNN personalities and topic-focused deep dives.

### Notable Announcements
- **Political Pressures**: Recent actions by the Trump administration regarding bank regulations on firearms and healthcare guidelines have been highlighted.
- **Elections**: The website provides analysis and updates on upcoming elections, including the implications of current policies on the political landscape.
- **Health Alerts**: Discussions around public health concerns, including potential mental health impacts of restrictive diets, have been reported.

The CNN website is a comprehensive platform for users looking to stay informed on global and national happenings in real-time.

In [44]:
display_summary("https://yahoo.com")

# Summary of Yahoo Website

Yahoo serves as a comprehensive portal providing content across various categories including Mail, News, Sports, Finance, Entertainment, and more. The homepage encourages users to make Yahoo their default starting page to discover new content daily.

## Latest News Highlights
- **Canada's Economic Slowdown**: An economic watchdog predicts a mere 1% growth for Canada, marking one of the worst slowdowns since COVID.
- **Branding Changes**: Canadian Tire is set to take over Hudson's Bay branding rights, signaling a potential end of an era.
- **Airport Incident**: Ellen Pompeo reported being detained by TSA for carrying an "expensive" snack.
- **Courtroom Drama**: A woman was ejected from a courtroom after shouting obscenities during Sean "Diddy" Combs' trial.
- **Energy Independence Announcement**: The CEO of Hydro-Québec announced a new energy deal with Newfoundland and Labrador Hydro, showcasing Canadian capabilities in energy production.
- **Missing Persons Case**: A tragic update on three sisters who disappeared during a visit with their father—police have confirmed they are dead, and the father is still missing.

For more details and updates across various topics, users can explore Yahoo's offerings in finance, entertainment, and sports.

You may notice that if you try display_summary("https://openai.com") - it doesn't work! That's because OpenAI has a fancy website that uses Javascript. There are many ways around this that some of you might be familiar with. For example, Selenium is a hugely popular framework that runs a browser behind the scenes, renders the page, and allows you to query it.

In [47]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import time

class ScrapeWebsite:
    def __init__(self, url):
        """
        Create this Website object from the given URL using Selenium + BeautifulSoup
        Supports JavaScript-heavy and normal websites uniformly.
        """
        self.url = url

        # Configure headless Chrome
        options = Options()
        options.add_argument('--headless')
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')

        # Use webdriver-manager to manage ChromeDriver
        service = Service(ChromeDriverManager().install())

        # Initialize the Chrome WebDriver with the service and options
        driver = webdriver.Chrome(service=service, options=options)

        # Start Selenium WebDriver
        driver.get(url)

        # Wait for JS to load (adjust as needed)
        time.sleep(5)

        # Fetch the page source after JS execution
        page_source = driver.page_source
        driver.quit()

        # Parse the HTML content with BeautifulSoup
        soup = BeautifulSoup(page_source, 'html.parser')

        # Extract title
        self.title = soup.title.string if soup.title else "No title found"

        # Remove unnecessary elements
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()

        # Extract the main text
        self.text = soup.body.get_text(separator="\n", strip=True)

In [48]:
def summarize2(url):
    website = ScrapeWebsite(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [49]:
def display_summary2(url):
    summary = summarize2(url)
    display(Markdown(summary))

In [50]:
display_summary2("https://cnn.com")

# CNN Website Summary

CNN is a leading news website that provides up-to-date information and analysis across a wide range of topics including US and world news, politics, business, health, entertainment, sports, and science.

## Key Recent News Highlights
- **Sean 'Diddy' Combs Trial**: Ongoing coverage of the trial where hotel security claims Combs paid $100,000 for video evidence related to an assault.
- **Ukraine-Russia Conflict**: Ukraine is reportedly utilizing underwater explosives to strike a bridge connecting Crimea to Russia.
- **Israel-Hamas War**: Ongoing humanitarian and political situations are being closely monitored and reported.

### Featured Topics
CNN covers a variety of categories such as:
- **Politics**: Including discussions on the implications of Trump's current policies and Congressional actions.
- **Health**: Investigative reports concerning gender-affirming care and efforts aimed at child health in schools.
- **Climate**: Addressing current weather events related to wildfires in Canada and their effects on air quality.

### Entertainment News
Updates on celebrity events and personal developments, including:
- Hailee Steinfeld's marriage announcement
- Vanessa Kirby's pregnancy reveal at a red carpet event.

In addition to breaking news and important issues, CNN also features podcasts, video content, and various lifestyle articles aimed at broad audiences. The website is designed to keep users informed about critical developments both in the US and globally.