# Use Case: Using OpenAI API Summerize the web page content

### Import required libraries

In [1]:
# imports required libraries

import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

In [2]:
# Load environment variables in a file called .env

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the key

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")

API key found and looks good so far!


In [3]:
openai = OpenAI()

In [4]:
# To give you a preview -- calling OpenAI with these messages is this easy. Any problems, head over to the Troubleshooting notebook.

message = "Hello, GPT! This is my first ever message to you! Hi!"
response = openai.chat.completions.create(model="gpt-4o-mini", messages=[{"role":"user", "content":message}])
print(response.choices[0].message.content)

Hello! Welcome! I'm glad to hear from you. How can I assist you today?


In [5]:
# A class to represent a Webpage

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [16]:
client = Website("https://vejr.tv2.dk/live/2024-12-02-vejr-og-klima")
print(client.title)
print(client.text)

Vejr og klima lige nu | Få seneste nyt her - TV 2
Hop til indhold
Nyheder
Sport
Vejr
TV
TV 2 Play
Log ind
Login og indstillinger
Log ind
Opret TV 2 Login
Indstillinger
Farvetema
Log ind for at ændre farvetema
Auto
Vælg farvetema
Lys
Mørk
Auto
Med "Auto" skiftes der automatisk mellem lys og mørk tilstand baseret på din enheds indstillinger.
Nyhedsregion
TV 2 Kosmopol
Vælg nyhedsregion
TV 2 Kosmopol
TV 2 Fyn
tvSyd
TV 2/Bornholm
TV2 ØST
TV2 Østjylland
TV2 Nord
TV Midtvest
Genveje
TV 2 Play
Administrer TV 2 konto
Privatlivsindstillinger
TV 2 privatlivspolitik
Cookie-indstillinger
Menu
Søg, menu og genveje
TV 2 Play
Søg på tv2.dk
Sektioner
Nyheder
Seneste
Korte videoer
Trump i Det Hvide Hus
Krigen i Ukraine
Politik
Krimi
Samfund
Udland
Business
Penge
Tech
Klima
Sport
Seneste
Korte videoer
Sendeplan
Live og resultater
Turneringer
Tour de France
3F Superliga
Tour de France Femmes
Fodbold
Håndbold
Cykling
Badminton
NFL
Tennis
Basketball
Video
Vejr
Seneste
Korte videoer
Vejrudsigt
Radar
Elprise

### Define Prompts for OpenAI API

In [7]:
# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish."

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [8]:
# A function that writes a User Prompt that asks for summaries of websites:
def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [9]:
print(user_prompt_for(client))

You are looking at a website titled Vejr og klima lige nu | Få seneste nyt her - TV 2
The contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.

Hop til indhold
Nyheder
Sport
Vejr
TV
TV 2 Play
Log ind
Login og indstillinger
Log ind
Opret TV 2 Login
Indstillinger
Farvetema
Log ind for at ændre farvetema
Auto
Vælg farvetema
Lys
Mørk
Auto
Med "Auto" skiftes der automatisk mellem lys og mørk tilstand baseret på din enheds indstillinger.
Nyhedsregion
TV 2 Kosmopol
Vælg nyhedsregion
TV 2 Kosmopol
TV 2 Fyn
tvSyd
TV 2/Bornholm
TV2 ØST
TV2 Østjylland
TV2 Nord
TV Midtvest
Genveje
TV 2 Play
Administrer TV 2 konto
Privatlivsindstillinger
TV 2 privatlivspolitik
Cookie-indstillinger
Menu
Søg, menu og genveje
TV 2 Play
Søg på tv2.dk
Sektioner
Nyheder
Seneste
Korte videoer
Trump i Det Hvide Hus
Krigen i Ukraine
Politik
Krimi
Samfund
Udland
Business
Penge
Tech
Klima
Sport
Seneste
Korte videoer


### Prepare message for OpenAI API

In [10]:
# See how this function creates exactly the format above

def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [11]:
messages_for(client)

[{'role': 'system',
  'content': 'You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown.'},
 {'role': 'user',
  'content': 'You are looking at a website titled Vejr og klima lige nu | Få seneste nyt her - TV 2\nThe contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\n\nHop til indhold\nNyheder\nSport\nVejr\nTV\nTV 2 Play\nLog ind\nLogin og indstillinger\nLog ind\nOpret TV 2 Login\nIndstillinger\nFarvetema\nLog ind for at ændre farvetema\nAuto\nVælg farvetema\nLys\nMørk\nAuto\nMed "Auto" skiftes der automatisk mellem lys og mørk tilstand baseret på din enheds indstillinger.\nNyhedsregion\nTV 2 Kosmopol\nVælg nyhedsregion\nTV 2 Kosmopol\nTV 2 Fyn\ntvSyd\nTV 2/Bornholm\nTV2 ØST\nTV2 Østjylland\nTV2 Nord\nTV Midtvest\nGenveje\nTV 2 Play\nAdministrer TV 2 konto\nPrivatlivsindst

### Call OpenAI API to get the summary

In [12]:
# And now: call the OpenAI API. You will get very familiar with this!

def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [19]:
## To see summary from Open AI in raw text format
summarize("https://vejr.tv2.dk/live/2024-12-02-vejr-og-klima")



In [21]:
# A function to display this nicely in the Jupyter output, using markdown

def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [22]:
display_summary("https://vejr.tv2.dk/live/2024-12-02-vejr-og-klima")

# Summary of "Vejr og klima lige nu | Få seneste nyt her - TV 2"

The website provides up-to-date information on weather and climate conditions in Denmark, managed by meteorologist Jonas Damsbo from TV 2 Vejret. Key highlights include:

- **Severe Weather Alerts**: There is a risk of thunderstorms accompanied by hail and strong winds today, particularly notable in Jutland. The storm could produce localized heavy rainfall, with predictions of over 30 millimeters in some areas during very short periods.

- **Temperature Information**: The website notes that current temperatures range from 19 to 24 degrees Celsius, with the potential for reaching the 17th meteorological summer day if temperatures exceed 25 degrees.

- **Safety Advice**: Recommendations are offered on how to prepare for potential flooding due to expected heavy rain. This includes securing valuables, maintaining drainage systems, and being aware of local flood risks.

- **Recent Weather Events**: The website discusses several recent weather impacts, such as road damage in Hovby after severe rainfall and flooding affecting various locations, leading to train cancellations on Fyn and warnings against swimming at several beaches due to contamination risks.

- **Public Interaction Encouraged**: Viewers are invited to share their weather stories or observations via uploads or email to enhance the reporting.

The overall focus is on providing timely weather updates, safety precautions, and interactive engagement with the public concerning significant weather events.