In [1]:
# imports

import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

# If you get an error running this cell, then please head over to the troubleshooting notebook!

In [2]:
# Load environment variables in a file called .env

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the key

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")


API key found and looks good so far!


In [3]:
openai = OpenAI()


In [4]:
# A class to represent a Webpage
# If you're not familiar with Classes, check out the "Intermediate Python" notebook

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [5]:
# Let's try one out. Change the website and add print statements to follow along.

ed = Website("https://www.foxnews.com/opinion/im-gender-detransitioner-i-told-ftc-how-doctors-abused-power-over-me")
print(ed.title)
print(ed.text)

FTC seeks stories from those harmed by transgender medical care practices | Fox News
Fox News Media
Fox News Media
Fox Business
Fox Nation
Fox News Audio
Fox Weather
Outkick
Fox Noticias
Books
Fox News
U.S.
Crime
Immigration
Terror
True Crime
Bryan Kohberger
Luigi Mangione
Politics
Donald Trump
Senate
House
Judiciary
Foreign Policy
Fox News Polls
Elections
World
U.N.
Conflicts
Terrorism
Disasters
Global Economy
Environment
Religion
Scandals
Opinion
Media
Fox Nation Coverage
Entertainment
Exclusives
Royal Families
Country Music
Sylvester Stallone
Matthew McConaughey
Sports
NFL
College Football
MLB
NASCAR
INDYCAR
NBA
NHL
Golf
Tennis
FIFA WORLD CUP 26
UFC
WWE
Lifestyle
MAHA
Health
Travel
Food & Drink
Games & Quizzes
Health
Cancer
Weight Loss
Brain Health
Heart Health
Healthy Living
Video
AI
More
Expand / Collapse search
Log In
Watch
            TV
Menu
Games
Expand/Collapse Menu
5 Across the Letter Word Game
DownWords
Sudoku
Swap Words
Word Search
Icon Match
Daily Crossword Puzzle
Mini Cr

In [6]:
# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish."

system_prompt = "You are a political bias rating assistant.  \
Your job is to analyze text and determine if it leans left or right politically.  \
You must always output percentages that add up to 100%, in this exact format:  \
- Left leaning: X%  \
- Right leaning: Y%  \
Do not explain or add any extra text. Just return the percentages.  \
"

In [7]:
# A function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(website):
    user_prompt = f"Analyze the following article and provide the left vs. right leaning percentages:  {website.title}"   
    user_prompt += website.text
    return user_prompt

In [8]:
print(user_prompt_for(ed))

Analyze the following article and provide the left vs. right leaning percentages:  FTC seeks stories from those harmed by transgender medical care practices | Fox NewsFox News Media
Fox News Media
Fox Business
Fox Nation
Fox News Audio
Fox Weather
Outkick
Fox Noticias
Books
Fox News
U.S.
Crime
Immigration
Terror
True Crime
Bryan Kohberger
Luigi Mangione
Politics
Donald Trump
Senate
House
Judiciary
Foreign Policy
Fox News Polls
Elections
World
U.N.
Conflicts
Terrorism
Disasters
Global Economy
Environment
Religion
Scandals
Opinion
Media
Fox Nation Coverage
Entertainment
Exclusives
Royal Families
Country Music
Sylvester Stallone
Matthew McConaughey
Sports
NFL
College Football
MLB
NASCAR
INDYCAR
NBA
NHL
Golf
Tennis
FIFA WORLD CUP 26
UFC
WWE
Lifestyle
MAHA
Health
Travel
Food & Drink
Games & Quizzes
Health
Cancer
Weight Loss
Brain Health
Heart Health
Healthy Living
Video
AI
More
Expand / Collapse search
Log In
Watch
            TV
Menu
Games
Expand/Collapse Menu
5 Across the Letter Word Game

In [9]:
# See how this function creates exactly the format above

def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [10]:
messages_for(ed)

[{'role': 'system',
  'content': 'You are a political bias rating assistant.  Your job is to analyze text and determine if it leans left or right politically.  You must always output percentages that add up to 100%, in this exact format:  - Left leaning: X%  - Right leaning: Y%  Do not explain or add any extra text. Just return the percentages.  '},
 {'role': 'user',
  'content': 'Analyze the following article and provide the left vs. right leaning percentages:  FTC seeks stories from those harmed by transgender medical care practices | Fox NewsFox News Media\nFox News Media\nFox Business\nFox Nation\nFox News Audio\nFox Weather\nOutkick\nFox Noticias\nBooks\nFox News\nU.S.\nCrime\nImmigration\nTerror\nTrue Crime\nBryan Kohberger\nLuigi Mangione\nPolitics\nDonald Trump\nSenate\nHouse\nJudiciary\nForeign Policy\nFox News Polls\nElections\nWorld\nU.N.\nConflicts\nTerrorism\nDisasters\nGlobal Economy\nEnvironment\nReligion\nScandals\nOpinion\nMedia\nFox Nation Coverage\nEntertainment\nExc

In [11]:
# And now: call the OpenAI API. You will get very familiar with this!

def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [12]:
summarize("https://www.foxnews.com/opinion/im-gender-detransitioner-i-told-ftc-how-doctors-abused-power-over-me")

'- Left leaning: 20%  \n- Right leaning: 80%  '

In [13]:
# A function to display this nicely in the Jupyter output, using markdown

def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [17]:
display_summary("https://www.ndtv.com/opinion/inside-trumps-quite-takeover-of-silicon-valley-9350071")

- Left leaning: 60%  
- Right leaning: 40%  