In [29]:
from dotenv import load_dotenv
import os
from openai import OpenAI
import requests
from bs4 import BeautifulSoup
from IPython.display import display, Markdown


## Connecting to OPENAI

In [2]:
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')
if not api_key:
    print("No API key was found")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them")
else:
    print("API key found and looks good so far!")

API key found and looks good so far!


In [3]:
openai =  OpenAI()

### Quick API call to OpenAI API

In [4]:
message = "Hello chatGPT, This is my test message to you through API"
response = openai.chat.completions.create(model = 'gpt-4o-mini' , messages = [{'role':'user', 'content': message}]) 
response

ChatCompletion(id='chatcmpl-AumrHcDbqJolf2fYQ9uhzC8bVsJdO', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Hello! It looks like your test message is working perfectly. How can I assist you today?', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1738097543, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier='default', system_fingerprint='fp_bd83329f63', usage=CompletionUsage(completion_tokens=20, prompt_tokens=20, total_tokens=40, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))

In [5]:
response.choices[0].message.content

'Hello! It looks like your test message is working perfectly. How can I assist you today?'

### Create a class

#### Display website content 

In [6]:
class Website:
    def __init__(self,url):
        self.url = url
        self.title = None
        self.text = None
        
        #set custom headers, especially user-agent, to avoid issues with some websites blocking the request.
        
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        }
        try:
            # Make an HTTP request to fetch page content
            response = requests.get(url,headers)
            response.raise_for_status() # raise an exception for HTTP errors
            
            # Parst the website content using BeautifulSoup
            soup = BeautifulSoup(response.content, 'html.parser')
            
            # Extract the title
            self.title = soup.title.string if soup.title else 'No Title Found'

            # Remove irrelevant tags
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            
            # Extract text, stripping unnecessary whitespace and adding newlines between elements
            self.text = soup.body.get_text(separator="\n", strip=True)

        except requests.exceptions.RequestException as ex:
            print(f"Error fetching {url}: {ex}")
        except Exception as ex:
            print(f"Error processing the website content: {ex}")

    # Returns the title of the webpage
    def get_title(self):
        return self.title
    
    # Returns the cleaned text content of the webpage
    def get_text(self):
        return self.text
        

In [7]:
# Usage of website class
mywebsite = Website("https://cnn.com/")
print("Title:", mywebsite.get_title())
text_content = mywebsite.get_text()

if text_content:
    print("Text Content:", text_content[:500])  # Print first 500 characters of the text
else:
    print("No text content available.")

Title: Breaking News, Latest News and Videos | CNN
Text Content: CNN values your feedback
1. How relevant is this ad to you?
2. Did you encounter any technical issues?
Video player was slow to load content
Video content never loaded
Ad froze or did not finish loading
Video content did not start after ad
Audio on ad was too loud
Other issues
Ad never loaded
Ad prevented/slowed the page from loading
Content moved around while ad loaded
Ad was repetitive to ads I've seen previously
Other issues
Cancel
Submit
Thank You!
Your effort and contribution in providing t


### Prompt Engineering

#### Types of prompts

A <b>system prompt </b>  -- that tells them what task they are performing and what tone they should use

A <b>user prompt </b>  -- the conversation starter that they should reply to

In [8]:
system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [9]:
system_prompt1 = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown in French"

In [14]:
def user_prompt_for(mywebsite):
    user_prompt = f"You are looking at a website titled {mywebsite.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += mywebsite.text
    return user_prompt

In [15]:
print(user_prompt_for(mywebsite))

You are looking at a website titled Breaking News, Latest News and Videos | CNN
The contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.

CNN values your feedback
1. How relevant is this ad to you?
2. Did you encounter any technical issues?
Video player was slow to load content
Video content never loaded
Ad froze or did not finish loading
Video content did not start after ad
Audio on ad was too loud
Other issues
Ad never loaded
Ad prevented/slowed the page from loading
Content moved around while ad loaded
Ad was repetitive to ads I've seen previously
Other issues
Cancel
Submit
Thank You!
Your effort and contribution in providing this feedback is much
                                        appreciated.
Close
Ad Feedback
Close icon
US
World
Politics
Business
Health
Entertainment
Style
Travel
Sports
Science
Climate
Weather
Ukraine-Russia War
Israel-Hamas War
Underscored
Games
Mo

### Messages

The API from OpenAI expects to receive messages in a particular structure. Many of the other APIs share this structure:

In [16]:
messages = [
    {"role": "system", "content": "You are a snarky assistant"},
    {"role": "user", "content": "What is 2 + 2?"}
]

In [17]:
# calling OpenAI with system and user messages:

response = openai.chat.completions.create(model="gpt-4o-mini", messages=messages)
print(response.choices[0].message.content)

Oh, you’re really hitting me with the tough ones today! It's 4. Shocking, I know.


In [19]:
# See how this function creates exactly the format above

def messages_for(mywebsite):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(mywebsite)}
    ]

In [20]:
messages_for(mywebsite)

[{'role': 'system',
  'content': 'You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown.'},
 {'role': 'user',
  'content': "You are looking at a website titled Breaking News, Latest News and Videos | CNN\nThe contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\n\nCNN values your feedback\n1. How relevant is this ad to you?\n2. Did you encounter any technical issues?\nVideo player was slow to load content\nVideo content never loaded\nAd froze or did not finish loading\nVideo content did not start after ad\nAudio on ad was too loud\nOther issues\nAd never loaded\nAd prevented/slowed the page from loading\nContent moved around while ad loaded\nAd was repetitive to ads I've seen previously\nOther issues\nCancel\nSubmit\nThank You!\nYour effort and contribution in providing thi

In [21]:
def messages_for_french(mywebsite):
    return [
        {"role": "system", "content": system_prompt1},
        {"role": "user", "content": user_prompt_for(mywebsite)}
    ]

In [22]:
messages_for_french(mywebsite)

[{'role': 'system',
  'content': 'You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown in French'},
 {'role': 'user',
  'content': "You are looking at a website titled Breaking News, Latest News and Videos | CNN\nThe contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\n\nCNN values your feedback\n1. How relevant is this ad to you?\n2. Did you encounter any technical issues?\nVideo player was slow to load content\nVideo content never loaded\nAd froze or did not finish loading\nVideo content did not start after ad\nAudio on ad was too loud\nOther issues\nAd never loaded\nAd prevented/slowed the page from loading\nContent moved around while ad loaded\nAd was repetitive to ads I've seen previously\nOther issues\nCancel\nSubmit\nThank You!\nYour effort and contribution in prov

In [23]:
# call the OpenAI API. 

def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [25]:
summarize("https://cnn.com")

"# CNN - Breaking News and Updates\n\nCNN's website provides a comprehensive hub for the latest news and videos from around the world, covering various categories including:\n\n- **US News**\n- **World Events**\n- **Politics**\n- **Business**\n- **Health**\n- **Entertainment**\n- **Sports**\n- **Science**\n\n## Recent Highlights\n\n- **Trump Administration Issues:** Reports indicate issues with federal grants being halted, leading to legal challenges by nonprofits. Organizations are struggling to access essential funds.\n  \n- **Ukraine-Russia Conflict:** The website features ongoing coverage of the conflict, including updates on North Korean soldiers reportedly fighting in Ukraine.\n\n- **Natural Disasters:** Coverage of the Los Angeles wildfires highlights the devastation and efforts made to rescue animals affected.\n\n- **Immigration Issues:** Reports include challenges faced by deported individuals and the impact of federal policies on various communities.\n\n- **Entertainment & Cu

In [30]:
 # A function to display this nicely in the Jupyter output, using markdown

def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [31]:
display_summary("https://cnn.com")

# CNN Website Summary

The CNN website offers the latest news, analysis, and videos across a variety of topics, including U.S. and World news, Politics, Business, Health, Entertainment, Sports, Science, and Climate. It features live updates covering significant events such as the Ukraine-Russia War and the Israel-Hamas War. 

## Notable Headlines:
- **Federal Aid Freeze:** Live updates are provided on the Trump administration's federal aid freeze, which has left many organizations unable to access essential funds.
- **North Korean Soldiers:** Intelligence notes have surfaced regarding North Korean soldiers engaging in the Ukraine conflict, depicting their operational protocols.
- **Oklahoma Education:** The Oklahoma Board of Education has approved a proposal requiring parents to prove citizenship for student enrollment. 

## Additional Features:
- CNN offers various multimedia content, including video reports, podcasts, and trending topics.
- They also cover social issues, sports events, and provide shopping guides through CNN Underscored.
  
Overall, CNN serves as a comprehensive news source, summarizing current events and providing detailed analysis across diverse fields.

In [32]:
display_summary("https://yahoo.com")

# Yahoo Overview

Yahoo is a comprehensive digital platform that offers a variety of services including email, news, finance, sports, and entertainment. The site features sections for:

- **News:** Daily updates covering US politics, global events, technology, health, and lifestyle topics. Key news releases include discussions on a federal funding freeze affecting safety-net programs and health services, alongside notable stories like a tragic incident at a high school track event.
  
- **Finance:** Real-time updates on stock market performance, personal finance tips, and investment strategies, with trending topics including significant market fluctuations attributed to competition in AI technology from China.

- **Sports:** In-depth coverage of major sports leagues such as the NFL, NBA, MLB, and NHL, including player updates, trade news, and game analyses. Recent stories include player suspensions and team strategies ahead of trade deadlines.

- **Lifestyle and Health:** Articles focusing on well-being, parenting, relationships, shopping guides, and health trends, including discussions on mental health and seasonal health issues.

- **Entertainment:** Content related to celebrities, movies, music, and shows, tracking box office performances and cultural trends.

Yahoo also integrates a weather feature, personalized finance portfolio management, and various interactive elements like games and horoscopes to engage users.

## Notable Recent Announcements
- **White House Memo on Funding Freeze:** Medicare, Medicaid, and social services have been noted as exempt from recent federal budget cuts, which highlight the implications for public health and education.
- **Nvidia Stock News:** The tech company's shares have seen significant volatility following concerns about competition in the AI industry, leading to substantial market analysis and forecasts.
- **Sports Updates:** Key sports figures have made headlines both for performance issues and trade possibilities as teams eye the upcoming deadlines.

With a robust mix of informative articles and engaging content, Yahoo remains a prominent hub for a diverse range of interests.

In [34]:
display_summary("https://anthropic.com")

# Anthropic Website Summary

Anthropic is an AI safety and research company based in San Francisco, focusing on developing reliable and beneficial AI systems. The website features information about their AI model "Claude," with the latest version, Claude 3.5 Sonnet, now available. Anthropic emphasizes safety in AI by pushing for alignment and integrating user feedback.

## Key Highlights

- **Claude Models**: Introduction of Claude 3.5 Sonnet and Claude 3.5 Haiku as the newest iterations.
- **Recent Announcements**:
  - **Oct 22, 2024**: Launch of new Claude models and features including "computer use."
  - **Sep 4, 2024**: Overview of the Claude for Enterprise product.
  - Various research publications focused on AI safety and alignment.
  
## Core Mission
Anthropic's interdisciplinary team aims to lead in AI research and promote safety in AI technologies through extensive studies and the development of their products. The company invites applicants for open roles to contribute to its advancements in AI.