In [3]:
from dotenv import load_dotenv
import os
from openai import OpenAI
import requests
from bs4 import BeautifulSoup
from IPython.display import display, Markdown


## Connecting to OPENAI

In [5]:
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')
if not api_key:
    print("No API key was found")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them")
else:
    print("API key found and looks good so far!")

API key found and looks good so far!


In [6]:
openai =  OpenAI()

### Quick API call to OpenAI API

In [7]:
message = "Hello chatGPT, This is my test message to you through API"
response = openai.chat.completions.create(model = 'gpt-4o-mini' , messages = [{'role':'user', 'content': message}]) 
response

ChatCompletion(id='chatcmpl-BpFWEUEccZZCMC7Dix74miut83kbD', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Hello! It looks like your test message has come through successfully. How can I assist you today?', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1751554082, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier='default', system_fingerprint='fp_34a54ae93c', usage=CompletionUsage(completion_tokens=20, prompt_tokens=20, total_tokens=40, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))

In [9]:
response

ChatCompletion(id='chatcmpl-BpFWEUEccZZCMC7Dix74miut83kbD', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Hello! It looks like your test message has come through successfully. How can I assist you today?', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1751554082, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier='default', system_fingerprint='fp_34a54ae93c', usage=CompletionUsage(completion_tokens=20, prompt_tokens=20, total_tokens=40, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))

In [14]:
response.choices[0]

Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Hello! It looks like your test message has come through successfully. How can I assist you today?', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))

In [13]:
response.choices[0].finish_reason

'stop'

In [16]:
response.choices[0].message.content


'Hello! It looks like your test message has come through successfully. How can I assist you today?'

### Display Website content

#### Using class cration in Python

In [17]:
class Website:
    def __init__(self,url):
        self.url = url
        self.title = None
        self.text = None
        
        #set custom headers, especially user-agent, to avoid issues with some websites blocking the request.
        
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        }
        try:
            # Make an HTTP request to fetch page content
            response = requests.get(url,headers)
            response.raise_for_status() # raise an exception for HTTP errors
            
            # Parst the website content using BeautifulSoup
            soup = BeautifulSoup(response.content, 'html.parser')
            
            # Extract the title
            self.title = soup.title.string if soup.title else 'No Title Found'

            # Remove irrelevant tags
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            
            # Extract text, stripping unnecessary whitespace and adding newlines between elements
            self.text = soup.body.get_text(separator="\n", strip=True)

        except requests.exceptions.RequestException as ex:
            print(f"Error fetching {url}: {ex}")
        except Exception as ex:
            print(f"Error processing the website content: {ex}")

    # Returns the title of the webpage
    def get_title(self):
        return self.title
    
    # Returns the cleaned text content of the webpage
    def get_text(self):
        return self.text
        

In [18]:
# Usage of website class
mywebsite = Website("https://cnn.com/")
print("Title:", mywebsite.get_title())
text_content = mywebsite.get_text()

if text_content:
    print("Text Content:", text_content[:500])  # Print first 500 characters of the text
else:
    print("No text content available.")

Title: Breaking News, Latest News and Videos | CNN
Text Content: CNN values your feedback
1. How relevant is this ad to you?
2. Did you encounter any technical issues?
Video player was slow to load content
Video content never loaded
Ad froze or did not finish loading
Video content did not start after ad
Audio on ad was too loud
Other issues
Ad never loaded
Ad prevented/slowed the page from loading
Content moved around while ad loaded
Ad was repetitive to ads I've seen previously
Other issues
Cancel
Submit
Thank You!
Your effort and contribution in providing t


### Prompt Engineering

#### Types of prompts

A <b>system prompt </b>  -- that tells them what task they are performing and what tone they should use

A <b>user prompt </b>  -- the conversation starter that they should reply to

In [37]:
system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [38]:
system_prompt1 = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown in French"

In [39]:
def user_prompt_for(mywebsite):
    user_prompt = f"You are looking at a website titled {mywebsite.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += mywebsite.text
    return user_prompt

In [40]:
print(user_prompt_for(mywebsite))

You are looking at a website titled Breaking News, Latest News and Videos | CNN
The contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.

CNN values your feedback
1. How relevant is this ad to you?
2. Did you encounter any technical issues?
Video player was slow to load content
Video content never loaded
Ad froze or did not finish loading
Video content did not start after ad
Audio on ad was too loud
Other issues
Ad never loaded
Ad prevented/slowed the page from loading
Content moved around while ad loaded
Ad was repetitive to ads I've seen previously
Other issues
Cancel
Submit
Thank You!
Your effort and contribution in providing this feedback is much
                                        appreciated.
Close
Ad Feedback
Close icon
US
World
Politics
Business
Health
Entertainment
Style
Travel
Sports
Science
Climate
Weather
Ukraine-Russia War
Israel-Hamas War
Underscored
Amazon P

### Messages

The API from OpenAI expects to receive messages in a particular structure. Many of the other APIs share this structure:

In [23]:
messages = [
    {"role": "system", "content": "You are a snarky assistant"},
    {"role": "user", "content": "What is 2 + 5?"}
]

In [25]:
# calling OpenAI with system and user messages:

response = openai.chat.completions.create(model="gpt-4o-mini", messages=messages)
print(response.choices[0].message.content)

Oh, a math genius in the house! That would be 7. You got any tougher ones, or should I just get my calculator?


In [27]:
# See how this function creates exactly the format above

def messages_for(mywebsite):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(mywebsite)}
    ]

In [29]:
messages_for(mywebsite)

[{'role': 'system',
  'content': 'You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown.'},
 {'role': 'user',
  'content': 'You are looking at a website titled Breaking News, Latest News and Videos | CNN\nThe contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\n\nCNN values your feedback\n1. How relevant is this ad to you?\n2. Did you encounter any technical issues?\nVideo player was slow to load content\nVideo content never loaded\nAd froze or did not finish loading\nVideo content did not start after ad\nAudio on ad was too loud\nOther issues\nAd never loaded\nAd prevented/slowed the page from loading\nContent moved around while ad loaded\nAd was repetitive to ads I\'ve seen previously\nOther issues\nCancel\nSubmit\nThank You!\nYour effort and contribution in providing th

In [30]:
def messages_for_french(mywebsite):
    return [
        {"role": "system", "content": system_prompt1},
        {"role": "user", "content": user_prompt_for(mywebsite)}
    ]

In [32]:
messages_for_french(mywebsite)

[{'role': 'system',
  'content': 'You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown in French'},
 {'role': 'user',
  'content': 'You are looking at a website titled Breaking News, Latest News and Videos | CNN\nThe contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\n\nCNN values your feedback\n1. How relevant is this ad to you?\n2. Did you encounter any technical issues?\nVideo player was slow to load content\nVideo content never loaded\nAd froze or did not finish loading\nVideo content did not start after ad\nAudio on ad was too loud\nOther issues\nAd never loaded\nAd prevented/slowed the page from loading\nContent moved around while ad loaded\nAd was repetitive to ads I\'ve seen previously\nOther issues\nCancel\nSubmit\nThank You!\nYour effort and contribution in pro

### Summarize the website now

In [41]:
# call the OpenAI API. 

def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [42]:
summarize("https://cnn.com")

'# CNN Website Summary\n\nCNN is a major news outlet providing up-to-date reporting on various topics including US and international news, politics, health, entertainment, business, sports, science, and climate issues. The site features live updates on significant events, breaking news articles, videos, and analysis.\n\n## Key News Highlights:\n\n- **Trump’s Megabill**: The House is preparing for a final vote on Trump’s proposed megabill, with Speaker of the House Mike Johnson addressing GOP excitement and the potential repercussions of the bill.\n  \n- **Death of Diogo Jota**: Liverpool and Portugal soccer star Diogo Jota tragically died at the age of 28 in a car crash. Many public figures, including FIFA President and UK Prime Minister, expressed their condolences.\n\n- **Job Growth**: The US economy added 147,000 jobs in June, indicating a better-than-expected unemployment rate.\n\n- **International Incidents**: Reports include fatal incidents such as a ferry sinking in Bali and esc

In [46]:
 # A function to display this nicely in the Jupyter output, using markdown

def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [47]:
display_summary("https://cnn.com")

# Summary of CNN Website

CNN provides up-to-date breaking news and analysis on various topics, including:

- **US Politics:** Current developments in the US Congress, notably around Trump's proposed legislation, referred to as his "megabill." The House is gearing up for a final vote, with Speaker Johnson and others discussing the implications and disagreements surrounding it.
  
- **International News:** Reports on significant events such as the increasing violence in Gaza, updates on the Ukraine-Russia conflict, and a tragic ferry sinking near Bali with numerous casualties. 

- **Business and Economy:** Updates on employment statistics, revealing a higher-than-expected job creation number for June. 

- **Health and Safety:** Information on health-related issues, including a report detailing significant improvements on heart attack mortality and updates on national park funding.

- **Entertainment:** Noteworthy mentions include the tragic death of young soccer star Diogo Jota in a car crash, along with various pop culture stories. 

Furthermore, the website features a wide array of topics from health to travel, sports, and science. CNN also promotes interactive content, including quizzes and games, aimed at engaging readers in a variety of thematic areas. 

For more consistent updates, CNN emphasizes the importance of reader feedback and encourages subscriptions for exclusive content.

In [49]:
display_summary("https://isoft.guru")

# iSoft Overview

iSoft is a human resources provider based in Bridgewater, New Jersey, specializing in Consulting, Staffing, and Training services. The company aims to be the preferred choice for organizations worldwide.

## Services

- **Training**: iSoft offers high-quality and cost-effective workshops designed to develop workforce skills in various organizations.
- **Consulting**: The company provides consulting services tailored to meet the specific needs of its clients.

While the website does not highlight any news or announcements, it emphasizes its commitment to delivering unmatched services in the HR sector.

In [50]:
display_summary("https://anthropic.com")

# Anthropic Overview

The Anthropic website presents information about its AI products, services, and research initiatives focused on ethical and responsible AI development. The centerpiece of their offerings is **Claude**, a series of advanced AI models designed to assist in various applications, including coding and customer support.

## Key Features

- **Claude Models**: Information on different versions of Claude, including:
  - **Claude Opus 4**: The most advanced model available, catering to complex tasks and interactions.
  - **Claude Sonnet 4**: Another powerful option in the Claude family.
  - **Claude Haiku 3.5**: A prior version with specific capabilities.

- **Pricing Plans**: Various plans are available for individual and enterprise users, including Max, Team, Enterprise, and Education plans.

- **API and Development**: Resources for developers to integrate and build applications using Claude's capabilities, including access to developer documentation and support.

- **Educational Resources**: Anthropic Academy offers materials to learn how to build with Claude, including case studies and customer stories.

- **Commitments to Safety**: The website outlines Anthropic's focus on AI safety, transparency, and responsible scaling policy, ensuring AI serves humanity's long-term interests.

## News and Announcements

- **ISO 42001 Certification**: Anthropic announces its achievement of ISO 42001 certification, emphasizing its commitment to high standards in AI development and operation.

Overall, the Anthropic website serves as a comprehensive resource for exploring AI developments, pricing, and principles guiding the safe deployment of AI solutions.

## Excersise

In [52]:
# Step 1: Create your prompts

system_prompt = "you are a humourous assistant that makes fun of everyhing"
user_prompt = """
    Working remotely can be challenging, but there are many strategies to stay productive. 
This includes creating a dedicated workspace, having a set schedule, and taking regular breaks...
"""

# Step 2: Make the messages list
messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_prompt}
]
# Step 3: Call OpenAI

response = openai.chat.completions.create(
    model="gpt-4o-mini",  # or "gpt-3.5-turbo", etc.
    messages=messages
)

# Step 4: print the result

print(response.choices[0].message.content)

Absolutely! Working remotely is like being in a relationship with your couch—comforting, yet ultimately a bad idea when it comes to productivity! Creating a dedicated workspace is great, but let’s be honest, it usually ends up being the kitchen table, surrounded by snack distractions and the occasional cat who thinks your laptop is a personal throne. 

And having a set schedule? Oh, that's adorable! It usually looks more like a flexible guideline shaped by your pajama pants and the siren call of Netflix's "Are you still watching?" pop-up.

As for regular breaks, let’s just admit that break time often extends into an uncharted territory where you find yourself watching videos of cats riding Roombas—turns out, there’s a fine line between "taking a break" and "whoa, I've just binged 3 seasons of a show about 19th-century sock knitting.”

But hey, at least those strategies make us feel productive while we’re trying to avoid becoming full-time snack reviewers!
