In [4]:
# imports

import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

In [5]:
# Load environment variables in a file called .env

load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

# Check the key

if not api_key:
    print("No API key was found!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them")
else:
    print("API key found and looks good so far!")


API key found and looks good so far!


In [6]:
openai = OpenAI()

In [7]:
# A class to represent a Webpage

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [8]:
# Let's try one out. Change the website and add print statements to follow along.

ed = Website("https://ganitlabs.ai/")
print(ed.title)
print(ed.text)

Home - Ganit Labs
Ganit Labs
Machine Learning, Deep Learning, AI, Data Science. Train the Machine to Think.
Home
About
Services
Case Studies
Contact
Machine Learning, Deep Learning, Artificial Intelligence
Train the machine to think
Sorry, your browser does not support inline SVG.
What we do
Our mission is to enable businesses adopt Artificial Intelligence (AI) to stay agile and competitive through incremental and transformative impacts.
Our objective is to help your business harness the power of Machine Learning (ML), Neural Nets, Deep Learning and statistical inference to enhance efficiencies and improve the bottom line.
We develop and implement
custom
AI solutions based on your specific business needs. And work with you from inception, development to final deployment.
LEARN MORE
AI, a data driven approach
Do machines have AI?
EXPERTISE
Customized, end-to-end, reliable, and scalable solution, from design to deployment
Data Science - AI & Machine Learning
Data Engineering
Cloud & Edge

In [9]:
# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish."

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [10]:
# A function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [11]:
system_prompt

'You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown.'

In [12]:
ed

<__main__.Website at 0x10619b090>

In [13]:
print(user_prompt_for(ed))

You are looking at a website titled Home - Ganit Labs
The contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.

Ganit Labs
Machine Learning, Deep Learning, AI, Data Science. Train the Machine to Think.
Home
About
Services
Case Studies
Contact
Machine Learning, Deep Learning, Artificial Intelligence
Train the machine to think
Sorry, your browser does not support inline SVG.
What we do
Our mission is to enable businesses adopt Artificial Intelligence (AI) to stay agile and competitive through incremental and transformative impacts.
Our objective is to help your business harness the power of Machine Learning (ML), Neural Nets, Deep Learning and statistical inference to enhance efficiencies and improve the bottom line.
We develop and implement
custom
AI solutions based on your specific business needs. And work with you from inception, development to final deployment.
LEARN MORE
AI

In [14]:
# See how this function creates exactly the format above

def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [15]:
# Try this out, and then try for a few more websites

messages_for(ed)

[{'role': 'system',
  'content': 'You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown.'},
 {'role': 'user',
  'content': 'You are looking at a website titled Home - Ganit Labs\nThe contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\n\nGanit Labs\nMachine Learning, Deep Learning, AI, Data Science. Train the Machine to Think.\nHome\nAbout\nServices\nCase Studies\nContact\nMachine Learning, Deep Learning, Artificial Intelligence\nTrain the machine to think\nSorry, your browser does not support inline SVG.\nWhat we do\nOur mission is to enable businesses adopt Artificial Intelligence (AI) to stay agile and competitive through incremental and transformative impacts.\nOur objective is to help your business harness the power of Machine Learning (ML), Neural Nets, Deep Learning

## Time to bring it together - the API for OpenAI is very simple!

In [16]:
def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [17]:
summarize("https://ganitlabs.ai/")

'# Summary of Ganit Labs Website\n\nGanit Labs is a technology company focused on enabling businesses to leverage Artificial Intelligence (AI) and related technologies to enhance efficiencies and maintain competitiveness. Their mission revolves around the adoption and implementation of Machine Learning (ML), Deep Learning, and Data Science tailored to specific business needs. The company offers customized solutions that cover the entire process from design to deployment.\n\n## Key Areas of Expertise:\n- **Data Science** with applications in AI and Machine Learning\n- **Data Engineering**\n- **Cloud & Edge Computing (IoT)**\n\n## Selected Projects:\nGanit Labs has worked on diverse case studies across various sectors, including:\n- **Food & Beverage**: Prediction of recipes\n- **Oil & Gas**: Predicting equipment failure\n- **Health Care & Genomics**: Implementation of ML in genomics\n\n## Global Impact:\nThe website discusses the transformative potential of AI, citing a McKinsey Global 

In [18]:
# A function to display this nicely in the Jupyter output, using markdown

def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [19]:
display_summary("https://ganitlabs.ai/")

# Summary of Ganit Labs Website

Ganit Labs specializes in Machine Learning, Deep Learning, Artificial Intelligence, and Data Science, aiming to help businesses harness these technologies for improved efficiency and competitiveness. 

## Key Offerings
- **Custom AI Solutions**: Tailored AI implementations from inception to deployment based on specific business needs.
- **Expertise Areas**:
  - Data Science & Machine Learning
  - Data Engineering
  - Cloud & Edge (IoT)

## Case Studies
Ganit Labs showcases a variety of selected projects, including:
- **Prediction of Recipe** (Food & Beverage)
- **Predict Equipment Failure** (Oil & Gas)
- **Machine Learning in Genomics** (Health Care & Genomics)

## Industry Insight
The site emphasizes the transformative impact of AI, projecting that by 2030, a significant percentage of companies will adopt various AI technologies, contributing to substantial global economic growth.

## Client Testimonials
The company highlights its commitment to client satisfaction, with positive feedback on their customized AI solutions from various sectors, including oilfield services and beverage production.

For more information, visit their [About](#), [Services](#), or [Case Studies](#) pages.

In [19]:
display_summary("https://cnn.com")

# Summary of CNN Website Content

CNN’s website provides a comprehensive platform for breaking news, live updates, videos, and various topics ranging from politics to entertainment. It serves as a primary source for local and international news and engages users in a variety of media formats, including articles, videos, and podcasts.

## Key Topics Covered:
- **World Affairs**: Includes in-depth coverage of major global conflicts such as the Israel-Hamas war and the Ukraine-Russia war.
- **Politics**: Features news related to elections, U.S. government activities, and significant political events.
- **Health**: Articles on health-related issues, fitness advice, and studies addressing public health concerns.
- **Entertainment**: Highlights in movies, television, and celebrity news.
- **Science and Technology**: Reports on space exploration, technological advancements, and innovations.

## Recent News Highlights:
- **Military and Political Developments**: Analysis of military purges in China and President Putin's strategic communications relating to Ukraine.
- **Ukraine Updates**: Reports include insights into the ongoing conflict and its impact on infrastructure.
- **Cultural Events**: Coverage of global cultural events such as the emotional reunion in Gaza during harsh weather conditions.
- **Business News**: Updates on significant market changes and antitrust probes involving major companies like Microsoft.

The site encourages interactivity, inviting feedback on advertisements and user experience, while continuously updating to keep viewers informed about the latest happenings globally.

In [20]:
display_summary("https://anthropic.com")

# Anthropic Website Summary

Anthropic is an AI safety and research company based in San Francisco, focused on creating reliable and beneficial AI systems. The site prominently features their main product, **Claude**, highlighting the latest versions **Claude 3.5 Sonnet** and **Claude 3.5 Haiku**, which are touted as highly intelligent AI models.

## Key Features
- **Claude API**: Tools and resources for businesses to integrate Claude into their operations for efficiency and revenue growth.
- **Enterprise Solutions**: Tailored offerings for enterprises looking to leverage AI in their operations.

## Recent Announcements
- **New AI Models**: As of **October 22, 2024**, Anthropic announced the introduction of computer use, along with updates to their AI models **3.5 Sonnet** and **3.5 Haiku**.
- **Research Publications**: Previous publications include insights on AI safety principles and constitutional AI methodologies aimed at ensuring harmlessness in AI outputs.

## About Anthropic
The company is committed to safety in AI development and employs a diverse team with expertise in machine learning, policy, and physics, working collaboratively to advance the field. The website also lists career opportunities for individuals interested in joining their mission.

In [None]:
# Step 1: Create your prompts

system_prompt = "something here"
user_prompt = """
    Lots of text
    Can be pasted here
"""

# Step 2: Make the messages list

messages = [] # fill this in

# Step 3: Call OpenAI

response =

# Step 4: print the result

print(