In [41]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time

In [42]:
load_dotenv(override = True)
api_key = os.getenv("OPENAI_API_KEY")

if not api_key:
    print("No API key was found!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start with sk-proj-; please check the key!")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space and tab characters ate the start or end!")
else:
    print("API key found, looks good so far!")

API key found, looks good so far!


In [43]:
# lets make a quick call to a frontier model to get started, as a preview!
openai = OpenAI()
message = "Hello, GPT! This is my first ever message to you! Hi!"
response = openai.chat.completions.create(
    model= "gpt-4o-mini", 
    messages=[
        {
            "role":"user", 
         "content":message
        }
    ]
)
print(response.choices[0].message.content)

Hello! Welcome! I'm glad to hear from you. How can I assist you today?


In [44]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    def __init__(self, url):
        self.url = url

        options = Options()
        options.add_argument("--headless")  # run browser in background
        driver = webdriver.Chrome(options=options)

        driver.get(url)
        time.sleep(3)  # wait for JavaScript to finish loading

        soup = BeautifulSoup(driver.page_source, 'html.parser')
        driver.quit()

        self.title = soup.title.string if soup.title else "No title found"

        for tag in soup(["script", "style", "img", "input"]):
            tag.decompose()

        self.text = soup.body.get_text(separator="\n", strip=True)

In [45]:
ed = Website("https://en.wikipedia.org/wiki/Perplexity_AI")
print(ed.title)

Exception managing chrome: error sending request for url (https://googlechromelabs.github.io/chrome-for-testing/last-known-good-versions-with-downloads.json)
Exception managing chrome: error sending request for url (https://googlechromelabs.github.io/chrome-for-testing/last-known-good-versions-with-downloads.json)
The chromedriver version (135.0.7049.84) detected in PATH at /usr/local/bin/chromedriver might not be compatible with the detected chrome version (); currently, chromedriver  is recommended for chrome .*, so it is advised to delete the driver in PATH and retry
There was an error managing chrome; using browser found in the cache


Perplexity AI - Wikipedia


In [46]:
system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [47]:
def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"

    user_prompt += website.text
    return user_prompt

In [48]:
# print(user_prompt_for(ed))

In [49]:
messages = [
    {"role": "system", "content" : "You are a snarky assistent"},
    {"role" : "user", "content" : "What is 2 + 2 ?"}
]

In [50]:
response = openai.chat.completions.create(model = "gpt-4o-mini", messages = messages)
print(response.choices[0].message.content)

Oh wow, weâ€™re really diving into advanced math here! The answer is 4. But if you want me to go deeper, I can start discussing the meaning of life or something equally profound!


In [51]:
def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content" : user_prompt_for(website)}
    ]

In [52]:
# messages_for(ed)

In [53]:
def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [54]:
summarize("https://en.wikipedia.org/wiki/Perplexity_AI")

'# Summary of Perplexity AI - Wikipedia\n\nPerplexity AI is an American web search engine that integrates a large language model to process user queries and deliver synthesized responses, complete with citations from web-based sources. Founded in August 2022 by Aravind Srinivas, Denis Yarats, Johnny Ho, and Andy Konwinski, the search engine was officially launched on December 7, 2022. As of December 2024, the company was valued at approximately $9 billion and has around 100 employees, with a headquarters in San Francisco, California.\n\n## Key Features and Services\n- **Conversational Interface**: Users can ask follow-up questions and receive answers with direct citations.\n- **Freemium Model**: A free version utilizes GPT-3.5, while a paid Pro subscription offers advanced LLM options and API access.\n- **Product Introduction**: \n  - **Shopping Hub** was launched in November 2024 to enhance user shopping experiences by displaying product cards based on search queries.\n  - **Internal 