In [123]:
# imports

import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI



In [124]:
# Loading the environment variables in a file called .env

load_dotenv(override=True)
api_key = os.getenv('OpenAI_API_KeyB')

# Check OpenAI key

if not api_key:
    print("No API key was found")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them")
else:
    print("API key found and looks good so far!")


API key found and looks good so far!


In [134]:
import os
from openai import OpenAI

# Set the environment variable
os.environ["OPENAI_API_KEY"] = "OpenAI_API_KeyB"  # Replace with your actual key

# Now you can initialize without explicitly passing the key
client = OpenAI()

In [135]:
openai = OpenAI()



In [152]:
# A class to represent a Webpage

# Some websites require the use of proper headers when fetching them:

headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [142]:
# Define a system prompt 

system_prompt = "You are an ML Engineer and want to analyze that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [143]:
# Creating a function that writes a User Prompt that requests for a summary of websites:

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [144]:
# This function creates the format above

def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [145]:
# Calling the OpenAI API. 

def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content



In [146]:
# Function to display in the Jupyter output, using markdown

def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))



In [158]:
display_summary("https://careers.zoom.us/jobs/machine-learning-engineer-aic-core-san-jose-california-united-states-remote")

# Summary of Machine Learning Engineer - AIC Core Job Listing

## Position Overview
The **Machine Learning Engineer** role is part of the **GenAI Engineering team** at Zoom, primarily focusing on developing and enhancing AI algorithms for the Zoom AI Companion product. This position is **remote**, based in the **United States**, with the opportunity to work closely with experienced researchers and engineers.

## Responsibilities
- Design and implement AI algorithms to solve complex business challenges.
- Collaborate with cross-functional teams for scalable engineering solutions.
- Engage in code reviews and technical presentations to ensure quality.
- Identify improvements in existing systems and propose innovative solutions.
- Stay updated on developments in GenAI research for continuous technical improvement.

## Qualifications
- Degree in Computer Science, AI, or related field.
- Proficiency in programming languages like Python, C, or C++, with experience in scalable software systems.
- Familiarity with deep learning frameworks (e.g., PyTorch, TensorFlow).
- Strong collaboration skills and ability to present technical concepts effectively.

## Salary Information
- **Salary Range**: Approximately **$127,700 to $255,400** based on qualifications and experience.

## Application Information
- Anticipated position close date: **July 4, 2025**.
- Applicants are encouraged to apply within a window of at least five days to ensure fair consideration. 

## Company Culture and Commitment
- Zoom emphasizes a supportive workplace culture, offering various benefits aimed at physical, mental, and financial health, alongside work-life balance.
- The company practices fair hiring, ensuring candidates are evaluated on their skills and potential. Accommodations are available for individuals with medical disabilities.

## Conclusion
This job listing presents an opportunity for individuals passionate about machine learning and AI to contribute to innovative engineering solutions in a dynamic team environment at Zoom.

# Handling JavaScript Website

Using a website like Openai "display_summary("https://openai.com")" - it doesn't work due to JavaScript. Selenium framework(install selenium and webdriver-manager) can be used to query the website as shown below

In [139]:
# A class to represent a Webpage
# If you're not familiar with Classes, check out the "Intermediate Python" notebook

# Some websites need you to use proper headers when fetching them:
# Import necessary modules
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import time

class ScrapeWebsite:
    def __init__(self, url):
        """
        Create this Website object from the given URL using Selenium + BeautifulSoup
        Supports JavaScript-heavy and normal websites uniformly.
        """
        self.url = url

        # Configure headless Chrome
        options = Options()
        options.add_argument('--headless')
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')

        # Use webdriver-manager to manage ChromeDriver
        service = Service(ChromeDriverManager().install())

        # Initialize the Chrome WebDriver with the service and options
        driver = webdriver.Chrome(service=service, options=options)

        # Start Selenium WebDriver
        driver.get(url)

        # Wait for JS to load (adjust as needed)
        time.sleep(3)

        # Fetch the page source after JS execution
        page_source = driver.page_source
        driver.quit()

        # Parse the HTML content with BeautifulSoup
        soup = BeautifulSoup(page_source, 'html.parser')

        # Extract title
        self.title = soup.title.string if soup.title else "No title found"

        # Remove unnecessary elements
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()

        # Extract the main text
        self.text = soup.body.get_text(separator="\n", strip=True)


In [132]:
def summarize_js_website(url):
    website = ScrapeWebsite(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [162]:
summary = summarize_js_website("https://openai.com")

In [163]:
display(Markdown(summary))

# Website Summary: Just a moment...

The website appears to be experiencing a temporary delay with a message indicating it is waiting for `openai.com` to respond. The content suggests to the user that enabling JavaScript and cookies is necessary to continue accessing the site. 

### Key Points:
- The page indicates a wait time for the response from `openai.com`.
- Users are prompted to enable JavaScript and cookies for full functionality.
- No additional news or announcements are provided on this page.

This website primarily serves as an intermediary notice while the user's request to `openai.com` is being processed.