In [44]:
# --- Imports ---
import os
import requests
from dotenv import load_dotenv, find_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

# Load .env variables
# Find and load .env
env_path = find_dotenv()  # returns absolute path if found, else ''
print("Detected .env path:", env_path if env_path else "No .env file found")
# Force reload and override existing env vars
load_dotenv(env_path, override=True)


# Initialize OpenAI client
openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# --- Global user-agent headers (helps bypass basic bot filters) ---
DEFAULT_HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/117.0.0.0 Safari/537.36"
    )
}


class Website:
    """
    Represents a web page and extracts its title and clean text content.

    Attributes:
        url (str): The URL of the web page.
        title (str): Page title or 'No title found'.
        text (str): Cleaned text content from the <body>.
    """

    def __init__(self, url: str, headers=None, timeout: int = 10):
        """
        Initialize the Website object and fetch + parse content.

        Args:
            url (str): The URL to scrape.
            headers (dict, optional): Custom request headers. Defaults to DEFAULT_HEADERS.
            timeout (int): Request timeout in seconds. Defaults to 10.
        """
        self.url = url
        self.title = "No title found"
        self.text = ""

        try:
            response = requests.get(url, headers=headers or DEFAULT_HEADERS, timeout=timeout)
            response.raise_for_status()
        except requests.RequestException as e:
            print(f"⚠️ Error fetching {url}: {e}")
            return

        soup = BeautifulSoup(response.content, "html.parser")

        # Extract title
        self.title = soup.title.string.strip() if soup.title else "No title found"

        # Remove irrelevant elements
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()

            # Extract text, clean up whitespace
            self.text = "\n".join(
                line.strip()
                for line in soup.body.get_text(separator="\n").splitlines()
                if line.strip()
            )
        else:
            self.text = "[No body content found]"

    def preview(self, lines: int = 5):
        """
        Show a Markdown preview (title + first few lines).
        """
        snippet = "\n".join(self.text.splitlines()[:lines])
        display(Markdown(f"### {self.title}\n\n{snippet}"))


# --- Usage Example ---
if __name__ == "__main__":
    site = Website("https://saadaziz.com")
    print(f"Title: {site.title}\n")
    print(site.text[:500])  # print first 500 chars
    site.preview(8)         # show first 8 lines as Markdown


Detected .env path: C:\users\saad0\Documents\source\careergpt_backend\careergpt-backend\.env
Title: Saad Aziz | Professional Website

SAAD AZIZ
About
Resume
Demo
Contact
Engineer •
Team builder
I turn complexity into clarity: crafting code, mentoring teams, and delivering solutions that matter.
CareerGPT
Demo
Contact me
About
Me
Engineer • Technical & Product Leader
I’m an engineer with 15+ years of experience building and scaling products, bridging the gap
between software development, product strategy, and team leadership. Passionate about
CI/CD, DevOps practices, and Observability
, I focus on creating systems that
empower 


### Saad Aziz | Professional Website

SAAD AZIZ
About
Resume
Demo
Contact
Engineer •
Team builder
I turn complexity into clarity: crafting code, mentoring teams, and delivering solutions that matter.

In [30]:
        # Let's try one out. Change the website and add print statements to follow along.

saad = Website("https://saadaziz.com")
print(ed.title)
print(ed.text)

Saad Aziz | Professional Website
SAAD AZIZ
About
Resume
Demo
Contact
Engineer •
Team builder
I turn complexity into clarity: crafting code, mentoring teams, and delivering solutions that matter.
CareerGPT
Demo
Contact me
About
Me
Engineer • Technical & Product Leader
I’m an engineer with 15+ years of experience building and scaling products, bridging the gap
between software development, product strategy, and team leadership. Passionate about
CI/CD, DevOps practices, and Observability
, I focus on creating systems that
empower teams to ship faster and with confidence.
When I’m not optimizing pipelines or refining product roadmaps, I’m exploring the
Pacific
Northwest trails and coastlines with my bulldogs
— finding inspiration for the next
challenge, both in tech and in life.
View My Resume
My
Qualifications
Education
Bachelor of Science - Computer Science
•
2007
Portland State University – Portland, OR
Specialized in software engineering, algorithms, and systems programming.
Senior cap

In [31]:
# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish."

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

# A function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [32]:
print(user_prompt_for(saad))

You are looking at a website titled Saad Aziz | Professional Website
The contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.

SAAD AZIZ
About
Resume
Demo
Contact
Engineer •
Team builder
I turn complexity into clarity: crafting code, mentoring teams, and delivering solutions that matter.
CareerGPT
Demo
Contact me
About
Me
Engineer • Technical & Product Leader
I’m an engineer with 15+ years of experience building and scaling products, bridging the gap
between software development, product strategy, and team leadership. Passionate about
CI/CD, DevOps practices, and Observability
, I focus on creating systems that
empower teams to ship faster and with confidence.
When I’m not optimizing pipelines or refining product roadmaps, I’m exploring the
Pacific
Northwest trails and coastlines with my bulldogs
— finding inspiration for the next
challenge, both in tech and in life.
View My R

In [33]:
# See how this function creates exactly the format above

def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [34]:
# And now: call the OpenAI API. You will get very familiar with this!

def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model="gpt-4o-mini",  # or gpt-4o
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [48]:

# Explicitly load
load_dotenv(env_path)

True

In [49]:
print("Loaded key:", os.getenv("OPENAI_API_KEY"))

Loaded key: your-key-here


In [43]:
summarize("https://saadaziz.com")

AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: your-key*here. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}