In [1]:
# --- Imports ---
import os
import requests
from dotenv import load_dotenv, find_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

# Load .env variables
env_path = find_dotenv()  # returns absolute path if found, else ''
print("Detected .env path:", env_path if env_path else "No .env file found")
# Force reload and override existing env vars
load_dotenv(env_path, override=True)

# Initialize OpenAI client
openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# --- Global user-agent headers (helps bypass basic bot filters) ---
DEFAULT_HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/117.0.0.0 Safari/537.36"
    )
}

class Website:
    """
    Represents a web page and extracts its title and clean text content.

    Attributes:
        url (str): The URL of the web page.
        title (str): Page title or 'No title found'.
        text (str): Cleaned text content from the <body>.
    """

    def __init__(self, url: str, headers=None, timeout: int = 10):
        """
        Initialize the Website object and fetch + parse content.

        Args:
            url (str): The URL to scrape.
            headers (dict, optional): Custom request headers. Defaults to DEFAULT_HEADERS.
            timeout (int): Request timeout in seconds. Defaults to 10.
        """
        self.url = url
        self.title = "No title found"
        self.text = ""

        try:
            response = requests.get(url, headers=headers or DEFAULT_HEADERS, timeout=timeout)
            response.raise_for_status()
        except requests.RequestException as e:
            print(f"⚠️ Error fetching {url}: {e}")
            return

        soup = BeautifulSoup(response.content, "html.parser")

        # Extract title
        self.title = soup.title.string.strip() if soup.title else "No title found"

        # Remove irrelevant elements
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()

            # Extract text, clean up whitespace
            self.text = "\n".join(
                line.strip()
                for line in soup.body.get_text(separator="\n").splitlines()
                if line.strip()
            )
        else:
            self.text = "[No body content found]"

    def preview(self, lines: int = 5):
        """
        Show a Markdown preview (title + first few lines).
        """
        snippet = "\n".join(self.text.splitlines()[:lines])
        display(Markdown(f"### {self.title}\n\n{snippet}"))

# --- Usage Example ---
if __name__ == "__main__":
    site = Website("https://saadaziz.com")
    print(f"Title: {site.title}\n")
    print(site.text[:500])  # print first 500 chars
    site.preview(8)         # show first 8 lines as Markdown


Detected .env path: C:\users\saad0\Documents\source\careergpt_backend\careergpt-backend\.env
Title: Saad Aziz | Professional Website

SAAD AZIZ
About
Resume
Demo
Contact
Engineer •
Team builder
I turn complexity into clarity: crafting code, mentoring teams, and delivering solutions that matter.
CareerGPT
Demo
Contact me
About
Me
Engineer • Technical & Product Leader
I’m an engineer with 15+ years of experience building and scaling products, bridging the gap
between software development, product strategy, and team leadership. Passionate about
CI/CD, DevOps practices, and Observability
, I focus on creating systems that
empower 


### Saad Aziz | Professional Website

SAAD AZIZ
About
Resume
Demo
Contact
Engineer •
Team builder
I turn complexity into clarity: crafting code, mentoring teams, and delivering solutions that matter.

In [3]:
# Example usage: Replace the URL below with any site you want to analyze.
# The print statements will show the page title and a text preview.

saad = Website("https://saadaziz.com")
print(saad.title)
print(saad.text)

Saad Aziz | Professional Website
SAAD AZIZ
About
Resume
Demo
Contact
Engineer •
Team builder
I turn complexity into clarity: crafting code, mentoring teams, and delivering solutions that matter.
CareerGPT
Demo
Contact me
About
Me
Engineer • Technical & Product Leader
I’m an engineer with 15+ years of experience building and scaling products, bridging the gap
between software development, product strategy, and team leadership. Passionate about
CI/CD, DevOps practices, and Observability
, I focus on creating systems that
empower teams to ship faster and with confidence.
When I’m not optimizing pipelines or refining product roadmaps, I’m exploring the
Pacific
Northwest trails and coastlines with my bulldogs
— finding inspiration for the next
challenge, both in tech and in life.
View My Resume
My
Qualifications
Education
Bachelor of Science - Computer Science
•
2007
Portland State University – Portland, OR
Specialized in software engineering, algorithms, and systems programming.
Senior cap

In [62]:
# --- System Prompt ---
# This sets the AI's behavior for summarizing websites.
# You can tweak the last line to change output format or language (e.g., "Respond in markdown in Spanish.")
system_prompt = (
    "You are an assistant that analyzes the contents of a website "
    "and provides a concise summary, ignoring navigation or irrelevant elements. "
    "Respond in markdown."
)


def user_prompt_for(website):
    """
    Build the user-facing prompt that will be sent to the model.

    Args:
        website (Website): A Website object containing title and text.

    Returns:
        str: Formatted prompt combining the title and text of the website.
    """
    return (
        f"You are reviewing a website titled: {website.title}\n\n"
        "The content of this website is provided below. "
        "Please summarize it in markdown format, including any key news or announcements:\n\n"
        f"{website.text}"
    )


In [63]:
print(user_prompt_for(saad))

You are reviewing a website titled: Saad Aziz | Professional Website

The content of this website is provided below. Please summarize it in markdown format, including any key news or announcements:

SAAD AZIZ
About
Resume
Demo
Contact
Engineer •
Team builder
I turn complexity into clarity: crafting code, mentoring teams, and delivering solutions that matter.
CareerGPT
Demo
Contact me
About
Me
Engineer • Technical & Product Leader
I’m an engineer with 15+ years of experience building and scaling products, bridging the gap
between software development, product strategy, and team leadership. Passionate about
CI/CD, DevOps practices, and Observability
, I focus on creating systems that
empower teams to ship faster and with confidence.
When I’m not optimizing pipelines or refining product roadmaps, I’m exploring the
Pacific
Northwest trails and coastlines with my bulldogs
— finding inspiration for the next
challenge, both in tech and in life.
View My Resume
My
Qualifications
Education
Bache

In [33]:
# See how this function creates exactly the format above

def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [64]:
# And now: call the OpenAI API. You will get very familiar with this!

def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model="gpt-4o-mini",  # or gpt-4o
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [65]:

# Explicitly load
load_dotenv(env_path)

True

In [55]:
print("Loaded key:", os.getenv("OPENAI_API_KEY"))

Loaded key: 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


In [66]:
summarize("https://saadaziz.com")

'# Saad Aziz | Professional Website Summary\n\n## About Me\n- **Name**: Saad Aziz\n- **Profession**: Engineer • Technical & Product Leader\n- **Experience**: 15+ years in building and scaling products, focusing on software development, product strategy, and team leadership.\n- **Interests**: CI/CD, DevOps, and observability. Enjoys exploring the Pacific Northwest with his bulldogs.\n\n## Qualifications\n### Education\n- **Bachelor of Science in Computer Science** (2007)  \n  Portland State University, Oregon  \n  Specialized in software engineering and developed a dynamic screen reader for visually impaired users.\n\n### Experience Highlights\n- **Engineering Leader / Principal Engineer**: Expertise in building scalable SaaS platforms and leading teams.\n- **Full-Stack Developer**: Proficient in modern frameworks and high-impact enterprise applications.\n\n### Skills\n- **Technical Skills**: C#/.NET, Java, Python, Node.js, TypeScript, frameworks like Ember.js, Angular, cloud services (