In [47]:
from dotenv import load_dotenv
import os
from openai import OpenAI
import requests
import json
from bs4 import BeautifulSoup
from IPython.display import display, Markdown, update_display

In [19]:
load_dotenv(override = True)
apikey = os.getenv("OPENAI_API_KEY")

if not apikey:
    print("API Key not found.")
elif not apikey.startswith("sk-proj-"):
    print("API Key is invalid.")
else:
    print("API Key is valid.")

API Key is valid.


In [20]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class WebScrape:
    def __init__(self, url):
        self.url = url
        soup = BeautifulSoup(requests.get(url, headers=headers).content, 'html.parser')
        self.title = soup.title.string if soup.title else 'No title found'
        if soup.body:
            for irrelevant in soup.body(['script', 'style', 'img', 'input']):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator='\n', strip=True)
        else:
            self.text = ''

        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link and link.startswith('http')]

    def web_content(self):
        return f"WebPage Title:\n{self.title}\nWebpage Content: {self.text}\n\n"

In [21]:
ed = WebScrape("https://edwarddonner.com")
ed.links

['https://edwarddonner.com/',
 'https://edwarddonner.com/connect-four/',
 'https://edwarddonner.com/outsmart/',
 'https://edwarddonner.com/about-me-and-about-nebula/',
 'https://edwarddonner.com/posts/',
 'https://edwarddonner.com/',
 'https://news.ycombinator.com',
 'https://nebula.io/?utm_source=ed&utm_medium=referral',
 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html',
 'https://patents.google.com/patent/US20210049536A1/',
 'https://www.linkedin.com/in/eddonner/',
 'https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/',
 'https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/',
 'https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/',
 'https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/',
 'https://edwarddonner.com/2025/05/18/2025-ai-executive-briefing/',
 '

In [22]:
link_system_prompt = """You are provided with a list of links found on a webpage. 
You are able to decide which of the links would be most relevant to include in a brochure about the company, 
such as links to an About page, or a Company page, or Careers/Jobs pages.

You should respond in JSON as in this example:
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}

"""

In [23]:
def get_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \n"
    
    user_prompt += "Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [24]:
print(get_user_prompt(ed))

Here is the list of links on the website of https://edwarddonner.com - please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. 
Do not include Terms of Service, Privacy, email links.
Links (some might be relative links):
https://edwarddonner.com/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://patents.google.com/patent/US20210049536A1/
https://www.linkedin.com/in/eddonner/
https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/
https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/
https://edwar

In [29]:
openai = OpenAI()
model = "gpt-4o-mini"

In [30]:
def find_relevant_links(url):
    website = WebScrape(url)
    message = [
        {"role": "system", "content": link_system_prompt},
        {"role": "user", "content": get_user_prompt(website)}
        ] 
    
    query = openai.chat.completions.create(
        model=model,
        messages=message,
        response_format={"type": "json_object"},
    )

    response = query.choices[0].message.content
    return json.loads(response)


In [31]:
url = "https://huggingface.co"
webSite = WebScrape(url)
webSite.links

['https://endpoints.huggingface.co',
 'https://apply.workable.com/huggingface/',
 'https://discuss.huggingface.co',
 'https://status.huggingface.co/',
 'https://github.com/huggingface',
 'https://twitter.com/huggingface',
 'https://www.linkedin.com/company/huggingface/']

In [32]:
find_relevant_links(url)

{'links': [{'type': 'careers page',
   'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'Twitter page', 'url': 'https://twitter.com/huggingface'},
  {'type': 'LinkedIn page',
   'url': 'https://www.linkedin.com/company/huggingface/'}]}

In [35]:
def get_all_details(url):
    result = "Landing page:\n"
    result += WebScrape(url).web_content()
    links = find_relevant_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += WebScrape(link["url"]).web_content()
    return result

In [None]:
print(get_all_details("https://huggingface.co"))

Creating Company Brochure Tutorial

In [37]:
system_prompt = """You are an assistant that analyzes the contents of several relevant pages from a company website and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown. Include details of company culture, customers and careers/jobs if you have the information."""

In [38]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [None]:
print(get_brochure_user_prompt("HuggingFace", "https://huggingface.co"))

In [43]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [49]:
create_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'linkedin page', 'url': 'https://www.linkedin.com/company/huggingface/'}, {'type': 'twitter page', 'url': 'https://twitter.com/huggingface'}, {'type': 'github page', 'url': 'https://github.com/huggingface'}, {'type': 'discussion forum', 'url': 'https://discuss.huggingface.co'}]}


```markdown
# Welcome to Hugging Face! 😄🌟

### The AI Community Building The Future 🤖

At Hugging Face, we don’t just love machine learning; we hug it like a teddy bear! Join the fun as we collaborate with a staggering **50,000+ organizations** and help the world of AI embrace the future, one *hug* at a time. 

#### What Do We Offer? 

- **1M+ Models**: Just like an ice cream shop, but instead, we have models, datasets, and applications. Pick your flavor!
  
- **Collaborative Spaces**: We've got community vibes as cozy as a chilly winter evening. Build, discover, and dive deep into the hug-tastic world of machine learning!

- **Datasets Galore!**: Why limit your data to a few bites when we have **250k+ datasets**? It’s a buffet of data goodness! 🥳

- **Specialized Solutions**: Need enterprise-grade security? We offer tailored solutions for teams, along with dedicated support—no more "uh-oh" moments! 

#### Meet Our Customers! 🤝

With big names like **Google**, **Microsoft**, and **Amazon** in our family of users, it's clear that we’re not just hugging any one-trick ponies! Each of these enterprises have dug into our cool offerings—like trying a new flavor of ice cream every week! 

#### Join Us! 🚀

If you're thinking, “Wow, this sounds like a place I’d love to work!” then grab your favorite beverage and cozy up! We’re looking for tech enthusiasts, driven coders, and anyone who can appreciate a well-placed pun. 

- **Current Openings**: We have a range of positions to fit your skill set—whether you’re a seasoned developer or just your friendly neighborhood data enthusiast.  
  - **Job Positions**: Check out jobs ranging from *Machine Learning Models Wrangler* to *Data Alchemist*. 🧙‍♂️

### Company Culture - A Hugging Touch! 

- **Open Source**: We believe in sharing—like a good friend who shares their fries!  
- **Collaboration**: Whether remote or in-house, our teamwork makes the dream work. (If your dream involves AI, that is!) 
- **Hugging Face Friday**: On Fridays, we don our best “hug” gear—because who doesn't want a fluffy hoodie at work? 🥰

#### How to Reach Us 📨

Want to hop on this hugging adventure? Visit us at [Hugging Face](https://huggingface.co) or follow us on social media to stay updated. We promise, if you give us a hug, we’ll definitely return the sentiment! 😉

---

**Hugging Face**: Not just a name, but a feeling! Let’s build the future of AI together—where everyone is welcome for a hug… or 100! 🤗
```

How to Animate ChatGPT streaming answers?

In [45]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [48]:
stream_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'company page', 'url': 'https://www.linkedin.com/company/huggingface/'}]}


# Welcome to Hugging Face: The AI Community Building the Future! 🤗

## Who are We? 
At **Hugging Face**, we're the community-driven masterminds behind groundbreaking AI innovations. Think of us as the friendly neighborhood superheroes of machine learning (minus the spandex suits... unless that’s your thing).

## What Do We Offer?
- **1 Million+ Models**: Whether you're in search of text, image, or video models, we’ve got a buffet of AI goodness waiting for you. Just don’t forget to leave room for dessert (read: datasets)!
- **Datasets Galore**: With over 250,000 datasets, we’ve got more data than your favorite café has beans! ☕️
- **Spaces**: Collaborate and run your applications with no stress! Just plug in and go, like your iPhone at 1 am (we know you do it).

## Who Uses Hugging Face?
We serve all kinds of wonderful folks: 
- Non-profits like Ai2 helping save the world (one model at a time).
- Giant enterprises, including Google and Microsoft, trying to make sense of their algorithms... or at least pretending to.
- Make sure to check your celebrity models like **baidu/ERNIE-4.5-21B-A3B-Thinking**—it’s quite the crowd-pleaser!

## Culture
We believe in an open-source universe where ideas float around like confetti at a party! 🥳 
- **Inclusivity**: Everyone’s invited to our community gathering, so bring your unique perspectives!
- **Collaboration**: Teamwork makes the dream work—let's build AI tools together!
- **Innovation**: If you have an idea, we say GO for it! Seriously, no one’s stopping you. Fly your AI flag high! 🏴‍☠️

## Careers at Hugging Face
Looking to Join Us? Here's your chance!  
We are a dynamic team of **51-200** amazing humans looking for more adrenaline junkies obsessed with AI. If your idea of a career involves machine learning, natural language processing, and working closely with **over 50,000 organizations** (yes, that's a lot), then what are you waiting for? 

- Explore our current openings and unleash your inner AI wizard! 🧙‍♂️

## Join Us Today!
Read more at [huggingface.co](https://huggingface.co) and start your journey with the coolest AI squad around.

*Disclaimer: Hugging Face is not responsible for sudden bursts of creativity or delusions of grandeur that may occur after joining.* 😂