In [1]:
import os
import json
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from scraper import fetch_website_links, fetch_website_contents
from openai import OpenAI

In [2]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-5-nano'
openai = OpenAI()

API key looks good so far


In [3]:
links = fetch_website_links("https://edwarddonner.com")
links

['https://edwarddonner.com/',
 'https://edwarddonner.com/connect-four/',
 'https://edwarddonner.com/outsmart/',
 'https://edwarddonner.com/about-me-and-about-nebula/',
 'https://edwarddonner.com/posts/',
 'https://edwarddonner.com/',
 'https://news.ycombinator.com',
 'https://nebula.io/?utm_source=ed&utm_medium=referral',
 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html',
 'https://patents.google.com/patent/US20210049536A1/',
 'https://www.linkedin.com/in/eddonner/',
 'https://edwarddonner.com/2025/11/11/ai-live-event/',
 'https://edwarddonner.com/2025/11/11/ai-live-event/',
 'https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/',
 'https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/',
 'https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/',
 'https://edwarddonner.com/2025/05/28/connecting-my-cou

In [4]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [5]:
def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [6]:
print(get_links_user_prompt("https://edwarddonner.com"))


Here is the list of links on the website https://edwarddonner.com -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

https://edwarddonner.com/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://patents.google.com/patent/US20210049536A1/
https://www.linkedin.com/in/eddonner/
https://edwarddonner.com/2025/11/11/ai-live-event/
https://edwarddonner.com/2025/11/11/ai-live-event/
https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/
htt

In [7]:
def select_relevant_links(url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    return links
    

In [8]:
select_relevant_links("https://edwarddonner.com")

{'links': [{'type': 'home page', 'url': 'https://edwarddonner.com/'},
  {'type': 'about page',
   'url': 'https://edwarddonner.com/about-me-and-about-nebula/'},
  {'type': 'portfolio page', 'url': 'https://edwarddonner.com/connect-four/'},
  {'type': 'portfolio page', 'url': 'https://edwarddonner.com/outsmart/'},
  {'type': 'blog page', 'url': 'https://edwarddonner.com/posts/'},
  {'type': 'company page',
   'url': 'https://nebula.io/?utm_source=ed&utm_medium=referral'},
  {'type': 'LinkedIn profile', 'url': 'https://www.linkedin.com/in/eddonner/'},
  {'type': 'Twitter profile', 'url': 'https://twitter.com/edwarddonner'},
  {'type': 'Facebook page',
   'url': 'https://www.facebook.com/edward.donner.52'}]}

In [9]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

In [10]:
print(fetch_page_and_all_relevant_links("https://huggingface.co"))

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


## Landing Page:

Hugging Face ‚Äì The AI community building the future.

Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 2M+ models
Trending on
this week
Models
zai-org/GLM-4.7
Updated
2 days ago
‚Ä¢
2.72k
‚Ä¢
818
Qwen/Qwen-Image-Layered
Updated
6 days ago
‚Ä¢
11.9k
‚Ä¢
688
Tongyi-MAI/Z-Image-Turbo
Updated
17 days ago
‚Ä¢
385k
‚Ä¢
3.41k
google/functiongemma-270m-it
Updated
6 days ago
‚Ä¢
24.9k
‚Ä¢
573
Shakker-Labs/AWPortrait-Z
Updated
11 days ago
‚Ä¢
6.73k
‚Ä¢
443
Browse 2M+ models
Spaces
Running
on
Zero
Featured
515
TRELLIS.2
üè¢
515
High-fidelity 3D Generation from images
Running
on
Zero
Featured
256
Qwen Image Layered
üöÄ
256
Decompose an image into layers and export as PPTX or ZIP
Running
on
CPU Upgrade
208
Omni Image Editor
üñº
208
Image edit, text to image, face swap, image up

In [11]:
brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""

In [12]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000] 
    return user_prompt

In [None]:
get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

In [13]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        update_display(Markdown(response), display_id=display_handle.display_id)

In [14]:
stream_brochure("HuggingFace", "https://huggingface.co")

# Hugging Face Brochure

---

## Who We Are  
Hugging Face is the global AI community and collaboration platform at the forefront of the machine learning revolution. We empower machine learning engineers, scientists, and enthusiasts to build, share, and experiment with open-source ML models, datasets, and applications. With a fast-growing community and a rich ecosystem of open-source tools and libraries, Hugging Face stands as the hub for building a more open, ethical, and innovative AI future.

---

## Our Platform  
- **Models:** Access and contribute to over 2 million machine learning models covering text, image, video, audio, and even 3D modalities.  
- **Datasets:** Browse and share from a collection of more than 500,000 datasets enabling diverse machine learning use cases.  
- **Spaces:** Host and explore thousands of AI applications and demos, providing accessible interfaces for ML models.  
- **Community:** Collaborate with thousands of ML practitioners worldwide, share your projects, and build your professional portfolio on our platform.  

---

## Enterprise Solutions  
Hugging Face offers robust enterprise and team plans designed to scale ML efforts securely and efficiently:  
- **Enterprise-grade security:** Single Sign-On (SSO), granular access controls, private storage, and audit logs.  
- **Advanced compute options:** Including ZeroGPU quota boosts and flexible compute scalability tailored for organizational needs.  
- **Collaboration tools:** Private dataset viewers and centralized token management enhance teamwork and governance.  
- **Analytics & Monitoring:** Track repository usage, API calls, and spending with comprehensive dashboards.  

Plans start at $20 per user/month, with customizable enterprise contracts to fit your business needs.

---

## Company Culture  
At Hugging Face, we champion openness, collaboration, and ethical AI development. Our vibrant community thrives on knowledge sharing and co-creation, fostering an environment where innovators can push boundaries together. We prioritize diversity, learning, and inclusive growth ‚Äî welcoming everyone passionate about shaping the future of AI.

---

## Careers  
Join a leading AI company shaping the future of technology. Hugging Face offers exciting opportunities for:  
- Machine Learning Engineers  
- Data Scientists  
- Research Scientists  
- Software Developers  
- Community & Developer Advocates  

We seek talented, mission-driven individuals who want to make a real-world impact on the AI landscape. Embrace a culture of continuous learning and contribute directly to open-source and enterprise-grade AI tools that millions rely on.

Explore current openings on our [Careers page](https://huggingface.co/careers).

---

## Why Choose Hugging Face?  
- **Leading AI ecosystem** with unmatched open-source resources.  
- **Collaborative platform** connecting ML practitioners worldwide.  
- **Innovative enterprise solutions** combining security, scalability, and ease of use.  
- **Commitment to ethical AI** and building a community-driven AI future.  

---

For more information:  
Website: [huggingface.co](https://huggingface.co)  
Community Hub: Join us on [Discord](https://discord.gg/huggingface) and [GitHub](https://github.com/huggingface)  
Contact Sales: [enterprise@huggingface.co](mailto:enterprise@huggingface.co)

---

*Hugging Face ‚Äì Building the future of AI together.*