In [3]:
from dotenv import load_dotenv
import os
from scraper import fetch_website_contents, fetch_website_links
from openai import OpenAI
import json
from IPython.display import Markdown, display, update_display

In [4]:
load_dotenv(override=True)

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

if OPENAI_API_KEY:
    print(f"Openai api key loaded successfully and starts with {OPENAI_API_KEY[:8]}.")
else:
    print("Failed to load Openai api key. Please check .env file.")

Openai api key loaded successfully and starts with sk-proj-.


In [41]:
links = fetch_website_links("https://huggingface.co")

In [12]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as link to an About page, or a Caompany page, or a Career/Job pages.
You should respond in a JSON as in this example:

{
    "links": [
        {"type": "about Page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.url/careers"}
    ]
}
"""

In [24]:
def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links found on webpage {url} - 
Please decide which of these is relevant weblinks for brochure of the company,
respond with full https url in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relevant links):

"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [25]:
print(get_links_user_prompt("https://huggingface.co"))


Here is the list of links found on webpage https://huggingface.co - 
Please decide which of these is relevant weblinks for brochure of the company,
respond with full https url in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relevant links):

/
/models
/datasets
/spaces
/docs
/enterprise
/pricing
/login
/join
/spaces
/models
/zai-org/GLM-OCR
/moonshotai/Kimi-K2.5
/openbmb/MiniCPM-o-4_5
/ACE-Step/Ace-Step1.5
/mistralai/Voxtral-Mini-4B-Realtime-2602
/models
/spaces/ACE-Step/Ace-Step-v1.5
/spaces/Qwen/Qwen3-TTS
/spaces/hadadxyz/ai
/spaces/mrfakename/Z-Image-Turbo
/spaces/Wan-AI/Wan2.2-Animate
/spaces
/datasets/openbmb/UltraData-Math
/datasets/sojuL/RubricHub_v1
/datasets/tencent/CL-bench
/datasets/FutureMa/EvasionBench
/datasets/tencent/HY3D-Bench
/datasets
/join
/enterprise
/enterprise
/enterprise
/enterprise
/enterprise
/enterprise
/enterprise
/inference/models
/pricing#endpoints
/pricing#spaces
/pricing
/allenai
/facebook
/amazon
/google
/In

In [42]:
openai = OpenAI()
MODEL="gpt-5-nano"
def select_relevant_links(url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    return links

In [29]:
select_relevant_links("https://huggingface.co")

Log: Calling gpt-5-nano to get relevant links for brochure of https://huggingface.co
Log: Got 9 relevant links for brochure


{'links': [{'type': 'About Page', 'url': 'https://huggingface.co/brand'},
  {'type': 'Company page', 'url': 'https://huggingface.co/enterprise'},
  {'type': 'Pricing page', 'url': 'https://huggingface.co/pricing'},
  {'type': 'Careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'Endpoints product page',
   'url': 'https://endpoints.huggingface.co'},
  {'type': 'GitHub page', 'url': 'https://github.com/huggingface'},
  {'type': 'Blog', 'url': 'https://huggingface.co/blog'},
  {'type': 'Twitter page', 'url': 'https://twitter.com/huggingface'},
  {'type': 'LinkedIn page',
   'url': 'https://www.linkedin.com/company/huggingface/'}]}

In [31]:
print(fetch_website_contents("https://huggingface.co"))

Hugging Face ‚Äì The AI community building the future.

Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 2M+ models
Trending on
this week
Models
zai-org/GLM-OCR
Updated
2 days ago
‚Ä¢
373k
‚Ä¢
952
moonshotai/Kimi-K2.5
Updated
7 days ago
‚Ä¢
504k
‚Ä¢
2.02k
openbmb/MiniCPM-o-4_5
Updated
about 7 hours ago
‚Ä¢
30.4k
‚Ä¢
770
ACE-Step/Ace-Step1.5
Updated
8 days ago
‚Ä¢
28.7k
‚Ä¢
512
mistralai/Voxtral-Mini-4B-Realtime-2602
Updated
about 17 hours ago
‚Ä¢
3.2k
‚Ä¢
467
Browse 2M+ models
Spaces
Running
on
A100
Featured
293
ACE-Step v1.5
üéµ
293
Music Generation Foundation Model v1.5
Running
on
Zero
Featured
1.37k
Qwen3-TTS Demo
üéô
1.37k
Generate speech from text with voice design, cloning, or speakers
Running
558
Demo Playground
‚ö°
558
Free platform to access multiple AI models
Running
on
Zero


In [33]:
def fetch_page_and_all_relevant_links(url):
    content = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"\n\n## Landing Page:\n\n{content}\n## Relevant Links:\n"

    for link in relevant_links['links']:
        result += f"### Link: {link['type']}\n"
        result += fetch_website_contents(link['url'])
        
    return result

In [34]:
print(fetch_page_and_all_relevant_links("https://huggingface.co"))

Log: Calling gpt-5-nano to get relevant links for brochure of https://huggingface.co
Log: Got 27 relevant links for brochure


## Landing Page:

Hugging Face ‚Äì The AI community building the future.

Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 2M+ models
Trending on
this week
Models
zai-org/GLM-OCR
Updated
3 days ago
‚Ä¢
373k
‚Ä¢
955
moonshotai/Kimi-K2.5
Updated
7 days ago
‚Ä¢
504k
‚Ä¢
2.02k
openbmb/MiniCPM-o-4_5
Updated
about 8 hours ago
‚Ä¢
30.4k
‚Ä¢
774
ACE-Step/Ace-Step1.5
Updated
8 days ago
‚Ä¢
28.7k
‚Ä¢
512
mistralai/Voxtral-Mini-4B-Realtime-2602
Updated
about 17 hours ago
‚Ä¢
3.2k
‚Ä¢
469
Browse 2M+ models
Spaces
Running
on
A100
Featured
293
ACE-Step v1.5
üéµ
293
Music Generation Foundation Model v1.5
Running
on
Zero
Featured
1.37k
Qwen3-TTS Demo
üéô
1.37k
Generate speech 

In [35]:
brochure_system_prompt = """
You are an assitant that analyzes the content of several relevant pages of a website
and creates a short borchure about the company for prospective customers, investors and recruits.
Respond in markdown withour code blocks.
Include details of company culture, customers and careers/jobs if you have.
"""

In [36]:
def get_brochure_user_prompt(company_name, url):
    brochure_user_prompt = f"""
You are looking at a company called: {company_name}.
Here are the contents of it's landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    brochure_user_prompt += fetch_page_and_all_relevant_links(url)
    brochure_user_prompt = brochure_user_prompt[:5_000]
    return brochure_user_prompt

In [37]:
print(get_brochure_user_prompt("Hugging Face", "https://huggingface.co"))

Log: Calling gpt-5-nano to get relevant links for brochure of https://huggingface.co
Log: Got 20 relevant links for brochure

You are looking at a company called: Hugging Face.
Here are the contents of it's landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.




## Landing Page:

Hugging Face ‚Äì The AI community building the future.

Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 2M+ models
Trending on
this week
Models
zai-org/GLM-OCR
Updated
3 days ago
‚Ä¢
373k
‚Ä¢
955
moonshotai/Kimi-K2.5
Updated
7 days ago
‚Ä¢
504k
‚Ä¢
2.02k
openbmb/MiniCPM-o-4_5
Updated
about 8 hours ago
‚Ä¢
30.4k
‚Ä¢
774
ACE-Step/Ace-Step1.5
Updated
8 days ago
‚Ä¢
28.7k
‚Ä¢
513
mistralai/Voxtral-Mini-4B-Realtime-2602
Updated
about 1

In [43]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name=company_name, url=url)}
        ]
    )

    result = response.choices[0].message.content
    display(Markdown(result))

In [40]:
create_brochure("Hugging Face", "https://huggingface.co")

Log: Calling gpt-5-nano to build brochure for Hugging Face using url https://huggingface.co
Log: Calling gpt-5-nano to get relevant links for brochure of https://huggingface.co
Log: Got 16 relevant links for brochure
Here is the borchure:

---------------------




# Hugging Face ‚Äî The AI community building the future

A platform where the machine learning community collaborates on models, datasets, and applications. Open, collaborative, and focused on building an open and ethical AI future.

---

## What we are

- The home of machine learning collaboration: share, explore, discover, and experiment with open-source ML.
- A central hub for models, datasets, and applications that powers researchers, engineers, and end users.
- The team-friendly, enterprise-ready platform with secure, scalable options for organizations.

Key tagline: The AI community building the future.

---

## Platform and offerings

- Models, Datasets, Spaces, Community, Docs, Enterprise, Pricing
- Explore AI Apps and browse 2M+ models
- Spaces: live demos and applications running with high-performance infrastructure (examples include A100-powered spaces and various demos)
- Datasets: browse 500k+ datasets
- All modalities supported: text, image, video, audio, and even 3D
- Build your portfolio: share your work and create an ML profile
- Free and paid pathways: Demo Playground for free access to multiple AI models; paid Compute and Enterprise options for organizations

Highlights from the ecosystem
- Spaces examples: ACE-Step v1.5, Music Generation Foundation Model v1.5, Qwen3-TTS Demo, MCP, Z Image Turbo, Wan2.2 Animate
- Datasets to explore: openbmb/UltraData-Math, sojuL/RubricHub_v1, tencent CL-bench, FutureMa/EvasionBench, and more

---

## How we help customers

- Accelerate ML development with HF Open Source stack to move faster.
- Enterprise-ready solutions: paid compute options and Team & Enterprise offerings.
- Enterprise-grade security, access controls, and governance to support organizational needs.
- A scalable platform to host and collaborate on unlimited public models, datasets, and applications.

---

## Culture and community

- The AI community building the future: collaboration, openness, and shared progress.
- Open source at the core: learn, collaborate, and share work to push the boundaries of ML.
- A global, diverse community of ML engineers, scientists, and end users shaping an open and ethical AI future together.
- Emphasis on learning, collaboration, and responsible AI practices.

---

## Careers and opportunities

- A fast-growing platform with opportunities across product, research, engineering, operations, and enterprise teams.
- Join a community that values learning, contribution to open-source ML, and building impactful AI tools.
- For current openings and the chance to shape the future of ML, explore the careers path on Hugging Face‚Äôs site.

---

## Brand at a glance

- Brand promise: Hugging Face is the collaboration platform for the machine learning community.
- Core assets: a central hub (the Hugging Face Hub) for open-source ML discovery and experimentation.
- Brand colors: HF Yellow #FFD21E, Accent Orange #FF9D00, Neutral Gray #6B7280.
- Visual identity emphasizes openness, collaboration, and ethics in AI.

 logo and brand assets are provided for consistent branding across materials.

---

If you‚Äôd like, I can tailor this brochure for a specific audience (investors, potential customers, or prospective recruits) and adjust the emphasis accordingly.

In [44]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name=company_name, url=url)}
        ],
        stream=True
    )

    response = ""
    display_handle = display(Markdown(""), display_id=True)

    for chunk in stream:
        response += chunk.choices[0].delta.content or ""
        update_display(Markdown(response), display_id=display_handle.display_id)

In [45]:
stream_brochure("Hugging Face", "https://huggingface.co")

# Hugging Face: The AI community building the future

Hugging Face is the collaboration platform where the machine learning community comes together to create, share, and deploy AI. We host models, datasets, and apps, empowering researchers, engineers, and end users to learn, collaborate, and build an open and ethical AI future.

## What we offer
- Central hub for open-source ML: Models, Datasets, and Spaces (interactive apps and demos)
- Large, active ecosystem: Browse 2M+ models and 500k+ datasets
- All modalities supported: Text, image, video, audio, and even 3D
- Collaborative tooling: A modern open-source stack that speeds up ML development
- Community-driven ecosystem: Documentation, forums, and shared projects

## The platform at a glance
- Models: A vast collection of open models with frequent updates
- Datasets: A growing repository of ready-to-use data for training and evaluation
- Spaces: Create and run AI applications and demos (including platforms like Demo Playground)
- Open source ethos: Central place to share, explore, discover, and experiment with open-source ML
- Enterprise options: Paid compute and enterprise solutions for teams and organizations
- Branding and accessibility: Clear brand assets and a consistent color and design system

## Why it matters for customers
- Accelerate ML development: Access ready-to-use models, data, and apps to move faster
- Enterprise-grade options: Security, access controls, and dedicated compute for teams
- Flexible collaboration: Host and collaborate on unlimited public models, datasets, and applications
- Multimodal capabilities: Work across text, image, video, audio, and 3D within a single platform

## For developers, researchers, and builders
- Explore all modalities and build a portfolio: Share your work with the world and grow your ML profile
- Free and paid resources: Use free tools like the Demo Playground and access paid compute when needed
- Rich, collaborative environment: A thriving community of ML engineers, scientists, and end users

## For investors and partners
- A rapidly growing, community-driven ecosystem: A platform that powers open-source ML libraries and collaboration
- Scalable impact: A global hub where researchers and developers contribute models, data, and apps
- Open and ethical AI focus: Built to support transparent collaboration and responsible AI development

## Culture and careers
- The AI community building the future: We empower the next generation of ML engineers, scientists, and end users to learn, collaborate, and share their work
- Open, collaborative, and ethical: A commitment to open-source, shared knowledge, and responsible AI
- Join the movement: If you‚Äôre passionate about ML and open collaboration, Hugging Face is a place to grow your skills, contribute to meaningful projects, and build a career around cutting-edge AI

## Get involved
- Explore apps and models: Browse 2M+ models, 500k+ datasets, and Spaces to see what others are building
- Sign up or log in: Join the community, contribute, and start building
- Learn more: Dive into docs and community resources to maximize your impact

Hugging Face is where the machine learning community collaborates on models, datasets, and applications‚Äîcreating a faster, more open, and ethically minded AI future.