In [38]:
import os
import json
from dotenv import load_dotenv
from IPython.display import display, Markdown, update_display
from scraper import fetch_website_contents, fetch_website_links
from openai import OpenAI
load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [5]:
links = fetch_website_links("https://luminal.com")
print(links)

['/', '/blog', 'https://github.com/luminal-ai/luminal', 'https://www.ycombinator.com/companies/luminal', 'https://forms.gle/sfwqY4hWgQpUzGet5', 'https://luminal.cloud/deepseek-ocr', 'https://forms.gle/sfwqY4hWgQpUzGet5', 'https://calendly.com/accounts-luminalai/30min', 'mailto:founders@luminalai.com', 'https://forms.gle/sfwqY4hWgQpUzGet5']


In [6]:
# first step: asking the GPT-5-nano model to figure out the relevant links

In [7]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [13]:
def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):
"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [14]:
print(get_links_user_prompt("https://luminal.com"))


Here is the list of links on the website https://luminal.com -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):
/
/blog
https://github.com/luminal-ai/luminal
https://www.ycombinator.com/companies/luminal
https://forms.gle/sfwqY4hWgQpUzGet5
https://luminal.cloud/deepseek-ocr
https://forms.gle/sfwqY4hWgQpUzGet5
https://calendly.com/accounts-luminalai/30min
mailto:founders@luminalai.com
https://forms.gle/sfwqY4hWgQpUzGet5


In [24]:
def select_relevant_links(url):
    response = client.chat.completions.create(
        model="gpt-5-nano",
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    return json.loads(response.choices[0].message.content)

In [30]:
select_relevant_links("https://huggingface.co")

{'links': [{'type': 'homepage', 'url': 'https://huggingface.co/'},
  {'type': 'brand page', 'url': 'https://huggingface.co/brand'},
  {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'},
  {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'},
  {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'blog', 'url': 'https://huggingface.co/blog'},
  {'type': 'learn resources', 'url': 'https://huggingface.co/learn'},
  {'type': 'docs page', 'url': 'https://huggingface.co/docs'},
  {'type': 'endpoints page', 'url': 'https://endpoints.huggingface.co'},
  {'type': 'community forums', 'url': 'https://discuss.huggingface.co'},
  {'type': 'status page', 'url': 'https://status.huggingface.co/'},
  {'type': 'GitHub', 'url': 'https://github.com/huggingface'},
  {'type': 'LinkedIn', 'url': 'https://www.linkedin.com/company/huggingface/'},
  {'type': 'Twitter', 'url': 'https://twitter.com/huggingface'}]}

In [31]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

In [32]:
print(fetch_page_and_all_relevant_links("https://huggingface.co"))

## Landing Page:

Hugging Face ‚Äì The AI community building the future.

Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 2M+ models
Trending on
this week
Models
Lightricks/LTX-2
Updated
about 15 hours ago
‚Ä¢
188k
‚Ä¢
581
tencent/HY-MT1.5-1.8B
Updated
8 days ago
‚Ä¢
7.29k
‚Ä¢
679
Qwen/Qwen-Image-2512
Updated
9 days ago
‚Ä¢
18.4k
‚Ä¢
533
IQuestLab/IQuest-Coder-V1-40B-Loop-Instruct
Updated
1 day ago
‚Ä¢
9.65k
‚Ä¢
289
MiniMaxAI/MiniMax-M2.1
Updated
12 days ago
‚Ä¢
200k
‚Ä¢
961
Browse 2M+ models
Spaces
Running
Featured
3.98k
Wan2.2 Animate
üëÅ
3.98k
Wan2.2 Animate
Running
on
Zero
1.16k
Z Image Turbo
üñº
1.16k
Generate stunning images from text descriptions in seconds
Running
on
Zero
MCP
Featured
264
Qwen-Image-Edit-2511-LoRAs-Fast
üéÉ
264
Demo of the Collection of Qwen Image Edit LoRAs


In [33]:
brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""

In [34]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [35]:
get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

'\nYou are looking at a company called: HuggingFace\nHere are the contents of its landing page and other relevant pages;\nuse this information to build a short brochure of the company in markdown without code blocks.\n\n\n## Landing Page:\n\nHugging Face ‚Äì The AI community building the future.\n\nHugging Face\nModels\nDatasets\nSpaces\nCommunity\nDocs\nEnterprise\nPricing\nLog In\nSign Up\nThe AI community building the future.\nThe platform where the machine learning community collaborates on models, datasets, and applications.\nExplore AI Apps\nor\nBrowse 2M+ models\nTrending on\nthis week\nModels\nLightricks/LTX-2\nUpdated\nabout 15 hours ago\n‚Ä¢\n188k\n‚Ä¢\n581\ntencent/HY-MT1.5-1.8B\nUpdated\n8 days ago\n‚Ä¢\n7.29k\n‚Ä¢\n679\nQwen/Qwen-Image-2512\nUpdated\n9 days ago\n‚Ä¢\n18.4k\n‚Ä¢\n533\nIQuestLab/IQuest-Coder-V1-40B-Loop-Instruct\nUpdated\n1 day ago\n‚Ä¢\n9.65k\n‚Ä¢\n289\nMiniMaxAI/MiniMax-M2.1\nUpdated\n12 days ago\n‚Ä¢\n200k\n‚Ä¢\n961\nBrowse 2M+ models\nSpaces\nRunning\nFe

In [39]:
def create_brochure(company_name, url):
    response = client.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [40]:
create_brochure("HuggingFace", "https://huggingface.co")

# Hugging Face Brochure

---

## About Hugging Face

Hugging Face is the vibrant AI community building the future of machine learning. As a central collaboration platform, Hugging Face enables engineers, scientists, and developers worldwide to share, explore, and experiment with open-source machine learning models, datasets, and applications.

The platform hosts over 2 million machine learning models and 500,000 datasets spanning text, image, video, audio, and even 3D modalities. Users can create, discover, and collaborate on projects to accelerate the development and deployment of AI technologies.

With a fast-growing global community and some of the most widely used open-source ML libraries, Hugging Face empowers a new generation to build an open and ethical AI future together.

---

## What We Offer

### The Hugging Face Hub
- A central place to **host, share, and collaborate** on unlimited public machine learning models, datasets, and applications.
- Tools and libraries built on an open-source stack to help developers **move faster** and innovate.

### Explore AI Applications
- Access and run over **1 million AI applications** such as image generators, natural language processors, and multi-modal AI assistants.
- Featured Spaces like text-to-image generators, image editors, and custom AI demos run directly on the platform.

### Enterprise & Compute Solutions
- Paid compute resources and enterprise-grade solutions for teams and organizations requiring scalable and secure AI infrastructure.
  
---

## Company Culture

Hugging Face is driven by the mission to democratize good machine learning ‚Äì one commit at a time. The community-focused culture encourages collaboration, openness, and ethical AI development.

- **Collaborative:** An active community with over 76,000 followers and growing daily contributions.
- **Open Source and Ethical:** Committed to transparency and sharing tools and research openly.
- **Innovative:** A talented science team pushing the boundaries of technology at the edge of AI.
- **Learning and Growth:** Hosts papers, blogs, and learning resources such as the Hugging Face Fundamentals program in partnership with DataCamp.

---

## Our Customers & Community

Hugging Face serves a broad spectrum of users:
- Machine Learning Engineers and Researchers
- AI Developers and Data Scientists
- Enterprises scaling AI solutions
- Open-source enthusiasts contributing to AI innovation

The platform is a hub for collaboration and discovery, attracting individuals and organizations that believe in democratizing AI technology globally.

---

## Careers at Hugging Face

Join a passionate team shaping the future of AI! Hugging Face is constantly growing ‚Äî currently around 193 team members ‚Äî and offers opportunities in research, engineering, community, product, and enterprise roles.

Why Join?
- Work on cutting-edge AI technologies
- Be part of an open and inclusive culture
- Collaborate with world-class experts and an engaged global community
- Contribute to ethical AI development and open-source projects

Check out the Hugging Face **Careers** page on their website to explore open positions and apply.

---

## Get Involved

- **Sign Up** to create your ML profile and build your portfolio.
- Explore and contribute to the vast repository of models and datasets.
- Join the [Hugging Face Community on Discord](https://discord.gg/huggingface) and social channels on GitHub, Twitter, and LinkedIn.
- Use the platform for your AI experiments or scale up using enterprise solutions.

---

### Contact

For press inquiries or partnerships, reach out via the Hugging Face contact page on their website.

---

**Join Hugging Face today ‚Äì the community where AI innovation thrives and the future is built through collaboration.**

[Visit Hugging Face](https://huggingface.co) | [Explore Models](https://huggingface.co/models) | [Discover Datasets](https://huggingface.co/datasets) | [Try Spaces](https://huggingface.co/spaces)

---

**Brand Colors**  
Yellow: #FFD21E  
Orange: #FF9D00  
Gray: #6B7280

In [42]:
def stream_brochure(company_name, url):
    stream = client.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        update_display(Markdown(response), display_id=display_handle.display_id)

In [43]:
stream_brochure("HuggingFace", "https://huggingface.co")

# Hugging Face Brochure

---

## About Hugging Face

**Hugging Face** is the AI community building the future of machine learning. It serves as the premier collaboration platform where the global machine learning community comes together to share, discover, and build models, datasets, and applications.

The Hugging Face Hub is a central place enabling anyone‚Äîengineers, scientists, end users‚Äîto experiment with open-source machine learning (ML), empower innovation, and foster an open and ethical AI future.

---

## What We Offer

- **2M+ Machine Learning Models**: Explore an extensive collection of models spanning multiple modalities including text, image, video, audio, and even 3D.
- **500k+ Datasets:** Gain access to a vast array of datasets updated continuously to fuel your AI experiments.
- **1M+ Applications (Spaces):** Run and create AI-powered applications with ease.
- **Collaboration Platform:** Host unlimited public models, datasets, and applications, building an accessible ML portfolio.
- **Open-Source Stack:** Tools and libraries designed to accelerate your research and product development.
- **Enterprise Solutions:** Paid Compute and Enterprise offerings designed to scale and support teams and organizations.

---

## Platform Highlights

- **Trending Models:** Stay up to date with the most recently updated and popular models driven by community contributions.
- **Spaces:** Interactive AI applications where users can demo and build real-time ML-powered experiences.
- **Multi-Modality Support:** Work seamlessly across text, images, audio, video, and 3D data.
- **Community Driven:** Millions of users collaborate to push the boundaries of ML innovation and ethics.

---

## Community & Culture

At its core, Hugging Face is a **vibrant and welcoming community** dedicated to open science, transparency, and ethical AI development. The platform connects researchers, developers, and enterprises in pursuit of democratizing machine learning to make it accessible and positive for society.

Hugging Face fosters a culture that embraces collaboration, continuous learning, and sharing knowledge. It empowers the next generation of machine learning professionals to grow their careers while contributing to meaningful projects that shape the future of AI.

---

## Careers at Hugging Face

Joining Hugging Face means becoming part of a forward-thinking company that blends cutting-edge technology with a community-first philosophy. The company values:

- Innovation and open-source contributions
- Ethical AI practices
- Cross-functional teamwork
- Impactful work with real-world applications

Whether you're an engineer, researcher, product specialist, or community advocate, Hugging Face offers opportunities to grow your skills and make a difference in an inclusive, collaborative environment.

Keep an eye on the official website for current openings and join a company building the future of machine learning!

---

## Who Uses Hugging Face?

- **AI Researchers & Scientists:** For rapid prototyping, collaboration, and open-source contributions.
- **Machine Learning Engineers & Developers:** Leverage pre-trained models and datasets to build powerful applications.
- **Enterprises:** Scale AI efforts with team and enterprise solutions for production workloads.
- **Educators & Students:** Build portfolios, learn, and share work in a globally connected environment.
- **AI Enthusiasts:** Explore and experiment with state-of-the-art models and applications.

---

## Get Started

Explore AI apps and browse over 2 million models on [huggingface.co](https://huggingface.co).

Sign up today to:

- Host and share your ML models and datasets
- Build and deploy AI-powered applications
- Join a community shaping the future of AI responsibly

---

## Hugging Face Brand Colors

- Bright Yellow: #FFD21E
- Orange: #FF9D00
- Gray: #6B7280

---

**Hugging Face** ‚Äî The AI community building the future.

---