In [None]:
import os
import json
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from openai import OpenAI
from scrapper import fetch_website_contents, fetch_website_links

In [None]:
load_dotenv(override=True)
api_key = os.getenv("OPENAI_API_KEY")

if api_key and api_key.startswith("sk-proj-") and len(api_key) > 10:
    print("key looks good")
else:
    print("there might be a problem with your OpenAI api key")

MODEL='gpt-5-nano'
openai = OpenAI()

In [None]:
links = fetch_website_links("https://edwarddonner.com")
links

In [6]:
link_system_prompt = """
    You are provide with a list of links found on a webpage. You are able to 
    decide which of the links would be most relevant to include in a brochure about the company,
    such as links to an About page, or a company page, or Careers/Jobs pages. 
    You should responsd in JSON as in this example:

    {
        "links": [
            {"type": "about page", "url": "https://full.url/pages/here/about"},
            {"type": "career page", "url": "https://full.url/pages/careers"}
        ]
    }
"""

In [11]:

def get_user_prompt(url):
    user_prompt = f"""
        Here is the list of link of the website {url} - Please
        decide which of these are relevant web links for a brochure about the company,
        respond with the full https URL in JSON format. Do not include
        Terms of Service, Privacy, email links
        
        Links (some might be relative links)
    """

    links = fetch_website_links(url)

    user_prompt += "\n".join(links)
    return user_prompt

In [None]:
print(get_user_prompt("https://edwarddonner.com"))

In [24]:
def select_relevant_links(url):
    print(f"Selecting relevant links for {url} by calling {MODEL}")

    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )

    result = response.choices[0].message.content

    links = json.loads(result)
    print(f"Found {len(links['links'])} relevant links")
    return links

In [25]:
select_relevant_links("https://huggingface.co")

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 11 relevant links


{'links': [{'type': 'homepage', 'url': 'https://huggingface.co/'},
  {'type': 'brand page', 'url': 'https://huggingface.co/brand'},
  {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'},
  {'type': 'career page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'blog', 'url': 'https://huggingface.co/blog'},
  {'type': 'community forum', 'url': 'https://discuss.huggingface.co'},
  {'type': 'Discord channel', 'url': 'https://huggingface.co/join/discord'},
  {'type': 'GitHub', 'url': 'https://github.com/huggingface'},
  {'type': 'Twitter', 'url': 'https://twitter.com/huggingface'},
  {'type': 'LinkedIn', 'url': 'https://www.linkedin.com/company/huggingface/'},
  {'type': 'Zhihu', 'url': 'https://www.zhihu.com/org/huggingface'}]}

In [30]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page: \n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

In [33]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
        You are looking at a company called: {company_name}
        Here are the contents of its landing page and other relevant pages;
        use this information to build a short brochure of the company in markdown without 
        code blocks. \n\n
    """

    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5000]
    return user_prompt


In [34]:
get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 14 relevant links


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


'\n        You are looking at a company called: HuggingFace\n        Here are the contents of its landing page and other relevant pages;\n        use this information to build a short brochure of the company in markdown without \n        code blocks. \n\n\n    ## Landing Page: \n\nHugging Face ‚Äì The AI community building the future.\n\nHugging Face\nModels\nDatasets\nSpaces\nCommunity\nDocs\nEnterprise\nPricing\nLog In\nSign Up\nThe AI community building the future.\nThe platform where the machine learning community collaborates on models, datasets, and applications.\nExplore AI Apps\nor\nBrowse 1M+ models\nTrending on\nthis week\nModels\nTongyi-MAI/Z-Image-Turbo\nUpdated\n6 days ago\n‚Ä¢\n278k\n‚Ä¢\n2.69k\nmicrosoft/VibeVoice-Realtime-0.5B\nUpdated\n2 days ago\n‚Ä¢\n131k\n‚Ä¢\n820\nzai-org/GLM-4.6V-Flash\nUpdated\n5 days ago\n‚Ä¢\n67.7k\n‚Ä¢\n410\nmistralai/Devstral-Small-2-24B-Instruct-2512\nUpdated\nabout 1 hour ago\n‚Ä¢\n15.8k\n‚Ä¢\n329\nzai-org/GLM-4.6V\nUpdated\n6 days ago\n‚Ä¢

In [39]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages = [
            {"role": "user", "content": get_brochure_user_prompt(company_name=company_name, url=url)}
        ]
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [None]:
create_brochure("HuggingFace", "https://huggingface.co")

In [43]:
def stream_brochure_response(company_name, url):
    stream = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
        stream=True
    )

    response = ""
    display_handle = display(Markdown(""), display_id=True)

    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        update_display(Markdown(response), display_id=display_handle.display_id)

In [44]:
stream_brochure_response("HuggingFace", "https://huggingface.co")

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 12 relevant links


# Hugging Face Brochure

## About Hugging Face  
Hugging Face is the AI community building the future of machine learning. It serves as a vibrant collaboration platform where machine learning engineers, scientists, and enthusiasts come together to share, explore, and create open-source AI models, datasets, and applications.

## What We Offer  
- **Models:** Access and contribute to over 1 million open-source machine learning models, spanning modalities such as text, image, video, audio, and even 3D.  
- **Datasets:** Explore a rich catalog of 250,000+ curated datasets to power your AI projects and research.  
- **Spaces:** Deploy and run thousands of AI applications or create your own interactive AI apps with ease.  
- **Community:** Join a fast-growing, engaged community that supports collaboration and ethical AI development.  
- **Enterprise & Compute:** Benefit from scalable paid compute solutions and enterprise tools designed to accelerate AI innovation within teams and organizations.

## Key Features  
- **Open Collaboration Platform:** Host and collaborate on unlimited public models, datasets, and applications‚Äîall in one centralized Hub.  
- **Multi-Modality Support:** Work across various AI modalities including natural language, computer vision, speech, and 3D.  
- **Build Your Portfolio:** Share your work globally and build your professional machine learning profile.  
- **Explore Trending AI:** Stay up-to-date by browsing trending models and applications, such as image generators, speech synthesis models, and dynamic video tools.  

## Why Choose Hugging Face?  
- Fast, open-source tools and libraries that streamline AI development.  
- Access to a diverse and ever-expanding AI community fostering collaboration and innovation.  
- Enterprise-grade solutions enabling teams to scale AI projects effectively.  

## Join Us  
Whether you‚Äôre an AI researcher, developer, or business leader, Hugging Face empowers you to create, discover, and collaborate on machine learning projects smarter and faster.  

**Sign up today and be part of the AI community building the future.**  

Visit: [huggingface.co](https://huggingface.co)  

---

### Brand Colors  
- Yellow: #FFD21E  
- Orange: #FF9D00  
- Gray: #6B7280  

---

Hugging Face ‚Äì The Home of Machine Learning Collaboration and Innovation.