In [1]:
# -------------------------------
# Standard library imports
# -------------------------------
import os        # For accessing environment variables and OS-level operations
import json      # For handling structured data (JSON format)

# -------------------------------
# Third-party library imports
# -------------------------------
from dotenv import load_dotenv  
# Loads environment variables from a .env file (used to keep API keys secure)

from IPython.display import Markdown, display, update_display  
# Used to render Markdown output dynamically in Jupyter notebooks

from openai import OpenAI  
# Official OpenAI client for interacting with Large Language Models (LLMs)

# -------------------------------
# Local application imports
# -------------------------------
from scraper import fetch_website_links, fetch_website_contents  
# Custom web scraping utilities:
# - fetch_website_links: extracts relevant links from a company website
# - fetch_website_contents: fetches and processes page content for brochure generation


In [2]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-5-nano'
openai = OpenAI()

API key looks good so far


In [5]:
links = fetch_website_links("https://huggingface.com")
links

['/',
 '/models',
 '/datasets',
 '/spaces',
 '/docs',
 '/enterprise',
 '/pricing',
 '/login',
 '/join',
 '/spaces',
 '/models',
 '/zai-org/GLM-Image',
 '/zai-org/GLM-4.7-Flash',
 '/google/translategemma-4b-it',
 '/Lightricks/LTX-2',
 '/kyutai/pocket-tts',
 '/models',
 '/spaces/multimodalart/qwen-image-multiple-angles-3d-camera',
 '/spaces/mrfakename/Z-Image-Turbo',
 '/spaces/black-forest-labs/FLUX.2-klein-9B',
 '/spaces/prithivMLmods/Qwen-Image-Edit-2511-LoRAs-Fast',
 '/spaces/Wan-AI/Wan2.2-Animate',
 '/spaces',
 '/datasets/Alibaba-Apsara/Superior-Reasoning-SFT-gpt-oss-120b',
 '/datasets/MiniMaxAI/OctoCodingBench',
 '/datasets/HuggingFaceFW/finetranslations',
 '/datasets/HuggingFaceFW/finepdfs',
 '/datasets/facebook/action100m-preview',
 '/datasets',
 '/join',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/inference/models',
 '/pricing#endpoints',
 '/pricing#spaces',
 '/pricing',
 '/allenai',
 '/facebook',
 '/amazon',


In [6]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [7]:
def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [8]:
def select_relevant_links(url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    return links

In [9]:
select_relevant_links("https://huggingface.com")

{'links': [{'type': 'company homepage', 'url': 'https://huggingface.co'},
  {'type': 'brand/about page', 'url': 'https://huggingface.co/brand'},
  {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'},
  {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'},
  {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'blog page', 'url': 'https://huggingface.co/blog'},
  {'type': 'community forum', 'url': 'https://discuss.huggingface.co'},
  {'type': 'GitHub', 'url': 'https://github.com/huggingface'},
  {'type': 'LinkedIn', 'url': 'https://www.linkedin.com/company/huggingface/'},
  {'type': 'Twitter', 'url': 'https://twitter.com/huggingface'}]}

## Second step: make the brochure

Assemble all the details into another prompt to GPT-5-nano

In [10]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result


print(fetch_page_and_all_relevant_links("https://huggingface.com"))


## Landing Page:

Hugging Face ‚Äì The AI community building the future.

Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 2M+ models
Trending on
this week
Models
zai-org/GLM-Image
Updated
6 days ago
‚Ä¢
8.71k
‚Ä¢
901
zai-org/GLM-4.7-Flash
Updated
about 17 hours ago
‚Ä¢
15.2k
‚Ä¢
789
google/translategemma-4b-it
Updated
6 days ago
‚Ä¢
35.6k
‚Ä¢
402
Lightricks/LTX-2
Updated
1 day ago
‚Ä¢
1.74M
‚Ä¢
1.22k
kyutai/pocket-tts
Updated
1 day ago
‚Ä¢
31.8k
‚Ä¢
337
Browse 2M+ models
Spaces
Running
on
Zero
Featured
1.01k
Qwen Image Multiple Angles 3D Camera
üé•
1.01k
Adjust camera angles in images using 3D controls or sliders
Running
on
Zero
MCP
1.66k
Z Image Turbo
üñº
1.66k
Generate stunning images from text descriptions in seconds
Running
on
Zero
MCP
Featured
160
FLUX.2 [Klein] 9B
üíª
160
Gener

In [11]:
brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""


def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt


In [12]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
    )
    result = response.choices[0].message.content
    # return the string instead of display() to avoid NoneType
    return result

In [13]:
english_brochure = create_brochure("HuggingFace", "https://huggingface.com")
display(Markdown(english_brochure))

# Hugging Face Company Brochure

---

## About Hugging Face

Hugging Face is the vibrant AI community and collaboration platform dedicated to building the future of machine learning. It serves as a central hub where machine learning engineers, scientists, and developers can share, explore, discover, and experiment with open-source ML models, datasets, and applications. Hugging Face empowers next-generation AI innovation through a strong commitment to open, transparent, and ethical AI development.

With a fast-growing global community and flagship open-source tools and libraries, Hugging Face is at the heart of the AI revolution ‚Äî fueling research, development, and real-world applications alike.

---

## What We Offer

### A Collaborative ML Ecosystem

- **Models Hub**  
  Access over **2 million** machine learning models spanning diverse tasks like text generation, image-to-image, speech synthesis, text-to-video, and more across multiple libraries and frameworks (PyTorch, TensorFlow, JAX, etc.).

- **Datasets**  
  Explore and contribute to over **500,000** public datasets facilitating breakthroughs in various AI domains.

- **Spaces**  
  Host and showcase **1 million+** AI-powered applications and demos in a collaborative, shareable environment.

- **Tools & Libraries**  
  Support for state-of-the-art libraries including Transformers, Diffusers, sentence-transformers, ONNX, and more, powering rapid experimentation and deployment.

- **Enterprise & Compute**  
  Dedicated paid compute infrastructure and enterprise-grade solutions tailored for scalable AI development and deployment.

---

## Community & Culture

Hugging Face thrives on **collaboration, openness, and inclusivity**. The company fosters an energetic community where sharing knowledge, peer learning, and ethical practices are the norm. Members from academia, industry, and independent enthusiasts come together to push the boundaries of AI technology.

Values include:

- **Transparency & Openness:** Open source at the core of its mission to democratize AI.
- **Innovation:** Continuously exploring the frontiers of machine learning research.
- **Ethical AI:** Commitment to building responsible, fair, and trustworthy AI systems.
- **Support & Growth:** Empowering users to build their portfolios and ML presence through public sharing and community feedback.

---

## Customers & Partners

Hugging Face serves a wide spectrum of users including:

- AI researchers and academic institutions
- Machine learning engineers and developers
- Enterprises seeking cutting-edge AI infrastructure and tools
- Open-source contributors and hobbyists

Notable integrations with leading companies and collaborations across the AI ecosystem highlight its central role in accelerating AI adoption and innovation.

---

## Career Opportunities

Join Hugging Face if you want to be at the **cutting edge of AI technology** in an environment that values creativity, collaboration, and impact. Careers span roles in:

- Machine Learning Research
- Software Engineering and DevOps
- Data Science and Applied AI
- Community Management and Developer Advocacy
- Product Management and Design

With a culture emphasizing **continuous learning and open collaboration**, Hugging Face offers a unique opportunity to shape the future of AI alongside some of the brightest minds.

---

## Contact & Get Involved

- Visit: [huggingface.co](https://huggingface.co)  
- Join the community on GitHub, Discord, Twitter, and LinkedIn  
- Explore models, datasets, and apps, or contribute your own today!  
- Sign up to create your ML portfolio and accelerate your AI projects

---

### Hugging Face ‚Äî The AI community building the future.  
_Innovate together. Build openly. Shape AI responsibly._

## Using Gradio to create a web interface for the brochure generator


In [16]:
import gradio as gr
view = gr.Interface(
    fn=create_brochure,
    inputs=[
        gr.Textbox(label="Company Name"),
        gr.Textbox(label="Website URL")
    ],
    outputs=gr.Markdown(label="English Brochure"),
    title="Company Brochure Generator",
    examples=[
        ["HuggingFace", "https://huggingface.com"],
        ["OpenAI", "https://openai.com"],
        ["Google", "https://google.com"]
    ],
    flagging_mode="never"
)

view.launch()

* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.




In [18]:
translate_system_prompt = """
You are a professional language translation assistant.

Your task is to translate content from English to French.

Rules:
- Translate ALL English text into French.
- Preserve the original structure, formatting, indentation, and line breaks.
- Do NOT add, remove, or reorder any content.
- Do NOT explain the translation.
- Do NOT include any text outside the translated content.
- Keep code blocks, technical terms, and proper nouns unchanged unless they are commonly translated.

Output:
- Return only the translated French content.
- The output must match the input format exactly.
"""

In [19]:
def translate_user_prompt(company_name, url):
    # call create_brochure() which now returns a string
    brochure_content = create_brochure(company_name, url)
    
    if not isinstance(brochure_content, str):
        raise ValueError("create_brochure() must return a string")
    
    # minimal user prompt; system prompt handles rules
    translate_prompt = brochure_content[:5_000]  # truncate if too long
    return translate_prompt

In [20]:
def translate_brochure(company_name, url):
    response = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": translate_system_prompt},
            {"role": "user", "content": translate_user_prompt(company_name, url)}
        ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))  # optional, for Jupyter
    return result


In [21]:
import gradio as gr
view = gr.Interface(
    fn=translate_brochure,
    inputs=[
        gr.Textbox(label="Company Name"),
        gr.Textbox(label="Website URL")
    ],
    outputs=gr.Markdown(label="French Brochure"),
    title="Company Brochure Generator",
    examples=[
        ["HuggingFace", "https://huggingface.com"],
        ["OpenAI", "https://openai.com"],
        ["Google", "https://google.com"]
    ],
    flagging_mode="never"
)

view.launch()

* Running on local URL:  http://127.0.0.1:7863
* To create a public link, set `share=True` in `launch()`.




# Hugging Face - La communaut√© IA qui construit le futur

---

## √Ä propos de Hugging Face

Hugging Face est une plateforme de collaboration de premier plan d√©di√©e √† la communaut√© de l'apprentissage automatique (ML). Elle sert de centre n√©vralgique o√π les professionnels, chercheurs et passionn√©s de ML partagent, explorent, d√©couvrent et exp√©rimentent des mod√®les, ensembles de donn√©es et applications d'apprentissage automatique open source.

Ancr√© dans un engagement envers l'ouverture et l'√©thique, Hugging Face donne les moyens √† la prochaine g√©n√©ration d'ing√©nieurs, scientifiques et utilisateurs ML de collaborer √† l'√©chelle mondiale et d‚Äôacc√©l√©rer le d√©veloppement des innovations en IA.

---

## Ce que propose Hugging Face

- **D√©p√¥t de mod√®les**  
  Acc√©dez √† plus de 2 millions de mod√®les d‚Äôapprentissage automatique couvrant diverses modalit√©s incluant texte, image, vid√©o, audio et m√™me 3D. Les mod√®les sont continuellement mis √† jour et s√©lectionn√©s, pr√©sentant des applications de pointe telles que le traitement du langage, la g√©n√©ration d‚Äôimage, et plus encore.

- **Hub de datasets**  
  Parcourez plus de 500 000 ensembles de donn√©es divers supportant la recherche et le d√©veloppement de pointe, avec des mises √† jour fr√©quentes par des contributeurs du monde entier.

- **Spaces**  
  Explorez et d√©ployez plus d‚Äôun million d‚Äôapplications propuls√©es par ML h√©berg√©es sur Hugging Face, permettant aux utilisateurs de tester et de pr√©senter en temps r√©el des d√©mos et outils d‚ÄôIA innovants.

- **√âcosyst√®me open source**  
  Profitez de la pile open source de Hugging Face pour acc√©l√©rer les cycles de d√©veloppement, favorisant la transparence, la reproductibilit√© et l‚Äôam√©lioration communautaire.

- **Solutions entreprises**  
  Pour les entreprises, Hugging Face propose des ressources de calcul payantes et des solutions personnalisables de niveau entreprise con√ßues pour faire √©voluer les projets d‚ÄôIA de mani√®re s√©curis√©e et efficace.

---

## Communaut√© & Culture

Hugging Face soutient une communaut√© dynamique, inclusive et mondiale o√π la collaboration est primordiale. La plateforme encourage les utilisateurs √† :

- Partager librement mod√®les, ensembles de donn√©es et applications  
- Construire et pr√©senter leur portfolio ML au monde entier  
- Participer √† l‚Äô√©change de connaissances via les forums, la documentation et les √©v√©nements  
- Contribuer √† un √©cosyst√®me IA √©thique respectant l‚Äôouverture et l‚Äôinnovation responsable  

Cette culture d‚Äôouverture acc√©l√®re l‚Äôapprentissage et le progr√®s collectif en technologie IA, accueillant des milliers de contributeurs et utilisateurs actifs √† travers le monde.

---

## Clients & Utilisateurs

Hugging Face sert un large public incluant :

- Chercheurs ind√©pendants et amateurs  
- Ing√©nieurs IA et data scientists de premier plan dans l‚Äôindustrie  
- Institutions acad√©miques et centres de recherche  
- Entreprises recherchant une infrastructure IA √©volutive et une expertise d√©di√©e  

La plateforme supporte un large √©ventail de cas d‚Äôusage, allant de la compr√©hension du langage naturel √† la vision par ordinateur et au traitement de la parole, en faisant un centre incontournable pour l‚Äôinnovation en ML.

---

## Carri√®res chez Hugging Face

Rejoignez une entreprise avant-gardiste √† l‚Äôintersection de l‚Äôinnovation en IA et de la construction communautaire. Hugging Face propose des carri√®res dans :

- Ing√©nierie en apprentissage automatique  
- Science des donn√©es et recherche  
- D√©veloppement logiciel et infrastructure  
- Gestion communautaire et relations d√©veloppeurs  
- Solutions entreprises et r√©ussite client  

Les employ√©s √©voluent dans une culture valorisant l‚Äôouverture, la cr√©ativit√©, l‚Äôapprentissage continu et la collaboration ‚Äî avec pour mission de contribuer √† l‚Äôavenir d‚Äôune IA √©thique.

---

## Pourquoi choisir Hugging Face ?

- Acc√©dez et contribuez √† l‚Äôun des plus grands d√©p√¥ts ML open source au monde.  
- Participez √† une communaut√© IA dynamique et mondiale fa√ßonnant ensemble le futur.  
- Utilisez des outils et infrastructures de pointe pour acc√©l√©rer vos projets IA.  
- Construisez un profil et portfolio ML professionnel visible par des millions.  
- Engagez-vous dans des pratiques transparentes et √©thiques de d√©veloppement et d√©ploiement IA.  

---

## Connectez-vous & Explorez

- Visitez [huggingface.co](https://huggingface.co) pour parcourir mod√®les, datasets et Spaces.  
- Rejoignez la communaut√© pour commencer √† partager votre travail et apprendre des experts du monde entier.  
- Explorez les options entreprises pour int√©grer l‚ÄôIA de mani√®re fluide dans votre activit√©.  

---

**Hugging Face ‚Äî Donner le pouvoir √† la communaut√© IA pour construire le futur.**