In [5]:
import os
import json
from dotenv import load_dotenv
from scraper import fetch_website_contents, fetch_website_links
from openai import OpenAI
from IPython.display import display, Markdown, update_display



In [6]:
load_dotenv(override=True)

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GEMINI_BASE = "https://generativelanguage.googleapis.com/v1beta/openai/"
GROQ_BASE = "https://api.groq.com/openai/v1"

groq_client = OpenAI(api_key=GROQ_API_KEY, base_url=GROQ_BASE)
google_client = OpenAI(api_key=GOOGLE_API_KEY, base_url=GEMINI_BASE)

In [7]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages. Include All the links that were passed into relevant type.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [8]:
def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [9]:
def select_relevant_links(model,url):
    print(f"Selecting relevant links for {url} by calling {model}")
    if model == "openai/gpt-oss-20b":
        openai = groq_client
    elif model == "gemini-2.5-flash":
        openai = google_client

    response = openai.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    print(f"Found {len(links['links'])} relevant links")
    return links

In [10]:
def fetch_page_and_all_relevant_links(model,url):
    contents = fetch_website_contents(url)
    print(contents)
    relevant_links = select_relevant_links(model,url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

In [11]:
brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""

In [12]:
def get_brochure_user_prompt(company_name, model,url):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links(model,url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [13]:
def create_brochure(company_name, Model, url):
    if Model == "Groq":
        openai = groq_client
        model = "openai/gpt-oss-20b"
    elif Model == "Gemini":
        openai = google_client
        model = "gemini-2.5-flash"
    response = openai.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, model, url)}
        ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [14]:
create_brochure("HuggingFace", "Groq","https://huggingface.co")

Hugging Face ‚Äì The AI community building the future.

Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 2M+ models
Trending on
this week
Models
zai-org/GLM-Image
Updated
about 13 hours ago
‚Ä¢
2.44k
‚Ä¢
685
fal/Qwen-Image-Edit-2511-Multiple-Angles-LoRA
Updated
8 days ago
‚Ä¢
44.1k
‚Ä¢
681
Lightricks/LTX-2
Updated
1 day ago
‚Ä¢
1.19M
‚Ä¢
1.04k
openbmb/AgentCPM-Explore
Updated
1 day ago
‚Ä¢
315
‚Ä¢
287
Kijai/LTXV2_comfy
Updated
1 day ago
‚Ä¢
41.1k
‚Ä¢
279
Browse 2M+ models
Spaces
Running
on
Zero
Featured
787
Qwen Image Multiple Angles 3D Camera
üé•
787
Adjust camera angles in images using 3D controls or sliders
Running
on
Zero
MCP
1.47k
Z Image Turbo
üñº
1.47k
Generate stunning images from text descriptions in seconds
Running
Featured
4.2k
Wan2.2 Animate
üëÅ
4.2k
Wan2.2 Animate
Running

# Hugging‚ÄØFace ‚Äì The AI Community Building the Future  

---

## Who We Are  
Hugging‚ÄØFace is the world‚Äôs first open‚Äësource collaboration hub for machine‚Äëlearning.  
- **Platform** where researchers, developers, and companies share, discover, and improve models, datasets, and end‚Äëto‚Äëend applications.  
- Home to **over 2‚ÄØmillion models, 1‚ÄØmillion+ apps (‚ÄúSpaces‚Äù), and 500‚ÄØk+ datasets** spanning text, image, video, audio, and 3D.  
- Built on the **HF Open‚ÄëSource stack** ‚Äì no‚Äëcode, zero‚Äëcompute demos, and the ability to spin up paid compute for production workloads.

---

## What We Offer  

| Feature | Highlights |
|---------|------------|
| **Models** | Browse, fine‚Äëtune, and deploy state‚Äëof‚Äëthe‚Äëart models (e.g., GLM‚ÄëImage, Qwen‚ÄëImage‚ÄëEdit, LTX‚Äë2). |
| **Datasets** | Curated collections from academia, industry, and the community; fast, easy loading in Hugging‚ÄØFace ü§ó. |
| **Spaces** | Instant demos: 3D image editing, text‚Äëto‚Äëimage, video generation, and more‚Äîall running on free ‚ÄúZero‚Äù compute. |
| **Enterprise** | Managed hosting, security, governance, and integration for large‚Äëscale ML deployments. |
| **Docs & Community** | Rich tutorials, forums, Discord, and Hugging‚ÄØFace‚ÄØHub for collaborative learning. |

---

## Culture & Community  

- **Open‚Äësource first** ‚Äì every model, dataset, or Space can be forked, improved, and shared back to the community.  
- **Collaboration at scale** ‚Äì contributors from research labs, startups, and Fortune‚ÄØ500 firms build together.  
- **Inclusivity & Accessibility** ‚Äì multilingual documentation, low‚Äëlatency demos, and support for under‚Äërepresented regions.  
- **Fast‚Äëforward Innovation** ‚Äì rapid iteration cycles, community‚Äëcurated benchmarks, and a commitment to responsible AI.

---

## Who Uses Hugging‚ÄØFace  

| Sector | Example Use Cases |
|--------|-------------------|
| **Research** | Benchmarking NLP, CV, and multimodal models; reproducible experiments. |
| **Start‚Äëups** | Rapid prototyping of AI products; embedding models into SaaS. |
| **Enterprise** | Customer‚Äësupport chatbots, content moderation, medical imaging diagnostics. |
| **Education** | Teaching ML concepts with hands‚Äëon code; building student portfolios. |

---

## Careers at Hugging‚ÄØFace  

Hugging‚ÄØFace is growing fast ‚Äì we‚Äôre hiring engineers, researchers, product managers, and community builders.  
- **Open Positions** ‚Äì Explore current opportunities on our careers page and join a team that thrives on experimentation and impact.  
- **What We Value** ‚Äì Curiosity, ownership, and a passion for building tools that make AI accessible to all.  
- **Benefits** ‚Äì Remote‚Äëfirst flexibility, equity, generous leave, and a culture that celebrates learning and collaboration.

---

## Ready to Build the Future Together?  

- **Sign‚ÄØUp** ‚Äì Join the Hugging‚ÄØFace Hub, upload a model, dataset, or Space.  
- **Explore** ‚Äì Browse 2‚ÄØmillion+ models or 1‚ÄØmillion+ apps to spark ideas.  
- **Collaborate** ‚Äì Contribute to the community, fork projects, and share your work.

**Hugging‚ÄØFace** ‚Äì Where machine‚Äëlearning innovators unite to create, share, and accelerate AI for everyone.