In [1]:
import os
import json
from scrapper2 import get_contents, get_website_links
from IPython.display import Markdown, display, update_display

from dotenv import load_dotenv
from openai import OpenAI


In [2]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Step1: narrowing down to only releavant links

### building the prompts

In [3]:
link_system_prompt = """
You are provided with ta list of links found on a webpage.
You are able to decide which of the links would be most relevant to include ina a brochure about eh company, 
such as links to an About page, or a Company page, or  Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url":"https://full.url/goes/here/about"},
        {"type": "careers page", "url":"https://full.url/careers"},
    ]
}
"""


def get_links_user_prompt(url):
    user_prompt = f"""
    Here is the list of links on the website {url} -
    Please decide which of these are relevant web links for a brochure about the company,
    respond with the full https URL in JSON format.
    Do not include Terms of SErvice, Privacy, email links.

    Links (some might be relative links):
    """

    links = get_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

### making the call to openai

In [4]:
def select_relevant_links(url):
    openai = OpenAI()
    response = openai.chat.completions.create(
        model="gpt-5-nano",
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
       response_format={"type":"json_object"}
    )

    result = response.choices[0].message.content
    links = json.loads(result)
    print(f"Found {len(links['links'])} relevant links")
    return links


#select_relevant_links("https://edwarddonner.com")

## Step2: making the brochure

In [5]:
def fetch_page_and_all_relevant_links(url):
    """
    this function iterates over the relevant links and get its contents
    """
    contents = get_contents("https://edwarddonner.com")
    relevant_links = select_relevant_links(url)
    result = f"## Landing page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += get_contents(link["url"])
    return result

In [6]:
#print(fetch_page_and_all_relevant_links("https://huggingface.co"))

## Step3: creating the prompts

In [7]:
brochure_system_prompt = """
You are an assistant that analyzes the contents of sevral relevant pages from a company website
and creates a short brochure about the company for prospective customers, investor and recuits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you the information
"""

## or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate the 'tone':

# brochure_system_prompt = """
# You are an assistant that analyzes the contents of sevral relevant pages from a company website
# and creates a short, humorous , entertaining, witty brochure about the company for prospective customers, investor and recuits.
# Respond in markdown without code blocks.
# Include details of company culture, customers and careers/jobs if you the information
# """


In [8]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
    You are looking at a company called: {company_name}
    Here are the contents of its landing page and other relevant pages;
    use this information to build a short brochure of the company in markdown without code blocks.\n\n
    """

    user_prompt  += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5000 characters
    return user_prompt

In [9]:
## checking how the user_prompt is looking like
#get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

## Step4: creating the brochure

In [10]:
def create_brochure(company_name, url):
    openai = OpenAI()
    response = openai.chat.completions.create(
        model = "gpt-4.1-mini",
        messages = [
            {"role":"system", "content": brochure_system_prompt},
             {"role":"user", "content": get_brochure_user_prompt(company_name, url)}
        ]
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [11]:
#create_brochure("HuggingFace", "https://huggingface.co")

### creating create_stream_brochure

the previous displayed the output ones when all the output is available. here we are exploring how to display the stream response . 

In [12]:
def stream_brochure(company_name, url):
    openai = OpenAI()
    stream = openai.chat.completions.create(
        model = "gpt-4.1-mini",
        messages = [
            {"role":"system", "content": brochure_system_prompt},
            {"role":"user", "content": get_brochure_user_prompt(company_name, url)}
        ],
        stream = True
    )

    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        update_display(Markdown(response), display_id = display_handle.display_id)

In [13]:
stream_brochure("HuggingFace", "https://huggingface.co")

Found 9 relevant links


# Hugging Face: Democratizing Machine Learning for Everyone

---

### Who We Are

Hugging Face is the collaboration platform at the heart of the machine learning (ML) community. We provide an open, ethical space where engineers, scientists, and end-users can share, explore, discover, and experiment with open-source ML models, datasets, and tools. Our mission is to democratize **good** machine learning, making advanced AI technology accessible and useful for all.

---

### What We Offer

- **Hugging Face Hub:** A central platform to host and discover thousands of open-source models and datasets.
- **Spaces:** An interactive environment where users can deploy and share ML applications.
- **Enterprise Solutions:** Tools and scalable services tailored for business needs.
- **State-of-the-Art Research:** Our talented science team pushes the boundaries of what AI can do.
- **Open-Source Libraries:** Widely used tools like Transformers and Diffusers trusted by millions worldwide.

---

### Our Community

With a fast-growing community of over 67,000 followers and hundreds of active contributors, Hugging Face fosters collaboration and knowledge sharing. From AI researchers to hobbyists, everyone finds value in our inclusive and supportive environment.

- Active datasets and models continuously updated by community members.
- A lively social presence on GitHub, Twitter, LinkedIn, and Discord.
- Educational content like tutorials, blog articles, and new learning tracks (e.g., Hugging Face Fundamentals with DataCamp).

---

### Company Culture

Our culture is rooted in openness, innovation, and shared learning. We believe AI should be developed responsibly and ethically — "one commit at a time." We encourage curiosity, collaboration, and contribution from all levels of expertise. If transforming the future of AI together excites you, there's a place for you here.

---

### Careers at Hugging Face

Join a team of 195+ passionate professionals shaping the AI frontier. We're always looking for talented individuals in engineering, research, product, and community roles.

- Work with cutting-edge ML technologies and proprietary research.
- Collaborate in a diverse and inclusive environment.
- Grow your skills through continuous learning and contribution.
  
For job opportunities and application details, visit our [Jobs Page](https://huggingface.co/jobs).

---

### Why Choose Hugging Face?

- **Trusted by Innovators:** Hugging Face powers numerous AI-driven products and research projects worldwide.
- **Award-Winning Platform:** Known for seamless user experience in open-source ML exploration.
- **Ethical AI Commitment:** Developing technologies that prioritize transparency and fairness.
- **Vibrant Ecosystem:** Integrates seamlessly with major ML frameworks and cloud providers.

---

### Connect With Us

- Website: [huggingface.co](https://huggingface.co)
- GitHub: github.com/huggingface
- Twitter: @huggingface
- LinkedIn: Hugging Face
- Discord community for real-time collaboration

---

**Hugging Face**  
Democratizing Machine Learning — One Commit at a Time

---

*For press and business inquiries, email: contact@huggingface.co*