In [5]:
import os
import json
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from scraper import Website, fetch_website_links, fetch_website_contents
from openai import OpenAI

In [12]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENROUTER_API_KEY')

if api_key and api_key.startswith('sk-or-') and len(api_key) > 10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")

# MODEL = 'openai/gpt-4o-mini'
MODEL = 'anthropic/claude-haiku-4-5'
openai = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=api_key,
)


# resp = openai.chat.completions.create(
#     model=MODEL,
#     messages=[
#         {"role": "system", "content": system_prompt},
#         {"role": "user", "content": user_prompt},
#     ],
# )

# print(resp.choices[0].message.content)



API key looks good so far


In [13]:
from urllib.parse import urljoin, urlparse

system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

def get_links_user_prompt(url):
    base = f"{urlparse(url).scheme}://{urlparse(url).netloc}"
    links = fetch_website_links(url)
    # Resolve relative links to absolute and drop non-http links
    absolute_links = []
    for link in links:
        absolute = urljoin(base, link)
        if absolute.startswith("http"):
            absolute_links.append(absolute)
    unique_links = list(dict.fromkeys(absolute_links))  # deduplicate, preserve order

    user_prompt = f"""Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links:

"""
    user_prompt += "\n".join(unique_links)
    return user_prompt


In [14]:
def select_relevant_links(url):
    #Function to select relevant links  
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
    )
    result = response.choices[0].message.content.strip()
    # Strip markdown code fences if the model wraps JSON in ```json ... ```
    if result.startswith("```"):
        result = result.split("```")[1]
        if result.startswith("json"):
            result = result[4:]
        result = result.strip()
    # Extract the JSON object in case there's surrounding prose
    start, end = result.find("{"), result.rfind("}")
    if start != -1 and end != -1:
        result = result[start:end + 1]
    return json.loads(result)
    

In [None]:
select_relevant_links("https://huggingface.co")

In [15]:
def fetch_page_and_all_relevant_links(url):
    #Function to fetch the page and all relevant links
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

In [62]:
print(fetch_page_and_all_relevant_links("https://edwarddonner.com"))

## Landing Page:

Home - Edward Donner

Home
AI Curriculum
Proficient AI Engineer
Connect Four
Outsmart
An arena that pits LLMs against each other in a battle of diplomacy and deviousness
About
Posts
Well, hi there.
I’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy amateur electronic music production (
very
amateur) and losing myself in
Hacker News
, nodding my head sagely to things I only half understand.
I’m the co-founder and CTO of
Nebula.io
. We’re applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. I’m previously the founder and CEO of AI startup untapt,
acquired in 2021
.
I will happily drone on for hours about LLMs to anyone in my vicinity. My friends got fed up with my impromptu lectures, and convinced me to make some Udemy courses. To my total joy (and shock) they’ve become best-selling, top-rated courses, with 400,000 e

In [68]:
brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# brochure_system_prompt = """
# You are an assistant that analyzes the contents of several relevant pages from a company website
# and creates a short, humorous, entertaining, witty brochure about the company for prospective customers, investors and recruits.
# Respond in markdown without code blocks.
# Include details of company culture, customers and careers/jobs if you have the information.
# """

In [16]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [None]:
get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

In [17]:
def create_brochure(company_name, url):
    user_prompt = get_brochure_user_prompt(company_name, url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": user_prompt},
        ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [69]:
create_brochure("HuggingFace", "https://huggingface.co")

# Hugging Face Brochure

## About Hugging Face

Hugging Face is the leading AI community platform building the future of machine learning. We provide a collaborative ecosystem where the global ML community creates, discovers, and shares innovative AI models, datasets, and applications.

## What We Offer

### Core Platform Features

**Models & Applications**
- Access to 2M+ pre-trained machine learning models
- Browse 1M+ AI applications and demonstrations
- Host and collaborate on unlimited public models, datasets, and applications

**Comprehensive Coverage**
- Support for all modalities: text, image, video, audio, and 3D
- Trending models updated daily from leading organizations (Qwen, NVIDIA, MiniMax, and more)
- 500k+ datasets available for training and research

**Developer Tools**
- HuggingChat Omni for conversational AI
- Inference capabilities deployable in seconds
- Open source ML stack for accelerated development
- Documentation and community support

### Paid Solutions
- Compute resources for model training and deployment
- Enterprise solutions for organizational scaling

## Community & Culture

Hugging Face is built on collaboration and openness. With 82,800+ followers and active community engagement, we foster a culture where:

- Developers build their ML portfolios and gain recognition
- Researchers share cutting-edge papers and findings
- Organizations contribute and benefit from shared resources
- The global AI community accelerates innovation together

## Getting Started

Whether you're exploring AI applications, training custom models, or deploying production systems, Hugging Face provides free and paid options to match your needs. Join the community, browse our extensive model hub, and start building the future of AI today.

### Typewriter Animation

In [18]:
def stream_brochure(company_name, url):
    #Function to stream the brochure    
    stream = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        update_display(Markdown(response), display_id=display_handle.display_id)

In [72]:
stream_brochure("HuggingFace", "https://huggingface.co")

# Hugging Face Brochure

---

## About Hugging Face

**Hugging Face** is a vibrant AI community and platform dedicated to building the future of machine learning (ML). It is a central hub where researchers, developers, and enterprises collaborate to create, share, and innovate across AI models, datasets, and applications. With a mission to democratize machine learning, Hugging Face provides a powerful open-source stack and enterprise solutions that accelerate ML development and deployment.

---

## What We Offer

### Collaborative Platform  
- **Models:** Access and contribute to over 2 million AI models spanning various modalities such as text, image, video, audio, and even 3D.  
- **Datasets:** Discover and share from a library of over 500,000 datasets curated for diverse AI research and applications.  
- **Spaces:** Host and run applications, demos, and AI-powered experiments with an easy-to-use platform supporting community creations.  
- **Apps:** Explore 1 million+ AI applications built by the community to solve real-world problems.

### Enterprise Solutions  
- Scalable **paid Compute and Enterprise solutions** tailored for teams and businesses seeking dependable AI infrastructure.  
- Streamlined tools and integrations to accelerate ML workflows for enterprise-grade projects.

---

## Company Culture

- Built on **open collaboration**, inclusion, and transparency with a strong focus on community engagement.  
- Encourages **building portfolios & profiles** to showcase innovators' and developers' contributions within the AI ecosystem.  
- Promotes **continuous innovation** through open source projects and active community activities such as dataset contributions and open research.  
- Embraces a fast-paced environment, leveraging the latest developments like GGML and llama.cpp integrations for cutting-edge AI capabilities. 

---

## Community and Customers

- Over **82,000+ followers** engaged in AI and ML interests worldwide.  
- Users range from individual researchers and hobbyists to large organizations and enterprises seeking AI model deployments.  
- Active community contributions constantly updating datasets, models, and ML applications to keep pace with advancing AI research.  
- Popular models and tools include **Qwen/Qwen3.5-397B-A17B**, **nvidia/personaplex-7b-v1**, and interactive AI chat solutions like **HuggingChat Omni**.

---

## Careers and Opportunities

- Join a **global AI community at the forefront of innovation** in machine learning and AI technology.  
- Opportunities to contribute to open-source AI projects or work on enterprise solutions that impact real-world customers.  
- Work in a culture that values collaboration, lifelong learning, and diversity.  
- Build a meaningful career by sharing your expertise and growing your profile with a worldwide audience in the AI space.  
- Stay tuned for career openings by signing up or logging into the Hugging Face platform.

---

## Get Started

- **Explore AI models, datasets, and apps:** Visit [huggingface.co](https://huggingface.co)  
- **Sign up for free:** Create your profile and start collaborating with the leading AI community.  
- **Enterprise Solutions:** Contact Hugging Face for tailored solutions to accelerate your team’s AI transformation.

---

## Join Us

Become part of the AI revolution with Hugging Face — The AI community building the future.  
Create, discover, and collaborate on machine learning that shapes tomorrow.

---

For more information, visit: [huggingface.co](https://huggingface.co)

In [19]:
import re
from pathlib import Path
from fpdf import FPDF

# ── Colour palette ───────────────────────────────────────────────
C_NAVY   = (22,  60, 110)    # header banner
C_BLUE   = (41,  98, 200)    # H2 accent
C_TEAL   = (0,  150, 136)    # H3 + rules
C_ORANGE = (210,  90,  20)   # bullet marker
C_DARK   = (40,  44,  52)    # body text
C_WHITE  = (255, 255, 255)

BROCHURE_SYSTEM_PROMPT = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a concise, precise brochure (300-400 words max) for prospective customers, investors and recruits.
Respond in markdown without code blocks. Be brief and impactful.
Use exactly one H1 (company name), H2 section headings, H3 sub-headings, bullet lists, and short paragraphs.
Cover: what the company does, company culture, key customers, and careers - only if info is available.
"""

def create_brochure_generate_pdf(company_name, url):
    # 1. Generate brochure via LLM
    user_prompt = get_brochure_user_prompt(company_name, url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": BROCHURE_SYSTEM_PROMPT},
            {"role": "user", "content": user_prompt},
        ],
    )
    md_content = response.choices[0].message.content
    display(Markdown(md_content))

    # 2. Helpers
    def safe(text):
        return text.encode("latin-1", errors="replace").decode("latin-1")

    def strip_md(text):
        text = re.sub(r'\*\*(.*?)\*\*', r'\1', text)
        text = re.sub(r'\*(.*?)\*',     r'\1', text)
        text = re.sub(r'`(.*?)`',       r'\1', text)
        text = re.sub(r'\[([^\]]*)\]\([^)]*\)', r'\1', text)
        return text.strip()

    # 3. Build PDF
    pdf = FPDF()
    pdf.set_margins(20, 20, 20)
    pdf.set_auto_page_break(auto=True, margin=20)
    pdf.add_page()
    W = pdf.w - pdf.l_margin - pdf.r_margin

    # --- Full-width header banner ---
    pdf.set_fill_color(*C_NAVY)
    pdf.rect(0, 0, pdf.w, 40, style="F")
    pdf.set_xy(0, 9)
    pdf.set_font("Helvetica", "B", 26)
    pdf.set_text_color(*C_WHITE)
    pdf.cell(pdf.w, 13, safe(company_name.upper()), align="C")
    pdf.set_xy(0, 24)
    pdf.set_font("Helvetica", "I", 11)
    pdf.set_text_color(180, 210, 255)
    pdf.cell(pdf.w, 8, safe("Company Brochure"), align="C")
    pdf.ln(26)

    # --- Teal rule under banner ---
    pdf.set_draw_color(*C_TEAL)
    pdf.set_line_width(1.0)
    pdf.line(pdf.l_margin, pdf.get_y(), pdf.w - pdf.r_margin, pdf.get_y())
    pdf.ln(6)

    # 4. Render markdown lines
    for line in md_content.splitlines():
        stripped = strip_md(line)

        if line.startswith("# "):
            continue                        # already in banner

        elif line.startswith("## "):
            pdf.ln(4)
            y = pdf.get_y()
            # coloured left accent bar
            pdf.set_fill_color(*C_BLUE)
            pdf.rect(pdf.l_margin, y, 3, 10, style="F")
            pdf.set_xy(pdf.l_margin + 6, y)
            pdf.set_font("Helvetica", "B", 14)
            pdf.set_text_color(*C_BLUE)
            pdf.multi_cell(W - 6, 10, safe(stripped))
            # thin underline
            pdf.set_draw_color(*C_BLUE)
            pdf.set_line_width(0.3)
            pdf.line(pdf.l_margin, pdf.get_y(), pdf.l_margin + W, pdf.get_y())
            pdf.ln(3)

        elif line.startswith("### "):
            pdf.ln(2)
            pdf.set_font("Helvetica", "B", 12)
            pdf.set_text_color(*C_TEAL)
            pdf.multi_cell(W, 7, safe(stripped))
            pdf.ln(1)

        elif line.startswith(("- ", "* ", "+ ")):
            pdf.set_font("Helvetica", "B", 11)
            pdf.set_text_color(*C_ORANGE)
            pdf.set_x(pdf.l_margin)
            pdf.cell(6, 7, safe(">"))
            pdf.set_font("Helvetica", "", 11)
            pdf.set_text_color(*C_DARK)
            pdf.multi_cell(W - 6, 7, safe(strip_md(line[2:])))

        elif line.strip() == "---":
            pdf.ln(2)
            pdf.set_draw_color(*C_TEAL)
            pdf.set_line_width(0.4)
            pdf.line(pdf.l_margin, pdf.get_y(), pdf.l_margin + W, pdf.get_y())
            pdf.ln(4)

        elif line.strip():
            pdf.set_font("Helvetica", "", 11)
            pdf.set_text_color(*C_DARK)
            pdf.multi_cell(W, 7, safe(stripped))
            pdf.ln(1)

        else:
            pdf.ln(3)

    # Footer
    pdf.set_y(-14)
    pdf.set_font("Helvetica", "I", 8)
    pdf.set_text_color(150, 150, 150)
    pdf.cell(0, 6, safe(f"{company_name}  |  AI-generated brochure"), align="C")

    # 5. Save to brochures/ folder at project root
    safe_name = re.sub(r'[^\w\s-]', '', company_name).strip().replace(' ', '_')
    brochures_dir = Path.cwd().parent / "brochures"
    brochures_dir.mkdir(exist_ok=True)
    output_path = brochures_dir / f"{safe_name}.pdf"
    pdf.output(str(output_path))
    print(f"PDF saved: {output_path}")

In [20]:
create_brochure_generate_pdf("Edward Donner", "https://edwarddonner.com")

# Edward Donner

## Who We Are

Edward Donner is an AI entrepreneur, educator, and technologist passionate about applying artificial intelligence to solve meaningful problems. As co-founder and CTO of **Nebula.io**, he leads a team building next-generation AI-powered talent solutions. Previously, he founded and sold AI startup untapt in 2021.

## What We Do

### Nebula.io – AI-Powered Talent Platform
Our patented matching engine uses Generative AI and machine learning to connect people with roles aligned to their potential. We help recruiters source, understand, engage, and manage talent with greater accuracy and speed than traditional methods—without requiring keyword searches.

**Our Mission:** Help people discover their potential and pursue their reason for being (inspired by *Ikigai*), ultimately raising human prosperity and engagement at work.

### Educational Leadership
Edward's best-selling Udemy courses on AI engineering have reached 400,000 learners across 190 countries. The curriculum covers:
- AI engineering fundamentals
- Building with LLMs and agentic systems
- MLOps and production deployment
- n8n integration and voice agents

## Culture & Philosophy

Edward embodies a hands-on, collaborative approach rooted in genuine passion for LLMs and AI innovation. He believes in sharing knowledge openly—from impromptu lectures that inspired his online courses to maintaining regular contact with the community.

## Why It Matters

With 77% of workers lacking inspiration at work, our mission addresses a critical human challenge. By matching talent to fulfilling roles with unprecedented accuracy, we're creating measurable impact on both individual careers and organizational success.

**Connect:** ed [at] edwarddonner [dot] com | www.edwarddonner.com

PDF saved: c:\Users\maina\Desktop\andela\ai_engineering\brochures\Edward_Donner.pdf
