### BUSINESS CHALLENGE:

#### Create a product that builds a Brochure for a company to be used for prospective clients, investors and potential recruits.


In [63]:
# Import all libraires

import os
import requests
import json
from openai import OpenAI 
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display

In [22]:
# Initialise the essential constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be problem with API key")

MODEL = 'gpt-4o-mini'
openai = OpenAI()

API key looks good so far


In [36]:
# A class to represent a webpage

headers = {
   "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a web site that we have scraped, now with links
    """
    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script","style","img","input"]):
                irrelevant.decompose()
            self.text = soup. body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]
    def get_contents(self):
        return f"ebpage Title:\n{self.title}\nwebpage content:\n{self.text}\n\n"

ed = Website("https://edwarddonner.com")
print(ed.links)

## Give clear instruction to system prompt
### Check the valid links

In [40]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"

link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [41]:
print(link_system_prompt)

You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}



In [42]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [43]:
print(get_links_user_prompt(ed))

Here is the list of links on the website of https://edwarddonner.com - please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.
Links (some might be relative links):
https://edwarddonner.com/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://patents.google.com/patent/US20210049536A1/
https://www.linkedin.com/in/eddonner/
https://edwarddonner.com/2025/01/23/llm-workshop-hands-on-with-agents-resources/
https://edwarddonner.com/2025/01/23/llm-workshop-hands-on-with-agents-resources/
https://edwarddonner.com/2024/12/21/

In [50]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system","content": link_system_prompt},
            {"role": "user","content": get_links_user_prompt(website)}
        ],
        response_format = {"type": "json_object"}
    )

    result = response.choices[0].message.content
    return json.loads(result)

In [52]:
get_links("https://en.wikipedia.org/wiki/Main_Page")

{'links': [{'type': 'about page',
   'url': 'https://en.wikipedia.org/wiki/Wikipedia:About'},
  {'type': 'company page', 'url': 'https://wikimediafoundation.org/'},
  {'type': 'careers page',
   'url': 'https://wikimediafoundation.org/our-work/wikimedia-projects/'}]}

## Second step: make the brochure!

Assemble all the details into another prompt to GPT4-o

In [53]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [54]:
print(get_all_details("https://en.wikipedia.org/wiki/Main_Page"))

Found links: {'links': [{'type': 'about page', 'url': 'https://en.wikipedia.org/wiki/Wikipedia:About'}, {'type': 'company page', 'url': 'https://wikimediafoundation.org/'}, {'type': 'foundation page', 'url': 'https://foundation.wikimedia.org/wiki/Home'}, {'type': 'careers page', 'url': 'https://wikimediafoundation.org/our-work/wikimedia-projects/'}]}
Landing page:
ebpage Title:
Wikipedia, the free encyclopedia
webpage content:
Jump to content
Main menu
Main menu
move to sidebar
hide
Navigation
Main page
Contents
Current events
Random article
About Wikipedia
Contact us
Contribute
Help
Learn to edit
Community portal
Recent changes
Upload file
Special pages
Search
Search
Appearance
Donate
Create account
Log in
Personal tools
Donate
Create account
Log in
Pages for logged out editors
learn more
Contributions
Talk
Main Page
Main Page
Talk
English
Read
View source
View history
Tools
Tools
move to sidebar
hide
Actions
Read
View source
View history
General
What links here
Related changes
Upload

In [76]:
# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."

system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information. I need a small image in each broucher"

In [77]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:20_000] # Truncate if more than 5,000 characters
    return user_prompt

In [78]:
get_brochure_user_prompt("WikiPedia", "https://en.wikipedia.org/wiki/Main_Page")

Found links: {'links': [{'type': 'about page', 'url': 'https://en.wikipedia.org/wiki/Wikipedia:About'}, {'type': 'company page', 'url': 'https://wikimediafoundation.org/'}, {'type': 'careers page', 'url': 'https://wikimediafoundation.org/'}]}


'You are looking at a company called: WikiPedia\nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\nLanding page:\nebpage Title:\nWikipedia, the free encyclopedia\nwebpage content:\nJump to content\nMain menu\nMain menu\nmove to sidebar\nhide\nNavigation\nMain page\nContents\nCurrent events\nRandom article\nAbout Wikipedia\nContact us\nContribute\nHelp\nLearn to edit\nCommunity portal\nRecent changes\nUpload file\nSpecial pages\nSearch\nSearch\nAppearance\nDonate\nCreate account\nLog in\nPersonal tools\nDonate\nCreate account\nLog in\nPages for logged out editors\nlearn more\nContributions\nTalk\nMain Page\nMain Page\nTalk\nEnglish\nRead\nView source\nView history\nTools\nTools\nmove to sidebar\nhide\nActions\nRead\nView source\nView history\nGeneral\nWhat links here\nRelated changes\nUpload file\nPermanent link\nPage information\nCite this page\nGet shortened URL\nDownload QR code\nPrint/expor

In [79]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [80]:
create_brochure("WikiPedia", "https://en.wikipedia.org/wiki/Main_Page")

Found links: {'links': [{'type': 'about page', 'url': 'https://en.wikipedia.org/wiki/Wikipedia:About'}, {'type': 'company page', 'url': 'https://wikimediafoundation.org/'}, {'type': 'careers page', 'url': 'https://wikimediafoundation.org/our-work/wikimedia-projects/'}, {'type': 'community portal', 'url': 'https://meta.wikimedia.org/wiki/Community_portal'}, {'type': 'help page', 'url': 'https://en.wikipedia.org/wiki/Help:Contents'}]}


```markdown
# Welcome to Wikipedia: The Encyclopedia That Can’t Keep Quiet!

![Wikipedia Logo](https://upload.wikimedia.org/wikipedia/commons/thumb/8/80/Wikipedia-logo-v2.svg/800px-Wikipedia-logo-v2.svg.png)

### What is Wikipedia?
Wikipedia is not just your go-to source for finding out who played Aunt Mildred in that obscure 90s sitcom—it's the **free online encyclopedia** that everyone can edit (yes, even your neighbor's tortoise if it learns to type). Created in 2001, we now rely on a motley crew of over **125,000 active editors** who add some serious brainpower to **6,969,146 articles**... and counting!

### Our Culture: Collaboration and Community!
At Wikipedia, we believe everyone should have access to knowledge—unless you were planning to write a memoir about your cat’s life, then let’s talk! Our employees (well, actually they are volunteers!) enjoy a culture emphasizing collective wisdom, free access to information, and a sprinkle of digital teamwork. 

- *Fun Fact*: Wikipedia is available in **more than 300 languages**, competing with your multilingual uncle in family gatherings!

### Who Uses Wikipedia?
You do! In fact, over **a billion visitors** each month rely on Wikipedia—from students frantically finishing their essays at midnight to that one uncle who can’t stop correcting everyone about **Hindenburg's exploding fate.** Our readers are as diverse as the articles, and trust us, we’ve got something for everyone!

### Career Opportunities and Community Involvement
Thinking of joining the ranks of our amazing editors? Dive into the world of editing with a few clicks! Even if you don't have an extensive knowledge of the Rosetta Stone, you can learn along the way. Remember, all you need to know is that *editing* is basically **typing, not time traveling!**

- *Becoming a Contributor*: Click the "Edit" button, and BAM! You're in. Just avoid using the term "epic" unless it’s about the Battle of Thermopylae.

### Join Us; Knowledge Awaits!
Imagine a world where anyone can get their facts straight without needing a library card or a PhD. That’s our mission! 

**So why wait?** Whether you’re curious about the stars or want to know how Romaine lettuce made a comeback this year, Wikipedia is your go-to!

### Donate (Pretty Please!)
Since we’re a nonprofit, we appreciate any and every donation, even if it’s just enough for a cup of coffee (or two). Donations allow us to keep rolling out knowledge that's user-generated (and yes, that includes your car-loving cousin's entry on *Rides*). 

![Donation](https://upload.wikimedia.org/wikipedia/commons/thumb/7/7c/Donate.svg/1000px-Donate.svg.png)

### In Conclusion:
Join the knowledge revolution today! Wikipedia is not just about facts; it’s about bringing out the hidden knowledge in each and every one of us—even if it’s how to perfectly microwave a burrito!

---
*Wikipedia: because who wants to crawl through a dusty old library when you can just type "cat videos" and land on a whole universe of knowledge!*
```


# Wikipedia Brochure

## Welcome to Wikipedia: The Free Encyclopedia

**Wikipedia** is a collaborative online encyclopedia that allows anyone to edit and contribute to a wealth of human knowledge. We are proud to host over **6,969,094 articles** in English alone, and our community includes over **125,967 active editors** who help keep information accurate and up to date.

### What We Offer

- **Free and Open Access:** Our content is available to anyone, anywhere, without charge.
- **Diverse Topics:** From historical events to scientific discoveries, the breadth of information covers countless subjects, including current events, biographies, geography, and more.
- **Community Engagement:** Wikipedia thrives on partnership and contributions from volunteers and experts around the globe. Our community is enriched by diverse perspectives and languages.

### Our Culture

At Wikipedia, we foster a culture of **transparency, collaboration, and respect**. We encourage open dialogue and constructive feedback to ensure the encyclopedia remains a reliable resource. Our environment is inclusive, allowing people from all walks of life to contribute and share their knowledge. We value the input of our users, tirelessly striving to make improvements based on community feedback.

### Careers at Wikipedia

Join our mission to provide free knowledge to the world! We are constantly looking for passionate individuals to become part of our team. Opportunities range from technical roles focusing on software development and IT infrastructure, to positions in community management and outreach.

- **Open Positions:** Discover various career paths that support our goal of making knowledge accessible to everyone.
- **Internships and Volunteer Opportunities:** Gain valuable experience while contributing to a global initiative.

### Get Involved

- **Edit Articles:** Anyone can become an editor—just create an account and start contributing!
- **Donate:** Support our efforts to keep Wikipedia ad-free and accessible to all by making a donation.
- **Spread the Word:** Share Wikipedia with your friends and family to promote our vision of a world with free knowledge.

For more information, visit [Wikipedia](https://www.wikipedia.org).

---

**Join us in our mission to share the world's knowledge, enriching lives and fostering a better-informed society!**

## Some Update and Streaming

In [81]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [None]:
stream_brochure("HuggingFace", "https://huggingface.co")