In [1]:
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI


In [30]:
# init constant 
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

if api_key:
    print("success")
else:
    print("OpenAi key invalid")

MODEL = 'gpt-4o-mini'
Openai = OpenAI()

success


In [31]:
# Class to represent a webpage

class Website:
    url: str
    title: str
    body: str
    links: List[str]
    text: str

    def __init__(self, url):
        self.url = url
        response = requests.get(url)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrevalent in soup.body(["script", "style", "img", "input"]):
                irrevalent.decompose()
            self.text = soup.body.get_text(separator='\n', strip=True)
        else:
            self.text = ""
        
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_content(self):
        return f"Webpage Tile:\n{self.title}\nWebpage Content:\n {self.text}"


    


In [32]:
ed = Website("https://edwarddonner.com")
print(ed.links)

['https://edwarddonner.com/', 'https://edwarddonner.com/outsmart/', 'https://edwarddonner.com/about-me-and-about-nebula/', 'https://edwarddonner.com/posts/', 'https://edwarddonner.com/', 'https://news.ycombinator.com', 'https://nebula.io/?utm_source=ed&utm_medium=referral', 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html', 'https://patents.google.com/patent/US20210049536A1/', 'https://www.linkedin.com/in/eddonner/', 'https://edwarddonner.com/2025/01/23/llm-workshop-hands-on-with-agents-resources/', 'https://edwarddonner.com/2025/01/23/llm-workshop-hands-on-with-agents-resources/', 'https://edwarddonner.com/2024/12/21/llm-resources-superdatascience/', 'https://edwarddonner.com/2024/12/21/llm-resources-superdatascience/', 'https://edwarddonner.com/2024/11/13/llm-engineering-resources/', 'https://edwarddonner.com/2024/11/13/llm-engineering-resources/', 'https://edwarddonner.com/2024/10/16/from-software-engineer-to

#### First step: Have GPT-4o-mini figure out which links are relevant

In [33]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [34]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt


In [35]:
# print(get_links_user_prompt(ed)

In [38]:
def get_links(url):
    website = Website(url)
    message = [ 
        {"role":"system", "content": link_system_prompt},
        {"role":"user", "content": get_links_user_prompt(website)}
               ]
    completion = Openai.chat.completions.create(
        model=MODEL,
        messages=message,
        response_format={"type":"json_object"}
    )
    result = completion.choices[0].message.content
    return json.loads(result)

In [39]:
get_links("https://anthropic.com")

{'links': [{'type': 'about page', 'url': 'https://www.anthropic.com/company'},
  {'type': 'careers page', 'url': 'https://www.anthropic.com/careers'},
  {'type': 'team page', 'url': 'https://www.anthropic.com/team'},
  {'type': 'research page', 'url': 'https://www.anthropic.com/research'},
  {'type': 'products page', 'url': 'https://www.anthropic.com/claude'},
  {'type': 'news page', 'url': 'https://www.anthropic.com/news'}]}

### Step 2: Make the brochure

In [40]:
def get_all_details(url):
    result = "Landing Page:\n"
    result += Website(url).get_content()
    links = get_links(url)
    print("found links:\n", links)
    for link in links['links']:
        result += f"\n\n{link['type']}\n"
        result += Website(link['url']).get_content()
    
    return result



In [41]:
print(get_all_details("https://anthropic.com"))

found links:
 {'links': [{'type': 'about page', 'url': 'https://www.anthropic.com/company'}, {'type': 'careers page', 'url': 'https://www.anthropic.com/careers'}, {'type': 'team page', 'url': 'https://www.anthropic.com/team'}, {'type': 'research page', 'url': 'https://www.anthropic.com/research'}, {'type': 'enterprise page', 'url': 'https://www.anthropic.com/enterprise'}, {'type': 'api page', 'url': 'https://www.anthropic.com/api'}, {'type': 'pricing page', 'url': 'https://www.anthropic.com/pricing'}, {'type': 'news page', 'url': 'https://www.anthropic.com/news'}]}
Landing Page:
Webpage Tile:
Home \ Anthropic
Webpage Content:
 Claude
Overview
Team
Enterprise
API
Pricing
Research
Company
Careers
News
Try Claude
AI
research
and
products
that put safety at the frontier
Claude.ai
Meet Claude 3.5 Sonnet
Claude 3.5 Sonnet, our most intelligent AI model, is now available.
Talk to Claude
API
Build with Claude
Create AI-powered applications and custom experiences using Claude.
Learn more
Announ

In [42]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [43]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)

    user_prompt = user_prompt[:20_000] # Truncate if more than 20,000 characters
    return user_prompt

In [44]:
get_brochure_user_prompt("Antropic", "https://anthropic.com")

found links:
 {'links': [{'type': 'about page', 'url': 'https://www.anthropic.com/company'}, {'type': 'careers page', 'url': 'https://www.anthropic.com/careers'}, {'type': 'team page', 'url': 'https://www.anthropic.com/team'}, {'type': 'research page', 'url': 'https://www.anthropic.com/research'}, {'type': 'enterprise page', 'url': 'https://www.anthropic.com/enterprise'}, {'type': 'api page', 'url': 'https://www.anthropic.com/api'}, {'type': 'claude page', 'url': 'https://www.anthropic.com/claude'}, {'type': 'news page', 'url': 'https://www.anthropic.com/news'}]}


'You are looking at a company called: Antropic\nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\nLanding Page:\nWebpage Tile:\nHome \\ Anthropic\nWebpage Content:\n Claude\nOverview\nTeam\nEnterprise\nAPI\nPricing\nResearch\nCompany\nCareers\nNews\nTry Claude\nAI\nresearch\nand\nproducts\nthat put safety at the frontier\nClaude.ai\nMeet Claude 3.5 Sonnet\nClaude 3.5 Sonnet, our most intelligent AI model, is now available.\nTalk to Claude\nAPI\nBuild with Claude\nCreate AI-powered applications and custom experiences using Claude.\nLearn more\nAnnouncements\nIntroducing computer use, a new Claude 3.5 Sonnet, and Claude 3.5 Haiku\nOct 22, 2024\nModel updates\n3.5 Sonnet\n3.5 Haiku\nOur Work\nProduct\nClaude for Enterprise\nSep 4, 2024\nAlignment\n·\nResearch\nConstitutional AI: Harmlessness from AI Feedback\nDec 15, 2022\nAnnouncements\nCore Views on AI Safety: When, Why, What, and How\nMar 8, 2

In [45]:
def create_brochure(company_name, url):
    response = Openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role":"system", "content":system_prompt},
            {"role":"user", "content":get_brochure_user_prompt(company_name, url)}
            ],
        #response_format={"type":"json_object"}
    )
    result = response.choices[0].message.content
    display(Markdown(result))
    return result

In [46]:
create_brochure("Antropic", "https://anthropic.com")

found links:
 {'links': [{'type': 'about page', 'url': 'https://www.anthropic.com/company'}, {'type': 'careers page', 'url': 'https://www.anthropic.com/careers'}, {'type': 'team page', 'url': 'https://www.anthropic.com/team'}, {'type': 'research page', 'url': 'https://www.anthropic.com/research'}, {'type': 'enterprise page', 'url': 'https://www.anthropic.com/enterprise'}, {'type': 'api page', 'url': 'https://www.anthropic.com/api'}, {'type': 'pricing page', 'url': 'https://www.anthropic.com/pricing'}, {'type': 'news page', 'url': 'https://www.anthropic.com/news'}]}


# Anthropic Brochure

---

## About Us

**Anthropic** is an AI safety and research company based in San Francisco. We are dedicated to building reliable, interpretable, and steerable AI systems that prioritize safety in their deployment. Our mission is focused on ensuring that AI is a transformative force for good, helping individuals and society flourish.

### Our Core Values
- **Here for the Mission**: We are committed to shaping AI technology for a safe and beneficial future.
- **Unusually High Trust**: We foster an environment of mutual respect where honesty and kindness are paramount.
- **One Big Team**: Collaboration is at the heart of our culture; every employee contributes toward our shared goals.
- **Pragmatic Solutions**: We value simple, effective approaches to complex problems.

---

## Our AI Products

### Meet Claude
Our flagship product, **Claude**, is a sophisticated AI model designed for a variety of applications:
- **Claude for Enterprise**: Tailored AI solutions for businesses to enhance productivity.
- **API Access**: Seamlessly integrate Claude's capabilities into your applications and workflows.
  
Claude empowers teams to:
1. Draft documents and code efficiently.
2. Generate insights from data quickly.
3. Collaborate creatively across departments.

---

## Our Team

We are an **interdisciplinary team** of researchers, engineers, policy experts, and business leaders, united by a common goal: to develop safe and reliable AI solutions. Our diverse backgrounds foster innovation and collaboration, making Anthropic a dynamic place to work.

### Team Highlights
- Expertise spans across **Machine Learning**, **Physics**, **Public Policy**, and **Business**.
- A **high-trust environment** where everyone's input is valued.
- Employees from various prominent organizations including NASA, startups, and the armed forces.

---

## Customers

Our clients include businesses, nonprofits, and government institutions aiming to leverage AI for impactful outcomes. Feedback from our users highlights Claude's ability to enhance workflows dramatically, leading to **5x faster** content generation and more insightful project management.

---

## Careers at Anthropic

Joining Anthropic means becoming part of a passionate team that's shaping the future of AI. We offer:
- **Flexible Work Arrangements**: Hybrid models that allow for both remote work and in-office collaboration.
- **Comprehensive Benefits**: Including competitive salaries, health benefits, flexible PTO, and more.
- **Inclusive Culture**: We encourage applicants from all backgrounds, and value the diverse perspectives they bring.

**Join Us**: Be a part of our mission to advance AI safely. Explore open roles on our **Careers Page**!

---

## Connect with Us

- **Website**: [anthropic.com](http://www.anthropic.com)
- **LinkedIn**: [LinkedIn Anthropic](https://www.linkedin.com/company/anthropic)
- **Twitter**: [@AnthropicAI](https://twitter.com/AnthropicAI)

---

### Together, Let's Build a Safer Future with AI!

"# Anthropic Brochure\n\n---\n\n## About Us\n\n**Anthropic** is an AI safety and research company based in San Francisco. We are dedicated to building reliable, interpretable, and steerable AI systems that prioritize safety in their deployment. Our mission is focused on ensuring that AI is a transformative force for good, helping individuals and society flourish.\n\n### Our Core Values\n- **Here for the Mission**: We are committed to shaping AI technology for a safe and beneficial future.\n- **Unusually High Trust**: We foster an environment of mutual respect where honesty and kindness are paramount.\n- **One Big Team**: Collaboration is at the heart of our culture; every employee contributes toward our shared goals.\n- **Pragmatic Solutions**: We value simple, effective approaches to complex problems.\n\n---\n\n## Our AI Products\n\n### Meet Claude\nOur flagship product, **Claude**, is a sophisticated AI model designed for a variety of applications:\n- **Claude for Enterprise**: Tailo

### Minor improvement

In [47]:
def stream_brochure(company_name, url):
    stream = Openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role":"system", "content":system_prompt},
            {"role":"user", "content":get_brochure_user_prompt(company_name, url)}
            ],
            stream=True
        #response_format={"type":"json_object"}
    )

    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("'''", "").replace("markdown","")
        update_display(Markdown(response), display_id=display_handle.display_id)


In [48]:
stream_brochure("Anthropic", "https://huggingface.co")

found links:
 {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}]}


# Hugging Face Brochure

---

## About Us

**Hugging Face** is at the forefront of democratizing machine learning. Our mission is to make state-of-the-art AI accessible to everyone, one commit at a time. We are the home for a vibrant community where innovators in AI collaborate, share, and build together. Our platform provides users unprecedented access to an extensive library of machine learning models, datasets, and applications.

---

## What We Offer

- **Models & Datasets**: Collaborate on and access over **400,000 models** and **100,000 datasets** for various tasks, from text and image to audio and 3D.
  
- **Enterprise Solutions**: Our enterprise platform provides advanced features with enterprise-grade security, ensuring your organization can harness the power of AI effectively. 

- **Compute Resources**: Accelerate your projects with flexible compute options starting from **$0.60/hour**. Our services make it easy to deploy machine learning solutions in the cloud.

- **Community Support**: Join over **50,000 organizations** already using Hugging Face. Collaborate and learn with our engaged community of developers, researchers, and businesses.

---

## Company Culture

At Hugging Face, we pride ourselves on fostering a culture of open collaboration and innovation. Our team of **over 223 members** thrives on experimentation and collective problem-solving. We believe in the open-source ethos, constantly encouraging contributions and feedback from our users, which enhances the technology we provide.

---

## Careers at Hugging Face

We’re always looking for new talent to join our mission of democratizing AI. Here are some roles we offer:
- Machine Learning Engineers
- Data Scientists
- Software Developers
- Product Managers

If you are passionate about AI and want to be part of a dynamic and mission-driven team, explore our **current openings** on our [Careers Page](https://huggingface.co/jobs).

---

## Join Our Community

Collaborate, learn, and create with us. Whether you are a seasoned practitioner or just starting your AI journey, Hugging Face offers the tools and community support to enhance your work. 

### Connect With Us 
- **Website**: [Hugging Face](https://huggingface.co/)
- **GitHub**: [Hugging Face GitHub](https://github.com/huggingface)
- **Follow us** on [Twitter](https://twitter.com/huggingface), [LinkedIn](https://www.linkedin.com/company/huggingface), and [Discord](https://discord.gg/huggingface).

---

Join us in building the future of AI!