In [1]:
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [3]:
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

if api_key and api_key[:8] == "sk-proj-":
    print("API Key seems to be OK so far")
else:
    print("There is some error with the API Key provided. Please check again")


API Key seems to be OK so far


In [4]:
model = "gpt-4o-mini"
openai = OpenAI()

In [11]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}


In [13]:
class Website:
    """
        A Utility class to represent the website to be scraped
    """
    url: str
    title: str
    body: str
    links: List[str]
    text: str

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, "html.parser")
        self.title = soup.title.string if soup.title else "No Title"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)

        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all("a")]
        self.links = [link for link in links if link]
    
    def get_content(self):
        return f"Webpage Title:\n{self.title}\nWebpage Content:\n{self.text}\n\n"





In [15]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [16]:
print(link_system_prompt)

You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}



In [30]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url}: "
    user_prompt += "Please decide which of these links are relevant for making a marketing brochure for a company. \
                    respond with the full https URL in JSON Format. \
                    Do not include Terms of Service, Privacy, email links, news articles, blogs, testimonials etc. \n "
    user_prompt += "Links (some might be relative links): \n"
    user_prompt += "\n".join(website.links)

    return user_prompt


In [31]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = model,
        messages = [
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)},
        ],
        response_format = {"type": "json_object"}
    )

    result = response.choices[0].message.content

    return json.loads(result)



In [39]:
anthropic ="https://anthropic.com"

In [40]:
get_links(anthropic)

{'links': [{'type': 'about page', 'url': 'https://www.anthropic.com/company'},
  {'type': 'careers page', 'url': 'https://www.anthropic.com/careers'},
  {'type': 'team page', 'url': 'https://www.anthropic.com/team'},
  {'type': 'enterprise page', 'url': 'https://www.anthropic.com/enterprise'},
  {'type': 'api page', 'url': 'https://www.anthropic.com/api'},
  {'type': 'pricing page', 'url': 'https://www.anthropic.com/pricing'},
  {'type': 'research page', 'url': 'https://www.anthropic.com/research'}]}

In [33]:
def get_all_details(url):
    result = "Landing page: \n"
    result += Website(url).get_content()
    links = get_links(url)
    
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_content()
    
    return result


In [41]:
print(get_all_details(anthropic))

Landing page: 
Webpage Title:
Home \ Anthropic
Webpage Content:
Claude
Overview
Team
Enterprise
API
Pricing
Research
Company
Careers
News
Try Claude
AI
research
and
products
that put safety at the frontier
Claude.ai
Meet Claude 3.5 Sonnet
Claude 3.5 Sonnet, our most intelligent AI model, is now available.
Talk to Claude
API
Build with Claude
Create AI-powered applications and custom experiences using Claude.
Learn more
Announcements
Introducing computer use, a new Claude 3.5 Sonnet, and Claude 3.5 Haiku
Oct 22, 2024
Model updates
3.5 Sonnet
3.5 Haiku
Our Work
Product
Claude for Enterprise
Sep 4, 2024
Alignment
·
Research
Constitutional AI: Harmlessness from AI Feedback
Dec 15, 2022
Announcements
Core Views on AI Safety: When, Why, What, and How
Mar 8, 2023
Work with Anthropic
Anthropic is an AI safety and research company based in San Francisco. Our interdisciplinary team has experience across ML, physics, policy, and product. Together, we generate research and create reliable, benefic

In [35]:
system_prompt = """ You are an assistant that analyzes the contents of several relevant pages of a website 
                    and creates a short brochure about the company for prospective customers, investors and recruits.
                    Respond in markdown. Include details of company culture, customers and career/jobs if you have the information
                """

In [43]:
def get_user_brochure_prompt(company_name, url):
    user_prompt = "Based on the content of a website's landing pages and other relevant pages,\
                    as given below, generate a marketing brochure \
                    for the company named {company_name}: \n"
    user_prompt += f"Website Content:\n {get_all_details(url)}"
    user_prompt = user_prompt[:20000]

    return user_prompt

In [44]:
get_user_brochure_prompt("Anthropic", anthropic)

'Based on the content of a website\'s landing pages and other relevant pages,                    as given below, generate a marketing brochure                     for the company named {company_name}: \nWebsite Content:\n Landing page: \nWebpage Title:\nHome \\ Anthropic\nWebpage Content:\nClaude\nOverview\nTeam\nEnterprise\nAPI\nPricing\nResearch\nCompany\nCareers\nNews\nTry Claude\nAI\nresearch\nand\nproducts\nthat put safety at the frontier\nClaude.ai\nMeet Claude 3.5 Sonnet\nClaude 3.5 Sonnet, our most intelligent AI model, is now available.\nTalk to Claude\nAPI\nBuild with Claude\nCreate AI-powered applications and custom experiences using Claude.\nLearn more\nAnnouncements\nIntroducing computer use, a new Claude 3.5 Sonnet, and Claude 3.5 Haiku\nOct 22, 2024\nModel updates\n3.5 Sonnet\n3.5 Haiku\nOur Work\nProduct\nClaude for Enterprise\nSep 4, 2024\nAlignment\n·\nResearch\nConstitutional AI: Harmlessness from AI Feedback\nDec 15, 2022\nAnnouncements\nCore Views on AI Safety: Whe

In [47]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model = model,
        messages= [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_user_brochure_prompt(company_name, url)},
        ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [48]:
create_brochure("Anthropic", anthropic)

# Welcome to Anthropic!

At **Anthropic**, we are pioneering the future of artificial intelligence through our commitment to safety, reliability, and transparency. Our mission is to build AI systems that people can trust and to research the profound impact of these technologies on society. 

## About Us

Located in the heart of San Francisco, Anthropic is an **AI safety and research company** focused on developing advanced AI models such as **Claude**. We prioritize creating reliable, interpretable, and steerable products that meet the growing demands of businesses, nonprofits, and civil society.

### Our Vision
At Anthropic, we recognize that AI has the potential to fundamentally change the world. Our dedication lies in ensuring that these transformations benefit humanity and help society flourish rather than pose risks. 

### Our Approach
- **Safety as Science**: We treat AI safety as a scientific process involving rigorous research, practical application, and continuous learning.
- **Interdisciplinary Collaboration**: Our diverse team brings expertise from fields including machine learning, physics, policy studies, and business operations. Together, we aim to address the complex challenges and opportunities presented by AI.

## Company Culture

### Our Values
1. **Mission-Driven**: We prioritize our mission at every level of decision-making.
2. **High Trust Environment**: We cultivate transparency, honesty, and kindness in our workspace.
3. **Collaboration**: Every team member contributes to our goals, and together we foster a sense of one big team.
4. **Pragmatism and Empiricism**: We believe in testing simple solutions before delving into complexities.

### Benefits for Our Team
- **Health & Wellness**: Comprehensive health, dental, and vision insurance; flexible paid time off; generous parental leave; and mental health support.
- **Compensation & Support**: Competitive salaries, equity packages, and retirement plans.
- **Work-Life Balance**: Flexible work options, relocation support, and daily meals/snacks in the office.
- **Growth Opportunities**: We encourage continuous learning with annual education stipends and home office stipends.

## Our Customers

We cater to a diverse array of clients, ranging from enterprise businesses to community-focused organizations, all benefiting from our AI solutions. By building products that prioritize safety, we empower our customers to harness the full potential of AI while mitigating risks.

## Careers at Anthropic

We are always on the lookout for **talented individuals** who share our vision and values! Regardless of your background in academic qualifications or prior experience in machine learning, if you are passionate about AI safety and innovation, we invite you to apply.

### Why Join Us?
- Be part of a company that is setting the industry standard for AI safety.
- Work alongside industry leaders in a dynamic and supportive environment.
- Contribute to meaningful projects that impact society and guide the future of AI.

Explore our open roles and join us in shaping a safer AI-driven world!

## Connect With Us

Follow **Anthropic** for the latest updates, insights, and job opportunities on [Twitter](#), [LinkedIn](#), and [YouTube](#).

---

**Together, let's build a brighter future with AI!**  
For more information, visit our website or contact us directly.

## Using OpenAi Streaming

In [49]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model = model,
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_user_brochure_prompt(company_name, url)},
        ],
        stream = True
    )

    response = "" 
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ""
        response = response.replace("```", "").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)


In [50]:
stream_brochure("Antropic", anthropic)

# Anthropic Company Brochure

## About Us
Welcome to **Anthropic**, a pioneering AI safety and research company based in San Francisco. Our mission is to create reliable, interpretable, and steerable AI systems that benefit humanity. With our flagship product, **Claude**, we are at the forefront of AI technology, ensuring that safety remains our top priority as we build solutions that can fundamentally transform how the world works.

## What We Value
At Anthropic, our culture thrives on **collaboration, trust, and a commitment to safety**. We maintain an unusually high-trust environment that fosters honest communication, enabling our interdisciplinary team to make effective decisions together. Our core values include:

- **Mission-Driven Focus**: Every action is aligned with the goal of ensuring transformative AI contributes positively to society.
- **Trust and Integrity**: We assume good intentions and prioritize open dialogue among colleagues.
- **Simplicity and Pragmatism**: We embrace straightforward solutions, focusing on what really works.
- **User-Centric Approach**: Prioritizing kindness and generosity in all interactions with users, customers, and stakeholders.

## Our Team
Our diverse team comprises researchers, engineers, policy experts, and operational leaders who bring a wealth of experience from multiple domains, including physics, machine learning, and public policy. At Anthropic, community matters: we regularly share insights with one another and work collaboratively on projects, regardless of our roles.

### Current Opportunities
We are always looking for passionate individuals to join us in making AI safer. Explore our **open roles** to see how you might fit into our mission-driven team. We value different backgrounds and experiences and especially encourage those who haven't worked directly in AI/ML to apply!

## Our Customers
We serve a wide array of clients from various sectors, including businesses, nonprofits, and civil society organizations. Our AI solutions, powered by **Claude**, help streamline work processes, spark creativity, and drive innovation