In [1]:
# imports

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [2]:
_=load_dotenv()
openai=OpenAI()
MODEL = 'gpt-4o-mini'

In [5]:
class Website():
    url: str
    title: str
    body: str
    links: List[str]
    text: str

    def __init__(self,url):
        self.url=url
        response=requests.get(url)
        self.body=response.content
        if response.status_code==200 and self.body!="":
            soup = BeautifulSoup(self.body,'html.parser')
            self.title = soup.title.string if soup.title !="" else "No title found"
            for irrelavent in soup.body(["script", "style", "img", "input"]):
                irrelavent.decompose()
            self.text=soup.body.get_text(strip=True, separator='\n')
            links=[link.get('href') for link in soup.find_all('a')]
            self.links= [link for link in links if link]
        else:
            self.title =""
            self.text=""
    def get_contents(self):
         return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

ed = Website("https://edwarddonner.com")
print(ed.get_contents())
print(ed.links)
        

Webpage Title:
Home - Edward Donner
Webpage Contents:
Home
Outsmart
An arena that pits LLMs against each other in a battle of diplomacy and deviousness
About
Posts
Well, hi there.
I’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy DJing (but I’m badly out of practice), amateur electronic music production (
very
amateur) and losing myself in
Hacker News
, nodding my head sagely to things I only half understand.
I’m the co-founder and CTO of
Nebula.io
. We’re applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. I’m previously the founder and CEO of AI startup untapt,
acquired in 2021
.
We work with groundbreaking, proprietary LLMs verticalized for talent, we’ve
patented
our matching model, and our award-winning platform has happy customers and tons of pr

In [6]:
link_system_prompt="You are provided with alinks on a website. \
You are able to find relevant links that should be part of company's broucher such as careers, jobs, about us etc.\
You should respond in JSON as in this example:"
link_system_prompt+="""
    {
        links:[
            {"type": "about page", "url":"https://full_url/about"},
            {"type": "career page", "url":"https://another_full_url/careers/jobs"}
        ]
    }
"""

def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [7]:
print(get_links_user_prompt(ed))

Here is the list of links on the website of https://edwarddonner.com - please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.
Links (some might be relative links):
https://edwarddonner.com/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://patents.google.com/patent/US20210049536A1/
https://www.linkedin.com/in/eddonner/
https://edwarddonner.com/2024/10/16/from-software-engineer-to-ai-data-scientist-resources/
https://edwarddonner.com/2024/10/16/from-software-engineer-to-ai-data-scientist-resources/
https://edwarddonner.com/2024/08/06/outsmart/
https://e

In [13]:
def get_links(url):
    website=Website(url)
    chat=openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role":"system","content":link_system_prompt},
            {"role":"user","content":get_links_user_prompt(website)},
        ],
        response_format={"type":"json_object"}
    )
    result=chat.choices[0].message.content
    # print(result)
    return json.loads(result)

In [14]:
get_links("https://anthropic.com")

{'links': [{'type': 'about page', 'url': 'https://anthropic.com/company'},
  {'type': 'career page', 'url': 'https://anthropic.com/careers'},
  {'type': 'team page', 'url': 'https://anthropic.com/team'},
  {'type': 'news page', 'url': 'https://anthropic.com/news'},
  {'type': 'research page', 'url': 'https://anthropic.com/research'},
  {'type': 'enterprise page', 'url': 'https://anthropic.com/enterprise'},
  {'type': 'pricing page', 'url': 'https://anthropic.com/pricing'},
  {'type': 'API page', 'url': 'https://anthropic.com/api'}]}

Assemble all by parsing all child links

In [15]:
def get_all(url):
    result="Landing Page\n"
    result+=Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links['links']:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result
        

In [17]:
print(get_all("https://anthropic.com"))

Found links: {'links': [{'type': 'about page', 'url': 'https://anthropic.com/company'}, {'type': 'career page', 'url': 'https://anthropic.com/careers'}, {'type': 'team page', 'url': 'https://anthropic.com/team'}, {'type': 'research page', 'url': 'https://anthropic.com/research'}, {'type': 'news page', 'url': 'https://anthropic.com/news'}, {'type': 'enterprise page', 'url': 'https://anthropic.com/enterprise'}, {'type': 'pricing page', 'url': 'https://anthropic.com/pricing'}]}
Landing Page
Webpage Title:
Home \ Anthropic
Webpage Contents:
Claude
Overview
Team
Enterprise
API
Pricing
Research
Company
Careers
News
AI
research
and
products
that put safety at the frontier
New
Meet Claude 3.5 Sonnet
Claude 3.5 Sonnet, our most intelligent AI model, is now available.
Talk to Claude
API
Build with Claude
Start using Claude to drive efficiency and create new revenue streams.
Get started now
Our Work
Announcements
Claude 3.5 Sonnet
Jun 21, 2024
Alignment
·
Research
Constitutional AI: Harmlessness 

In [18]:
system_prompt="You are an assitant who analyse a company data by reading several pages from its website \
and creates a short company broucher for future customer, investors, recruits etc. \
Respond in markdwon and formal tone and include company culture, vision, future projects, jobs, policies etc\
if you have that info"

In [23]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all(url)
    user_prompt = user_prompt[:20_000] # Truncate if more than 20,000 characters
    return user_prompt

In [24]:
def get_broucher(company_name, url):
    chat=openai.chat.completions.create(
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
        model=MODEL
    )
    r=chat.choices[0].message.content
    display(Markdown(r))

In [25]:
get_broucher("Anthropic", "https://anthropic.com")

Found links: {'links': [{'type': 'about page', 'url': 'https://anthropic.com/company'}, {'type': 'career page', 'url': 'https://anthropic.com/careers'}, {'type': 'team page', 'url': 'https://anthropic.com/team'}, {'type': 'news page', 'url': 'https://anthropic.com/news'}, {'type': 'research page', 'url': 'https://anthropic.com/research'}, {'type': 'pricing page', 'url': 'https://anthropic.com/pricing'}, {'type': 'enterprise page', 'url': 'https://anthropic.com/enterprise'}, {'type': 'API page', 'url': 'https://anthropic.com/api'}, {'type': 'jobs page', 'url': 'https://anthropic.com/jobs'}]}


# Anthropic Company Brochure

## Overview
Anthropic is a pioneering AI safety and research company based in San Francisco, committed to developing reliable, interpretable, and steerable AI systems. With a unique interdisciplinary team composed of researchers, engineers, policy experts, and business leaders, Anthropic is at the forefront of generating AI systems that prioritize safety and benefit society.

## Vision
At Anthropic, we envision a world where AI systems are designed to enhance human potential while minimizing risks. We believe in the transformative power of AI and dedicate our efforts to ensure these technologies contribute positively to humanity.

## Company Culture
Anthropic fosters an exceptionally collaborative and high-trust environment. Our cultural values include:

- **Mission-Driven**: Focused on ensuring that AI transforms society positively.
- **High Trust**: Prioritizing honesty and emotional maturity in all interactions.
- **Collaboration**: Emphasizing teamwork across diverse disciplines.
- **Pragmatism and Empiricism**: Celebrating practical solutions and evidence-based approaches.

## Current Projects
Our notable project includes the latest iteration of our AI model, **Claude 3.5 Sonnet**, designed to drive efficiency and create revenue opportunities for users. We also engage in various research initiatives focusing on:

- Constitutional AI for harmlessness via feedback mechanisms.
- An in-depth exploration of AI safety issues including interpretability and policy impacts.

## Future Initiatives
Looking ahead, Anthropic is paving the way for further innovations to enhance AI capabilities in various sectors. Recent announcements include:

- Collaboration with Salesforce to boost Einstein capabilities via Claude.
- The introduction of the **Message Batches API** to streamline processes.
- Expansion of our model safety bug bounty program to enhance the robustness of our offerings.

## Careers
Anthropic is constantly looking for passionate individuals to join our team. We offer competitive compensation packages, generous benefits including unlimited PTO, and a supportive work environment. Our hiring process aims to identify candidates who align with our mission and values, valuing practical skills and collaborative spirit.

### Current Job Openings
Explore our diverse roles in engineering, research, policy, and operations. Visit our [Careers Page](https://www.anthropic.com/careers) for open roles and application details.

## Policies
As a Public Benefit Corporation, Anthropic is committed to responsible AI development. Our governance structure supports long-term benefits for society, with a focus on transparent practices. We uphold:

- **Privacy Policies**: Ensuring candidate and user data protection.
- **Usage Policies**: Clarifying acceptable use of our products.
- **Responsible Scaling Policies**: A recent update focused on sustainable and ethical growth.

## Join Us
If you are excited about contributing to the future of safe AI and want to be part of a mission-driven team that prioritizes collaboration and innovation, get in touch with us or explore our careers page.

## Contact Us
For inquiries or to learn more about our work, partnerships, or media relations, please reach out to our team at:

- [press@anthropic.com](mailto:press@anthropic.com) for press inquiries.
- [support.anthropic.com](http://support.anthropic.com) for general support.

Together, let’s build AI systems you can rely on.

In [26]:
system_prompt_funny="You are an assitant who analyse a company data by reading several pages from its website \
and creates a short company broucher for future customer, investors, recruits etc. \
Respond in markdwon and funny tone and include company culture, vision, future projects, jobs, policies etc\
if you have that info"

In [27]:
def get_broucher(company_name, url):
    chat=openai.chat.completions.create(
        messages=[
            {"role": "system", "content": system_prompt_funny},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
        model=MODEL
    )
    r=chat.choices[0].message.content
    display(Markdown(r))

In [28]:
get_broucher("Anthropic", "https://anthropic.com")

Found links: {'links': [{'type': 'about page', 'url': 'https://anthropic.com/company'}, {'type': 'career page', 'url': 'https://anthropic.com/careers'}, {'type': 'team page', 'url': 'https://anthropic.com/team'}, {'type': 'news page', 'url': 'https://anthropic.com/news'}, {'type': 'research page', 'url': 'https://anthropic.com/research'}, {'type': 'pricing page', 'url': 'https://anthropic.com/pricing'}, {'type': 'enterprise page', 'url': 'https://anthropic.com/enterprise'}, {'type': 'api page', 'url': 'https://anthropic.com/api'}]}


# 🎉 Welcome to Anthropic: The Playground for Safe AI! 🎉

## 👩‍💻 Who are we?
Anthropic is not just any AI company; we’re your friendly neighborhood AI safety and research superheroes, saving the world one reliable AI model at a time! 🌍 Based in fabulous San Francisco, our diverse team of experts in **machine learning**, **physics**, **policy**, and **business** collaborates to craft AI systems that *trustworthiness* and *safety* is the name of the game. 

## 🎯 Our Mission
We believe that **AI will radically transform** our world, so we're hell-bent on creating systems you can actually rely on! Our ultimate goal? Help people and society flourish. 🌈✨

## 🌟 Meet Claude!
Introducing our most intelligent AI sidekick: **Claude 3.5 Sonnet**! 🎶 Whether you're looking to speed up your workflow, brainstorm new ideas, or simply chat, Claude is here to assist. Get ready to launch ideas into the stratosphere! 🚀

## 🔍 Safety Is Our Middle Name
We treat AI safety like a systematic sci-fi novel—filled with exciting adventures! We're all about conducting cutting-edge research and sharing our insights, working with everyone from governments 🌐 to nonprofits 💼 so that we can truly embrace the wild world of AI together! 

## 🎊 Company Culture
Anthropic isn’t just a workplace; it’s a family of innovators! Our **values** are baked into everything we do:
1. **Here for the Mission**: We're all about building a future where AI serves humanity! 
2. **Unusually High Trust**: Kind disagreements welcome! We value honesty and empathy like a warm hug. 🤗  
3. **One Big Team**: Many voices, one vision! 🎤  
4. **Do The Simple Thing That Works**: Keep it simple, folks!

## 🔮 Future Projects
Catch a glimpse into our crystal ball! We’re whipping up:
- **Contextual Retrieval**: Get info quicker than a coffee break! ☕
- **Message Batches API**: Automate your chit-chats!
- Enhanced capabilities that make Claude an indispensable asset for everyone, including businesses, nonprofits, and even your pet goldfish (just kidding, maybe)!

## 🌈 Open Roles
Want to join our team? We’re looking for superheroes in various roles, including:
- Research wizards
- Engineering magicians
- Policy wrights
- Operations ninjas 

Check out our [Careers Page](#) for the latest job openings and join our quest to build safe and reliable AI!

## 💖 Perks & Benefits
Because we care like a golden retriever:
- Comprehensive health, dental, and vision insurance for you and your **optional furry friends!**
- Unlimited Paid Time Off (PTO): Seriously—who doesn’t love a good vacation? 🌴
- Flexible wellness stipend and training perks because self-care is essential!
- **Team lunches** that might just include your favorite taco truck! 🌮😉

## 📏 Policies
We mean business, but we also care about you:
- 🎤 **Open Communication**: Talk to us about everything from your career trajectory to next week’s pizza order!
- **Remote Work Options**: Work from your favorite comfy chair! 
- **Diversity & Inclusion**: Bring your whole self to work! 

## 💌 Get in Touch
Join us in making AI systems you can rely on! Follow us on [Twitter](#), [LinkedIn](#), or [YouTube](#), and let’s collaborate to elevate the world of AI together! 🌍✨

**Anthropic: Where Safety Meets AI Awesomeness!**