### Make the Broucher for the company based on its URL and relevant links

In [16]:
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [17]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key) > 10:
    print('API KEY looks good so far')
else:
    print("There moght be problem with your API key")

MODEL = 'gpt-4o-mini'
openai = OpenAI()

API KEY looks good so far


In [18]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

In [19]:
class Website:
    def __init__(self,url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else 'Np title found'
        if soup.body:
            for irrelevant in soup.body(['script', 'img', 'style', 'input']):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator='\n', strip = True)
        else:
            self.text = ''
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\n Webpage Contents:\n{self.text}\n\n"

In [20]:
mywebsite = Website("https://isoft.guru")
mywebsite.links

['company.htm',
 'services.htm',
 'partners.htm',
 'clients.htm',
 'careers.htm',
 'contact.htm',
 'services.htm',
 'company.htm',
 'termsofuse.htm']

In [21]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [22]:
print(link_system_prompt)

You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}



In [23]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [24]:
print(get_links_user_prompt(mywebsite))


Here is the list of links on the website of https://isoft.guru - please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.
Links (some might be relative links):
company.htm
services.htm
partners.htm
clients.htm
careers.htm
contact.htm
services.htm
company.htm
termsofuse.htm


In [25]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [26]:
huggingface = Website("https://huggingface.co")
huggingface.links

['/',
 '/models',
 '/datasets',
 '/spaces',
 '/docs',
 '/enterprise',
 '/pricing',
 '/login',
 '/join',
 '/spaces',
 '/models',
 '/black-forest-labs/FLUX.1-Kontext-dev',
 '/tencent/Hunyuan-A13B-Instruct',
 '/google/gemma-3n-E4B-it',
 '/THUDM/GLM-4.1V-9B-Thinking',
 '/OmniGen2/OmniGen2',
 '/models',
 '/spaces/enzostvs/deepsite',
 '/spaces/black-forest-labs/FLUX.1-Kontext-Dev',
 '/spaces/ilcve21/Sparc3D',
 '/spaces/OmniGen2/OmniGen2',
 '/spaces/tencent/Hunyuan3D-2.1',
 '/spaces',
 '/datasets/fka/awesome-chatgpt-prompts',
 '/datasets/facebook/seamless-interaction',
 '/datasets/HuggingFaceFW/fineweb-2',
 '/datasets/FreedomIntelligence/ShareGPT-4o-Image',
 '/datasets/black-forest-labs/kontext-bench',
 '/datasets',
 '/join',
 '/pricing#endpoints',
 '/pricing#spaces',
 '/pricing',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/allenai',
 '/facebook',
 '/amazon',
 '/google',
 '/Intel',
 '/microsoft',
 '/grammarly',
 '/Writer',

In [27]:
get_links("https://huggingface.co")

{'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'},
  {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'company page', 'url': 'https://huggingface.co/enterprise'},
  {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'},
  {'type': 'blog page', 'url': 'https://huggingface.co/blog'},
  {'type': 'documentation', 'url': 'https://huggingface.co/docs'}]}

In [28]:
get_links("http://isoft.guru")

{'links': [{'type': 'about page', 'url': 'http://isoft.guru/company.htm'},
  {'type': 'services page', 'url': 'http://isoft.guru/services.htm'},
  {'type': 'careers page', 'url': 'http://isoft.guru/careers.htm'},
  {'type': 'contact page', 'url': 'http://isoft.guru/contact.htm'}]}

In [30]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [31]:
print(get_all_details("https://huggingface.co"))

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'docs page', 'url': 'https://huggingface.co/docs'}]}
Landing page:
Webpage Title:
Hugging Face – The AI community building the future.
 Webpage Contents:
Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 1M+ models
Trending on
this week
Models
black-forest-labs/FLUX.1-Kontext-dev
Updated
6 days ago
•
131k
•
1.25k
tencent/Hunyuan-A13B-Instruct
Updated
2 days ago
•
7.81k
•
692
google/gemma-3n-E4B-it
Updated
about 9 hou

In [35]:
print(get_all_details("http://isoft.guru"))

Found links: {'links': [{'type': 'about page', 'url': 'http://isoft.guru/company.htm'}, {'type': 'services page', 'url': 'http://isoft.guru/services.htm'}, {'type': 'careers page', 'url': 'http://isoft.guru/careers.htm'}, {'type': 'contact page', 'url': 'http://isoft.guru/contact.htm'}]}
Landing page:
Webpage Title:
i Soft
 Webpage Contents:
Home
Company
Services
Partners
Clients
Careers
Contact
To be the preferred choice in Consulting, Staffing and Training to organizations worldwide.
Services
Training
iSoft develops high caliber, time bound and cost-effective workshops to train workforce in organizations....
More...
iSoft Consulting Inc. is an emerging human resources provider in the USA in the domain of Staffing, Training and Consulting. Headquartered in Bridgewater, New Jersey, the company has been providing unmatched services...
More...
Partners
© Copyright 2015 iSoft Consulting inc. All Rights Reserved.
Terms of Use



about page
Webpage Title:
Company
 Webpage Contents:
Home
Com

In [41]:
system_prompt1 = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [40]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."

In [42]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [43]:
get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'community page', 'url': 'https://discuss.huggingface.co'}, {'type': 'GitHub page', 'url': 'https://github.com/huggingface'}, {'type': 'LinkedIn page', 'url': 'https://www.linkedin.com/company/huggingface/'}, {'type': 'Twitter page', 'url': 'https://twitter.com/huggingface'}]}


'You are looking at a company called: HuggingFace\nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\nLanding page:\nWebpage Title:\nHugging Face – The AI community building the future.\n Webpage Contents:\nHugging Face\nModels\nDatasets\nSpaces\nCommunity\nDocs\nEnterprise\nPricing\nLog In\nSign Up\nThe AI community building the future.\nThe platform where the machine learning community collaborates on models, datasets, and applications.\nExplore AI Apps\nor\nBrowse 1M+ models\nTrending on\nthis week\nModels\nblack-forest-labs/FLUX.1-Kontext-dev\nUpdated\n6 days ago\n•\n131k\n•\n1.25k\ntencent/Hunyuan-A13B-Instruct\nUpdated\n2 days ago\n•\n7.81k\n•\n692\ngoogle/gemma-3n-E4B-it\nUpdated\nabout 9 hours ago\n•\n148k\n•\n411\nTHUDM/GLM-4.1V-9B-Thinking\nUpdated\nabout 14 hours ago\n•\n1.96k\n•\n160\nOmniGen2/OmniGen2\nUpdated\n10 days ago\n•\n33.6k\n•\n331\nBrowse 1M+ models\nSpaces\nRunning\n9.15

In [44]:
get_brochure_user_prompt("isoft", "http://isoft.guru")

Found links: {'links': [{'type': 'about page', 'url': 'http://isoft.guru/company.htm'}, {'type': 'services page', 'url': 'http://isoft.guru/services.htm'}, {'type': 'careers page', 'url': 'http://isoft.guru/careers.htm'}]}


"You are looking at a company called: isoft\nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\nLanding page:\nWebpage Title:\ni Soft\n Webpage Contents:\nHome\nCompany\nServices\nPartners\nClients\nCareers\nContact\nTo be the preferred choice in Consulting, Staffing and Training to organizations worldwide.\nServices\nTraining\niSoft develops high caliber, time bound and cost-effective workshops to train workforce in organizations....\nMore...\niSoft Consulting Inc. is an emerging human resources provider in the USA in the domain of Staffing, Training and Consulting. Headquartered in Bridgewater, New Jersey, the company has been providing unmatched services...\nMore...\nPartners\n© Copyright 2015 iSoft Consulting inc. All Rights Reserved.\nTerms of Use\n\n\n\nabout page\nWebpage Title:\nCompany\n Webpage Contents:\nHome\nCompany\nServices\nPartners\nClients\nCareers\nContact\nCompany\nis an eme

In [45]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [46]:
create_brochure("HuggingFace", "https://huggingface.com")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.com/'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'enterprise page', 'url': 'https://huggingface.com/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.com/pricing'}, {'type': 'blog page', 'url': 'https://huggingface.com/blog'}, {'type': 'community page', 'url': 'https://discuss.huggingface.co'}, {'type': 'GitHub page', 'url': 'https://github.com/huggingface'}, {'type': 'Twitter page', 'url': 'https://twitter.com/huggingface'}, {'type': 'LinkedIn page', 'url': 'https://www.linkedin.com/company/huggingface/'}]}


# Welcome to the Hugging Face Brochure!

## Hugs for Everyone (Even Computers)

At **Hugging Face**, we believe in building a brighter future with our AI community, one model at a time! We're like the friendly neighborhood Spider-Man, except instead of webs, we weave together models, datasets, and applications. And instead of saving the world from bad guys, we're teaching machines how to understand the world (and how to create adorable cat pictures).

### What We Do
- **Models Galore!** 
  Explore over 1 million models! We've got more models than there are cat videos on the internet. From the latest in deep learning to mind-boggling applications, we've got it all—our models are trending faster than a viral TikTok!

- **Datasets, Datasets, and More Datasets!**
  Need data? You're in luck! We have a treasure trove with over 250,000 datasets. Not only will they help you become smarter, but they also travel better than the average tourist! 

- **Spaces: A Playground for Coders**
  Think of "Spaces" like a playground, but for machine learning applications. Swing by and let your creativity run wild! It's like Minecraft for AI nerds. 🎮

### Who's Hugging Us?
Over **50,000 organizations** are already in our warm embrace, including tech giants like Google, Microsoft, and Amazon, and even some friendly neighborhood non-profits! 

Your ex may not have followed you on social media, but our models sure have followers. We have more followers than a celebrity with a million selfies! 

### Join the Team!
At Hugging Face, we're not just about AI; we're about community and innovation. Our company culture is as vibrant as a box of crayons (and likely just as colorful). We encourage:
- **Collaboration:** Because sometimes you need a superhero sidekick!
- **Learning:** We provide plenty of opportunities to practice your skills, whether it’s ML, Python, or deciphering the office coffee preferences.
- **Future Focused:** We’re building tomorrow’s tech today! Plus, our office snack game is STRONG.

### Careers with a Hug
Feeling inspired? Ready to jump into the AI deep end? We are always on the lookout for talent as bright as a supernova! Positions range from data scientists to community managers—and yes, we have a position for the office “Snack Coordinator.” (Kudos to this job largely entails taste-testing.)

### Can AI Have Fun?
Absolutely! At Hugging Face, we take our work seriously, but not ourselves. Our employees know how to throw a fine pizza party or a virtual game night! 😄 

### Join Us!
Ready to hug it out with us? Whether you're a prospective customer, an eager recruit, or an investor looking for a warm machine learning embrace, come join the Hugging Face community! 

---

**Hugging Face - The AI community building the future.**  
Where everyone gets a hug, even computers! 

In [47]:
create_brochure("isoft", "http://isoft.guru")

Found links: {'links': [{'type': 'about page', 'url': 'http://isoft.guru/company.htm'}, {'type': 'services page', 'url': 'http://isoft.guru/services.htm'}, {'type': 'careers page', 'url': 'http://isoft.guru/careers.htm'}, {'type': 'contact page', 'url': 'http://isoft.guru/contact.htm'}]}


# Welcome to iSoft: Your HR Superheroes!
*Bringing Light to Your HR Darkness Since 2012!*

---

## Who We Are  
At iSoft, we like to think of ourselves as the superheroes of the human resources world. Based in the bustling metropolis of Bridgewater, New Jersey, we've been saving organizations from the clutches of staffing and training chaos since 2012. Armed with vibrant professionals, advanced search techniques, and a passion for HR (we're basically HR ninjas), we’re here to help businesses big and small find their perfect match!

---

## Our Mission (with a Side of Humor)
- Be the **preferred choice** in Consulting, Staffing, and Training.  
- Maintain high standards and leave no resume unturned.
- Build lifelong friendships (who doesn’t love a good HR buddy?).
- Make you go from "meh" to "marvelous" in record time!

---

## Our Services: What We Offer (No Capes Required)

### 1. **Consulting**  
Need a map for your HR journey? We’ve got the GPS!  
- **Cost Consulting:** Because saving money is always in style.
- **HR Consulting:** The secret sauce for all your people problems.
- **Strategy Consulting:** Outsmart your competitors with our cunning plans.  
- **IT Consulting:** Where tech meets talent – think of us as the best IT matchmakers.

### 2. **Training**  
Our workshops are designed to make your workforce feel like the Avengers!  
- **Corporate Training:** Empower your employees and lift their spirits.  
- **On-Demand IT Training:** Because your team should always be on the cutting edge.
- **Customized Programs:** Just like ordering a pizza, but way more productive. 
- **Induction Programs for Freshers:** Because everyone deserves a warm welcome!

### 3. **Staffing**  
Do you have a staffing problem? Not on our watch!  
- **Temporary Staffing:** When you need help *just* until you find your new best friend.  
- **Permanent Placement:** Because sometimes you just know it’s meant to be.  
- **Vendor Management Solutions:** For when you want to manage the chaos with style!

---

## Company Culture: Fun and Dynamic!  
At iSoft, we love to mix business with pleasure! Our culture encourages open communication, creative collaboration, and just the right amount of quirky. We believe a happy workplace is a productive workplace – just ask our office pets! (No, seriously, they answer emails better than half of us.) 

---

## Join Us: Careers at iSoft  
Think you have what it takes to join our HR superhero squad? We have room for vibrant, dynamic souls looking to make a difference! Whether you’re a seasoned pro or just starting your HR journey, iSoft is your launching pad to success!

*Get ready to embark on an adventure where the coffee is strong, the teamwork is unbeatable, and the job satisfaction is through the roof!*

---

## Our Clients: Big and Small  
From corporate giants to local legends, we work with organizations that trust us to save the day! Plus, we have a knack for leaving our clients with smiles bigger than their quarterly profits. 

---

**So what are you waiting for?**  
Become a part of the iSoft family where work is just a part of the fun! Visit our **[Careers page](#)** to join our adventure, and let’s make HR magical together!

*Contact us today – we promise we don’t bite… unless you bring donuts!* 🍩  

---  
*© 2015 iSoft Consulting Inc. All Superhero Rights Reserved.*   
*Terms of Use: Laughter & productivity required; HR crises preferred.*