In [13]:
# imports
# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
GPT_MODEL = 'gpt-4o-mini'
gpt_openai = OpenAI()

OLLAMA_API = "http://localhost:11434/api/chat"
OLLAMA_MODEL = "llama3.2"

'''
  define openai to use ollama at local
'''
ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')


API key looks good so far


In [8]:
# A class to represent a Webpage

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [9]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."


In [17]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt


def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [18]:
MODEL = GPT_MODEL
openai = gpt_openai
'''create_brochure("HuggingFace", "https://huggingface.co")'''
stream_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'community page', 'url': 'https://discuss.huggingface.co'}, {'type': 'GitHub page', 'url': 'https://github.com/huggingface'}, {'type': 'LinkedIn page', 'url': 'https://www.linkedin.com/company/huggingface/'}, {'type': 'Twitter page', 'url': 'https://twitter.com/huggingface'}]}


# Hugging Face: Building the Future of AI Together

## About Us

Hugging Face is the premier community-driven platform dedicated to advancing machine learning (ML) technology. Our mission is to democratize machine learning by making it accessible to everyone – from researchers and developers to enterprises. We are the home of open-source ML tooling that fosters innovation and collaboration.

---

## Our Offerings

- **Cutting-Edge Models**: Explore over 1 million models and 250,000 datasets designed for multiple applications including text, image, and audio processing.
- **Collaboration Spaces**: Create and host projects alongside a vibrant community using Spaces, where ideas are nurtured, and solutions are born.
- **Enterprise Solutions**: Robust services tailored for organizations, ensuring enterprise-grade security and dedicated support at competitive pricing.

---

## Community and Collaboration

Join a vast network of over 50,000 organizations employing Hugging Face technologies, including industry leaders like Google, Microsoft, and Amazon. Our collaborative platform enables users to share and improve upon models and datasets, creating a unique environment where innovation thrives.

- **Trending Models**: Stay up to date with the latest advancements in AI through our trending models, continually updated to reflect the most significant contributions to the community.
  
- **Open Source Commitment**: We are committed to open-source principles, with tools like Transformers, Diffusers, and tokenizers all developed collaboratively with the community.

---

## Company Culture

At Hugging Face, we believe in fostering a supportive and inclusive environment where everyone's contributions matter. Our culture is deeply rooted in collaboration, transparency, and the belief that together we can create groundbreaking technology. We encourage experimentation and continuous learning, making Hugging Face not just a workplace, but a learning community.

- **Team Spirit**: We celebrate diversity and creativity, encouraging our team members to explore new ideas and challenge the status quo.
- **Mission-Driven**: Every member of our team is united by a common goal – to make machine learning available to all.

---

## Careers at Hugging Face

Join us in shaping the future of AI! We are always on the lookout for passionate individuals to join our team across various roles, including engineering, research, and community management. 

- **Current Openings**: Check our jobs page for opportunities to contribute to exciting projects in an innovative workspace.
- **Perks and Benefits**: We offer a supportive work environment with flexible hours, remote work options, and a range of employee benefits tailored to ensure work-life balance.

---

## Connect With Us

To learn more about us, explore our offerings, or become part of our innovative community, visit our website at [Hugging Face](https://huggingface.co)

---

Together, let’s build the future of AI!

In [19]:
# MODEL = OLLAMA_MODEL
# openai = ollama_via_openai
create_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'community forum', 'url': 'https://discuss.huggingface.co'}, {'type': 'GitHub page', 'url': 'https://github.com/huggingface'}, {'type': 'LinkedIn page', 'url': 'https://www.linkedin.com/company/huggingface/'}, {'type': 'Twitter page', 'url': 'https://twitter.com/huggingface'}]}


# Hugging Face Company Brochure

## Welcome to Hugging Face
**The AI community building the future.**  
At Hugging Face, we are dedicated to creating a collaborative space for the machine learning community to thrive. Our platform empowers individuals and organizations to work together on models, datasets, and applications.

---

## Our Offerings

### Collaboration Platform
- **Models**: Access and collaborate on over **1M+ models** that cater to various machine learning tasks.
- **Datasets**: Browse through **250k+ datasets** to find the right resources for your projects.
- **Spaces**: Create and showcase your applications within a running environment.

### Compute Services
- **Inference Endpoints**: Deploy optimized applications with just a few clicks.
- **Enterprise Solutions**: Tailored services for organizations looking to leverage AI with enterprise-grade security and support.

### Open Source Initiatives
We are building the foundation of machine learning tooling with community-driven efforts, including:
- **Transformers**: State-of-the-art ML libraries for PyTorch, TensorFlow, and JAX.
- **Diffusers**: Advanced diffusion models for various applications.
- **SafeTensors**: Secure methods to store and distribute neural network weights.

Explore all our tools and libraries through our hub to enhance your machine learning experience!

---

## Who We Serve
Hugging Face is trusted by over **50,000 organizations** including:
- **Meta**
- **Amazon**
- **Google**
- **Microsoft**
- **Intel**

Join a diverse community where innovation meets collaboration!

---

## Company Culture
At Hugging Face, we embrace a culture of **inclusivity, creativity, and continuous learning**. Our team fosters an environment where collaboration and knowledge-sharing are at the forefront, encouraging every employee to contribute their unique talents and ideas. 

### Workplaces Benefits
- Flexible work hours
- Opportunities for professional development
- A supportive community that values work-life balance

---

## Careers at Hugging Face
Join us in building the future of AI! We are constantly on the lookout for passionate innovators who are eager to make a difference. 

### Current Job Opportunities
- Machine Learning Engineers
- Data Scientists
- Community Managers
- Software Developers

Explore our [Careers page](#) for open positions and to learn more about life at Hugging Face.

---

## Connect With Us
Stay updated on our latest models, datasets, and developments by following us on our social media channels:
- [GitHub](#)
- [Twitter](#)
- [LinkedIn](#)
- [Discord](#)

For more information, visit our [website](#) and join the Hugging Face community in shaping the future of AI.

---

### Together, let's accelerate the journey of machine learning and build something remarkable!

In [21]:
get_links("https://www.linkedin.com/jobs/collections/recommended/?currentJobId=4092073336&discover=recommended&discoveryOrigin=JOBS_HOME_JYMBII")

{'links': []}