In [2]:
import requests
import json
from typing import List
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
import ollama
import gradio as gr

In [3]:
MODEL = "mistral:7b"

In [4]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, "html.parser")
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]
        
    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [5]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [6]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [7]:
def get_links(url):
    website = Website(url)
    response= ollama.chat(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ]
    )
    result = response['message']['content']
    return json.loads(result)



In [8]:
def get_all_details (url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result


In [9]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [10]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000]
    return user_prompt

In [11]:
def create_brochure(company_name, url):
    response = ollama.chat(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ]
    )
    result = response['message']['content']
    display(Markdown(result))
    return result


In [13]:
force_dark_mode = """
() => {
    const url = new URL(window.location);
    if (url.searchParams.get('__theme') !== 'dark') {
        url.searchParams.set('__theme', 'dark');
        window.location.href = url.href;
    }
}
"""

gr.Interface(
    fn=create_brochure,
    inputs=["textbox", "textbox"],
     outputs=[gr.Markdown(label="Response:")],
    js=force_dark_mode
).launch()

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.




In [14]:
create_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'company page', 'url': 'https://www.linkedin.com/company/huggingface/'}, {'type': 'docs', 'url': 'https://huggingface.co/docs'}, {'type': 'blog', 'url': 'https://huggingface.co/blog'}, {'type': 'discussion forum', 'url': 'https://discuss.huggingface.co/'}, {'type': 'status page', 'url': 'https://status.huggingface.co/'}, {'type': 'github', 'url': 'https://github.com/huggingface'}, {'type': 'twitter', 'url': 'https://twitter.com/huggingface'}, {'type': 'join discord', 'url': 'https://discord.gg/huggingface'}]}


 # Hugging Face Brochure

Welcome to **Hugging Face**, the AI community building the future.

**Company Overview**

Hugging Face is a platform that brings together the machine learning community, fostering collaboration on models, datasets, and applications. Our mission is to make AI accessible to everyone by providing an open-source stack that enables exploration of various modalities such as text, image, video, audio, and 3D.

**Culture**

We are a vibrant, inclusive community driven by the shared passion for AI and its potential. We believe in collaboration, transparency, and the power of open source. Our platform hosts over 50,000 organizations, from startups to tech giants like Meta, Google, Microsoft, and more.

**Customers**

Our platform caters to a diverse range of users, from individual AI enthusiasts to large enterprises seeking to accelerate their machine learning efforts. Our customers include companies such as Grammarly and Writer, as well as non-profit organizations like AI2.

**Careers/Jobs**

Join our team and be part of the future of AI! We offer a wide range of opportunities for engineers, designers, researchers, and more. With us, you'll work on cutting-edge projects, collaborate with top minds in the field, and make a real impact.

**Products & Services**

1. **Models**: Access and use over 1 million pre-trained models from various developers on our platform. Our trending models include openai/gpt-oss-120b and openai/gpt-oss-20b.

2. **Datasets**: Browse and share over 250,000 datasets for various machine learning tasks. Examples include spatialverse/InteriorGS and nvidia/Nemotron-Post-Training-Dataset-v1.

3. **Spaces**: Build and deploy your AI applications on our platform with ease. Our currently running spaces include DeepSite v2, Qwen Image, FLUX.1 Krea Dev, and more.

4. **Enterprise Solutions**: For businesses needing more advanced capabilities, we offer paid compute solutions with dedicated support and enterprise-grade security. Our pricing starts at $0.60/hour for GPU.

5. **Open Source Projects**: We are committed to building the foundation of ML tooling with the community. Our open source projects include Transformers, Tokenizers, Datasets, and more.

**Getting Started**

Start your journey with Hugging Face today! Sign up for free and get started on your AI adventure. Whether you're a seasoned professional or just starting out, there's something for everyone at Hugging Face.

Join us in building the future of AI! 🚀

" # Hugging Face Brochure\n\nWelcome to **Hugging Face**, the AI community building the future.\n\n**Company Overview**\n\nHugging Face is a platform that brings together the machine learning community, fostering collaboration on models, datasets, and applications. Our mission is to make AI accessible to everyone by providing an open-source stack that enables exploration of various modalities such as text, image, video, audio, and 3D.\n\n**Culture**\n\nWe are a vibrant, inclusive community driven by the shared passion for AI and its potential. We believe in collaboration, transparency, and the power of open source. Our platform hosts over 50,000 organizations, from startups to tech giants like Meta, Google, Microsoft, and more.\n\n**Customers**\n\nOur platform caters to a diverse range of users, from individual AI enthusiasts to large enterprises seeking to accelerate their machine learning efforts. Our customers include companies such as Grammarly and Writer, as well as non-profit org