# Project 1 : Web Summarizer (Text Summarization)

In [3]:
# imports

import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
#from IPython.display import Markdown, display
from openai import OpenAI
import json
from typing import List
import anthropic



In [4]:
# Load environment variables in a file called .env
# Print the key prefixes to help with any debugging

load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')
google_api_key = os.getenv('GOOGLE_API_KEY')

if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")
    
if anthropic_api_key:
    print(f"Anthropic API Key exists and begins {anthropic_api_key[:7]}")
else:
    print("Anthropic API Key not set")

if google_api_key:
    print(f"Google API Key exists and begins {google_api_key[:8]}")
else:
    print("Google API Key not set")

OpenAI API Key exists and begins sk-proj-
Anthropic API Key exists and begins sk-ant-
Google API Key exists and begins AIzaSyAA


In [6]:
openai = OpenAI()
claude = anthropic.Anthropic()

In [6]:
# A class to represent a Webpage

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        #for irrelevant in soup.body(["script", "style", "img", "input"]):
        #    irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [7]:
# Let's try one out. Change the website and add print statements to follow along.

isaignani = Website("https://en.wikipedia.org/wiki/Ilaiyaraaja")
print(isaignani.title)
print(isaignani.text)

Ilaiyaraaja - Wikipedia
Jump to content
Main menu
Main menu
move to sidebar
hide
Navigation
Main page
Contents
Current events
Random article
About Wikipedia
Contact us
Contribute
Help
Learn to edit
Community portal
Recent changes
Upload file
Special pages
Search
Search
Appearance
Donate
Create account
Log in
Personal tools
Donate
Create account
Log in
Pages for logged out editors
learn more
Contributions
Talk
Contents
move to sidebar
hide
(Top)
1
Early life
Toggle Early life subsection
1.1
Initial exposure to music
2
Career
Toggle Career subsection
2.1
Session musician and orchestrator
2.2
Film score composer
2.3
Non-cinematic output
2.4
Live performances
3
Musical style and sensibility
4
Honours and legacy
5
Personal life
6
Legal issues and controversies
7
Discography
8
See also
9
Notes
10
References
Toggle References subsection
10.1
Bibliography
11
Further reading
12
External links
Toggle the table of contents
Ilaiyaraaja
24 languages
العربية
अवधी
تۆرکجه
বাংলা
Español
فارسی
Français


In [8]:
# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish."

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [9]:
# A function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [10]:
print(user_prompt_for(isaignani))

You are looking at a website titled Ilaiyaraaja - Wikipedia
The contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.

Jump to content
Main menu
Main menu
move to sidebar
hide
Navigation
Main page
Contents
Current events
Random article
About Wikipedia
Contact us
Contribute
Help
Learn to edit
Community portal
Recent changes
Upload file
Special pages
Search
Search
Appearance
Donate
Create account
Log in
Personal tools
Donate
Create account
Log in
Pages for logged out editors
learn more
Contributions
Talk
Contents
move to sidebar
hide
(Top)
1
Early life
Toggle Early life subsection
1.1
Initial exposure to music
2
Career
Toggle Career subsection
2.1
Session musician and orchestrator
2.2
Film score composer
2.3
Non-cinematic output
2.4
Live performances
3
Musical style and sensibility
4
Honours and legacy
5
Personal life
6
Legal issues and controversies
7
Discography
8
See also
9
No

In [11]:
# See how this function creates exactly the format above

def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [12]:
messages_for(isaignani)

[{'role': 'system',
  'content': 'You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown.'},
 {'role': 'user',
  'content': 'You are looking at a website titled Ilaiyaraaja - Wikipedia\nThe contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\n\nJump to content\nMain menu\nMain menu\nmove to sidebar\nhide\nNavigation\nMain page\nContents\nCurrent events\nRandom article\nAbout Wikipedia\nContact us\nContribute\nHelp\nLearn to edit\nCommunity portal\nRecent changes\nUpload file\nSpecial pages\nSearch\nSearch\nAppearance\nDonate\nCreate account\nLog in\nPersonal tools\nDonate\nCreate account\nLog in\nPages for logged out editors\nlearn more\nContributions\nTalk\nContents\nmove to sidebar\nhide\n(Top)\n1\nEarly life\nToggle Early life subsection\n1.1\nInitial exposure to music\n

In [13]:
# call the OpenAI API

def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [14]:
summarize("https://en.wikipedia.org/wiki/Ilaiyaraaja")

'```markdown\n# Ilaiyaraaja - Wikipedia Summary\n\nIlaiyaraaja, born R. Gnanathesikan on June 3, 1943, is a renowned Indian composer, playback singer, and musician known for his prolific contributions to Indian cinema, especially in Tamil films. With a career spanning over 49 years, he has composed more than 8,600 songs and provided scores for 1,523 feature films in nine languages.\n\n## Early Life\nIlaiyaraaja was born in Pannaipuram, Tamil Nadu, and grew up in a rural setting where he was exposed to Tamil folk music. His musical journey began with a traveling musical troupe, leading him to Chennai for formal training in music.\n\n## Career Highlights\n- **Film Score Composer**: He revolutionized the Tamil film music scene by incorporating Western classical music elements into his compositions. His debut came in 1976 with the film *Annakili* which became a major hit.\n- **Innovations**: He was the first Indian composer to record using a computer and to perform a full Western classical

In [15]:
# A function to display this nicely in the Jupyter output, using markdown

def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [16]:
display_summary("https://en.wikipedia.org/wiki/Ilaiyaraaja")

# Let us try a different weebsites
# display_summary("https://cnn.com")
# display_summary("https://anthropic.com")
# display_summary("https://www.youtube.com")
# display_summary("https://www.wikipedia.org")
# display_summary("https://www.amazon.com")
# display_summary("https://www.twitter.com")
# display_summary("https://www.facebook.com")
# display_summary("https://www.instagram.com")
# display_summary("https://www.linkedin.com")
# display_summary("https://www.github.com")

# Summary of Ilaiyaraaja - Wikipedia Page

## Overview
Ilaiyaraaja, born as R. Gnanathesikan on June 3, 1943, in Pannaipuram, is a prominent Indian musician, composer, and playback singer, primarily active in Tamil cinema but also working in Telugu, Malayalam, Kannada, and Hindi films. Widely regarded as one of the most prolific film composers, he has created over 8,600 songs and scored music for approximately 1,523 feature films across nine languages. Ilaiyaraaja is known for incorporating Western classical music harmonies into Indian film music, earning him the titles "Isaignani" (the musical sage) and "Maestro."

## Early Life
Ilaiyaraaja grew up in a rural Tamil community and was influenced by folk music from a young age. Joining a traveling music troupe at age 14, he honed his skills in various musical instruments and eventually moved to Chennai to further his musical education.

## Career Highlights
- **Compositional Work**: He produced his first film score for *Annakili* (1976), using a blend of contemporary cinema music with traditional Tamil folk themes.
- **Innovations**: Became the first Indian to compose a full Western classical symphony and recorded soundtracks utilizing computers in film scoring.
- **Collaborations**: Worked with renowned poets and lyricists across South Indian cinema and has had a significant impact on the musical landscape.

## Musical Style
Ilaiyaraaja's music merges Indian folk traditions with Western classical elements, and he is noted for his use of diverse instruments in his compositions. His works have touched on various genres, showcasing a broad emotional range and narrative depth.

## Honors and Awards
His achievements have been recognized with numerous accolades, including the Padma Bhushan and Padma Vibhushan awards, as well as five National Film Awards. He was nominated for the Rajya Sabha in July 2022 as a member representing the arts.

## Recent News
A biographical film about Ilaiyaraaja's life was announced on March 20, 2024, expected to further highlight his historical contributions to music.

## Personal Life
Ilaiyaraaja was married to Jeeva Rajayya and has three children, all of whom are involved in the music industry. His daughter Bhavatharini passed away from liver cancer in January 2024.


# Project 2: Build a Brochure for a Company

### A full business solution

### Now we will take our project from Day 1 to the next level

### Business Challenge:

Create a product that builds a Brochure for a company to be used for prospective clients, investors and potential recruits.

We will be provided a company name and their primary website.

See the end of this notebook for examples of real-world business applications.

And remember: I'm always available if you have problems or ideas! Please do reach out.

In [7]:
# A class to represent a Webpage

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [18]:
hf = Website("https://en.wikipedia.org/wiki/Ilaiyaraaja")
hf.links

['#bodyContent',
 '/wiki/Main_Page',
 '/wiki/Wikipedia:Contents',
 '/wiki/Portal:Current_events',
 '/wiki/Special:Random',
 '/wiki/Wikipedia:About',
 '//en.wikipedia.org/wiki/Wikipedia:Contact_us',
 '/wiki/Help:Contents',
 '/wiki/Help:Introduction',
 '/wiki/Wikipedia:Community_portal',
 '/wiki/Special:RecentChanges',
 '/wiki/Wikipedia:File_upload_wizard',
 '/wiki/Special:SpecialPages',
 '/wiki/Main_Page',
 '/wiki/Special:Search',
 'https://donate.wikimedia.org/?wmf_source=donate&wmf_medium=sidebar&wmf_campaign=en.wikipedia.org&uselang=en',
 '/w/index.php?title=Special:CreateAccount&returnto=Ilaiyaraaja',
 '/w/index.php?title=Special:UserLogin&returnto=Ilaiyaraaja',
 'https://donate.wikimedia.org/?wmf_source=donate&wmf_medium=sidebar&wmf_campaign=en.wikipedia.org&uselang=en',
 '/w/index.php?title=Special:CreateAccount&returnto=Ilaiyaraaja',
 '/w/index.php?title=Special:UserLogin&returnto=Ilaiyaraaja',
 '/wiki/Help:Introduction',
 '/wiki/Special:MyContributions',
 '/wiki/Special:MyTalk',

## First step: Have GPT-4o-mini figure out which links are relevant

### Use a call to gpt-4o-mini to read the links on a webpage, and respond in structured JSON.  
It should decide which links are relevant, and replace relative links such as "/about" with "https://company.com/about".  
We will use "one shot prompting" in which we provide an example of how it should respond in the prompt.

This is an excellent use case for an LLM, because it requires nuanced understanding. Imagine trying to code this without LLMs by parsing and analyzing the webpage - it would be very hard!

In [19]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [None]:
#print(link_system_prompt)

In [20]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [None]:
#print(get_links_user_prompt(ed))

In [21]:
MODEL = 'gpt-4o-mini'

def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [27]:
# Anthropic has made their site harder to scrape, so I'm using HuggingFace..

wikipedia = Website("https://www.cnn.com/")
wikipedia.links

['https://www.cnn.com',
 'https://www.cnn.com/us',
 'https://www.cnn.com/world',
 'https://www.cnn.com/politics',
 'https://www.cnn.com/business',
 'https://www.cnn.com/health',
 'https://www.cnn.com/entertainment',
 'https://www.cnn.com/cnn-underscored',
 'https://www.cnn.com/style',
 'https://www.cnn.com/travel',
 'https://www.cnn.com/sports',
 'https://www.cnn.com/science',
 'https://www.cnn.com/climate',
 'https://www.cnn.com/weather',
 'https://www.cnn.com/world/europe/ukraine',
 'https://www.cnn.com/world/middleeast/israel',
 'https://www.cnn.com/games',
 'https://www.cnn.com/us',
 'https://www.cnn.com/world',
 'https://www.cnn.com/politics',
 'https://www.cnn.com/business',
 'https://www.cnn.com/health',
 'https://www.cnn.com/entertainment',
 'https://www.cnn.com/cnn-underscored',
 'https://www.cnn.com/style',
 'https://www.cnn.com/travel',
 'https://www.cnn.com/sports',
 'https://www.cnn.com/science',
 'https://www.cnn.com/climate',
 'https://www.cnn.com/weather',
 'https://www

In [28]:
get_links("https://www.cnn.com/")

{'links': [{'type': 'about page', 'url': 'https://www.cnn.com/about'},
  {'type': 'careers page', 'url': 'https://careers.wbd.com/cnnjobs'},
  {'type': 'company profile', 'url': 'https://www.cnn.com/profiles'},
  {'type': 'leadership page',
   'url': 'https://www.cnn.com/profiles/cnn-leadership'}]}

## Second step: make the brochure!

Assemble all the details into another prompt to GPT4-o

In [29]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [30]:
#print(get_all_details("https://en.wikipedia.org/wiki/Ilaiyaraaja"))
print(get_all_details("https://www.cnn.com/"))

Found links: {'links': [{'type': 'about page', 'url': 'https://www.cnn.com/about'}, {'type': 'careers page', 'url': 'https://careers.wbd.com/cnnjobs'}, {'type': 'company page', 'url': 'https://www.cnn.com/profiles'}, {'type': 'leadership page', 'url': 'https://www.cnn.com/profiles/cnn-leadership'}, {'type': 'newsletters', 'url': 'https://www.cnn.com/newsletters'}]}
Landing page:
Webpage Title:
Breaking News, Latest News and Videos | CNN
Webpage Contents:
CNN values your feedback
1. How relevant is this ad to you?
2. Did you encounter any technical issues?
Video player was slow to load content
Video content never loaded
Ad froze or did not finish loading
Video content did not start after ad
Audio on ad was too loud
Other issues
Ad never loaded
Ad prevented/slowed the page from loading
Content moved around while ad loaded
Ad was repetitive to ads I've seen previously
Other issues
Cancel
Submit
Thank You!
Your effort and contribution in providing this feedback is much
                    

In [31]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."


In [32]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [33]:
get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'company page', 'url': 'https://www.linkedin.com/company/huggingface/'}]}


'You are looking at a company called: HuggingFace\nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\nLanding page:\nWebpage Title:\nHugging Face – The AI community building the future.\nWebpage Contents:\nHugging Face\nModels\nDatasets\nSpaces\nCommunity\nDocs\nEnterprise\nPricing\nLog In\nSign Up\nThe AI community building the future.\nThe platform where the machine learning community collaborates on models, datasets, and applications.\nExplore AI Apps\nor\nBrowse 1M+ models\nTrending on\nthis week\nModels\nQwen/Qwen-Image-Edit\nUpdated\n5 days ago\n•\n28.2k\n•\n1.12k\ndeepseek-ai/DeepSeek-V3.1-Base\nUpdated\n1 day ago\n•\n12.5k\n•\n868\ndeepseek-ai/DeepSeek-V3.1\nUpdated\n1 day ago\n•\n14.3k\n•\n468\ngoogle/gemma-3-270m\nUpdated\n9 days ago\n•\n63.8k\n•\n609\nByteDance-Seed/Seed-OSS-36B-Instruct\nUpdated\n1 day ago\n•\n3.13k\n•\n259\nBrowse 1M+ models\nSpaces\nRunning\n12.2k\n12.2k\nDeepSite

In [34]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [39]:
create_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'company page', 'url': 'https://www.linkedin.com/company/huggingface/'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'docs page', 'url': 'https://huggingface.co/docs'}]}


# Hugging Face Brochure

## Welcome to Hugging Face
**The AI community building the future.**  
At Hugging Face, we are pioneering collaboration within the machine learning community by providing innovative platforms for models, datasets, and applications. Join us in shaping the future of AI!

---

## Who We Are
Hugging Face is dedicated to making machine learning more accessible and efficient. With a focus on open-source technology, we facilitate collaboration among thousands of organizations and developers. Our resources help users quickly deploy and implement machine learning applications in various modalities, including text, image, video, audio, and 3D.

---

## What We Offer

### **Cutting-Edge Models**
Explore our growing repository featuring **1M+ models** such as:
- Qwen/Qwen-Image-Edit
- DeepSeek-V3.1
- Google's Gemma
  
### **Diverse Datasets**
Utilize **250k+ datasets** for your machine learning projects. We provide comprehensive datasets to tackle any ML task.

### **Interactive Spaces**
Experiment and build applications using our collaborative platform featuring over **400k applications**.

### **Enterprise Solutions**
Our enterprise-grade solutions provide your team with the tools necessary for successful AI development, including:
- **Security audits**
- **Dedicated support**
- **Single Sign-On** access

---

## Our Community
More than **50,000 organizations** use Hugging Face, including industry leaders like Google, Microsoft, Amazon, and Intel. We thrive on community engagement and encourage contributions to our open-source projects.

### **Join the Community**
Connect with us to share your models, datasets, and projects. Together, we can accelerate innovation in the AI space!

---

## Company Culture
At Hugging Face, diversity and inclusivity are fundamental to our mission. Our culture promotes:
- **Collaboration:** Working together on impactful projects.
- **Innovation:** Encouraging creativity and new ideas.
- **Education:** Actively sharing knowledge and learning from one another.

We believe in democratizing ML, making it available to everyone, and providing a welcoming atmosphere for all enthusiasts.

---

## Careers at Hugging Face
We are on the lookout for passionate individuals to join our dynamic team! Opportunities span across various domains including engineering, product management, community engagement, and more.

### **Why Work with Us?**
- Engage in a passionate and innovative environment.
- Collaborate with experts in AI and machine learning.
- Contribute to meaningful projects that influence the future.

**Explore current job openings [here](https://huggingface.co/jobs).**

---

## Join Us!
Whether you're a developer, researcher, or an AI enthusiast, Hugging Face welcomes you to explore, collaborate, and contribute to the future of machine learning. Let's build something incredible together!

**Connect with us:**
- [Hugging Face Website](https://huggingface.co)
- [GitHub](https://github.com/huggingface)
- [Twitter](https://twitter.com/huggingface)
- [Discord](https://discord.com/invite/huggingface)

--- 

**Thank you for considering Hugging Face as your partner in AI innovation!**

## Finally - a minor improvement

With a small adjustment, we can change this so that the results stream back from OpenAI,
with the familiar typewriter animation

In [36]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [37]:
stream_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'blog', 'url': 'https://huggingface.co/blog'}, {'type': 'company page', 'url': 'https://www.linkedin.com/company/huggingface/'}, {'type': 'community forum', 'url': 'https://discuss.huggingface.co'}]}


# Hugging Face Brochure

## Company Overview
**Hugging Face** is a vibrant community and platform revolutionizing the world of AI and machine learning. Driven by the mission to democratize AI, we invite professionals, enthusiasts, and newcomers to collaborate, learn, and build at the forefront of this innovative field. 

## What We Offer
- **Models:** Access to over **1 million models** across diverse modalities like text, image, audio, and video. From state-of-the-art transformers to custom AI applications, explore the latest advancements in AI technology.
  
- **Datasets:** Browse **over 250,000 datasets** that fuel machine learning research and applications. This rich repository empowers the community to create high-quality AI models.

- **Spaces:** A collaborative environment to create, share, and deploy machine learning applications seamlessly. Launch your project in just a few clicks.

- **Enterprise Solutions:** Tailored offerings for organizations requiring advanced platforms, enterprise-grade security, dedicated support, and scalable compute resources.

## Community & Culture
At Hugging Face, we foster a **collaborative culture** that values creativity, diversity, and openness. Our community-driven philosophy encourages:
- **Collaborative Learning:** Individuals from various backgrounds come together to learn, share insights, and grow their AI expertise.
- **Open Source Contribution:** As a strong proponent of open-source technologies, we welcome contributions that enhance our tools and services.
- **Innovation:** Regularly updated models and applications reinforce our commitment to being at the cutting-edge of AI development.

We believe in creating a supportive environment where ideas can flourish and everyone’s contribution counts.

## Our Users
Hugging Face is proud to support more than **50,000 organizations**, including notable enterprises like:
- Meta
- Google
- Amazon
- Microsoft
- Intel
- Grammarly

These organizations leverage our resources to enhance their AI capabilities and drive innovation in their respective fields.

## Careers at Hugging Face
Join our dynamic team and be part of a mission to democratize AI! We are looking for innovative, passionate individuals who thrive in a collaborative environment. Whether you’re a developer, researcher, or community advocate, there’s a place for you at Hugging Face. 

### Why Work With Us?
- **Growth Opportunities:** Develop your skills and grow with the community.
- **Inclusive Environment:** Celebrate diversity and engage with a passionate team.
- **Flexibility:** Work remotely and balance your career and personal life effectively.

## Connect with Us!
Join us on our journey as we build the AI applications of tomorrow. Explore our offerings, engage with our community, and consider embarking on a career with Hugging Face!

- **Website:** [Hugging Face](https://huggingface.co)
- **Social Media:** GitHub, Twitter, LinkedIn, Discord

**Hugging Face – The AI Community Building the Future!**

## Gradio - We can build User Interfaces using the outrageously simple Gradio framework.
### Implement the Company Brochure project Uisng Gradio UI

In [5]:
import gradio as gr

In [10]:
# With massive thanks to Bill G. who noticed that a prior version of this had a bug! Now fixed.

system_message = "You are an assistant that analyzes the contents of a company website landing page \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown."

In [8]:
# Let's create a call that streams back results
# If you'd like a refresher on Generators (the "yield" keyword),
# Please take a look at the Intermediate Python notebook in week1 folder.

def stream_gpt(prompt):
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": prompt}
      ]
    stream = openai.chat.completions.create(
        model='gpt-4o-mini',
        messages=messages,
        stream=True
    )
    result = ""
    for chunk in stream:
        result += chunk.choices[0].delta.content or ""
        yield result

In [9]:
def stream_claude(prompt):
    result = claude.messages.stream(
        model="claude-3-haiku-20240307",
        max_tokens=1000,
        temperature=0.7,
        system=system_message,
        messages=[
            {"role": "user", "content": prompt},
        ],
    )
    response = ""
    with result as stream:
        for text in stream.text_stream:
            response += text or ""
            yield response

In [11]:
def stream_brochure(company_name, url, model):
    yield ""
    prompt = f"Please generate a company brochure for {company_name}. Here is their landing page:\n"
    prompt += Website(url).get_contents()
    if model=="GPT":
        result = stream_gpt(prompt)
    elif model=="Claude":
        result = stream_claude(prompt)
    else:
        raise ValueError("Unknown model")
    yield from result

In [12]:
view = gr.Interface(
    fn=stream_brochure,
    inputs=[
        gr.Textbox(label="Company name:"),
        gr.Textbox(label="Landing page URL including http:// or https://"),
        gr.Dropdown(["GPT", "Claude"], label="Select model")],
    outputs=[gr.Markdown(label="Brochure:")],
    flagging_mode="never"
)
view.launch()

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


