In [28]:
import os
import requests
from bs4 import BeautifulSoup
import json 
from dotenv import load_dotenv
from typing import List
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [29]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key wrking fine")
else:
    print("Check api key again")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

API key wrking fine


In [30]:
header={
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36'
}

class Website:
    def __init__(self,url):
        self.url=url
        response=requests.get(url, headers=header)
        self.body=response.content
        soup=BeautifulSoup(self.body, 'html.parser')
        self.title=soup.title.string if soup.title else "No title found"
        if soup.body:
            for faaltu in soup.body(['script', 'style','img','input']):
                faaltu.decompose()
            self.text=soup.body.get_text(separator='\n', strip=True)
        else:
            self.text=""
        links=[link.get('href') for link in soup.find_all('a')]
        self.links=[link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [31]:
system_prompt="You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"

system_prompt+='You should respond in JSON as in this example:'
system_prompt+="""
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [32]:
def get_user_prompt(website):
    user_prompt=f'Here is the list of links on the website of {website.url} - '
    user_prompt+="please decide which of these are relevant web links  for a brochure about the company, respond with the full https URL in JSON format. \
        do not include Terms of Service, Privacy, email links.\n"
    user_prompt+="Links (some might be relative links):\nGive the output strictly in python list form"
    user_prompt+="\n".join(website.links)

    return user_prompt

In [33]:
def get_links(url):
    website=Website(url)
    response=openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content":system_prompt},
            {"role":"user", "content": get_user_prompt(website)}
        ],

            response_format={"type":"json_object"}
    )
    result=response.choices[0].message.content
    return json.loads(result)
    

In [34]:
class ConnectTimeout(Exception):
    """Custom exception for connection timeout."""
    pass


In [35]:
import time

def retry(url, retries=5, delay=2):
    for attempt in range(retries):
        try:
            return Website(url).get_contents()  
        except (ConnectTimeout, TimeoutError, Exception) as e:
            print(f"Error occurred for {url}: {e}. Attempt {attempt + 1} of {retries}.")
            time.sleep(delay)  
    print(f"Skipping {url} after {retries} failed attempts.")
    return f"Failed to retrieve contents for {url}\n"

def get_details(url):
    result = "Landing page:\n"
    result += retry(url)  
    links = get_links(url)
    print("Found links:", links)
    
    key_to_check = next(iter(links), None) 
    
    if key_to_check == 'links':
        for link in links["links"]:
            result += f"\n\n{link}\n"
            result += retry(link)  
    elif key_to_check == 'relevant_links':
        for link in links["relevant_links"]:
            result += f"\n\n{link}\n"
            result += retry(link)  
    else:
        result += "\nNo relevant links found.\n"
    
    return result


In [36]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a website ( it can be company website, college club website, someone's portfolio) so be prepared \
and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [37]:
def brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a professional brief brochure of the website in markdown.At footer always give the contacts link of their social media handles (except phone number) if available\n"
    user_prompt += get_details(url)
    user_prompt = user_prompt[:5_000] 
    return user_prompt

In [38]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [39]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [42]:
stream_brochure("HuggingFace", "https://huggingface.co/")

Found links: {'relevant_links': ['https://huggingface.co/models', 'https://huggingface.co/datasets', 'https://huggingface.co/spaces', 'https://huggingface.co/posts', 'https://huggingface.co/docs', 'https://huggingface.co/enterprise', 'https://huggingface.co/pricing', 'https://huggingface.co/deepseek-ai/DeepSeek-R1', 'https://huggingface.co/hexgrad/Kokoro-82M', 'https://huggingface.co/openbmb/MiniCPM-o-2_6', 'https://huggingface.co/spaces/hexgrad/Kokoro-TTS', 'https://huggingface.co/spaces/tencent/Hunyuan3D-2', 'https://huggingface.co/spaces/lllyasviel/iclight-v2', 'https://huggingface.co/spaces/JeffreyXiang/TRELLIS', 'https://huggingface.co/spaces/FaceOnLive/Face-Search-Online', 'https://huggingface.co/datasets/fka/awesome-chatgpt-prompts', 'https://huggingface.co/datasets/HumanLLMs/Human-Like-DPO-Dataset', 'https://huggingface.co/datasets/yale-nlp/MMVU', 'https://huggingface.co/datasets/NovaSky-AI/Sky-T1_data_17k', 'https://huggingface.co/datasets/bespokelabs/Bespoke-Stratos-17k', 'ht

# Hugging Face: The Future of AI, One Hug at a Time!

**Welcome to Hugging Face**, where AI enthusiasts all around the globe come together to cuddle with cutting-edge models and datasets. We're not just a tech company; we're a cozy community dedicated to hugging machines (that...hopefully won't take us over)!

---

### 🌟 What We Offer:
- **Models Galore**: Over **400k+ models** just waiting for you to give them a big ol’ hug! From creating emojis to predicting the weather (okay, maybe not that), we have it all!
- **Datasets for Days**: Want to play with **100k+ datasets**? We got 'em! (No hugs needed, just knowledge).
- **Spaces for Creativity**: Our virtual playground hosts over **150k applications** where your imagination can run wild (just no running in the hallways, please!).

---

### 💼 Careers at Hugging Face:
Join our incredible team to help build the next generation of AI, while basking in the warmth of our unique company culture. We believe in:
- **Collaboration Over Competition**: We’re here to lift each other up, and yes, occasionally share the last slice of pizza during lunch.
- **Supportive Coworkers**: Everyone here is just one "hug" (or Slack message) away from giving you a helping hand!
- **Remote Inclusivity**: Whether you want to work in comfy pajamas or business casual, we’ve got your back (and your front)!

---

### 🤝 Who Uses Hugging Face?
Forget about your lonely algorithms! We have an extensive lineup of **over 50,000 organizations** using Hugging Face, including giants like:
- **Google**
- **Amazon Web Services**
- **Microsoft**
- And countless others who want to show their algorithms some love!

---

### 🚀 Join the Hugging Face Revolution!
Our prices start at just **$20/user/month** for teams eager to scale their AI talents—save a dime while snuggling with the best! Plus, GPUs are available for a mere **$0.60/hour**. 

Sign up today and become part of our hug-tastic community, whether you're an investor eager to cash in, a curious customer, or a recruit ready to embrace an exciting career!

---

### 💌 Stay Connected:
Follow us for the latest updates, memes, and AI breakthroughs!

- [GitHub](https://github.com/huggingface)
- [Twitter](https://twitter.com/huggingface)
- [LinkedIn](https://linkedin.com/company/huggingface)
- [Discord](https://discord.gg/huggingface)

---

**Hugging Face**: Because who doesn't need a digital hug from an AI every now and then? 🤗

In [3]:
from openai import OpenAI
client = OpenAI()

response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "What is in this image and classify each material you see if its garbage classify what type of waste is this?"},
                {
                    "type": "image_url",
                    "image_url": {
                        "url": "https://www.treehugger.com/thmb/VG2npeTMAl7EyduCgt4QCEPGbnk=/750x0/filters:no_upscale():max_bytes(150000):strip_icc():format(webp)/GettyImages-139804274-571a88323df78c56403e1954.jpg",
                    },
                },
            ],
        }
    ],
    max_tokens=300,
)

print(response.choices[0])

Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='This image depicts a landfill with various types of waste. Here’s a general classification of materials visible in the image:\n\n1. **Plastic**: Bags, wrappers, and other plastic items. \n   - **Type**: Non-biodegradable, recyclable (if sorted properly).\n\n2. **Paper**: Pieces of paper and cardboard.\n   - **Type**: Biodegradable, recyclable (if not contaminated).\n\n3. **Metal**: Possible metallic items or containers.\n   - **Type**: Non-biodegradable, recyclable.\n\n4. **Organic Waste**: Various decaying organic materials.\n   - **Type**: Biodegradable, compostable.\n\n5. **Textiles**: Pieces of clothing or fabric.\n   - **Type**: Biodegradable (natural fibers), recyclable (synthetic).\n\nThis kind of landfill waste is mixed, containing both recyclable and non-recyclable materials, as well as biodegradable and non-biodegradable waste. Proper sorting and recycling processes can help manage suc