In [28]:
import os
import requests
from bs4 import BeautifulSoup
import json 
from dotenv import load_dotenv
from typing import List
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [29]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key wrking fine")
else:
    print("Check api key again")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

API key wrking fine


In [30]:
header={
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36'
}

class Website:
    def __init__(self,url):
        self.url=url
        response=requests.get(url, headers=header)
        self.body=response.content
        soup=BeautifulSoup(self.body, 'html.parser')
        self.title=soup.title.string if soup.title else "No title found"
        if soup.body:
            for faaltu in soup.body(['script', 'style','img','input']):
                faaltu.decompose()
            self.text=soup.body.get_text(separator='\n', strip=True)
        else:
            self.text=""
        links=[link.get('href') for link in soup.find_all('a')]
        self.links=[link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [31]:
system_prompt="You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"

system_prompt+='You should respond in JSON as in this example:'
system_prompt+="""
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [32]:
def get_user_prompt(website):
    user_prompt=f'Here is the list of links on the website of {website.url} - '
    user_prompt+="please decide which of these are relevant web links  for a brochure about the company, respond with the full https URL in JSON format. \
        do not include Terms of Service, Privacy, email links.\n"
    user_prompt+="Links (some might be relative links):\nGive the output strictly in python list form"
    user_prompt+="\n".join(website.links)

    return user_prompt

In [33]:
def get_links(url):
    website=Website(url)
    response=openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content":system_prompt},
            {"role":"user", "content": get_user_prompt(website)}
        ],

            response_format={"type":"json_object"}
    )
    result=response.choices[0].message.content
    return json.loads(result)
    

In [34]:
class ConnectTimeout(Exception):
    """Custom exception for connection timeout."""
    pass


In [35]:
import time

def retry(url, retries=5, delay=2):
    for attempt in range(retries):
        try:
            return Website(url).get_contents()  
        except (ConnectTimeout, TimeoutError, Exception) as e:
            print(f"Error occurred for {url}: {e}. Attempt {attempt + 1} of {retries}.")
            time.sleep(delay)  
    print(f"Skipping {url} after {retries} failed attempts.")
    return f"Failed to retrieve contents for {url}\n"

def get_details(url):
    result = "Landing page:\n"
    result += retry(url)  
    links = get_links(url)
    print("Found links:", links)
    
    key_to_check = next(iter(links), None) 
    
    if key_to_check == 'links':
        for link in links["links"]:
            result += f"\n\n{link}\n"
            result += retry(link)  
    elif key_to_check == 'relevant_links':
        for link in links["relevant_links"]:
            result += f"\n\n{link}\n"
            result += retry(link)  
    else:
        result += "\nNo relevant links found.\n"
    
    return result


In [36]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a website ( it can be company website, college club website, someone's portfolio) so be prepared \
and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [37]:
def brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a professional brief brochure of the website in markdown.At footer always give the contacts link of their social media handles (except phone number) if available\n"
    user_prompt += get_details(url)
    user_prompt = user_prompt[:5_000] 
    return user_prompt

In [38]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [39]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [41]:
stream_brochure("Google", "https://google.com/")

Found links: {'relevant_links': ['https://about.google/?fg=1&utm_source=google-IN&utm_medium=referral&utm_campaign=hp-header', 'https://store.google.com/IN?utm_source=hp_header&utm_medium=google_ooo&utm_campaign=GS100042&hl=en-IN', 'https://www.google.co.in/intl/en/about/products', 'https://www.google.com/intl/en_in/ads/?subid=ww-ww-et-g-awa-a-g_hpafoot1_1!o2&utm_source=google.com&utm_medium=referral&utm_campaign=google_hpafooter&fg=1', 'https://www.google.com/services/?subid=ww-ww-et-g-awa-a-g_hpbfoot1_1!o2&utm_source=google.com&utm_medium=referral&utm_campaign=google_hpbfooter&fg=1', 'https://google.com/search/howsearchworks/?fg=1', 'https://www.google.com/preferences?hl=en-IN&fg=1', 'https://support.google.com/websearch/?p=ws_results_help&hl=en-IN&fg=1']}


# Welcome to Google: The World's Smartest Friend! 

## About Us
At Google, we have one mission: to **organize the world’s information** and make it **universally accessible and useful**. It's like being a librarian for the internet but without the shushing!

---

## Our Culture: The Tinkerers and Thinkers
- **Innovative Atmosphere:** We embrace weird ideas (seriously, have you met Google Lens?).
- **Team Spirit:** Ever tried playing a game of ping pong during lunch? We do! 
- **Diversity**: We speak every language, from Hindi to Klingon. It’s a language buffet! 🍽️

---

## Products That Make Your Life Easier (and Cooler!)
Discover an array of **Google Products** that can even impress your grandma. They've got the Wi-Fi capability of a wizard!

- **Google Workspace:** Because we all need a little help pretending to be productive. 
- **Nest Devices:** Smart home gadgets that listen better than your best friend!
- **Pixel Phones:** Takes pictures so good, even your selfies will get a gallery showing. 

---

## Our Committed Customers: A Global Family
From students to CEOs, we've got all kinds under our belt. It’s like the world’s largest family reunion—everyone’s welcome and we promise not to ask too many awkward questions.

### Who We Serve:
- **Educators** looking to spark creativity.
- **Businesses** wanting to confuse competitors (legally, of course).
- **Tech Enthusiasts** who need the latest and greatest gadgets!

---

## Careers: Join the Fun!
Want to work where innovation meets caffeine? We're hiring! And yes, we do have a treadmill desk. 🚶‍♂️🏃‍♀️

### Why Join Us:
- **Flexible Work Environments**: Work from a hammock or a mountain top. We cater to your kind of zen!
- **Promote Diversity**: Be part of a team that celebrates your quirks (yes, all ten of them).
- **Career Development**: Who needs GPS? We help you find your path in the tech wilderness!

---

### Connect With Us
Follow us on our social channels and let’s stay virtual friends!  
- [Facebook](https://www.facebook.com/Google)  
- [Twitter](https://twitter.com/google)  
- [Instagram](https://www.instagram.com/google/)  
- [LinkedIn](https://www.linkedin.com/company/google)

---

Join Google, where we magically turn dreams into code! ✨