In [13]:
import os
import requests
from bs4 import BeautifulSoup
import json 
from dotenv import load_dotenv
from typing import List
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [14]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key wrking fine")
else:
    print("Check api key again")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

API key wrking fine


In [15]:
header={
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36'
}

class Website:
    def __init__(self,url):
        self.url=url
        response=requests.get(url, headers=header)
        self.body=response.content
        soup=BeautifulSoup(self.body, 'html.parser')
        self.title=soup.title.string if soup.title else "No title found"
        if soup.body:
            for faaltu in soup.body(['script', 'style','img','input']):
                faaltu.decompose()
            self.text=soup.body.get_text(separator='\n', strip=True)
        else:
            self.text=""
        links=[link.get('href') for link in soup.find_all('a')]
        self.links=[link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [16]:
system_prompt="You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"

system_prompt+='You should respond in JSON as in this example:'
system_prompt+="""
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [17]:
def get_user_prompt(website):
    user_prompt=f'Here is the list of links on the website of {website.url} - '
    user_prompt+="please decide which of these are relevant web links  for a brochure about the company, respond with the full https URL in JSON format. \
        do not include Terms of Service, Privacy, email links.\n"
    user_prompt+="Links (some might be relative links):\nGive the output strictly in python list form"
    user_prompt+="\n".join(website.links)

    return user_prompt

In [18]:
def get_links(url):
    website=Website(url)
    response=openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content":system_prompt},
            {"role":"user", "content": get_user_prompt(website)}
        ],

            response_format={"type":"json_object"}
    )
    result=response.choices[0].message.content
    return json.loads(result)
    

In [25]:
class ConnectTimeout(Exception):
    """Custom exception for connection timeout."""
    pass


In [26]:
import time

def retry(url, retries=5, delay=2):
    for attempt in range(retries):
        try:
            return Website(url).get_contents()  
        except (ConnectTimeout, TimeoutError, Exception) as e:
            print(f"Error occurred for {url}: {e}. Attempt {attempt + 1} of {retries}.")
            time.sleep(delay)  
    print(f"Skipping {url} after {retries} failed attempts.")
    return f"Failed to retrieve contents for {url}\n"

def get_details(url):
    result = "Landing page:\n"
    result += retry(url)  
    links = get_links(url)
    print("Found links:", links)
    
    key_to_check = next(iter(links), None) 
    
    if key_to_check == 'links':
        for link in links["links"]:
            result += f"\n\n{link}\n"
            result += retry(link)  
    elif key_to_check == 'relevant_links':
        for link in links["relevant_links"]:
            result += f"\n\n{link}\n"
            result += retry(link)  
    else:
        result += "\nNo relevant links found.\n"
    
    return result


In [27]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a website ( it can be company website, college club website, someone's portfolio) so be prepared \
and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [28]:
def brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a professional brief brochure of the website in markdown.At footer always give the contacts link of their social media handles (except phone number) if available\n"
    user_prompt += get_details(url)
    user_prompt = user_prompt[:5_000] 
    return user_prompt

In [29]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [30]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [31]:
stream_brochure("Massachusetts Institute of Technology", "https://www.mit.edu/")

Found links: {'relevant_links': ['https://www.mit.edu/', 'https://www.mit.edu/education', 'https://www.mit.edu/research', 'https://www.mit.edu/innovation', 'https://www.mit.edu/admissions-aid', 'https://www.mit.edu/campus-life', 'https://news.mit.edu/', 'https://www.mit.edu/alumni', 'https://www.mit.edu/about', 'https://www.mit.edu/building-a-better-world', 'https://www.google.com/maps/place/Massachusetts+Institute+of+Technology/@42.360091,-71.09416,15z/data=!4m5!3m4!1s0x0:0xd0e08ea5b308203c!8m2!3d42.360091!4d-71.09416', 'https://www.mit.edu/visitmit', 'https://whereis.mit.edu/', 'https://calendar.mit.edu/', 'https://careers.mit.edu/', 'https://socialmediahub.mit.edu/', 'https://twitter.com/mit', 'https://www.facebook.com/MITnews', 'https://www.youtube.com/mit', 'https://www.instagram.com/mit/']}
Error occurred for https://whereis.mit.edu/: HTTPSConnectionPool(host='whereis.mit.edu', port=443): Max retries exceeded with url: / (Caused by SSLError(SSLError(1, '[SSL: DH_KEY_TOO_SMALL] dh

# Welcome to the MIT Universe: Where Science Meets Snacks!

---

## **Massachusetts Institute of Technology (MIT)**
> *Education, Research, Innovation, and Weird Coffee* 

---

### **Life at MIT: Crunching Numbers and Snacking on Knowledge!**
At MIT, education is not just a serious affair; it’s an adventure! Picture this: late-night study sessions fueled by an endless supply of pizza and enough caffeine to power a small city. Students often pull all-nighters—fine-tuning robots, designing the next solar-powered toaster, or unraveling the mysteries of dark matter—while occasionally debating the merits of pineapple on pizza. Spoiler alert: It's not a popular opinion! 🍕 

### **Research: Where the Magic Happens (with a dash of caffeine)!**
Our researchers don’t just think outside the box; they build entire universities inside it! From AI models that could save the world to cracking the mysteries of supermassive black holes (and some that just aim to make a mean cup of joe), innovation is our bread and butter. All while making sure to keep some time free for our famous annual **MIT Mystery Hunt** - because who doesn’t like puzzling through sleep deprivation?

### **Campus Life: Let's Get This Party Started!**
- **Puzzles and Prizes:** Just when you thought puzzles were for kids, MIT throws three days of intense mind-bending fun your way! Get ready for brain teasers, sleep deprivation, and possibly some unexpected victory dances.
- **Cool Clubs and Crazy Creations:** From makerspaces to rocketry clubs, don’t be surprised if you end up building a life-sized R2-D2 on a Wednesday night… that's a Tuesday too if time allows!
  
### **Customers: The World’s Problems!**
Our students, alumni, and researchers tackle real-world issues like sustainable energy and blockchain technology—with the occasional superhero cape thrown in for good measure! 🌎🦸‍♂️ That's right! Whether it is a startup like NALA connecting artists with art buyers or our students venturing to Tsinghua University to change the course of global affairs, when we say we build a better world, we mean all hands on deck!

### **Careers: Your Future Awaits (With Free Wi-Fi!)**
Thinking about joining us? We are always on the lookout for innovative thinkers, problem solvers, and a few coffee lovers. Whether you’re a prospective student, an aspiring researcher, or someone who can code while singing show tunes, there’s a place for you at MIT. 

---

### **Join Us!** 
Ready to jump into a world of science, social change, and sustenance? Apply now! Remember, the only thing we take seriously at MIT is… well, some things… *mostly*.

---

### **Connect with Us!**
Got questions? Or you just really want to share those “I’m at MIT” selfies? Follow us on social media:

- Twitter: [@mit](https://twitter.com/mit)
- Facebook: [MIT](https://www.facebook.com/MIT)
- Instagram: [@mit](https://www.instagram.com/mit)
- YouTube: [MIT](https://www.youtube.com/mit)

> *Disclaimer: MIT is not responsible for any spontaneous ideas that might make you want to revolutionize your kitchen—with robots!*