In [1]:
import os
import requests
from bs4 import BeautifulSoup
import json 
from dotenv import load_dotenv
from typing import List
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [2]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key wrking fine")
else:
    print("Check api key again")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

API key wrking fine


In [3]:
header={
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36'
}

class Website:
    def __init__(self,url):
        self.url=url
        response=requests.get(url, headers=header)
        self.body=response.content
        soup=BeautifulSoup(self.body, 'html.parser')
        self.title=soup.title.string if soup.title else "No title found"
        if soup.body:
            for faaltu in soup.body(['script', 'style','img','input']):
                faaltu.decompose()
            self.text=soup.body.get_text(separator='\n', strip=True)
        else:
            self.text=""
        links=[link.get('href') for link in soup.find_all('a')]
        self.links=[link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [4]:
system_prompt="You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"

system_prompt+='You should respond in JSON as in this example:'
system_prompt+="""
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [5]:
def get_user_prompt(website):
    user_prompt=f'Here is the list of links on the website of {website.url} - '
    user_prompt+="please decide which of these are relevant web links  for a brochure about the company, respond with the full https URL in JSON format. \
        do not include Terms of Service, Privacy, email links.\n"
    user_prompt+="Links (some might be relative links):\nGive the output strictly in python list form"
    user_prompt+="\n".join(website.links)

    return user_prompt

In [6]:
def get_links(url):
    website=Website(url)
    response=openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content":system_prompt},
            {"role":"user", "content": get_user_prompt(website)}
        ],

            response_format={"type":"json_object"}
    )
    result=response.choices[0].message.content
    return json.loads(result)
    

In [13]:
import time

def retry(url, retries=5, delay=2):
    for attempt in range(retries):
        try:
            return Website(url).get_contents()  
        except (ConnectTimeout, TimeoutError, Exception) as e:
            print(f"Error occurred for {url}: {e}. Attempt {attempt + 1} of {retries}.")
            time.sleep(delay)  
    print(f"Skipping {url} after {retries} failed attempts.")
    return f"Failed to retrieve contents for {url}\n"

def get_details(url):
    result = "Landing page:\n"
    result += retry(url)  
    links = get_links(url)
    print("Found links:", links)
    
    key_to_check = next(iter(links), None) 
    
    if key_to_check == 'links':
        for link in links["links"]:
            result += f"\n\n{link}\n"
            result += retry(link)  
    elif key_to_check == 'relevant_links':
        for link in links["relevant_links"]:
            result += f"\n\n{link}\n"
            result += retry(link)  
    else:
        result += "\nNo relevant links found.\n"
    
    return result


In [14]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a website ( it can be company website, college club website, someone's portfolio) so be prepared \
and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [15]:
def brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a professional brief brochure of the website in markdown.At footer always give the contacts link of their social media handles (except phone number) if available\n"
    user_prompt += get_details(url)
    user_prompt = user_prompt[:5_000] 
    return user_prompt

In [17]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [18]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [19]:
stream_brochure("VIT University", "https://vit.ac.in")

Found links: {'relevant_links': ['https://vit.ac.in/vit-2025-26-applications-open-for-ug-pg-nri-foreign-research-programmes', 'https://admissions.vit.ac.in/ugapplications/bdes-industrial-design-2025-applications/login', 'https://admissions.vit.ac.in/ugapplications/b-arch-2025-applications/login', 'https://admissions.vit.ac.in/ugapplications/bsc-hons-agri-2025-applications/login', 'https://admissions.vit.ac.in/ugapplications/bsc-bcom-bba-2025-applications/login', 'https://admissions.vit.ac.in/ugnriapplication/login', 'https://results.vit.ac.in/VITREE2025INT', 'https://careers.vit.ac.in/#!/', 'https://vit.ac.in/about-vit', 'https://vit.ac.in/about/vision-mission', 'https://vit.ac.in/vit-milestones', 'https://vit.ac.in/about/leadership', 'https://vit.ac.in/governance', 'https://vit.ac.in/about/infrastructure', 'https://vit.ac.in/about/sustainability', 'https://vit.ac.in/about/community-outreach', 'https://vit.ac.in/all-events', 'https://vit.ac.in/national-institutional-ranking-framework-n

# Welcome to VIT University: The Place Where Dreams Go to Graduate!

---

## 🤖 About Us
VIT University, located amidst the serene landscapes of Vellore, is not just an "engineering institution." It's where students turn caffeine into code, innovation into invention, and education into exploration! We're not saying our dreamers will change the world, but last time we checked, world-changers attend classes here. 

Built on a foundation of sustainability, community engagement, and academic excellence, our vision is to produce not just engineers but *engineers of change* (and a few coffee enthusiasts too).

---

## 🎉 Student Life
One word: Fests! 📅
From cultural fests that rival most festivals to sports days where the only injury is from excessive high-fiving, our campus life is vibrant and engaging. Not to mention, our hostels are where friendships turn into family (and sometimes study groups at 2 AM).

### Health Services 😷
We care for our students. If your study binge turns into a sneeze binge, our health services are available for all your needs.

---

## 🚀 Academics
We offer 71 Undergraduate and 58 Postgraduate programs because why limit yourself? Our curriculum not only makes you industry-ready but also builds your coffee-making skills for all-nighters.

### Research Opportunities 🔬
Ready to dive into the unknown? Look no further than VIT! Our passionate team encourages students to explore academia and research. Ph.D. applicants, welcome to the club! 

**All Programs Offered:**
- B.Tech.: Start your engineering journey!
- MBA: For the aspiring business tycoon.
- Research & Ph.D.: Get ready to earn that title of "Dr."

---

## 💼 Career Development
If you’re here to just attend classes and not prepare for the future, this isn’t the place for you. 
- **Placement Highlights:** History shows every student here is committed to securing jobs that pay well enough to justify their student loans! (Spoiler: Highest CTC ₹1.02 Crore!)
- **Top Recruiters:** We have a star-studded list spanning from tech giants to innovative startups.

---

## VIT Culture: Laughter in Learning
- **Inclusivity:** Everyone is welcome! Well, except for pranksters who take things too far. Kindly exit stage left. 
- **Coffee Culture:** We have coffee breaks built into our schedule because caffeine is the true fuel of curiosity!
- **Green Initiative:** We’re all about going green, not just with envy, but for sustainability too.

---

## 🎓 Careers at VIT
So, you think you can teach? Become part of our academic family! We are always on the lookout for passionate faculty members and staff. Join us, and you could make a difference in the lives of future innovators (or just help them navigate through busy schedules).

---

### ✉️ Stay Connected!
We’re social! Follow us on our platforms for the latest updates and even more hilarious memes about student life.

- [Facebook](#)
- [Twitter](#)
- [Instagram](#)
- [LinkedIn](#)

---

VIT University: **Where learning is fun, enriching, and possibly involves more snacks than you anticipated!**