https://github.com/skchandrappa/dsa-cookbook.git


In [23]:
! pip install python-dotenv



In [24]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI



class Website:
  url : str
  title: str
  text : str

  def __init__(self, url: str):
    self.url = url

    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    self.title = soup.title.string if soup.title else None

    for irrelevant in soup.body(["script", "style","img","input"]):
      irrelevant.decompose()

    self.text = soup.body.get_text(separator="\n", strip=True)



In [25]:
# ndtv = Website("https://www.ndtv.com/")
# # print(ndtv.title)
# # print(ndtv.text)

In [47]:
load_dotenv()
api_key = 'KEY'
print(api_key)
# Check the key

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif api_key[:8]!="sk-proj-":
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them")
else:
    print("API key found and looks good so far!")

KEY
An API key was found, but it doesn't start sk-proj-; please check you're using the right key


In [41]:
openai = OpenAI(api_key=api_key)

In [42]:
# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish."

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [43]:
def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "The contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [44]:
def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [45]:
import time
def summarize(url):
    website = Website(url)
    try:
      response = openai.chat.completions.create(
          model = "gpt-4o-mini",
          messages = messages_for(website)
      )
      return response.choices[0].message.content
    except openai.error.RateLimitError:
      print("Rate limit exceeded. Waiting for 60 seconds...")
      time.sleep(60)  # Wait for 60 seconds before retrying
      return summarize(url)

In [46]:
summarize("https://www.ndtv.com/")

"# NDTV.com Summary\n\nNDTV.com is a prominent news website in India that provides the latest reports on national and international events, covering a wide range of topics including politics, sports, entertainment, education, and health. \n\n## Key News Highlights\n- **Air India and Vistara Merger**: The first flight of the integrated Air India-Vistara is set to operate on Tuesday.\n- **Kerala Police Controversy**: Actor Siddique claims the Kerala Police are fabricating stories in a sex assault case.\n- **Tejashwi Yadav's Critique**: Tejashwi Yadav criticizes Nitish Kumar for misappropriating Mahatma Gandhi's name for political gains.\n- **Haryana Stubble Burning Policy**: The state has doubled fines for stubble burning, accumulating Rs 1.65 lakh in penalties.\n- **Weather Update**: Kashmir experiences its first snowfall of the season, ending a prolonged dry spell.\n\n## Education Updates\n- **Delhi University Elections**: The Delhi High Court has mandated the announcement of DU's stud