In [40]:
from openai import AzureOpenAI
import os
from dotenv import load_dotenv
import json

from IPython.display import Markdown, display

## Load LLM Configurations

### Azure OpenAI LLM

In [18]:
load_dotenv(override=True)
llm_api_key = os.getenv("AZURE_OPENAI_KEY")
azure_oai_endpoint = os.getenv("AZURE_OAI_ENDPOINT")

In [3]:
openai = AzureOpenAI(
    azure_endpoint = azure_oai_endpoint,
    api_key = llm_api_key,
    api_version = "2025-01-01-preview"
)

#### Sample Test usage

In [33]:
message = "Hi"
response = openai.chat.completions.create(
    model = "gpt-4o-mini",
    messages = [{"role": "user", "content": message}]
)

print(response.choices[0].message.content)

messages = [
    {'role': 'system', 'content': "You are a sarcastic assistant"},
    {'role': 'user', 'content': "How is the weather?"}
]

response = openai.chat.completions.create(model="gpt-4o-mini", messages=messages)
print(messages[1]["content"])
print(response.choices[0].message.content)

Hello! How can I assist you today?
How is the weather?
Oh, you know, just the usual—sunshine and rainbows or perhaps a monsoon. It really depends on whether you checked the forecast or just decided to wing it. Why bother with a weather app when you can step outside and see for yourself? It's like a surprise party but for your day!


## Website Summarizer

A Website is scraped and its contents are then parsed. This content is then fed to the LLM to get a summary

In [6]:
from bs4 import BeautifulSoup
import requests

In [7]:
class Website:
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
    }
    
    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=Website.headers)
        parser = BeautifulSoup(response.content, 'html.parser')

        # get rid of the irreleavnt stuff from body
        for element in parser.body(["script", "style", "img", "input"]):
            element.decompose()
            
        self.title = parser.title.string if parser.title else ""
        self.body = parser.body.get_text(separator="\n", strip=True)
        

In [15]:
website = Website("https://edition.cnn.com/")

In [16]:
print(website.title)
print()
print(website.body)

Breaking News, Latest News and Videos | CNN

CNN values your feedback
1. How relevant is this ad to you?
2. Did you encounter any technical issues?
Video player was slow to load content
Video content never loaded
Ad froze or did not finish loading
Video content did not start after ad
Audio on ad was too loud
Other issues
Ad never loaded
Ad prevented/slowed the page from loading
Content moved around while ad loaded
Ad was repetitive to ads I've seen previously
Other issues
Cancel
Submit
Thank You!
Your effort and contribution in providing this feedback is much
                                        appreciated.
Close
Ad Feedback
Close icon
US
World
Politics
Business
Health
Entertainment
Style
Travel
Sports
Science
Climate
Weather
Ukraine-Russia War
Israel-Hamas War
Games
More
US
World
Politics
Business
Health
Entertainment
Style
Travel
Sports
Science
Climate
Weather
Ukraine-Russia War
Israel-Hamas War
Games
Watch
Listen
Live TV
Subscribe
Sign in
My Account
Settings
Newsletters
Topics Y

### AI Summarization

We will summarize the contents of a website with the help of a LLM.

In [56]:
def create_summarization_system_prompt():
    return """You are an assistant which specializes in summarizing text content. You will be provided text content from a website and your task is to write a short summary and follow the below instructions:
    - The summary format should be:
        - Title
        - Overview
        - Summary
    - Ignore any navigation related contents.
    - Output should be in raw markdown format only, without the ```."""

def create_summarization_user_prompt(website: Website):
    prompt = f"""Here is a website with title: {website.title}.
    Summarize the contents of this website and provide summary in markdown.
    The contents of the website are as follows:
    {website.body}
    """
    return prompt

def create_summarization_messages(website: Website):
    return [
        {"role": "system", "content": create_summarization_system_prompt()},
        {"role": "user", "content": create_summarization_user_prompt(website)}
    ]

def summarize_website(url: str):
    website = Website(url)
    response = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=create_summarization_messages(website)
    )

    return response.choices[0].message.content

def display_markdown(content: str):
    display(Markdown(content))
    

In [61]:
summary = summarize_website("https://edition.cnn.com/")
summary

"# Breaking News, Latest News and Videos | CNN\n\n## Overview\nCNN's website features a comprehensive aggregation of the latest global news, videos, and analysis across various topics including politics, business, health, and entertainment.\n\n## Summary\nThe CNN website offers breaking news and in-depth analysis on current events globally. It covers various categories such as US and world news, politics, business, health, and entertainment. The platform also features numerous video segments, podcasts, and newsletters to keep users updated on significant stories, trends, and investigative reports. Additionally, CNN presents interactive content including games like crosswords and quizzes, enhancing user engagement. Users can also submit feedback on advertisements and utilize options for personalized news through account settings."

In [59]:
display_markdown(summary)

# Breaking News, Latest News and Videos | CNN

## Overview
CNN is a leading news platform offering a wide range of content that includes breaking news, in-depth analysis, and video coverage across various topics. The content spans global headlines, politics, business, health, entertainment, science, and sports. 

## Summary
CNN provides up-to-the-minute news reports on significant global and national events. The website features a multitude of sections including politics, business news, health updates, entertainment highlights, and sports coverage. Current stories cover impactful topics such as the Ukraine-Russia war and international conflicts like the Israel-Hamas situation. Additionally, CNN offers specialized sections dedicated to various interests, including travel, science, and technology, alongside interactive elements like games and video content. Users are encouraged to provide feedback on ads and other experiences, reflecting CNN's commitment to enhancing user engagement and experience.