# Summary of a Wikipedia page
- Read the main content of this wiki page by using BeautifulSoup -> https://fa.wikipedia.org/wiki/%D8%AD%D8%A7%D9%81%D8%B8
- Summerize this content by one of OpenAI models i.e gpt-4o-mini
- Can you make the summarization very clean and professional?


In [None]:
import requests
from bs4 import BeautifulSoup
from openai import OpenAI

In [None]:
# A class to represent a Webpage

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)[:60000]

In [None]:
# Let's try one out. Change the website and add print statements to follow along.

hafez_wiki = Website("https://fa.wikipedia.org/wiki/%D8%AD%D8%A7%D9%81%D8%B8")
print(hafez_wiki.title)
print(hafez_wiki.text)

In [None]:
# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish."

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [None]:
# A function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
The summary should be in Persian and be no longer than 200 words. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [None]:
print(user_prompt_for(hafez_wiki))

In [None]:
client = OpenAI()

response = client.responses.create(
    model="gpt-4o-mini",
    input=[
        {
            "role": "system",
            "content": system_prompt
        },
        {
            "role": "user",
            "content": user_prompt_for(hafez_wiki)
        }
    ]
)

print(response.output_text)