# Website Summarizer using Gemini Free API

Install required libraries

In [None]:
!pip install google-generativeai beautifulsoup4 requests



Import libraries and configure Gemini

In [None]:

import google.generativeai as genai
import requests
from bs4 import BeautifulSoup

genai.configure(api_key="your api key")

model = genai.GenerativeModel("gemini-2.5-flash")




All support for the `google.generativeai` package has ended. It will no longer be receiving 
updates or bug fixes. Please switch to the `google.genai` package as soon as possible.
See README for more details:

https://github.com/google-gemini/deprecated-generative-ai-python/blob/main/README.md

  loader.exec_module(module)


In [None]:
header = {
    "user-agent":
"Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Mobile Safari/537.36"


}
response = requests.get("https://www.wikipedia.org", headers=header)

from bs4 import BeautifulSoup

htm = """<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Web Scraping Practice Page</title>

    <style>
        body {
            font-family: Arial;
            background-color: #f4f4f4;
        }
    </style>

    <script>
        console.log("This is a script tag for practice");
    </script>
</head>

<body>

    <h1>Main Heading - Web Scraping</h1>
    <h2>Sub Heading - BeautifulSoup Practice</h2>
    <h3>Minor Heading - HTML Parsing</h3>

    <p>This is the first paragraph for web scraping practice.</p>
    <p>This paragraph contains special symbols like ‚Çπ, ¬©, ‚Ñ¢.</p>
    <p>This paragraph contains Indian language text: ‡§®‡§Æ‡§∏‡•ç‡§§‡•á ‡§≠‡§æ‡§∞‡§§</p>

    <a href="https://www.google.com">Google</a><br>
    <a href="https://www.github.com">GitHub</a><br>
    <a href="https://www.python.org">Python</a><br>

    <img src="image1.jpg" alt="Sample Image 1">
    <img src="https://example.com/image2.png" alt="Sample Image 2">

    <noscript>
        This content should be ignored while scraping clean text.
    </noscript>

    <footer>
        <p>Footer paragraph for testing.</p>
    </footer>

</body>
</html>
"""
soup = BeautifulSoup(htm,"html.parser")
# print(soup.title.text)
# print(soup.head)
# print(soup.find("h1"))
print(soup.find("a").text)
print(soup.find_all("a"))
anchor_list = soup.find_all("a")
for i in anchor_list:
    print(i.text)
print(len(soup.find_all("a")))

for tag in (["h1", "h2", "h3"]):
  heading = soup.find_all(tag)
  for i in heading:
    print(i.text)

paragraph_list = soup.find_all("p")
with open("output.txt", "w", encoding="utf-8") as f:
  for i in paragraph_list:
    f.write(i.text + "\n")

hyperlink_list = soup.find_all("a")
for i in hyperlink_list:
  print(f"text {i.text} link {i.get("href")}")

Google
[<a href="https://www.google.com">Google</a>, <a href="https://www.github.com">GitHub</a>, <a href="https://www.python.org">Python</a>]
Google
GitHub
Python
3
Main Heading - Web Scraping
Sub Heading - BeautifulSoup Practice
Minor Heading - HTML Parsing
text Google link https://www.google.com
text GitHub link https://www.github.com
text Python link https://www.python.org


Extract website text

In [None]:
def extract_website_text(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (compatible; EducationalSummarizer/1.0)"
    }


    response = requests.get(url, headers=headers, timeout=10)

    response.raise_for_status()

    soup = BeautifulSoup(response.text, "html.parser")

    print(soup)

    for tag in soup(["script", "style", "noscript"]):
        tag.decompose()
    print(soup.get_text())
    text = soup.get_text(separator=" ")
    output_text = text.split()
    return " ".join(output_text)



In [None]:
header = {
    "user-agent" :
"Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Mobile Safari/537.36"




}
res = requests.get("https://www.wikipedia.org",headers=header, timeout=10)
print(res)

soup = BeautifulSoup(res.text, "html.parser")
for tag in soup(["script", "style", "noscript"]):
    print(tag)


In [None]:
extract_website_text("https://www.wikipedia.org")



Summarize using Gemini

In [None]:

def summarize_website(text):
    prompt = f"""Summarize the following website content in simple language:

{text[:8000]}
"""
    response = model.generate_content(prompt)
    return response.text


Provide URL and generate summary

In [None]:

url = "https://en.wikipedia.org/wiki/Artificial_intelligence"
content = extract_website_text(url)
summary = summarize_website(content)

print(summary)


In [None]:
url = input("üîó Enter website URL to summarize: ").strip()


print("Access allowed. Extracting content...\n")
content = extract_website_text(url)

print("Generating summary...\n")
summary = summarize_website(content)
print("WEBSITE SUMMARY:\n")
print(summary)
