In [28]:
%pip install requests beautifulsoup4 google-generativeai python-dotenv

import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
import google.generativeai as genai
from urllib.parse import urljoin, urlparse
import time
import sys
import textwrap

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [16]:
load_dotenv()
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

In [17]:
class Website:
    def __init__(self, url: str):
        self.url = url
        resp = requests.get(url)
        resp.raise_for_status()  # Raise an exception for failed HTTP requests
        soup = BeautifulSoup(resp.content, 'html.parser')
        self.title = soup.title.string if soup.title else 'No title'
        
        # Remove unwanted tags
        for tag in soup.body(['script', 'style', 'img', 'input']):
            tag.decompose()
        self.text = soup.body.get_text(separator='\n', strip=True)
        
        # Extract internal and external links
        self.domain = urlparse(url).netloc
        self.internal_links = []
        self.external_links = []
        for a_tag in soup.find_all('a', href=True):
            href = a_tag['href']
            # Resolve relative URLs to absolute
            absolute_url = urljoin(url, href)
            # Validate URL
            parsed = urlparse(absolute_url)
            if parsed.scheme in ('http', 'https') and parsed.netloc:
                if parsed.netloc == self.domain:
                    self.internal_links.append(absolute_url)
                else:
                    self.external_links.append(absolute_url)
        # Remove duplicates while preserving order
        self.internal_links = list(dict.fromkeys(self.internal_links))
        self.external_links = list(dict.fromkeys(self.external_links))

In [35]:
system_prompt = (
    "You are an assistant summarizing a website. Provide a concise plain text summary, "
    "ignoring navigation text, and include any news or announcements. "
    "List up to 3 example internal and external links from the website, if available, "
    "in the format:\n\n"
    "Internal Links:\n"
    "- [Link Text](URL)\n\n"
    "External Links:\n"
    "- [Link Text](URL)\n\n"
    "Ensure the summary is brief, multi-line, and fits within half the vertical display space."
)

In [36]:
def user_prompt_for(site: Website) -> str:
    # Get up to 3 example links to match the original format
    internal_links = site.internal_links[:3]
    external_links = site.external_links[:3]
    
    internal_links_text = "\n".join([f"- [{link}]({link})" for link in internal_links]) if internal_links else "- None"
    external_links_text = "\n".join([f"- [{link}]({link})" for link in external_links]) if external_links else "- None"
    
    return (
        f"Website title: {site.title}\n\n"
        "Content:\n\n"
        f"{site.text[:2000]}\n\n"  # Keep sufficient context
        "Internal Links:\n"
        f"{internal_links_text}\n\n"
        "External Links:\n"
        f"{external_links_text}"
    )

In [21]:
def summarize_with_gemini(url: str, model: str = "gemini-1.5-flash") -> str:
    site = Website(url)
    model = genai.GenerativeModel(model)
    prompt = f"{system_prompt}\n\n{user_prompt_for(site)}"
    response = model.generate_content(prompt)
    return response.text

In [37]:
def display_summary(url: str, delay: float = 0.03, width: int = 80):
    summary = summarize_with_gemini(url)
    # Wrap the entire summary at once to preserve multi-line structure
    wrapped_summary = textwrap.fill(summary, width=width, break_long_words=False, replace_whitespace=False)
    
    # Display with typing effect, line by line
    for line in wrapped_summary.split('\n'):
        for char in line:
            sys.stdout.write(char)
            sys.stdout.flush()
            time.sleep(delay)  # Typing effect with delay
        sys.stdout.write('\n')  # New line after each wrapped line
    
    sys.stdout.write('\n')  # Extra newline at the end

In [38]:
# Example usage
display_summary("https://www.geeksforgeeks.org/")

GeeksforGeeks is an all-in-one learning portal offering courses, tutorials, and
practice problems in various domains like Data Structures & Algorithms (DSA),
Web Development, AI/ML, and more.  They feature numerous courses with ratings
and thousands of interested users, covering topics from beginner to advanced
levels.  Job opportunities and a placement training program are also advertised.
The site promotes several courses, including a Full Stack Development course and
a DSA course.

Internal Links:
- [Homepage](https://www.geeksforgeeks.org/)
-
[DSA Tutorial](https://www.geeksforgeeks.org/learn-data-structures-and-
algorithms-dsa-tutorial/)
- [Explore](https://www.geeksforgeeks.org/explore)
External Links:
- [DSA Self-Paced
Course](https://practice.geeksforgeeks.org/courses/dsa-self-paced)
- [Facebook
Page](https://www.facebook.com/geeksforgeeks.org/)
- [Instagram
Page](https://www.instagram.com/geeks_for_geeks/)

