In [49]:
from bs4 import BeautifulSoup
import requests
from transformers import pipeline

In [62]:
def scrape_and_summarize(url):
    # Step 1: Fetching the web page content
    try:
        response = requests.get(url)
        response.raise_for_status()
        html_content = response.text
    except requests.exceptions.RequestException as e:
        return f"Error fetching the URL: {e}"

    
    soup = BeautifulSoup(html_content, 'html.parser')

    # Extracting title
    title = soup.title.string if soup.title else "No title found"

    # Extracting headings and main content
    headings = [h.get_text(strip=True) for h in soup.find_all(['h1', 'h2', 'h3'])]
    paragraphs = [p.get_text(strip=True) for p in soup.find_all('p')]

    # Combining extracted content
    content = "\n".join(headings + paragraphs)

    if not content.strip():
        return "No meaningful content extracted from the webpage."

    # Truncating content to fit model limits
    max_tokens = 1024
    content = " ".join(content.split()[:max_tokens])

    # Step3: Summarising content using an open-source NLP model
    try:
        summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
        summary = summarizer(
            content, 
            max_length=150,  # Maximum length (about 150 words)
            min_length=100,  # Minimum length (about 100 words)
            do_sample=False
        )[0]['summary_text']
    except Exception as e:
        return f"Error during summarization: {e}"

    # Output the results
    return {
        "title": title,
        "headings": headings,
        "summary": summary
    }

In [64]:
if __name__ == "__main__":
    url = input("Enter the URL: ")
    result = scrape_and_summarize(url)
    if isinstance(result, dict):
        print("Title:", result["title"])
        print("\nHeadings:")
        for heading in result["headings"]:
            print(f"- {heading}")
        print("\nSummary:")
        print(result["summary"])
    else:
        print(result)

Enter the URL:  https://www.formula1.com/en/drivers/lewis-hamilton


Device set to use cpu


Title: Lewis Hamilton - F1 Driver for Ferrari

Headings:
- Lewis Hamilton
- Biography
- Results
- Gaming

Summary:
Lewis Hamilton is one of the best drivers in the history of Formula One. The Briton has risen to the top of the all-time pole positions list ahead of his hero Ayrton Senna. He has also surged into first place in the wins column surpassing the inimitable Michael Schumacher, and then matched the legendary German’s seven world titles. Is he the G.O.A.T? Few would deny that he's in the conversation – and what's more he's got there his way.
