In [None]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import csv
import os

# Function to fetch and summarize a webpage
def summarize_webpage(url):
    try:
        response = requests.get(url)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')
        paragraphs = soup.find_all('p')
        
        # Summarizing paragraphs starting from the fourth one
        summary = ' '.join([p.get_text() for p in paragraphs[3:5]])

        return summary
    except Exception as e:
        return f"An error occurred: {e}"

    
# Read list of URLs from a text file
def read_urls_from_file(filename):
    with open(filename, 'r') as file:
        urls = file.read().splitlines()
    return urls

# Create a directory to store summaries if not exists
output_directory = 'summaries'
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Read URLs from the input file
input_filename = 'urls.txt'
urls = read_urls_from_file(input_filename)

# Create and open a CSV file to store the summaries
csv_filename = 'summaries.csv'
with open(os.path.join(output_directory, csv_filename), 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(['URL', 'Summary'])  # Write header

    # Process each URL
    for url in urls:
        summary = summarize_webpage(url)
        csv_writer.writerow([url, summary])

        print(f"Summary for {url} has been added to {csv_filename}")
