In [None]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

# Function to fetch and parse a webpage
def fetch_page(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check if the request was successful
        return BeautifulSoup(response.text, 'html.parser')
    except requests.exceptions.RequestException as e:
        print(f"Error fetching {url}: {e}")
        return None

# Function to extract all URLs from a webpage
def extract_urls(soup, base_url):
    urls = set()
    for link in soup.find_all('a', href=True):
        href = link['href']
        full_url = urljoin(base_url, href)  # Convert relative URLs to absolute
        urls.add(full_url)
    return urls

# Function to crawl a given URL
def crawl(url, max_pages=10):
    pages_crawled = 0
    urls_to_crawl = set([url])
    crawled_urls = set()

    while urls_to_crawl and pages_crawled < max_pages:
        current_url = urls_to_crawl.pop()
        if current_url in crawled_urls:
            continue

        print(f"Crawling: {current_url}")
        soup = fetch_page(current_url)
        if soup:
            new_urls = extract_urls(soup, current_url)
            urls_to_crawl.update(new_urls - crawled_urls)
            crawled_urls.add(current_url)
            pages_crawled += 1

    print("\nCrawling finished.")
    print(f"Pages crawled: {pages_crawled}")
    print(f"Total unique URLs found: {len(crawled_urls)}")

# Main function to start the crawler
if __name__ == "__main__":
    start_url = "https://example.com"  # Replace with the URL you want to crawl
    crawl(start_url)
