In [None]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin

def crawl_web(seed_url, max_depth=3):
    visited_urls = set()  # To keep track of visited URLs
    queue = [(seed_url, 0)]  # Queue of URLs to be crawled with their depth

    while queue:
        url, depth = queue.pop(0)

        if depth > max_depth:
            continue

        if url not in visited_urls:
            try:
                response = requests.get(url)
                if response.status_code == 200:
                    visited_urls.add(url)
                    print(f"Depth {depth}: Crawling {url}")

                    # Parse the HTML content of the page
                    soup = BeautifulSoup(response.text, 'html.parser')

                    # You can perform various operations with the page content here
                    # For example, extract and print page title
                    page_title = soup.title.string.strip() if soup.title else "No Title"
                    print(f"Title: {page_title}")

                    # Extract links from the page and add them to the queue
                    for link in soup.find_all('a'):
                        href = link.get('href')
                        if href and not href.startswith('#'):
                            absolute_url = urljoin(url, href)
                            queue.append((absolute_url, depth + 1))

            except Exception as e:
                print(f"Error while crawling {url}: {str(e)}")

if __name__ == "__main__":
    seed_url = "https://google.com"  # Replace with the starting URL
    max_depth = 3  # Maximum depth to crawl

    crawl_web(seed_url, max_depth)


Depth 0: Crawling https://google.com
Title: Google
Depth 1: Crawling https://www.google.com/imghp?hl=en&tab=wi
Title: Google Images
Depth 1: Crawling https://maps.google.co.in/maps?hl=en&tab=wl
Title: Google Maps
Depth 1: Crawling https://play.google.com/?hl=en&tab=w8
Title: Android Apps on Google Play
Depth 1: Crawling https://www.youtube.com/?tab=w1
Title: YouTube
Depth 1: Crawling https://news.google.com/?tab=wn
Title: Google News
Depth 1: Crawling https://mail.google.com/mail/?tab=wm
Title: Gmail
Depth 1: Crawling https://drive.google.com/?tab=wo
Title: Google Drive: Sign-in
Depth 1: Crawling https://www.google.co.in/intl/en/about/products?tab=wh
Title: Browse All of Google's Products & Services - Google
Depth 1: Crawling http://www.google.co.in/history/optout?hl=en
Title: Google - Search Customization
Depth 1: Crawling https://google.com/preferences?hl=en
Title: Search Settings
Depth 1: Crawling https://accounts.google.com/ServiceLogin?hl=en&passive=true&continue=https://www.googl