In [1]:

import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

def simple_web_crawler(start_url, max_pages=5):
    visited = set()
    to_visit = [start_url]
    index = {}

    while to_visit and len(visited) < max_pages:
        url = to_visit.pop(0)
        if url not in visited:
            try:
                response = requests.get(url)
                if response.status_code == 200:
                    soup = BeautifulSoup(response.text, 'html.parser')
                    title = soup.title.string if soup.title else 'No Title'
                    index[url] = title
                    print(f"Crawled: {url} -> {title}")

                    # Find all links
                    for link in soup.find_all('a', href=True):
                        absolute_link = urljoin(url, link['href'])
                        if absolute_link not in visited:
                            to_visit.append(absolute_link)
                visited.add(url)
            except Exception as e:
                print(f"Failed to crawl {url}: {e}")

    return index


start_url = 'https://www.youtube.com/'
indexed_pages = simple_web_crawler(start_url)

print("\nIndexed Pages:")
for url, title in indexed_pages.items():
    print(url, ":", title)


Crawled: https://www.youtube.com/ -> YouTube
Crawled: https://www.youtube.com/about/ -> About YouTube - YouTube
Crawled: https://www.youtube.com/about/press/ -> 
          
            
              Official YouTube Blog for Latest YouTube News & Insights
            
          
          
        
Crawled: https://www.youtube.com/about/copyright/ -> Beleid voor auteursrecht en redelijk gebruik op YouTube - Hoe YouTube werkt
Crawled: https://www.youtube.com/t/contact_us/ -> Contact opnemen

Indexed Pages:
https://www.youtube.com/ : YouTube
https://www.youtube.com/about/ : About YouTube - YouTube
https://www.youtube.com/about/press/ : 
          
            
              Official YouTube Blog for Latest YouTube News & Insights
            
          
          
        
https://www.youtube.com/about/copyright/ : Beleid voor auteursrecht en redelijk gebruik op YouTube - Hoe YouTube werkt
https://www.youtube.com/t/contact_us/ : Contact opnemen
