In [1]:
!pip install beautifulsoup4
!pip install requests



In [2]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse

In [3]:
visited = set()
unique_links = set()

def get_all_links(url, base_url, max_depth, current_depth=0):
    if current_depth > max_depth or url in visited:
        return
    
    visited.add(url)
    try:
        # Send a GET request to the URL
        response = requests.get(url)
        response.raise_for_status()
    except requests.RequestException as e:
        print(f"Failed to fetch {url}: {e}")
        return
    
    # Parse the content of the request with BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Find all links
    links = soup.find_all('a')
    
    for link in links:
        href = link.get('href')
        if href:
            full_url = urljoin(base_url, href)
            if urlparse(full_url).netloc == urlparse(base_url).netloc:
                if full_url not in unique_links:
                    unique_links.add(full_url)
                    print(full_url)
                    get_all_links(full_url, base_url, max_depth, current_depth + 1)

# URL of the website to scrape
website_url = 'https://u.ae/en/'
base_url = 'https://u.ae'

get_all_links(website_url, base_url, max_depth=2)

https://u.ae/
https://u.ae/en
https://u.ae/ar-ae
https://u.ae
https://u.ae/en/footer/accessibility
https://u.ae/en/help
https://u.ae/en/
https://u.ae/en/information-and-services
https://u.ae/en/about-the-uae
https://u.ae/en/participate
https://u.ae/en/media
https://u.ae#homeServices
https://u.ae/en/information-and-services/top-government-services
https://u.ae/en/information-and-services/visa-and-emirates-id
https://u.ae/en/information-and-services/jobs
https://u.ae/en/information-and-services/education
https://u.ae/en/information-and-services/business
https://u.ae/en/information-and-services/moving-to-the-uae
https://u.ae/en/information-and-services/justice-safety-and-the-law
https://u.ae/en/information-and-services/visiting-and-exploring-the-uae
https://u.ae/en/media/news
https://u.ae#2024-2030
https://u.ae#2050-2117
https://u.ae/en/about-the-uae/uae-in-the-future/uae-future#2024-2030
https://u.ae/en/about-the-uae/uae-in-the-future/uae-future#2050-2117
https://u.ae/en/about-the-uae/ua

In [5]:
# List of URLs to scrape
UAE_website_urls = [
    "https://u.ae/en/#/",
    "https://u.ae/en/information-and-services",
    "https://u.ae/en/about-the-uae",
    "https://u.ae/en/participate",
    "https://u.ae/en/media",
    "https://u.ae/en/information-and-services/top-government-services",
    "https://u.ae/en/information-and-services/visa-and-emirates-id",
    "https://u.ae/en/information-and-services/visa-and-emirates-id/residence-visas",
    "https://u.ae/en/information-and-services/visa-and-emirates-id/residence-visas/golden-visa",
    "https://u.ae/",
    "https://u.ae/en",
    "https://u.ae",
    "https://u.ae/en/footer/accessibility",
    "https://u.ae/en/help",
    "https://u.ae/en/",
    "https://u.ae/en/information-and-services",
    "https://u.ae/en/about-the-uae",
    "https://u.ae/en/participate",
    "https://u.ae/en/media",
    "https://u.ae#homeServices",
    "https://u.ae/en/information-and-services/top-government-services",
    "https://u.ae/en/information-and-services/visa-and-emirates-id",
    "https://u.ae/en/information-and-services/jobs",
    "https://u.ae/en/information-and-services/education",
    "https://u.ae/en/information-and-services/business",
    "https://u.ae/en/information-and-services/moving-to-the-uae",
    "https://u.ae/en/information-and-services/justice-safety-and-the-law",
    "https://u.ae/en/information-and-services/visiting-and-exploring-the-uae",
    "https://u.ae/en/media/news",
    "https://u.ae#2024-2030",
    "https://u.ae#2050-2117",
    "https://u.ae/en/about-the-uae/uae-in-the-future/uae-future#2024-2030",
    "https://u.ae/en/about-the-uae/uae-in-the-future/uae-future#2050-2117",
    "https://u.ae/en/about-the-uae/uae-in-the-future/uae-future",
    "https://u.ae/en/about-the-uae/strategies-initiatives-and-awards/strategies-plans-and-visions/business/uae-unified-industrial-brand-identity-make-it-in-the-emirates",
    "https://u.ae/en/about-the-uae/science-and-technology/key-sectors-in-science-and-technology/space-science-and-technology",
    "https://u.ae/en/about-the-uae/founders-of-the-union#abu-dhabi",
    "https://u.ae/en/about-the-uae/uae-competitiveness/imd-world-competitiveness-yearbook/the-uae-performance",
    "https://u.ae#polls",
    "https://u.ae#consultations",
    "https://u.ae/en/information-and-services/justice-safety-and-the-law/handling-emergencies",
    "https://u.ae/en/help/contact-us/this-portal",
    "https://u.ae/en/help/contact-us/the-government",
    "https://u.ae/en/more/contact-officials",
    "https://u.ae/en/resources/faqs",
    "https://u.ae/en/resources",
    "https://u.ae/en/footer/map",
    "https://u.ae/en/footer/about-this-portal",
    "https://u.ae/en/footer/newsletter",
    "https://u.ae/uaepasstoken?url=https://sharik.ae",
    "https://u.ae/en/footer/disclaimer",
    "https://u.ae/en/footer/citizen-charter",
    "https://u.ae/en/footer/digital-participation-policy",
    "https://u.ae/en/footer/sitemap",
    "https://u.ae/en/statistics",
    "https://u.ae/en/footer/privacy-policy",
    "https://u.ae/en/footer/copyright",
    "https://u.ae/en/footer/terms-and-conditions"
]

# Function to scrape content from a UAE website URL
def scrape_content(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an error for bad status codes
        soup = BeautifulSoup(response.content, 'html.parser')
        return soup.get_text()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching {url}: {e}")
        return ""

# Scrape content from all UAE website URLs
UAE_website_text = {url: scrape_content(url) for url in UAE_website_urls}

# Display the UAE website text (for demonstration, displaying the first few characters of each)
for url, text in UAE_website_text.items():
    print(f"URL: {url}\nContent Preview: {text[:100]}\n")

# Scrape content from all UAE webstie urls
UAE_website_text = {url: scrape_content(url) for url in UAE_website_urls
                }

# Display Tthe UAE website text
UAE_website_text

URL: https://u.ae/en/#/
Content Preview: 













Home | The Official Portal of the UAE Government












استمع
















الع

URL: https://u.ae/en/information-and-services
Content Preview: 













Information and services | The Official Portal of the UAE Government












استمع

URL: https://u.ae/en/about-the-uae
Content Preview: 













About the UAE | The Official Portal of the UAE Government












استمع












URL: https://u.ae/en/participate
Content Preview: 













Your voice | The Official Portal of the UAE Government












استمع















URL: https://u.ae/en/media
Content Preview: 













Media | The Official Portal of the UAE Government












استمع















الع

URL: https://u.ae/en/information-and-services/top-government-services
Content Preview: 













top-government-services | The Official Portal of the UAE Government












استمع


URL: https://u.ae/en/information-and-services/visa-

{'https://u.ae/en/#/': "\n\n\n\n\n\n\n\n\n\n\n\n\n\nHome | The Official Portal of the UAE Government\n\n\n\n\n\n\n\n\n\n\n\n\nاستمع\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nالعربية\n\n\n\n\nOther languages\n\n\n\n\nAccessibility\n\n\n\n\nHelp\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDisclaimer: You are using Google Translate. The UAE mGovernment is not responsible for the accuracy of information in the translated language.\nPowered by Google\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nHome\n\n\nInformation and services\n\n\nAbout the UAE\n\n\nYour voice\n\n\nMedia\n\n\n\nUAsk\n\n\r\n                                Beta\r\n                            \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nWelcome to\nThe United Arab Emirates'\r\nGovernment portalThe unified channel to transform how\r\nyou live, work, visit and invest in the UAE\n\n\nDiscover more\n\n\n\n\n\n\nInformation and services\n\n\n\n\n\nServices around the clock\n\nTop digital services provided by governm