5)Develop a code showcase networked program including retrieving
image over HTTP, parsing HTML and scraping the web


In [12]:
# Install necessary libraries
# !pip install requests beautifulsoup4

import requests
from bs4 import BeautifulSoup
import os
import shutil

# Step 1: Define the URL to scrape
url = "https://en.wikipedia.org/wiki/Main_Page"  # Wikipedia main page

# Step 2: Make an HTTP GET request to fetch the HTML content
response = requests.get(url)
if response.status_code == 200:
    print("Website fetched successfully.")
else:
    print(f"Failed to fetch the website. Status code: {response.status_code}")
    exit()

# Step 3: Parse the HTML using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')

# Step 4: Extract the first image source (you can modify to target specific elements)
image_tag = soup.find('img')
if image_tag and 'src' in image_tag.attrs:
    image_url = image_tag['src']
    if not image_url.startswith('http'):  # Handle relative URLs
        image_url = requests.compat.urljoin(url, image_url)
    print(f"Image URL: {image_url}")
else:
    print("No image found on the page.")
    exit()

# Step 5: Retrieve the image over HTTP
image_response = requests.get(image_url, stream=True)
if image_response.status_code == 200:
    print("Image retrieved successfully.")
    # Step 6: Save the image locally
    image_filename = os.path.basename(image_url)
    with open(image_filename, 'wb') as f:
        shutil.copyfileobj(image_response.raw, f)
    print(f"Image saved as {image_filename}.")
else:
    print(f"Failed to fetch the image. Status code: {image_response.status_code}")

# Step 7: Additional Scraping (e.g., extracting all hyperlinks)
print("\nExtracting all hyperlinks from the page:")
for link in soup.find_all('a', href=True):
    href = link['href']
    full_url = requests.compat.urljoin(url, href)  # Handle relative URLs
    print(full_url)


Website fetched successfully.
Image URL: https://en.wikipedia.org/static/images/icons/wikipedia.png
Image retrieved successfully.
Image saved as wikipedia.png.

Extracting all hyperlinks from the page:
https://en.wikipedia.org/wiki/Main_Page#bodyContent
https://en.wikipedia.org/wiki/Main_Page
https://en.wikipedia.org/wiki/Wikipedia:Contents
https://en.wikipedia.org/wiki/Portal:Current_events
https://en.wikipedia.org/wiki/Special:Random
https://en.wikipedia.org/wiki/Wikipedia:About
https://en.wikipedia.org/wiki/Wikipedia:Contact_us
https://en.wikipedia.org/wiki/Help:Contents
https://en.wikipedia.org/wiki/Help:Introduction
https://en.wikipedia.org/wiki/Wikipedia:Community_portal
https://en.wikipedia.org/wiki/Special:RecentChanges
https://en.wikipedia.org/wiki/Wikipedia:File_upload_wizard
https://en.wikipedia.org/wiki/Main_Page
https://en.wikipedia.org/wiki/Special:Search
https://donate.wikimedia.org/?wmf_source=donate&wmf_medium=sidebar&wmf_campaign=en.wikipedia.org&uselang=en
https://en