In [None]:
# import library
import requests
from bs4 import BeautifulSoup

# Q1: Scrape Title

**Objective:**
Scrape all the titles of the books listed on the Books to Scrape website (http://books.toscrape.com/).

**Instructions:**
- Use requests to send a GET request to the Books to Scrape homepage.
- Parse the HTML content using BeautifulSoup.
- Extract the book titles, which are inside "h3" tags with a nested "a" tag, and print them.

In [None]:
# URL to scrape
url = 'https://www.example.com'

# Send GET request to the website
response = ...

# Parse the page content using BeautifulSoup
soup = BeautifulSoup(...)

# Find all the title tags and print them
titles = ...
for title in titles:
    print(title.text)


# Q2: Web Scraping (headlines)
**Objective:**
Scrape headlines from the website https://news.ycombinator.com/.

**Task:**
- Send a GET request to the URL using requests.
- Parse the HTML content with BeautifulSoup.
- Extract and print the headlines inside <a> tags with class storylink.
Handle cases where no headlines are found.

In [None]:
# URL of the news site to scrape
url = 'https://news.ycombinator.com/'

# Send GET request to the website
response = ...

# Check if the request was successful (status code 200)
if response.status_code == 200:
    print("Request successful!")
else:
    print(f"Request failed with status code: {response.status_code}")

# Parse the page content using BeautifulSoup
soup = BeautifulSoup(...)

# Debugging: Print the raw HTML content to see what we are working with
print(soup.prettify()[:1000])  # Print first 1000 characters to inspect the HTML

# Find all the headline links (they are in <a> tags with class 'storylink')
headlines = ...

# Print each headline
if headlines:
    for headline in headlines:
        print(headline.text)
else:
    print("No headlines found!")


# Q3. Web Scraping (links)
**Objective:**
Scrape all the links (from <a> tags) from the Wikipedia homepage (https://www.wikipedia.org/).

**Instructions:**
- Use requests to send a GET request to the Wikipedia homepage.
- Parse the HTML content using BeautifulSoup.
- Extract all the links (href attributes) from <a> tags and print them.

In [None]:
# URL to scrape
url = 'https://www.wikipedia.org/'

# Send GET request to the website
response = ...

# Parse the page content using BeautifulSoup
soup = BeautifulSoup(...)

# Find all the anchor tags (<a>) which contain links
links = ...

# Print all the links
for link in links:
    href = ...
    if href:
        print(href)


# Q4. Web Scraping (quote texts)
**Objective:**
Scrape all the quote texts from the Quotes to Scrape website (https://quotes.toscrape.com/).

**Instructions:**
- Use requests to send a GET request to the Quotes to Scrape homepage.
- Parse the HTML content using BeautifulSoup.
- Extract all the quote texts, which are inside <span> tags with the class 'text', and print them.

In [None]:
# URL of the blog to scrape
url = 'https://quotes.toscrape.com/'

# Send GET request to the website
response = ...

# Parse the page content using BeautifulSoup
soup = BeautifulSoup(...)

# Find all the article titles (here, they are in <span> tags with class 'text')
titles = soup.find_all(...)

# Print each article title
for title in titles:
    ...


# Q5. Scraping a Simple Website for Titles and Links
- We'll scrape a simpler, publicly accessible website like Quotes to Scrape, which is designed for learning purposes.
- URL: http://quotes.toscrape.com/

In [None]:
# URL of the quotes website
url = 'http://quotes.toscrape.com/'

# Send GET request to the website
response = ...

# Check if the request was successful
if response.status_code == ...:
    print(...)
else:
    print(f"Request failed with status code: {response.status_code}")

# Parse the page content using BeautifulSoup
soup = BeautifulSoup(...)

# Find all quote containers (they are in <div> tags with class 'quote')
quotes = soup.find_all(...)

# Loop through each quote and extract the text and author
for quote in quotes:
    text = ...
    author = ...

    print(f"Quote: {text}")
    print(f"Author: {author}")
    print('-' * 40)


# Q6: Web Scraping eBay for Product Listings

**Objective:** Write a Python script to scrape eBay search results for a specific product (e.g., laptops), extract product details, and display them.

**Requirements:**

- Use the requests library to send an HTTP GET request to eBay’s search results page for a given product (e.g., "laptop").
- Use the BeautifulSoup library to parse the HTML content of the response.
- Extract the following details for each product listing:
  - Title: The name or description of the product.
  - Price: The price of the product.
  - Hit Count: The number of views or watchers for the product.
  - Display the extracted data in a readable format with product index, title, price, and hit count.
  - Handle cases where some data may be missing by providing default values.

In [None]:
# Define the URL to scrape (Example: eBay search results for "laptop")
url = "https://www.ebay.com/sch/i.html?_nkw=laptop"

# Set headers to mimic a real browser request
headers = {"User-Agent": "Mozilla/5.0"}

# Send an HTTP GET request to the website
response = requests.get(...)

# Check if the request was successful
if response.status_code == ...:
    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(...)

    # Extract product listings
    products = soup.find_all('li', class_='s-item')

    extracted_data = []
    for idx, product in enumerate(products, start=1):
        title_tag = product.find(...)
        price_tag = product.find(...)
        hit_count_tag = product.find(...)  # eBay shows watchers/views here

        title = title_tag.text.strip() if title_tag else 'No Title'
        price = price_tag.text.strip() if price_tag else 'No Price'
        hits = hit_count_tag.text.strip() if hit_count_tag else 'No Hit Count'

        extracted_data.append({"index": idx, "title": title, "price": price, "hits": hits})

    # Print extracted data
    for data in extracted_data:
        print(f"...")
else:
    print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
