# Step 1: Web Scraping Using requests and BeautifulSoup

## Importing necessary libraries

In [25]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

## URL of the target website

In [26]:
url = 'http://books.toscrape.com/'

## Sending a request to the website

In [27]:
response = requests.get(url)

## Parse HTML content

In [28]:
soup = BeautifulSoup(response.text, 'html.parser')

## Extracting data

In [49]:
books = soup.find_all('article', class_='product_pod')

books_data_beautifulsoup = []

for book in books:
    # Title
    title = book.h3.a['title']
    
    # Price
    price = book.find('p', class_='price_color').text
    
    # Availability
    availability = book.find('p', class_='instock availability').text.strip()
    
    # Append the data
    books_data_beautifulsoup.append({
        'Title': title,
        'Price': price,
        'Availability': availability
    })

## Converting the data into a DataFrame

In [50]:
df = pd.DataFrame(books_data_beautifulsoup)

## Saving data to CSV

In [51]:
df.to_csv('books_data_beautifulsoup.csv', index=False)

## Preview data

In [52]:
df.head()

Unnamed: 0,Title,Price,Availability
0,A Light in the Attic,£51.77,In stock
1,Tipping the Velvet,£53.74,In stock
2,Soumission,£50.10,In stock
3,Sharp Objects,£47.82,In stock
4,Sapiens: A Brief History of Humankind,£54.23,In stock


# Step 2: Web Scraping Using Scrapy

In this step, we implemented web scraping using the Scrapy framework. The step was performed in a file named `books.py`.

# Step 3: Web Scraping Using Selenium

## Importing necessary libraries

In [33]:
import logging
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

## Setting logging level to WARNING to suppress debug logs

In [34]:
logging.getLogger('selenium').setLevel(logging.WARNING)

## Initializing WebDriver 

In [35]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

## Navigate to the page

In [36]:
driver.get('http://books.toscrape.com/')

## Get the page source and parse it using BeautifulSoup

In [37]:
soup = BeautifulSoup(driver.page_source, 'html.parser')

## Extracting book data 

In [38]:
books = soup.find_all('article', class_='product_pod')

book_data = []
for book in books:
    title = book.h3.a['title']
    price = book.find('p', class_='price_color').text
    availability = book.find('p', class_='instock availability').text.strip()
    
    book_data.append({
        'Title': title,
        'Price': price,
        'Availability': availability
    })

## Converting to DataFrame and saving as CSV

In [39]:
df_selenium = pd.DataFrame(book_data)
df_selenium.to_csv('books_data_selenium.csv', index=False)

## Closing the browser

In [40]:
driver.quit()

## Displaying the first few rows of the DataFrame

In [43]:
df_selenium.head(10)

Unnamed: 0,Title,Price,Availability
0,A Light in the Attic,£51.77,In stock
1,Tipping the Velvet,£53.74,In stock
2,Soumission,£50.10,In stock
3,Sharp Objects,£47.82,In stock
4,Sapiens: A Brief History of Humankind,£54.23,In stock
5,The Requiem Red,£22.65,In stock
6,The Dirty Little Secrets of Getting Your Dream...,£33.34,In stock
7,The Coming Woman: A Novel Based on the Life of...,£17.93,In stock
8,The Boys in the Boat: Nine Americans and Their...,£22.60,In stock
9,The Black Maria,£52.15,In stock
