In [173]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import NoSuchElementException
import pandas as pd 
import requests
from word2number import w2n

In [174]:
driver = webdriver.Chrome()

url = "http://books.toscrape.com"
categories = ["Travel", "Mystery", "Historical Fiction", "Sequential Art", "Classics"]

driver.get(url)

In [175]:
def price_to_czk(value):
    url = "https://api.exchangerate-api.com/v4/latest/GBP"
    response = requests.get(url)
    data = response.json()
    return value * data['rates']['CZK']


In [176]:
result = []
def find_category(category):
    link = driver.find_element(By.PARTIAL_LINK_TEXT, category)
    link.click()
    
    try:
        current_page_element = driver.find_element(By.CSS_SELECTOR, ".current")
        total_pages = int(current_page_element.text.split()[-1])
        print(category, total_pages)
    except NoSuchElementException:
        total_pages = 1
        print(category, total_pages)

    for page in range(1,total_pages+1):

        books = driver.find_elements(By.CSS_SELECTOR, ".product_pod")

        for book in books:
            book.find_element(By.CSS_SELECTOR, "h3 a").click()

            book_name = driver.find_element(By.CSS_SELECTOR, "h1").text
            book_category = category
            price_str = driver.find_element(By.CSS_SELECTOR, ".price_color").text
            price = float(price_str[1:])
            price_czk = price_to_czk(price)
            stock_text = driver.find_element(By.CSS_SELECTOR, ".instock.availability").text.strip()
            in_stock,stock_amount = stock_text.split("(")
            if stock_amount.endswith(")"):
                stock_amount = stock_amount[:-1].strip()
            try:
                rating_str = driver.find_element(By.CSS_SELECTOR, ".star-rating").get_attribute("class").split()[-1]
                rating = w2n.word_to_num(rating_str)
            except StaleElementReferenceException:
                rating_str = driver.find_element(By.CSS_SELECTOR, ".star-rating").get_attribute("class").split()[-1]
                rating = w2n.word_to_num(rating_str)

            result.append((book_name, book_category, price_czk, in_stock,stock_amount, rating))

            driver.back()
        
        if page < total_pages:
            next_page = driver.find_element(By.CSS_SELECTOR, ".next a")
            next_page.click()
        

In [177]:
for category in categories: 
    find_category(category)
    driver.get(url)
driver.quit()
    

Travel 1
Mystery 2
Historical Fiction 2
Sequential Art 4
Classics 1


In [180]:
df = pd.DataFrame(result, columns=["Book Name", "Category", "Price(in CZK)", "Stock Status", "Stock Amount", "Rating"])

df.to_csv("output.csv", index=False)

In [183]:
df.head()

for column in df.columns:
    print(f"{column}: {df[column].dtype}")

Book Name: object
Category: object
Price(in CZK): float64
Stock Status: object
Stock Amount: object
Rating: int64
