# Scraping a book web-site

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

## Preparation

In [4]:
# creating lists for the dataframe
titles_col = []
prices_col = []
availability_col = []
rating_col = []

#preparing a map to convert the book-rating-strings to numbers
rating_map = {
    'One': 1,
    'Two': 2,
    'Three': 3,
    'Four': 4,
    'Five': 5
}

### Scraping the Data

In [6]:
# iterating through multiple pages on website
for i in range(1,51):

    # get html data and create parser
    url = requests.get(f"https://books.toscrape.com/catalogue/category/books_1/page-{i}.html")
    soup = BeautifulSoup(url.content, "lxml")

    # obtaining the specific part of the code I need
    books = soup.find_all('article', class_='product_pod')

    # iterating through the required tags and adding data to the lists
    for book in books:
        title = book.h3.a['title'].strip()
        price = book.find('p', class_ = 'price_color').text.strip()
        availability = book.find('p', class_ = 'instock availability').text.strip()
        rating = book.find('p')['class'][1]

        titles_col.append(title)
        prices_col.append(price)
        availability_col.append(availability)
        rating_col.append(rating)

### Dataframe & CSV

In [8]:
# creating the dataframe
pd.set_option('display.max_colwidth', None)
df = pd.DataFrame(data = {'Book_title': titles_col, 'Book_prices': prices_col, 'Availability': availability_col, 'Rating': rating_col})

# cleaning the dataframe
df['Book_prices'] = df['Book_prices'].str.replace('£', '', regex=False).astype(float)
df['Rating'] = df['Rating'].map(rating_map)

# exporting the dataframa as a csv file
df.to_csv('Book_scraping.csv')

In [15]:
#overview of the dataframe
df

Unnamed: 0,Book_title,Book_prices,Availability,Rating
0,A Light in the Attic,51.77,In stock,3
1,Tipping the Velvet,53.74,In stock,1
2,Soumission,50.10,In stock,1
3,Sharp Objects,47.82,In stock,4
4,Sapiens: A Brief History of Humankind,54.23,In stock,5
...,...,...,...,...
995,Alice in Wonderland (Alice's Adventures in Wonderland #1),55.53,In stock,1
996,"Ajin: Demi-Human, Volume 1 (Ajin: Demi-Human #1)",57.06,In stock,4
997,A Spy's Devotion (The Regency Spies of London #1),16.97,In stock,5
998,1st to Die (Women's Murder Club #1),53.98,In stock,1
