# Web Scraping with Python

## Use the requests library to download web pages

In [2]:
!pip install requests --upgrade --quiet

In [3]:
import requests

In [25]:
books_url = 'https://www.amazon.co.uk/bestsellers/books'

In [26]:
response = requests.get(books_url)

In [27]:
response.status_code

200

In [28]:
len(response.text)

217621

In [30]:
page_contents = response.text

In [33]:
with open('webpage.html', 'w') as f:
    f.write(page_contents)

## Use Beautiful Soup to parse and extract information

In [34]:
!pip install beautifulsoup4 --upgrade --quiet

In [36]:
from bs4 import BeautifulSoup

In [38]:
doc = BeautifulSoup(page_contents, 'html.parser')

In [39]:
type(doc)

bs4.BeautifulSoup

In [147]:
title_tags = doc.find_all('div', {'class': 'p13n-sc-truncate'})

In [238]:
author_tags = doc.select("a.a-size-small.a-link-child, span.a-size-small.a-color-base")

In [239]:
len(author_tags)

50

In [203]:
price_tags = doc.find_all('span', {'class': 'p13n-sc-price'})

In [206]:
rating_tags = doc.find_all('span', {'class': 'a-icon-alt'})

In [207]:
len(rating_tags)

45

In [228]:
title = []

for tag in title_tags:
    title.append(tag.text.strip())

In [240]:
author = []

for tag in author_tags:
    author.append(tag.text)

In [234]:
price = []

for tag in price_tags:
    price.append(tag.text)

In [229]:
rating = []

for tag in rating_tags:
    rating.append(tag.text)

In [221]:
import pandas as pd

In [242]:
books_dict = {
    'title': title,
    'author': author,
    'price' : price
}

In [243]:
books_df = pd.DataFrame(books_dict)

In [244]:
books_df

Unnamed: 0,title,author,price
0,"Pinch of Nom Comfort Food: 100 Slimming, Satis...",Kay Featherstone,£10.00
1,Murder Investigation Team: How Scotland Yard R...,Steven Keogh,£8.99
2,Why Has Nobody Told Me This Before?: The No 1 ...,Dr Julie Smith,£10.00
3,"Slimming Eats: Healthy, delicious recipes – 10...",Siobhan Wightman,£12.00
4,It Ends With Us: The top five Sunday Times bes...,Colleen Hoover,£5.00
5,"Pinch of Nom Quick & Easy: 100 Delicious, Slim...",Kay Featherstone,£10.00
6,Manifest: The Sunday Times bestseller that wil...,Roxie Nafousi,£7.49
7,The Midnight Library: The No.1 Sunday Times be...,Matt Haig,£4.50
8,Atomic Habits: The life-changing million copy ...,James Clear,£12.00
9,Everything You Need to Know About the Menopaus...,Kate Muir,£13.59
