In [417]:
import requests
from bs4 import BeautifulSoup

### Part 1: Inspect

- How do the URLs change when you navigate to the next results page?

- Using the Etsy website, I would complete the task. When URLs change to a new page there is an increament to the last integer.

This denotes the first page: `https://www.etsy.com/search?q=blanket&ref=pagination&page=1`
This follows as the second page: `https://www.etsy.com/search?q=blanket&ref=pagination&page=2`

- URLs change when a different key word is changed by modifying the query parameter information in the URL
 


`https://www.etsy.com/search?q=blanket&ref=pagination&page=1`

- **Base URL**
    - `https://www.etsy.com/search`
- **Query Parameters**
    - Start & Separators: `?`, `&`
    - Information: `q=blanket`

### Part 2: Scrape


In [455]:
base_url = "https://www.etsy.com/search?q=blanket&ref=pagination&page="

In [456]:
#This function opens the page number specified
#repsonse = ""
def get_url(page_num):

    page_inc = str(page_num * 1)
    url = base_url + page_inc
    response = requests.get(url)
    return response 


In [457]:
site_content = site_url.content
site_content



In [458]:
soup = BeautifulSoup(site_content, "html.parser")
soup

<!DOCTYPE html>

<html class="ui-toolkit" lang="en-US" xml:lang="en-US" xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="https://www.facebook.com/2008/fbml" xmlns:og="http://ogp.me/ns#">
<head>
<meta content="text/html; charset=utf-8" http-equiv="content-type"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible">
<meta content="width=1024" name="viewport"/>
<meta content="Is code your craft? https://www.etsy.com/careers" name="X-Recruiting"/>
<meta content="3:1610400546:m1OPGV_ZGI38OtR2oXXchdCFxZbm:ebdce22cb08ca65d940c459d7bf8e150dbbcc35a168d9eb1af9a4ca5c70ea9a5" name="csrf_nonce"/>
<meta content="3:1610400546:Ywi_JxCZrUsRsoK6afZ0OCSXZncc:e7465a7ce24190136abcef7c815cd85c270567a0e33dae6e2935bbd77c92fe25" name="uaid_nonce"/>
<meta content="89186614300" property="fb:app_id"/>
<meta content="Etsy" property="og:site_name"/>
<meta content="en_US" property="og:locale"/>
<script charset="utf-8" type="text/javascript">
!function(n,e){var t,o,i,c=[],f={passive:!0,capture:!0},r=new Date,a="pointe

In [459]:
results = soup.find(id='content')
items = results.find_all("div",class_="v2-listing-card__info")
len(items) #50 blankets per page 

50

In [460]:
items[0]

<div class="v2-listing-card__info">
<div>
<h3 class="text-gray text-truncate mb-xs-0 text-body">
                    Curious George Blanket
                </h3>
<p>
<div class="v2-listing-card__shop">
<p aria-hidden="true" class="text-gray-lighter text-body-smaller display-inline-block">KuddleBugCorner</p>
<p class="screen-reader-only">From shop KuddleBugCorner</p>
<span class="v2-listing-card__rating icon-t-2 display-block">
<span class="stars-svg stars-smaller">
<input name="initial-rating" type="hidden" value="5"/>
<input name="rating" type="hidden" value="5"/>
<span class="screen-reader-only">5 out of 5 stars</span>
<span aria-hidden="true" class="rating lit rating-first icon-b-2" data-rating="1">
<span class="etsy-icon stars-svg-star"><svg aria-hidden="true" focusable="false" viewbox="3 3 18 18" xmlns="http://www.w3.org/2000/svg"><path d="M19.985,10.36a0.5,0.5,0,0,0-.477-0.352H14.157L12.488,4.366a0.5,0.5,0,0,0-.962,0l-1.67,5.642H4.5a0.5,0.5,0,0,0-.279.911L8.53,13.991l-1.5,5.328a0

In [461]:
#To get the name of the blanket
name = items[0].find("h3").text.strip()
name


'Curious George Blanket'

In [425]:
#To get the price 
price = items[0].find('span', class_="currency-value").text
price

'29.00'

### Part 3: Parse

- Sieve through your HTML soup to pick out only the variables required
- Format the results in a readable format (e.g. JSON)
- Save the results to a file

In [447]:
def parse_etsy(soup):
    results = soup.find(id='content')
    items = results.find_all("div",class_="v2-listing-card__info")
    
    result = []

    for blkt in items:
        name = blkt.find("h3").text.strip()
        price = blkt.find('span', class_="currency-value").text  
        spans = blkt.find_all('span',class_="screen-reader-only")
        if spans:
            rev = spans[0].text
            num_rev = spans[1].text
        result.append({"Product Name": name, "Price": price, "Review": (rev.split())[0],"Number of reviews":(num_rev.split())[0]})
    return result

parse_etsy(soup)


[{'Product Name': 'Curious George Blanket',
  'Price': '29.00',
  'Review': '5',
  'Number of reviews': '94'},
 {'Product Name': 'German Flag Blanket / German print blanket / Flagge Deutschlands / Cosy Fleece blanket',
  'Price': '29.99',
  'Review': '4.5',
  'Number of reviews': '118'},
 {'Product Name': 'AUTISIM Awareness Tied Fleece Blanket',
  'Price': '35.00',
  'Review': '5',
  'Number of reviews': '26'},
 {'Product Name': 'Custom Blanket, Boat Blanket, Lake House Blanket, Beach House Blanket, Beach House Gift, Lake House Gift, Personalized Throw Blanket',
  'Price': '58.00',
  'Review': '5',
  'Number of reviews': '1,676'},
 {'Product Name': 'Block Sherpa Blanket, Block Blanket, Block Gift, Kids Blanket, Cool Kids Blanket, Funny Blanket',
  'Price': '54.99',
  'Review': '5',
  'Number of reviews': '213'},
 {'Product Name': 'Standard Size Custom Stadium Sherpa Fleece Hockey Blanket Blanket Economy- Any Color, Any Name, Any Team',
  'Price': '65.00',
  'Review': '5',
  'Number of 

In [448]:
# Declare necessary variables
qty_page = 50
num_pro = 500

# To determine the number of pages to open 
page_num = num_pro//qty_page + 1

def tot_page(page_num):
    pro_tot = []
    num = 0
    for i in range(page_num):
        num += 1
        site_url = get_url(num)
        soup = BeautifulSoup(site_url.content)
        pro_tot += (parse_etsy(soup))
    return pro_tot
        
Blanket = tot_page(page_num)


In [452]:
#import necessary modules
import pandas as pd


etsy = pd.DataFrame(Blanket, columns=["Product Name","Price","Review", "Number of reviews"])

etsy.head()

Unnamed: 0,Product Name,Price,Review,Number of reviews
0,"Customizable Photo Blanket, Photo Collage Gift...",29.99,4.5,1378
1,"Chunky blanket, Chunky knit throw, Chunky knit...",22.39,4.5,2320
2,Chunky knit blanket merino wool blanket arm kn...,20.56,5.0,1699
3,"Custom Photo Sherpa Fleece Blanket, Personaliz...",46.95,5.0,728
4,Personalized Custom Soft Blanket with SCRIPT N...,25.0,5.0,990


In [454]:
etsy.to_csv("etsy.csv")