This repository has been archived by the owner on Oct 4, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
PumaScraper.py
60 lines (48 loc) · 1.62 KB
/
PumaScraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from IScraper import IScraper
from Shoe import Shoe
import urllib
import requests
import json
import re
from bs4 import BeautifulSoup
from bs4 import NavigableString
class PumaScraper(IScraper):
def __init__(self):
self.domain = 'https://us.puma.com/en/us'
def getUrl(self, name, gender, sport):
if name != '':
vars = {'q': name, 'prefn1': 'productDivision', 'prefv1': 'Footwear', 'pagesize': 128}
if gender != '':
vars['prefn2'] = 'gender'
vars['prefv2'] = gender
if sport != '':
vars['prefn3'] = 'sportName'
vars['prefv3'] = sport
return '%s/search?%s' % (self.domain, urllib.parse.urlencode(vars))
else:
return '%s/%s/shoes/%s#pagesize=128' % (self.domain, gender, sport)
def getShoes(self, name, gender='', sport=''):
soup = IScraper.getData(self, name, gender, sport)
if soup is None:
return []
grid = soup.find('div', {'class': 'product-grid'})
if grid is None:
return []
shoes = []
items = grid.find_all('div', {'class': 'product-tile'})
for item in items:
if isinstance(item, NavigableString):
continue
body = item.find('div', {'class': 'tile-body'})
name = body.find('div', {'class': 'pdp-link'}).text.strip('\n\r\t')
price = 'N/A'
priceDiv = body.find('div', {'class': 'price'})
if priceDiv is not None:
price = priceDiv.find('span', {'class': 'value'}).text.strip()
swatches = body.find('div', {'class': 'swatches'})
colors = len(swatches.find_all('a', {'class': 'swatch__container'})) if swatches is not None else 0
if colors == 0:
continue
shoe = Shoe(name, gender, price, colors, 'Puma')
shoes.append(shoe)
return shoes