-
Notifications
You must be signed in to change notification settings - Fork 3
/
get_details.py
95 lines (83 loc) · 4.08 KB
/
get_details.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import json
import random
import requests
from lxml import html
user_agents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0',
'Mozilla/5.0 (X11; Linux i686; rv:78.0) Gecko/20100101 Firefox/78.0',
'Mozilla/5.0 (Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0',
'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:78.0) Gecko/20100101 Firefox/78.0',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0',
'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36 Edg/84.0.522.44',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36 Edg/84.0.522.44'
]
class InvalidURLException(Exception):
pass
def get_amazon_details(tree):
try:
title = tree.xpath('//span[@id="productTitle"]/text()')[0].strip()
availability = tree.xpath('//div[@id="availability"]/span/text()')[0]
if 'Currently unavailable' in availability:
return {
'price': None,
'title': title
}
deal = tree.xpath('//span[@id="priceblock_dealprice"]/text()')
sale = tree.xpath('//span[@id="priceblock_saleprice"]/text()')
our = tree.xpath('//span[@id="priceblock_ourprice"]/text()')
price = deal or sale or our
price = price[0].translate(str.maketrans('', '', ',₹')).strip()
price = int(float(price))
except Exception:
raise InvalidURLException('url is not a valid amazon.in product page')
return {
'price': price,
'title': title
}
def get_flipkart_details(tree):
try:
price = tree.xpath('//div[@class="_1vC4OE _3qQ9m1"]/text()')[0]
price = int(price[1:].replace(',', ''))
title = tree.xpath('//span[@class="_35KyD6"]/text()')[0].strip()
except Exception:
raise InvalidURLException('url is not a valid flipkart.com product page')
return {
'price': price,
'title': title
}
def get_details(url):
if 'amazon.in' not in url and 'flipkart.com' not in url:
raise InvalidURLException('url is not a valid amazon.in or flipkart.com product page')
try:
page = requests.get(url, headers={'User-Agent': random.choice(user_agents)})
except Exception:
raise InvalidURLException('url is not accessible')
try:
tree = html.fromstring(page.content)
canonical_link = tree.xpath('//link[@rel="canonical"]/@href')[0]
except IndexError:
raise InvalidURLException('url is not a valid amazon.in or flipkart.com product page')
if 'www.amazon.in' in canonical_link:
details = get_amazon_details(tree)
elif 'www.flipkart.com' in canonical_link:
details = get_flipkart_details(tree)
else:
raise InvalidURLException('url is not a valid amazon.in or flipkart.com product page')
return {
'price': details['price'],
'url': canonical_link,
'title': details['title']
}
if __name__ == '__main__':
url = input('[*] Enter URL: ')
details = get_details(url)
print(json.dumps(details, indent=2))