-
Notifications
You must be signed in to change notification settings - Fork 0
/
try.py
98 lines (73 loc) · 2.96 KB
/
try.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import urllib2
import re
from subprocess import check_output
from selenium import webdriver
from PIL import Image
import logging
from selenium.webdriver.common.keys import Keys
import ctypes
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
def kbb_request():
image_file_name = 'output.png'
#driver = webdriver.Firefox()
driver = webdriver.PhantomJS(executable_path=r'phantomjs-2.0.0-windows\bin\phantomjs.exe')
url = 'http://www.kbb.com/honda/accord/2001-honda-accord/dx-sedan-4d/?vehicleid=4387&intent=buy-used&category=sedan&condition=good&mileage=130000&pricetype=private-party&printable=true'
driver.get(url)
widget_id = driver.find_element_by_id("market-meter-widget-image-1")
img_src = widget_id.get_attribute('src')
imgstr = re.search(r'base64,(.*)', img_src).group(1)
output = open(image_file_name, 'wb')
output.write(imgstr.decode('base64'))
output.close()
driver.close()
crop_image()
tess1(image_file_name)
def to_int(price):
res = price.replace(',', '')
res = res.replace('.', '')
return int(res)
def tesseract():
check_output(r'C:"\Program Files (x86)"\Tesseract-OCR\tesseract.exe new.png numbers', shell=True)
path = "numbers.txt"
fd = open(path, 'r')
m = sorted(map(to_int, re.findall("\$(\d+[,|.]\d+)", fd.read())))[1]
print m
def tess1(image_file_name):
check_output(r'tess1\tesseract.exe -l kbb ' + image_file_name + ' numbers', shell=True)
path = "numbers.txt"
fd = open(path, 'r')
#m = to_int(re.findall("\$(\d+[,|.]\d+)", fd.read())[0])
m = to_int(re.findall("\$(\d+,*\d*)", fd.read())[0])
print m
def crop_image():
box = (45, 48, 122, 68)
im = Image.open("output.png")
crop = im.crop(box)
return crop.save("output.png")
def test_ghost_driver():
driver = webdriver.PhantomJS(executable_path=r'phantomjs-2.0.0-windows\bin\phantomjs.exe')
driver.get("http://www.google.com")
print driver.title
print driver.current_url
def test_page_download(url):
user_agent = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3'
headers = { 'User-Agent' : user_agent }
req = urllib2.Request(url, None, headers)
response = urllib2.urlopen(req)
page = response.read()
response.close() # its always safe to close an open connection
print page
m = re.search('defaultprice\': \'(\d*)\'', page)
if m == None:
return 0
return m.group(1)
def main():
print test_page_download('https://www.kbb.com/toyota/camry/2001/ce-sedan-4d/?vehicleid=5120&intent=buy-used&mileage=100000&condition=good&pricetype=private-party')
#tess1('output.png')
#tess1("cropped_output.png")
#test_ghost_driver()
if __name__ == "__main__":
main()