-
Notifications
You must be signed in to change notification settings - Fork 2
/
test.py
80 lines (71 loc) · 2.73 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#coding=utf-8
import urllib
import urllib2
import cookielib
import string
import json
from bs4 import BeautifulSoup
# # 请求头
# headers = {'User-Agent': 'android 4.1.2;867746015013717;zys 8.1.2;MI 1S zhjkyzYs safari webkit',
# 'X-Requested-With':'XMLHttpRequest'}
# # target地址
# baseUrl = "http://m.120ask.com/kuaiwen/site/doctor"
# 数据
# data = {'id':13599,'showwxpaytitle':1}
class Spider(object):
def __init__(self):
self.headers = {'User-Agent': 'new_doctorpda/4.2.2 (iPhone; iOS 9.3; Scale/2.00) doctorpda',
'Cookie':'JSESSIONID=CE1D48E03C95B4D5D841F79AEE55B895; Hm_lvt_bc9d4fa6469686fe63002104880688b1=1460551175,1460717756; JSESSIONID=CE1D48E03C95B4D5D841F79AEE55B895; login_id=15221131593; secret_token=84bc894eada38d0b24fbcfd6163b914b'}
self.data = {'data':'xCtl8j6pbBDaw53TMFbZMSDgBDoxuv57m9vj5INOfTU%3D'}
self.baseUrl = 'http://api.doctorpda.cn/api/v2/case/index?app_key=f1c18i2otirc0004&client_id=7c01ag7q3qcc0112&access_token=72ba7500-7562-40a0-a17e-f3cb465e3839&net=wifi&versionName=4.2.2&versionCode=85&source=app '
# 构造请求参数
def constructPostData(self):
postData = {'id':13599,'showwxpaytitle':1}
# self.dataTag += 1
return postData
# 获得页面数据
def getPage(self, _data):
# post请求体编码
req_data = urllib.urlencode(self.data)
# 构造HTTP请求
request = urllib2.Request(self.baseUrl, req_data, self.headers)
print request
# 发起post请求
try:
# 获的请求的句柄
handler = urllib2.urlopen(request)
# 返回页面数据
print handler.read()
# 捕获异常
except urllib2.HTTPError, e:
if hasattr(e,"code"):
print e.code
if hasattr(e,"reason"):
print e.reason
# 捕获异常
except urllib2.URLError, e:
if hasattr(e,"code"):
print e.code
if hasattr(e,"reason"):
print e.reason
# 处理字符(删除\n \t \r)
def dealWithString(self, response):
response_ = response.replace('\r','').replace('\t','').replace('\n','')
jsonStr = json.loads(response_)
jsonData = json.dumps(jsonStr['html'], ensure_ascii=False)
soup = BeautifulSoup(jsonData, "lxml")
return soup.find_all("span")
# 写入文件
def writeToFile(self):
return None
# 开始方法
def start(self):
pageData = self.getPage(self.constructPostData())
# dataStr = self.dealWithString(pageData)
# for data in dataStr:
# print data.string
# print dataStr
if __name__ == "__main__":
pass
spider = Spider()
spider.start()