Skip to content

Commit ddb8fd6

Browse files
committed
add WebSpiderSeed/req_helper
1 parent efeab66 commit ddb8fd6

File tree

1 file changed

+74
-0
lines changed

1 file changed

+74
-0
lines changed
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
'''
2+
Author: herongwei
3+
Date: 2021-06-26 11:32:42
4+
LastEditTime: 2021-06-26 11:45:12
5+
LastEditors: Please set LastEditors
6+
Description: 请求帮助类
7+
FilePath: /WebSpiderSeed/lib/req_helper.py
8+
'''
9+
import requests
10+
import json
11+
import time
12+
from comlib.common_helper import CommonHelper
13+
from comlib.log_helper import LogHelper
14+
from retrying import retry
15+
from comlib.config_helper import ConfigHelper
16+
import traceback
17+
18+
class ReqHelper(object):
19+
#模拟 pc_ua 和 安卓_ua
20+
g_pc_ua = 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 1.1.4322; CIBA'
21+
g_wap_ua = 'Mozilla/5.0 (Linux; U; Android 4.4.4; Nexus 5 Build/KTU84P) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'
22+
headers = {'Content-Type': 'application/json;charset=utf8'}
23+
get_headers = {}
24+
s = requests.session()
25+
http_proxies = ConfigHelper.get_config().PROXIES
26+
27+
@staticmethod
28+
def get_url(url):
29+
# url = ReqHelper.url_base + url
30+
# print('url:' + url)
31+
return url
32+
33+
@staticmethod
34+
@retry(stop_max_attempt_number=3, wait_incrementing_increment=1200)
35+
def post_json(url, data):
36+
t1 = CommonHelper.get_time_millis()
37+
response = ReqHelper.s.post(ReqHelper.get_url(url), data=json.dumps(data))
38+
t2 = CommonHelper.get_time_millis()
39+
return response.json()
40+
41+
@staticmethod
42+
@retry(stop_max_attempt_number=3, wait_incrementing_increment=1200)
43+
#获取网页
44+
def __get_html(url, ua=None,use_proxy=0):
45+
if not ua:
46+
ua = ReqHelper.g_pc_ua
47+
headers = {'user-agent': ua}
48+
t1 = CommonHelper.get_time_millis()
49+
# http_proxies
50+
if use_proxy == 0:
51+
response = ReqHelper.s.get(url, headers=headers, timeout=30, verify=False)
52+
else:
53+
http_proxies = ConfigHelper.get_config().PROXIES
54+
response = ReqHelper.s.get(url, headers=headers, timeout=30, verify=False, proxies=http_proxies)
55+
# print()
56+
response.encoding = response.apparent_encoding
57+
# print(response.text)
58+
t2 = CommonHelper.get_time_millis()
59+
LogHelper.debug("req url %s cost_time: %s", url, t2 - t1)
60+
# print('cost time %s ms' % (t2 - t1))
61+
if response.status_code != 200:
62+
err_info = 'req %s status_code %s' % (url, response.status_code)
63+
LogHelper.error(err_info)
64+
raise Exception(err_info)
65+
return response.text
66+
67+
@staticmethod
68+
def get_html(url, ua=None,use_proxy=0):
69+
try:
70+
text = ReqHelper.__get_html(url, ua,use_proxy)
71+
return text
72+
except Exception as e:
73+
LogHelper.error("get_html url:%s err:%s trace_back:%s", url, e, traceback.format_exc())
74+
return None

0 commit comments

Comments
 (0)