1+ '''
2+ Author: herongwei
3+ Date: 2021-06-26 11:32:42
4+ LastEditTime: 2021-06-26 11:45:12
5+ LastEditors: Please set LastEditors
6+ Description: 请求帮助类
7+ FilePath: /WebSpiderSeed/lib/req_helper.py
8+ '''
9+ import requests
10+ import json
11+ import time
12+ from comlib .common_helper import CommonHelper
13+ from comlib .log_helper import LogHelper
14+ from retrying import retry
15+ from comlib .config_helper import ConfigHelper
16+ import traceback
17+
18+ class ReqHelper (object ):
19+ #模拟 pc_ua 和 安卓_ua
20+ g_pc_ua = 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 1.1.4322; CIBA'
21+ g_wap_ua = 'Mozilla/5.0 (Linux; U; Android 4.4.4; Nexus 5 Build/KTU84P) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'
22+ headers = {'Content-Type' : 'application/json;charset=utf8' }
23+ get_headers = {}
24+ s = requests .session ()
25+ http_proxies = ConfigHelper .get_config ().PROXIES
26+
27+ @staticmethod
28+ def get_url (url ):
29+ # url = ReqHelper.url_base + url
30+ # print('url:' + url)
31+ return url
32+
33+ @staticmethod
34+ @retry (stop_max_attempt_number = 3 , wait_incrementing_increment = 1200 )
35+ def post_json (url , data ):
36+ t1 = CommonHelper .get_time_millis ()
37+ response = ReqHelper .s .post (ReqHelper .get_url (url ), data = json .dumps (data ))
38+ t2 = CommonHelper .get_time_millis ()
39+ return response .json ()
40+
41+ @staticmethod
42+ @retry (stop_max_attempt_number = 3 , wait_incrementing_increment = 1200 )
43+ #获取网页
44+ def __get_html (url , ua = None ,use_proxy = 0 ):
45+ if not ua :
46+ ua = ReqHelper .g_pc_ua
47+ headers = {'user-agent' : ua }
48+ t1 = CommonHelper .get_time_millis ()
49+ # http_proxies
50+ if use_proxy == 0 :
51+ response = ReqHelper .s .get (url , headers = headers , timeout = 30 , verify = False )
52+ else :
53+ http_proxies = ConfigHelper .get_config ().PROXIES
54+ response = ReqHelper .s .get (url , headers = headers , timeout = 30 , verify = False , proxies = http_proxies )
55+ # print()
56+ response .encoding = response .apparent_encoding
57+ # print(response.text)
58+ t2 = CommonHelper .get_time_millis ()
59+ LogHelper .debug ("req url %s cost_time: %s" , url , t2 - t1 )
60+ # print('cost time %s ms' % (t2 - t1))
61+ if response .status_code != 200 :
62+ err_info = 'req %s status_code %s' % (url , response .status_code )
63+ LogHelper .error (err_info )
64+ raise Exception (err_info )
65+ return response .text
66+
67+ @staticmethod
68+ def get_html (url , ua = None ,use_proxy = 0 ):
69+ try :
70+ text = ReqHelper .__get_html (url , ua ,use_proxy )
71+ return text
72+ except Exception as e :
73+ LogHelper .error ("get_html url:%s err:%s trace_back:%s" , url , e , traceback .format_exc ())
74+ return None
0 commit comments