-
Notifications
You must be signed in to change notification settings - Fork 18
/
curl_loop.py
105 lines (92 loc) · 3.62 KB
/
curl_loop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import asyncio as aio
import atexit
import pycurl
from falsy.loader.func import load
from falsy.netboy.curl_result import curl_result
class CurlLoop:
class CurlException(Exception):
def __init__(self, code, desc, data):
self.code = code
self.desc = desc
self.data = data
_multi = pycurl.CurlMulti()
_multi.setopt(pycurl.M_PIPELINING, 1)
atexit.register(_multi.close)
_futures = {}
@classmethod
async def handler_ready(cls, c):
cls._futures[c] = aio.Future()
cls._multi.add_handle(c)
try:
try:
curl_ret = await cls._futures[c]
except CurlLoop.CurlException as e:
return {
'url': c._raw_url,
'id': c._raw_id,
'payload': c._raw_payload,
'spider': 'pycurl',
'state': 'error',
'error_code': e.code,
'error_desc': e.desc,
}
except Exception as e:
return {
'url': c._raw_url,
'id': c._raw_id,
'payload': c._raw_payload,
'spider': 'pycurl',
'state': 'critical',
'error_code': -1,
'error_desc': "{} - {}".format(type(e), str(e)),
}
return curl_ret
finally:
cls._multi.remove_handle(c)
@classmethod
def perform(cls):
if cls._futures:
while True:
status, num_active = cls._multi.perform()
if status != pycurl.E_CALL_MULTI_PERFORM:
break
while True:
num_ready, success, fail = cls._multi.info_read()
for c in success:
cc = cls._futures.pop(c)
result = curl_result(c)
result['url'] = c._raw_url
result['id'] = c._raw_id
result['state'] = 'normal'
result['spider'] = 'pycurl'
result['payload'] = payload = c._raw_payload
# post_func = payload.get('post_func')
# if type(post_func) == str:
# post_func = load(post_func)
# if post_func:
# result = post_func(payload, result)
cc.set_result(result)
for c, err_num, err_msg in fail:
print('error:', err_num, err_msg, c.getinfo(pycurl.EFFECTIVE_URL))
result = curl_result(c)
result['url'] = c._raw_url
result['id'] = c._raw_id
result['state'] = 'error'
result['spider'] = 'pycurl'
result['error_code'] = err_num
result['error_desc'] = err_msg
result['payload'] = payload = c._raw_payload
# post_func = payload.get('post_func')
# if type(post_func) == str:
# post_func = load(post_func)
# if post_func:
# result2 = post_func(payload, result)
# if type(result2) is dict and len(result2) >= len(result):
# result = result2
cls._futures.pop(c).set_exception(CurlLoop.CurlException(code=err_num, desc=err_msg, data=result))
if num_ready == 0:
break
async def curl_loop():
while True:
await aio.sleep(0)
CurlLoop.perform()