/
connections.py
233 lines (195 loc) · 8.6 KB
/
connections.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
import asyncio
import functools
import logging
from dataclasses import dataclass
from datetime import datetime, timedelta
from operator import attrgetter
from ssl import SSLContext
from typing import Any, List, Optional, Tuple, Union
from uuid import uuid4
import aioredis
from aioredis import MultiExecError, Redis
from .constants import default_queue_name, job_key_prefix, result_key_prefix
from .jobs import Deserializer, Job, JobDef, JobResult, Serializer, deserialize_job, serialize_job
from .utils import timestamp_ms, to_ms, to_unix_ms
logger = logging.getLogger('arq.connections')
@dataclass
class RedisSettings:
"""
No-Op class used to hold redis connection redis_settings.
Used by :func:`arq.connections.create_pool` and :class:`arq.worker.Worker`.
"""
host: Union[str, List[Tuple[str, int]]] = 'localhost'
port: int = 6379
database: int = 0
password: str = None
ssl: [bool, None, SSLContext] = None
conn_timeout: int = 1
conn_retries: int = 5
conn_retry_delay: int = 1
sentinel: bool = False
sentinel_master: str = 'mymaster'
def __repr__(self):
return '<RedisSettings {}>'.format(' '.join(f'{k}={v}' for k, v in self.__dict__.items()))
# extra time after the job is expected to start when the job key should expire, 1 day in ms
expires_extra_ms = 86_400_000
class ArqRedis(Redis):
"""
Thin subclass of ``aioredis.Redis`` which adds :func:`arq.connections.enqueue_job`.
:param redis_settings: an instance of ``arq.connections.RedisSettings``.
:param job_serializer: a function that serializes Python objects to bytes, defaults to pickle.dumps
:param job_deserializer: a function that deserializes bytes into Python objects, defaults to pickle.loads
:param kwargs: keyword arguments directly passed to ``aioredis.Redis``.
"""
def __init__(
self,
pool_or_conn,
job_serializer: Optional[Serializer] = None,
job_deserializer: Optional[Deserializer] = None,
**kwargs,
) -> None:
self.job_serializer = job_serializer
self.job_deserializer = job_deserializer
super().__init__(pool_or_conn, **kwargs)
async def enqueue_job(
self,
function: str,
*args: Any,
_job_id: Optional[str] = None,
_queue_name: str = default_queue_name,
_defer_until: Optional[datetime] = None,
_defer_by: Union[None, int, float, timedelta] = None,
_expires: Union[None, int, float, timedelta] = None,
_job_try: Optional[int] = None,
**kwargs: Any,
) -> Optional[Job]:
"""
Enqueue a job.
:param function: Name of the function to call
:param args: args to pass to the function
:param _job_id: ID of the job, can be used to enforce job uniqueness
:param _queue_name: queue of the job, can be used to create job in different queue
:param _defer_until: datetime at which to run the job
:param _defer_by: duration to wait before running the job
:param _expires: if the job still hasn't started after this duration, do not run it
:param _job_try: useful when re-enqueueing jobs within a job
:param kwargs: any keyword arguments to pass to the function
:return: :class:`arq.jobs.Job` instance or ``None`` if a job with this ID already exists
"""
job_id = _job_id or uuid4().hex
job_key = job_key_prefix + job_id
assert not (_defer_until and _defer_by), "use either 'defer_until' or 'defer_by' or neither, not both"
defer_by_ms = to_ms(_defer_by)
expires_ms = to_ms(_expires)
with await self as conn:
pipe = conn.pipeline()
pipe.unwatch()
pipe.watch(job_key)
job_exists = pipe.exists(job_key)
job_result_exists = pipe.exists(result_key_prefix + job_id)
await pipe.execute()
if await job_exists or await job_result_exists:
return
enqueue_time_ms = timestamp_ms()
if _defer_until is not None:
score = to_unix_ms(_defer_until)
elif defer_by_ms:
score = enqueue_time_ms + defer_by_ms
else:
score = enqueue_time_ms
expires_ms = expires_ms or score - enqueue_time_ms + expires_extra_ms
job = serialize_job(function, args, kwargs, _job_try, enqueue_time_ms, serializer=self.job_serializer)
tr = conn.multi_exec()
tr.psetex(job_key, expires_ms, job)
tr.zadd(_queue_name, score, job_id)
try:
await tr.execute()
except MultiExecError:
# job got enqueued since we checked 'job_exists'
# https://github.com/samuelcolvin/arq/issues/131, avoid warnings in log
await asyncio.gather(*tr._results, return_exceptions=True)
return
return Job(job_id, redis=self, _queue_name=_queue_name, _deserializer=self.job_deserializer)
async def _get_job_result(self, key) -> JobResult:
job_id = key[len(result_key_prefix) :]
job = Job(job_id, self, _deserializer=self.job_deserializer)
r = await job.result_info()
r.job_id = job_id
return r
async def all_job_results(self) -> List[JobResult]:
"""
Get results for all jobs in redis.
"""
keys = await self.keys(result_key_prefix + '*')
results = await asyncio.gather(*[self._get_job_result(k) for k in keys])
return sorted(results, key=attrgetter('enqueue_time'))
async def _get_job_def(self, job_id, score) -> JobDef:
v = await self.get(job_key_prefix + job_id, encoding=None)
jd = deserialize_job(v, deserializer=self.job_deserializer)
jd.score = score
return jd
async def queued_jobs(self, *, queue_name: str = default_queue_name) -> List[JobDef]:
"""
Get information about queued, mostly useful when testing.
"""
jobs = await self.zrange(queue_name, withscores=True)
return await asyncio.gather(*[self._get_job_def(job_id, score) for job_id, score in jobs])
async def create_pool(
settings: RedisSettings = None,
*,
retry: int = 0,
job_serializer: Optional[Serializer] = None,
job_deserializer: Optional[Deserializer] = None,
) -> ArqRedis:
"""
Create a new redis pool, retrying up to ``conn_retries`` times if the connection fails.
Similar to ``aioredis.create_redis_pool`` except it returns a :class:`arq.connections.ArqRedis` instance,
thus allowing job enqueuing.
"""
settings = settings or RedisSettings()
assert not (
type(settings.host) is str and settings.sentinel
), "str provided for 'host' but 'sentinel' is true; list of sentinels expected"
if settings.sentinel:
addr = settings.host
async def pool_factory(*args, **kwargs):
client = await aioredis.sentinel.create_sentinel_pool(*args, ssl=settings.ssl, **kwargs)
return client.master_for(settings.sentinel_master)
else:
pool_factory = functools.partial(
aioredis.create_pool, create_connection_timeout=settings.conn_timeout, ssl=settings.ssl
)
addr = settings.host, settings.port
try:
pool = await pool_factory(addr, db=settings.database, password=settings.password, encoding='utf8')
pool = ArqRedis(pool, job_serializer=job_serializer, job_deserializer=job_deserializer)
except (ConnectionError, OSError, aioredis.RedisError, asyncio.TimeoutError) as e:
if retry < settings.conn_retries:
logger.warning(
'redis connection error %s %s %s, %d retries remaining...',
addr,
e.__class__.__name__,
e,
settings.conn_retries - retry,
)
await asyncio.sleep(settings.conn_retry_delay)
else:
raise
else:
if retry > 0:
logger.info('redis connection successful')
return pool
# recursively attempt to create the pool outside the except block to avoid
# "During handling of the above exception..." madness
return await create_pool(
settings, retry=retry + 1, job_serializer=job_serializer, job_deserializer=job_deserializer
)
async def log_redis_info(redis, log_func):
with await redis as r:
info, key_count = await asyncio.gather(r.info(), r.dbsize())
log_func(
f'redis_version={info["server"]["redis_version"]} '
f'mem_usage={info["memory"]["used_memory_human"]} '
f'clients_connected={info["clients"]["connected_clients"]} '
f'db_keys={key_count}'
)