/
job.py
583 lines (482 loc) · 19.3 KB
/
job.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
# -*- coding: utf-8 -*-
from __future__ import (absolute_import, division, print_function,
unicode_literals)
import inspect
import warnings
from functools import partial
from uuid import uuid4
from rq.compat import as_text, decode_redis_hash, string_types, text_type
from .connections import resolve_connection
from .exceptions import NoSuchJobError, UnpickleError
from .local import LocalStack
from .utils import enum, import_attribute, utcformat, utcnow, utcparse
try:
import cPickle as pickle
except ImportError: # noqa
import pickle
# Serialize pickle dumps using the highest pickle protocol (binary, default
# uses ascii)
dumps = partial(pickle.dumps, protocol=pickle.HIGHEST_PROTOCOL)
loads = pickle.loads
JobStatus = enum(
'JobStatus',
QUEUED='queued',
FINISHED='finished',
FAILED='failed',
STARTED='started',
DEFERRED='deferred'
)
# Sentinel value to mark that some of our lazily evaluated properties have not
# yet been evaluated.
UNEVALUATED = object()
def unpickle(pickled_string):
"""Unpickles a string, but raises a unified UnpickleError in case anything
fails.
This is a helper method to not have to deal with the fact that `loads()`
potentially raises many types of exceptions (e.g. AttributeError,
IndexError, TypeError, KeyError, etc.)
"""
try:
obj = loads(pickled_string)
except Exception as e:
raise UnpickleError('Could not unpickle', pickled_string, e)
return obj
def cancel_job(job_id, connection=None):
"""Cancels the job with the given job ID, preventing execution. Discards
any job info (i.e. it can't be requeued later).
"""
Job.fetch(job_id, connection=connection).cancel()
def requeue_job(job_id, connection=None):
"""Requeues the job with the given job ID. If no such job exists, just
remove the job ID from the failed queue, otherwise the job ID should refer
to a failed job (i.e. it should be on the failed queue).
"""
from .queue import get_failed_queue
fq = get_failed_queue(connection=connection)
fq.requeue(job_id)
def get_current_job(connection=None):
"""Returns the Job instance that is currently being executed. If this
function is invoked from outside a job context, None is returned.
"""
job_id = _job_stack.top
if job_id is None:
return None
return Job.fetch(job_id, connection=connection)
class Job(object):
"""A Job is just a convenient datastructure to pass around job (meta) data.
"""
# Job construction
@classmethod
def create(cls, func, args=None, kwargs=None, connection=None,
result_ttl=None, ttl=None, status=None, description=None,
depends_on=None, timeout=None, id=None, origin=None, meta=None):
"""Creates a new Job instance for the given function, arguments, and
keyword arguments.
"""
if args is None:
args = ()
if kwargs is None:
kwargs = {}
if not isinstance(args, (tuple, list)):
raise TypeError('{0!r} is not a valid args list'.format(args))
if not isinstance(kwargs, dict):
raise TypeError('{0!r} is not a valid kwargs dict'.format(kwargs))
job = cls(connection=connection)
if id is not None:
job.set_id(id)
if origin is not None:
job.origin = origin
# Set the core job tuple properties
job._instance = None
if inspect.ismethod(func):
job._instance = func.__self__
job._func_name = func.__name__
elif inspect.isfunction(func) or inspect.isbuiltin(func):
job._func_name = '{0}.{1}'.format(func.__module__, func.__name__)
elif isinstance(func, string_types):
job._func_name = as_text(func)
elif not inspect.isclass(func) and hasattr(func, '__call__'): # a callable class instance
job._instance = func
job._func_name = '__call__'
else:
raise TypeError('Expected a callable or a string, but got: {}'.format(func))
job._args = args
job._kwargs = kwargs
# Extra meta data
job.description = description or job.get_call_string()
job.result_ttl = result_ttl
job.ttl = ttl
job.timeout = timeout
job._status = status
job.meta = meta or {}
# dependency could be job instance or id
if depends_on is not None:
job._dependency_id = depends_on.id if isinstance(depends_on, Job) else depends_on
return job
def get_status(self):
self._status = as_text(self.connection.hget(self.key, 'status'))
return self._status
def _get_status(self):
warnings.warn(
"job.status is deprecated. Use job.get_status() instead",
DeprecationWarning
)
return self.get_status()
def set_status(self, status, pipeline=None):
self._status = status
self.connection._hset(self.key, 'status', self._status, pipeline)
def _set_status(self, status):
warnings.warn(
"job.status is deprecated. Use job.set_status() instead",
DeprecationWarning
)
self.set_status(status)
status = property(_get_status, _set_status)
@property
def is_finished(self):
return self.get_status() == JobStatus.FINISHED
@property
def is_queued(self):
return self.get_status() == JobStatus.QUEUED
@property
def is_failed(self):
return self.get_status() == JobStatus.FAILED
@property
def is_started(self):
return self.get_status() == JobStatus.STARTED
@property
def dependency(self):
"""Returns a job's dependency. To avoid repeated Redis fetches, we cache
job.dependency as job._dependency.
"""
if self._dependency_id is None:
return None
if hasattr(self, '_dependency'):
return self._dependency
job = Job.fetch(self._dependency_id, connection=self.connection)
job.refresh()
self._dependency = job
return job
@property
def func(self):
func_name = self.func_name
if func_name is None:
return None
if self.instance:
return getattr(self.instance, func_name)
return import_attribute(self.func_name)
def _unpickle_data(self):
self._func_name, self._instance, self._args, self._kwargs = unpickle(self.data)
@property
def data(self):
if self._data is UNEVALUATED:
if self._func_name is UNEVALUATED:
raise ValueError('Cannot build the job data')
if self._instance is UNEVALUATED:
self._instance = None
if self._args is UNEVALUATED:
self._args = ()
if self._kwargs is UNEVALUATED:
self._kwargs = {}
job_tuple = self._func_name, self._instance, self._args, self._kwargs
self._data = dumps(job_tuple)
return self._data
@data.setter
def data(self, value):
self._data = value
self._func_name = UNEVALUATED
self._instance = UNEVALUATED
self._args = UNEVALUATED
self._kwargs = UNEVALUATED
@property
def func_name(self):
if self._func_name is UNEVALUATED:
self._unpickle_data()
return self._func_name
@func_name.setter
def func_name(self, value):
self._func_name = value
self._data = UNEVALUATED
@property
def instance(self):
if self._instance is UNEVALUATED:
self._unpickle_data()
return self._instance
@instance.setter
def instance(self, value):
self._instance = value
self._data = UNEVALUATED
@property
def args(self):
if self._args is UNEVALUATED:
self._unpickle_data()
return self._args
@args.setter
def args(self, value):
self._args = value
self._data = UNEVALUATED
@property
def kwargs(self):
if self._kwargs is UNEVALUATED:
self._unpickle_data()
return self._kwargs
@kwargs.setter
def kwargs(self, value):
self._kwargs = value
self._data = UNEVALUATED
@classmethod
def exists(cls, job_id, connection=None):
"""Returns whether a job hash exists for the given job ID."""
conn = resolve_connection(connection)
return conn.exists(cls.key_for(job_id))
@classmethod
def fetch(cls, id, connection=None):
"""Fetches a persisted job from its corresponding Redis key and
instantiates it.
"""
job = cls(id, connection=connection)
job.refresh()
return job
def __init__(self, id=None, connection=None):
self.connection = resolve_connection(connection)
self._id = id
self.created_at = utcnow()
self._data = UNEVALUATED
self._func_name = UNEVALUATED
self._instance = UNEVALUATED
self._args = UNEVALUATED
self._kwargs = UNEVALUATED
self.description = None
self.origin = None
self.enqueued_at = None
self.started_at = None
self.ended_at = None
self._result = None
self.exc_info = None
self.timeout = None
self.result_ttl = None
self.ttl = None
self._status = None
self._dependency_id = None
self.meta = {}
def __repr__(self): # noqa
return 'Job({0!r}, enqueued_at={1!r})'.format(self._id, self.enqueued_at)
# Data access
def get_id(self): # noqa
"""The job ID for this job instance. Generates an ID lazily the
first time the ID is requested.
"""
if self._id is None:
self._id = text_type(uuid4())
return self._id
def set_id(self, value):
"""Sets a job ID for the given job."""
if not isinstance(value, string_types):
raise TypeError('id must be a string, not {0}'.format(type(value)))
self._id = value
id = property(get_id, set_id)
@classmethod
def key_for(cls, job_id):
"""The Redis key that is used to store job hash under."""
return b'rq:job:' + job_id.encode('utf-8')
@classmethod
def dependents_key_for(cls, job_id):
"""The Redis key that is used to store job hash under."""
return 'rq:job:{0}:dependents'.format(job_id)
@property
def key(self):
"""The Redis key that is used to store job hash under."""
return self.key_for(self.id)
@property
def dependents_key(self):
"""The Redis key that is used to store job hash under."""
return self.dependents_key_for(self.id)
@property
def result(self):
"""Returns the return value of the job.
Initially, right after enqueueing a job, the return value will be
None. But when the job has been executed, and had a return value or
exception, this will return that value or exception.
Note that, when the job has no return value (i.e. returns None), the
ReadOnlyJob object is useless, as the result won't be written back to
Redis.
Also note that you cannot draw the conclusion that a job has _not_
been executed when its return value is None, since return values
written back to Redis will expire after a given amount of time (500
seconds by default).
"""
if self._result is None:
rv = self.connection.hget(self.key, 'result')
if rv is not None:
# cache the result
self._result = loads(rv)
return self._result
"""Backwards-compatibility accessor property `return_value`."""
return_value = result
# Persistence
def refresh(self): # noqa
"""Overwrite the current instance's properties with the values in the
corresponding Redis key.
Will raise a NoSuchJobError if no corresponding Redis key exists.
"""
key = self.key
obj = decode_redis_hash(self.connection.hgetall(key))
if len(obj) == 0:
raise NoSuchJobError('No such job: {0}'.format(key))
def to_date(date_str):
if date_str is None:
return
else:
return utcparse(as_text(date_str))
try:
self.data = obj['data']
except KeyError:
raise NoSuchJobError('Unexpected job format: {0}'.format(obj))
self.created_at = to_date(as_text(obj.get('created_at')))
self.origin = as_text(obj.get('origin'))
self.description = as_text(obj.get('description'))
self.enqueued_at = to_date(as_text(obj.get('enqueued_at')))
self.started_at = to_date(as_text(obj.get('started_at')))
self.ended_at = to_date(as_text(obj.get('ended_at')))
self._result = unpickle(obj.get('result')) if obj.get('result') else None # noqa
self.exc_info = as_text(obj.get('exc_info'))
self.timeout = int(obj.get('timeout')) if obj.get('timeout') else None
self.result_ttl = int(obj.get('result_ttl')) if obj.get('result_ttl') else None # noqa
self._status = as_text(obj.get('status') if obj.get('status') else None)
self._dependency_id = as_text(obj.get('dependency_id', None))
self.ttl = int(obj.get('ttl')) if obj.get('ttl') else None
self.meta = unpickle(obj.get('meta')) if obj.get('meta') else {}
def to_dict(self):
"""Returns a serialization of the current job instance"""
obj = {}
obj['created_at'] = utcformat(self.created_at or utcnow())
obj['data'] = self.data
if self.origin is not None:
obj['origin'] = self.origin
if self.description is not None:
obj['description'] = self.description
if self.enqueued_at is not None:
obj['enqueued_at'] = utcformat(self.enqueued_at)
if self.started_at is not None:
obj['started_at'] = utcformat(self.started_at)
if self.ended_at is not None:
obj['ended_at'] = utcformat(self.ended_at)
if self._result is not None:
obj['result'] = dumps(self._result)
if self.exc_info is not None:
obj['exc_info'] = self.exc_info
if self.timeout is not None:
obj['timeout'] = self.timeout
if self.result_ttl is not None:
obj['result_ttl'] = self.result_ttl
if self._status is not None:
obj['status'] = self._status
if self._dependency_id is not None:
obj['dependency_id'] = self._dependency_id
if self.meta:
obj['meta'] = dumps(self.meta)
if self.ttl:
obj['ttl'] = self.ttl
return obj
def save(self, pipeline=None):
"""Persists the current job instance to its corresponding Redis key."""
key = self.key
connection = pipeline if pipeline is not None else self.connection
connection.hmset(key, self.to_dict())
self.cleanup(self.ttl, pipeline=connection)
def cancel(self):
"""Cancels the given job, which will prevent the job from ever being
ran (or inspected).
This method merely exists as a high-level API call to cancel jobs
without worrying about the internals required to implement job
cancellation.
"""
from .queue import Queue, get_failed_queue
pipeline = self.connection._pipeline()
if self.origin:
q = (get_failed_queue(connection=self.connection)
if self.is_failed
else Queue(name=self.origin, connection=self.connection))
q.remove(self, pipeline=pipeline)
pipeline.execute()
def delete(self, pipeline=None, remove_from_queue=True):
"""Cancels the job and deletes the job hash from Redis."""
if remove_from_queue:
self.cancel()
connection = pipeline if pipeline is not None else self.connection
connection.delete(self.key)
connection.delete(self.dependents_key)
# Job execution
def perform(self): # noqa
"""Invokes the job function with the job arguments."""
self.connection.persist(self.key)
self.ttl = -1
_job_stack.push(self.id)
try:
self._result = self.func(*self.args, **self.kwargs)
finally:
assert self.id == _job_stack.pop()
return self._result
def get_ttl(self, default_ttl=None):
"""Returns ttl for a job that determines how long a job will be
persisted. In the future, this method will also be responsible
for determining ttl for repeated jobs.
"""
return default_ttl if self.ttl is None else self.ttl
def get_result_ttl(self, default_ttl=None):
"""Returns ttl for a job that determines how long a jobs result will
be persisted. In the future, this method will also be responsible
for determining ttl for repeated jobs.
"""
return default_ttl if self.result_ttl is None else self.result_ttl
# Representation
def get_call_string(self): # noqa
"""Returns a string representation of the call, formatted as a regular
Python function invocation statement.
"""
if self.func_name is None:
return None
arg_list = [as_text(repr(arg)) for arg in self.args]
kwargs = ['{0}={1}'.format(k, as_text(repr(v))) for k, v in self.kwargs.items()]
# Sort here because python 3.3 & 3.4 makes different call_string
arg_list += sorted(kwargs)
args = ', '.join(arg_list)
return '{0}({1})'.format(self.func_name, args)
def cleanup(self, ttl=None, pipeline=None, remove_from_queue=True):
"""Prepare job for eventual deletion (if needed). This method is usually
called after successful execution. How long we persist the job and its
result depends on the value of ttl:
- If ttl is 0, cleanup the job immediately.
- If it's a positive number, set the job to expire in X seconds.
- If ttl is negative, don't set an expiry to it (persist
forever)
"""
if ttl == 0:
self.delete(remove_from_queue=remove_from_queue)
elif not ttl:
return
elif ttl > 0:
connection = pipeline if pipeline is not None else self.connection
connection.expire(self.key, ttl)
def register_dependency(self, pipeline=None):
"""Jobs may have dependencies. Jobs are enqueued only if the job they
depend on is successfully performed. We record this relation as
a reverse dependency (a Redis set), with a key that looks something
like:
rq:job:job_id:dependents = {'job_id_1', 'job_id_2'}
This method adds the job in its dependency's dependents set
and adds the job to DeferredJobRegistry.
"""
from .registry import DeferredJobRegistry
registry = DeferredJobRegistry(self.origin, connection=self.connection)
registry.add(self, pipeline=pipeline)
connection = pipeline if pipeline is not None else self.connection
connection.sadd(Job.dependents_key_for(self._dependency_id), self.id)
def __str__(self):
return '<Job {0}: {1}>'.format(self.id, self.description)
# Job equality
def __eq__(self, other): # noqa
return isinstance(other, self.__class__) and self.id == other.id
def __hash__(self):
return hash(self.id)
_job_stack = LocalStack()