-
Notifications
You must be signed in to change notification settings - Fork 82
/
scheduler.go
603 lines (539 loc) · 16.4 KB
/
scheduler.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
package quartz
import (
"context"
"errors"
"fmt"
"math"
"sync"
"time"
"github.com/reugn/go-quartz/logger"
)
// ScheduledJob represents a scheduled Job with the Trigger associated
// with it and the next run epoch time.
type ScheduledJob interface {
JobDetail() *JobDetail
Trigger() Trigger
NextRunTime() int64
}
// Scheduler represents a Job orchestrator.
// Schedulers are responsible for executing Jobs when their associated
// Triggers fire (when their scheduled time arrives).
type Scheduler interface {
// Start starts the scheduler. The scheduler will run until
// the Stop method is called or the context is canceled. Use
// the Wait method to block until all running jobs have completed.
Start(context.Context)
// IsStarted determines whether the scheduler has been started.
IsStarted() bool
// ScheduleJob schedules a job using a specified trigger.
ScheduleJob(jobDetail *JobDetail, trigger Trigger) error
// GetJobKeys returns the keys of scheduled jobs.
// For a job key to be returned, the job must satisfy all of the
// matchers specified.
// Given no matchers, it returns the keys of all scheduled jobs.
GetJobKeys(...Matcher[ScheduledJob]) ([]*JobKey, error)
// GetScheduledJob returns the scheduled job with the specified key.
GetScheduledJob(jobKey *JobKey) (ScheduledJob, error)
// DeleteJob removes the job with the specified key from the
// scheduler's execution queue.
DeleteJob(jobKey *JobKey) error
// PauseJob suspends the job with the specified key from being
// executed by the scheduler.
PauseJob(jobKey *JobKey) error
// ResumeJob restarts the suspended job with the specified key.
ResumeJob(jobKey *JobKey) error
// Clear removes all of the scheduled jobs.
Clear() error
// Wait blocks until the scheduler stops running and all jobs
// have returned. Wait will return when the context passed to
// it has expired. Until the context passed to start is
// cancelled or Stop is called directly.
Wait(context.Context)
// Stop shutdowns the scheduler.
Stop()
}
// StdScheduler implements the quartz.Scheduler interface.
type StdScheduler struct {
mtx sync.Mutex
wg sync.WaitGroup
queue JobQueue
queueMtx sync.Locker
interrupt chan struct{}
cancel context.CancelFunc
feeder chan ScheduledJob
dispatch chan ScheduledJob
started bool
opts StdSchedulerOptions
}
type StdSchedulerOptions struct {
// When true, the scheduler will run jobs synchronously, waiting
// for each execution instance of the job to return before starting
// the next execution. Running with this option effectively serializes
// all job execution.
BlockingExecution bool
// When greater than 0, all jobs will be dispatched to a pool of
// goroutines of WorkerLimit size to limit the total number of processes
// usable by the scheduler. If all worker threads are in use, job
// scheduling will wait till a job can be dispatched.
// If BlockingExecution is set, then WorkerLimit is ignored.
WorkerLimit int
// When the scheduler attempts to execute a job, if the time elapsed
// since the job's scheduled execution time is less than or equal to the
// configured threshold, the scheduler will execute the job.
// Otherwise, the job will be rescheduled as outdated. By default,
// NewStdScheduler sets the threshold to 100ms.
//
// As a rule of thumb, your OutdatedThreshold should always be
// greater than 0, but less than the shortest interval used by
// your job or jobs.
OutdatedThreshold time.Duration
// This retry interval will be used if the scheduler fails to
// calculate the next time to interrupt for job execution. By default,
// the NewStdScheduler constructor sets this interval to 100
// milliseconds. Changing the default value may be beneficial when
// using a custom implementation of the JobQueue, where operations
// may timeout or fail.
RetryInterval time.Duration
// MisfiredChan allows the creation of event listeners to handle jobs that
// have failed to be executed on time and have been skipped by the scheduler.
//
// Misfires can occur due to insufficient resources or scheduler downtime.
// Adjust OutdatedThreshold to establish an acceptable delay time and
// ensure regular job execution.
MisfiredChan chan ScheduledJob
}
// Verify StdScheduler satisfies the Scheduler interface.
var _ Scheduler = (*StdScheduler)(nil)
// NewStdScheduler returns a new StdScheduler with the default configuration.
func NewStdScheduler() Scheduler {
return NewStdSchedulerWithOptions(StdSchedulerOptions{
OutdatedThreshold: 100 * time.Millisecond,
RetryInterval: 100 * time.Millisecond,
}, nil, nil)
}
// NewStdSchedulerWithOptions returns a new StdScheduler configured as specified.
//
// A custom [JobQueue] implementation may be provided to manage scheduled jobs.
// This is useful when distributed mode is required, so that jobs can be stored
// in persistent storage. Pass in nil to use the internal in-memory implementation.
//
// A custom [sync.Locker] may also be provided to ensure that scheduler operations
// on the job queue are atomic when used in distributed mode. Pass in nil to use
// the default [sync.Mutex].
func NewStdSchedulerWithOptions(
opts StdSchedulerOptions,
jobQueue JobQueue,
jobQueueMtx sync.Locker,
) *StdScheduler {
if jobQueue == nil {
jobQueue = NewJobQueue()
}
if jobQueueMtx == nil {
jobQueueMtx = &sync.Mutex{}
}
return &StdScheduler{
queue: jobQueue,
queueMtx: jobQueueMtx,
interrupt: make(chan struct{}, 1),
feeder: make(chan ScheduledJob),
dispatch: make(chan ScheduledJob),
opts: opts,
}
}
// ScheduleJob schedules a Job using a specified Trigger.
func (sched *StdScheduler) ScheduleJob(
jobDetail *JobDetail,
trigger Trigger,
) error {
sched.queueMtx.Lock()
defer sched.queueMtx.Unlock()
if jobDetail == nil {
return illegalArgumentError("jobDetail is nil")
}
if jobDetail.jobKey == nil {
return illegalArgumentError("jobDetail.jobKey is nil")
}
if jobDetail.jobKey.name == "" {
return illegalArgumentError("empty key name is not allowed")
}
if trigger == nil {
return illegalArgumentError("trigger is nil")
}
nextRunTime := int64(math.MaxInt64)
var err error
if !jobDetail.opts.Suspended {
nextRunTime, err = trigger.NextFireTime(NowNano())
if err != nil {
return err
}
}
toSchedule := &scheduledJob{
job: jobDetail,
trigger: trigger,
priority: nextRunTime,
}
err = sched.queue.Push(toSchedule)
if err == nil {
logger.Debugf("Successfully added job %s.", jobDetail.jobKey)
if sched.started {
sched.Reset()
}
}
return err
}
// Start starts the StdScheduler execution loop.
func (sched *StdScheduler) Start(ctx context.Context) {
sched.mtx.Lock()
defer sched.mtx.Unlock()
if sched.started {
logger.Info("Scheduler is already running.")
return
}
ctx, sched.cancel = context.WithCancel(ctx)
go func() { <-ctx.Done(); sched.Stop() }()
// start scheduler execution loop
sched.wg.Add(1)
go sched.startExecutionLoop(ctx)
// starts worker pool when WorkerLimit is greater than 0
sched.startWorkers(ctx)
sched.started = true
}
// Wait blocks until the scheduler shuts down.
func (sched *StdScheduler) Wait(ctx context.Context) {
sig := make(chan struct{})
go func() { defer close(sig); sched.wg.Wait() }()
select {
case <-ctx.Done():
case <-sig:
}
}
// IsStarted determines whether the scheduler has been started.
func (sched *StdScheduler) IsStarted() bool {
sched.mtx.Lock()
defer sched.mtx.Unlock()
return sched.started
}
// GetJobKeys returns the keys of scheduled jobs.
// For a job key to be returned, the job must satisfy all of the matchers specified.
// Given no matchers, it returns the keys of all scheduled jobs.
func (sched *StdScheduler) GetJobKeys(matchers ...Matcher[ScheduledJob]) ([]*JobKey, error) {
sched.queueMtx.Lock()
defer sched.queueMtx.Unlock()
scheduledJobs, err := sched.queue.ScheduledJobs(matchers)
if err != nil {
return nil, err
}
keys := make([]*JobKey, 0, len(scheduledJobs))
for _, scheduled := range scheduledJobs {
keys = append(keys, scheduled.JobDetail().jobKey)
}
return keys, nil
}
// GetScheduledJob returns the ScheduledJob with the specified key.
func (sched *StdScheduler) GetScheduledJob(jobKey *JobKey) (ScheduledJob, error) {
sched.queueMtx.Lock()
defer sched.queueMtx.Unlock()
if jobKey == nil {
return nil, illegalArgumentError("jobKey is nil")
}
return sched.queue.Get(jobKey)
}
// DeleteJob removes the Job with the specified key if present.
func (sched *StdScheduler) DeleteJob(jobKey *JobKey) error {
sched.queueMtx.Lock()
defer sched.queueMtx.Unlock()
if jobKey == nil {
return illegalArgumentError("jobKey is nil")
}
_, err := sched.queue.Remove(jobKey)
if err == nil {
logger.Debugf("Successfully deleted job %s.", jobKey)
if sched.started {
sched.Reset()
}
}
return err
}
// PauseJob suspends the job with the specified key from being
// executed by the scheduler.
func (sched *StdScheduler) PauseJob(jobKey *JobKey) error {
sched.queueMtx.Lock()
defer sched.queueMtx.Unlock()
if jobKey == nil {
return illegalArgumentError("jobKey is nil")
}
job, err := sched.queue.Get(jobKey)
if err != nil {
return err
}
if job.JobDetail().opts.Suspended {
return illegalStateError(fmt.Sprintf("job %s is suspended", jobKey))
}
job, err = sched.queue.Remove(jobKey)
if err == nil {
job.JobDetail().opts.Suspended = true
paused := &scheduledJob{
job: job.JobDetail(),
trigger: job.Trigger(),
priority: int64(math.MaxInt64),
}
err = sched.queue.Push(paused)
if err == nil {
logger.Debugf("Successfully paused job %s.", jobKey)
if sched.started {
sched.Reset()
}
}
}
return err
}
// ResumeJob restarts the suspended job with the specified key.
func (sched *StdScheduler) ResumeJob(jobKey *JobKey) error {
sched.queueMtx.Lock()
defer sched.queueMtx.Unlock()
if jobKey == nil {
return illegalArgumentError("jobKey is nil")
}
job, err := sched.queue.Get(jobKey)
if err != nil {
return err
}
if !job.JobDetail().opts.Suspended {
return illegalStateError(fmt.Sprintf("job %s is active", jobKey))
}
job, err = sched.queue.Remove(jobKey)
if err == nil {
job.JobDetail().opts.Suspended = false
nextRunTime, err := job.Trigger().NextFireTime(NowNano())
if err != nil {
return err
}
resumed := &scheduledJob{
job: job.JobDetail(),
trigger: job.Trigger(),
priority: nextRunTime,
}
err = sched.queue.Push(resumed)
if err == nil {
logger.Debugf("Successfully resumed job %s.", jobKey)
if sched.started {
sched.Reset()
}
}
}
return err
}
// Clear removes all of the scheduled jobs.
func (sched *StdScheduler) Clear() error {
sched.queueMtx.Lock()
defer sched.queueMtx.Unlock()
// reset the job queue
err := sched.queue.Clear()
if err == nil {
logger.Debug("Successfully cleared job queue.")
if sched.started {
sched.Reset()
}
}
return err
}
// Stop exits the StdScheduler execution loop.
func (sched *StdScheduler) Stop() {
sched.mtx.Lock()
defer sched.mtx.Unlock()
if !sched.started {
logger.Info("Scheduler is not running.")
return
}
logger.Info("Closing the StdScheduler.")
sched.cancel()
sched.started = false
}
func (sched *StdScheduler) startExecutionLoop(ctx context.Context) {
defer sched.wg.Done()
maxTimerDuration := time.Duration(1<<63 - 1)
timer := time.NewTimer(maxTimerDuration)
for {
queueSize, err := sched.queue.Size()
switch {
case err != nil:
logger.Errorf("Failed to fetch queue size: %s", err)
timer.Reset(sched.opts.RetryInterval)
case queueSize == 0:
logger.Trace("Queue is empty.")
timer.Reset(maxTimerDuration)
default:
timer.Reset(sched.calculateNextTick())
}
select {
case <-timer.C:
logger.Trace("Tick.")
sched.executeAndReschedule(ctx)
case <-sched.interrupt:
logger.Trace("Interrupted waiting for next tick.")
timer.Stop()
case <-ctx.Done():
logger.Info("Exit the execution loop.")
timer.Stop()
return
}
}
}
func (sched *StdScheduler) startWorkers(ctx context.Context) {
if sched.opts.WorkerLimit > 0 {
logger.Debugf("Starting %d scheduler workers.", sched.opts.WorkerLimit)
for i := 0; i < sched.opts.WorkerLimit; i++ {
sched.wg.Add(1)
go func() {
defer sched.wg.Done()
for {
select {
case <-ctx.Done():
return
case scheduled := <-sched.dispatch:
executeWithRetries(ctx, scheduled.JobDetail())
}
}
}()
}
}
}
func (sched *StdScheduler) calculateNextTick() time.Duration {
var nextTickDuration time.Duration
scheduledJob, err := sched.queue.Head()
if err != nil {
if errors.Is(err, ErrQueueEmpty) {
logger.Debug("Queue is empty")
return nextTickDuration
}
logger.Errorf("Failed to calculate next tick: %s", err)
return sched.opts.RetryInterval
}
nextRunTime := scheduledJob.NextRunTime()
now := NowNano()
if nextRunTime > now {
nextTickDuration = time.Duration(nextRunTime - now)
}
logger.Tracef("Next tick is for %s in %s.", scheduledJob.JobDetail().jobKey,
nextTickDuration)
return nextTickDuration
}
func (sched *StdScheduler) executeAndReschedule(ctx context.Context) {
// fetch a job for processing
scheduled, valid := sched.fetchAndReschedule()
// execute the job
if valid {
logger.Debugf("Job %s is about to be executed.", scheduled.JobDetail().jobKey)
switch {
case sched.opts.BlockingExecution:
executeWithRetries(ctx, scheduled.JobDetail())
case sched.opts.WorkerLimit > 0:
select {
case sched.dispatch <- scheduled:
case <-ctx.Done():
return
}
default:
sched.wg.Add(1)
go func() {
defer sched.wg.Done()
executeWithRetries(ctx, scheduled.JobDetail())
}()
}
}
}
func executeWithRetries(ctx context.Context, jobDetail *JobDetail) {
// recover from unhandled panics that may occur during job execution
defer func() {
if err := recover(); err != nil {
logger.Errorf("Job %s panicked: %s", jobDetail.jobKey, err)
}
}()
err := jobDetail.job.Execute(ctx)
if err == nil {
return
}
retryLoop:
for i := 1; i <= jobDetail.opts.MaxRetries; i++ {
timer := time.NewTimer(jobDetail.opts.RetryInterval)
select {
case <-timer.C:
case <-ctx.Done():
timer.Stop()
break retryLoop
}
logger.Tracef("Job %s retry %d", jobDetail.jobKey, i)
err = jobDetail.job.Execute(ctx)
if err == nil {
break
}
}
if err != nil {
logger.Warnf("Job %s terminated with error: %s", jobDetail.jobKey, err)
}
}
func (sched *StdScheduler) validateJob(job ScheduledJob) (bool, func() (int64, error)) {
if job.JobDetail().opts.Suspended {
return false, func() (int64, error) { return math.MaxInt64, nil }
}
now := NowNano()
if job.NextRunTime() < now-sched.opts.OutdatedThreshold.Nanoseconds() {
duration := time.Duration(now - job.NextRunTime())
logger.Infof("Job %s is outdated %s.", job.JobDetail().jobKey, duration)
select {
case sched.opts.MisfiredChan <- job:
default:
}
return false, func() (int64, error) { return job.Trigger().NextFireTime(now) }
} else if job.NextRunTime() > now {
logger.Debugf("Job %s is not due to run yet.", job.JobDetail().jobKey)
return false, func() (int64, error) { return job.NextRunTime(), nil }
}
return true, func() (int64, error) { return job.Trigger().NextFireTime(job.NextRunTime()) }
}
func (sched *StdScheduler) fetchAndReschedule() (ScheduledJob, bool) {
sched.queueMtx.Lock()
defer sched.queueMtx.Unlock()
// fetch a job for processing
job, err := sched.queue.Pop()
if err != nil {
if errors.Is(err, ErrQueueEmpty) {
logger.Debug("Queue is empty")
} else {
logger.Errorf("Failed to fetch a job from the queue: %s", err)
}
return nil, false
}
// validate the job
valid, nextRunTimeExtractor := sched.validateJob(job)
// calculate next run time for the job
nextRunTime, err := nextRunTimeExtractor()
if err != nil {
logger.Infof("Job %s exited the execution loop: %s.", job.JobDetail().jobKey, err)
return job, valid
}
// reschedule the job
toSchedule := &scheduledJob{
job: job.JobDetail(),
trigger: job.Trigger(),
priority: nextRunTime,
}
if err := sched.queue.Push(toSchedule); err != nil {
logger.Errorf("Failed to reschedule job %s, err: %s",
toSchedule.JobDetail().jobKey, err)
} else {
logger.Tracef("Successfully rescheduled job %s", toSchedule.JobDetail().jobKey)
sched.Reset()
}
return job, valid
}
// Reset is called internally to recalculate the closest job timing when there
// is an update to the job queue by the scheduler. In cluster mode with a shared
// queue, it can be triggered manually to synchronize with remote changes if one
// of the schedulers fails.
func (sched *StdScheduler) Reset() {
select {
case sched.interrupt <- struct{}{}:
default:
}
}