-
Notifications
You must be signed in to change notification settings - Fork 9
Re try failed jobs #119
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Re try failed jobs #119
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -76,6 +76,7 @@ workflows: | |
- develop | ||
- feat/ai-workflows | ||
- pm-1955_2 | ||
- re-try-failed-jobs | ||
|
||
|
||
- 'build-prod': | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ import { policies, Queue } from 'pg-boss'; | |
export class QueueSchedulerService implements OnModuleInit, OnModuleDestroy { | ||
private readonly logger: Logger = new Logger(QueueSchedulerService.name); | ||
private boss: PgBoss; | ||
private $start; | ||
|
||
private jobsHandlersMap = new Map< | ||
string, | ||
|
@@ -46,7 +47,7 @@ export class QueueSchedulerService implements OnModuleInit, OnModuleDestroy { | |
return; | ||
} | ||
|
||
await this.boss.start(); | ||
await (this.$start = this.boss.start()); | ||
} | ||
|
||
async onModuleDestroy() { | ||
|
@@ -114,14 +115,35 @@ export class QueueSchedulerService implements OnModuleInit, OnModuleDestroy { | |
return; | ||
} | ||
|
||
if (resolution === 'fail') { | ||
// IMPORTANT! | ||
// thes 4 operations will update the cache for the active singletons in the database | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typo in comment: 'thes' should be 'these'. |
||
// and will allow the jobs queue to go next or retry | ||
await this.boss.cancel(queueName, jobId); | ||
await this.boss.getQueueStats(queueName); | ||
await this.boss.supervise(queueName); | ||
await this.boss.resume(queueName, jobId); | ||
} | ||
|
||
if (this.jobsHandlersMap.has(jobId)) { | ||
this.logger.log( | ||
`Found job handler for ${jobId}. Calling with '${resolution}' resolution.`, | ||
); | ||
this.jobsHandlersMap.get(jobId)?.call(null, resolution); | ||
this.jobsHandlersMap.delete(jobId); | ||
this.logger.log('JobHandlers left:', [...this.jobsHandlersMap.keys()]); | ||
} else { | ||
await this.boss[resolution](queueName, jobId); | ||
} | ||
|
||
this.logger.log(`Job ${jobId} ${resolution} called.`); | ||
|
||
if (resolution === 'fail') { | ||
const bossJob = await this.boss.getJobById(queueName, jobId); | ||
if (bossJob && bossJob.retryCount >= bossJob.retryLimit) { | ||
throw new Error('Job failed! Retry limit reached!'); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider logging a message before throwing an error to provide more context about the failure. |
||
} | ||
} | ||
} | ||
|
||
async handleWorkForQueues<T>( | ||
|
@@ -135,7 +157,7 @@ export class QueueSchedulerService implements OnModuleInit, OnModuleDestroy { | |
return; | ||
} | ||
|
||
await this.boss.start(); | ||
await this.$start; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The line |
||
return Promise.all( | ||
queuesNames.map(async (queueName) => { | ||
const queue = await this.boss.getQueue(queueName); | ||
|
@@ -155,6 +177,7 @@ export class QueueSchedulerService implements OnModuleInit, OnModuleDestroy { | |
jobId: string, | ||
handler: (resolution?: string, result?: any) => void, | ||
) { | ||
this.logger.log(`Registering job handler for job ${jobId}.`); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider adding error handling or validation to ensure that |
||
this.jobsHandlersMap.set(jobId, handler); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -100,14 +100,19 @@ export class WorkflowQueueHandler implements OnModuleInit { | |
// return not-resolved promise, | ||
// this will put a pause on the job | ||
// until it is marked as completed via webhook call | ||
return new Promise<void>((resolve, reject) => { | ||
await new Promise<void>((resolve, reject) => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The change from |
||
this.scheduler.registerJobHandler( | ||
job.id, | ||
(resolution: string = 'complete', result: any) => { | ||
this.logger.log( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider providing more context in the log message for better traceability. For example, include the job ID in the log message to make it easier to identify which job the log entry pertains to. |
||
`Job handler called with ${resolution} and ${result}`, | ||
); | ||
(resolution === 'fail' ? reject : resolve)(result); | ||
}, | ||
); | ||
}); | ||
|
||
this.logger.log(`Job ${job.id} promise finished.`); | ||
} | ||
|
||
async handleWorkflowRunEvents(event: { | ||
|
@@ -263,24 +268,6 @@ export class WorkflowQueueHandler implements OnModuleInit { | |
break; | ||
} | ||
|
||
if (conclusion === 'FAILURE') { | ||
await this.scheduler.completeJob( | ||
(aiWorkflowRun as any).workflow.gitWorkflowId, | ||
aiWorkflowRun.scheduledJobId as string, | ||
'fail', | ||
); | ||
|
||
this.logger.log({ | ||
message: 'Workflow job failed. Calling retry.', | ||
aiWorkflowRunId: aiWorkflowRun.id, | ||
gitRunId: event.workflow_job.run_id, | ||
jobId: event.workflow_job.id, | ||
status: conclusion, | ||
timestamp: new Date().toISOString(), | ||
}); | ||
break; | ||
} | ||
|
||
await this.prisma.aiWorkflowRun.update({ | ||
where: { id: aiWorkflowRun.id }, | ||
data: { | ||
|
@@ -289,13 +276,31 @@ export class WorkflowQueueHandler implements OnModuleInit { | |
completedJobs: { increment: 1 }, | ||
}, | ||
}); | ||
await this.scheduler.completeJob( | ||
(aiWorkflowRun as any).workflow.gitWorkflowId, | ||
aiWorkflowRun.scheduledJobId as string, | ||
); | ||
|
||
try { | ||
await this.scheduler.completeJob( | ||
(aiWorkflowRun as any).workflow.gitWorkflowId, | ||
aiWorkflowRun.scheduledJobId as string, | ||
conclusion === 'FAILURE' ? 'fail' : 'complete', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
); | ||
|
||
if (conclusion === 'FAILURE') { | ||
this.logger.log({ | ||
message: `Workflow job ${aiWorkflowRun.id} failed. Retrying!`, | ||
aiWorkflowRunId: aiWorkflowRun.id, | ||
gitRunId: event.workflow_job.run_id, | ||
jobId: event.workflow_job.id, | ||
status: conclusion, | ||
timestamp: new Date().toISOString(), | ||
}); | ||
return; | ||
} | ||
} catch (e) { | ||
this.logger.log(aiWorkflowRun.id, e.message); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider providing more context in the error logging. Currently, only the |
||
} | ||
|
||
this.logger.log({ | ||
message: 'Workflow job completed', | ||
message: `Workflow job ${aiWorkflowRun.id} completed with conclusion: ${conclusion}`, | ||
aiWorkflowRunId: aiWorkflowRun.id, | ||
gitRunId: event.workflow_job.run_id, | ||
jobId: event.workflow_job.id, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider renaming the variable
$start
to a more descriptive name that indicates its purpose or usage within the service. Using a more descriptive name can improve code readability and maintainability.