From 91908d9e10ee16d1e8f2abc0b9aba22ea67c36c4 Mon Sep 17 00:00:00 2001 From: Jean Schmidt Date: Tue, 18 Oct 2022 09:17:59 +0200 Subject: [PATCH 1/3] FIX: Don't remove EC2 instance when fails to remove githubRunner --- .../runners/src/scale-runners/scale-down.ts | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts index ed60525978..48f89d2598 100644 --- a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts +++ b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts @@ -97,21 +97,22 @@ export async function scaleDown(): Promise { removedRunners += 1; + // removeGithubRunner[Org || Repo] already call terminateRunner if successful if (ghRunner !== undefined) { if (Config.Instance.enableOrganizationRunners) { await removeGithubRunnerOrg(ec2runner, ghRunner.id, ec2runner.org as string, metrics); } else { await removeGithubRunnerRepo(ec2runner, ghRunner.id, getRepo(ec2runner.repo as string), metrics); } - } - - console.info(`Runner '${ec2runner.instanceId}' [${ec2runner.runnerType}] will be removed.`); - try { - await terminateRunner(ec2runner, metrics); - metrics.runnerTerminateSuccess(ec2runner); - } catch (e) { - metrics.runnerTerminateFailure(ec2runner); - console.error(`Runner '${ec2runner.instanceId}' [${ec2runner.runnerType}] cannot be removed: ${e}`); + } else { + console.info(`Runner '${ec2runner.instanceId}' [${ec2runner.runnerType}] will be removed.`); + try { + await terminateRunner(ec2runner, metrics); + metrics.runnerTerminateSuccess(ec2runner); + } catch (e) { + metrics.runnerTerminateFailure(ec2runner); + console.error(`Runner '${ec2runner.instanceId}' [${ec2runner.runnerType}] cannot be removed: ${e}`); + } } } } From 068524b7ca8093069ff57988b36b543c5fe60cec Mon Sep 17 00:00:00 2001 From: Jean Schmidt Date: Tue, 18 Oct 2022 11:19:58 +0200 Subject: [PATCH 2/3] Fix scaleDown tests, and improved code-flow * [FIX] scale-down-test.ts * [IMPROVE] removeGithubRunner does not terminate EC2 runner * [IMPROVE] application-level metrics for GH runner de-registers --- .../src/scale-runners/gh-runners.test.ts | 27 +----- .../runners/src/scale-runners/gh-runners.ts | 90 ++++++++----------- .../runners/src/scale-runners/metrics.ts | 59 ++++++++++++ .../src/scale-runners/scale-down.test.ts | 88 +++++++++--------- .../runners/src/scale-runners/scale-down.ts | 57 ++++++++++-- 5 files changed, 195 insertions(+), 126 deletions(-) diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/gh-runners.test.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/gh-runners.test.ts index 52866a34c5..64d546d5e9 100644 --- a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/gh-runners.test.ts +++ b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/gh-runners.test.ts @@ -14,7 +14,6 @@ import { removeGithubRunnerRepo, resetGHRunnersCaches, } from './gh-runners'; -import { RunnerInfo } from './utils'; import { createGithubAuth, createOctoClient } from './gh-auth'; import { ScaleUpMetrics } from './metrics'; @@ -390,12 +389,6 @@ describe('listGithubRunners', () => { }); describe('removeGithubRunnerRepo', () => { - const repo = { owner: 'owner', repo: 'repo' }; - const irrelevantRunnerInfo: RunnerInfo = { - ...repo, - instanceId: '113', - }; - it('succeeds', async () => { const runnerId = 33; const repo = { owner: 'owner', repo: 'repo' }; @@ -419,16 +412,13 @@ describe('removeGithubRunnerRepo', () => { mockCreateOctoClient.mockReturnValueOnce(mockedOctokit as unknown as Octokit); resetGHRunnersCaches(); - await removeGithubRunnerRepo(irrelevantRunnerInfo, runnerId, repo, metrics); + await removeGithubRunnerRepo(runnerId, repo, metrics); expect(mockedOctokit.actions.deleteSelfHostedRunnerFromRepo).toBeCalledWith({ ...repo, runner_id: runnerId, }); expect(getRepoInstallation).toBeCalled(); - expect(mockEC2.terminateInstances).toBeCalledWith({ - InstanceIds: [irrelevantRunnerInfo.instanceId], - }); }); it('fails', async () => { @@ -454,23 +444,18 @@ describe('removeGithubRunnerRepo', () => { mockCreateOctoClient.mockReturnValueOnce(mockedOctokit as unknown as Octokit); resetGHRunnersCaches(); - await removeGithubRunnerRepo(irrelevantRunnerInfo, runnerId, repo, metrics); + await expect(removeGithubRunnerRepo(runnerId, repo, metrics)).rejects.toThrow(); expect(mockedOctokit.actions.deleteSelfHostedRunnerFromRepo).toBeCalledWith({ ...repo, runner_id: runnerId, }); expect(getRepoInstallation).toBeCalled(); - expect(mockEC2.terminateInstances).not.toBeCalled(); }); }); describe('removeGithubRunnerOrg', () => { const org = 'mockedOrg'; - const irrelevantRunnerInfo: RunnerInfo = { - org: org, - instanceId: '113', - }; it('succeeds', async () => { const runnerId = 33; @@ -494,16 +479,13 @@ describe('removeGithubRunnerOrg', () => { mockCreateOctoClient.mockReturnValueOnce(mockedOctokit as unknown as Octokit); resetGHRunnersCaches(); - await removeGithubRunnerOrg(irrelevantRunnerInfo, runnerId, org, metrics); + await removeGithubRunnerOrg(runnerId, org, metrics); expect(mockedOctokit.actions.deleteSelfHostedRunnerFromOrg).toBeCalledWith({ org: org, runner_id: runnerId, }); expect(getOrgInstallation).toBeCalled(); - expect(mockEC2.terminateInstances).toBeCalledWith({ - InstanceIds: [irrelevantRunnerInfo.instanceId], - }); }); it('fails', async () => { @@ -528,14 +510,13 @@ describe('removeGithubRunnerOrg', () => { mockCreateOctoClient.mockReturnValueOnce(mockedOctokit as unknown as Octokit); resetGHRunnersCaches(); - await removeGithubRunnerOrg(irrelevantRunnerInfo, runnerId, org, metrics); + await expect(removeGithubRunnerOrg(runnerId, org, metrics)).rejects.toThrow(); expect(mockedOctokit.actions.deleteSelfHostedRunnerFromOrg).toBeCalledWith({ org: org, runner_id: runnerId, }); expect(getOrgInstallation).toBeCalled(); - expect(mockEC2.terminateInstances).not.toBeCalled(); }); }); diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/gh-runners.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/gh-runners.ts index 0730846987..b5bc3b34ce 100644 --- a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/gh-runners.ts +++ b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/gh-runners.ts @@ -1,5 +1,5 @@ -import { Repo, RunnerInfo, getRepoKey } from './utils'; -import { RunnerType, terminateRunner } from './runners'; +import { Repo, getRepoKey } from './utils'; +import { RunnerType } from './runners'; import { createGithubAuth, createOctoClient } from './gh-auth'; import { Config } from './config'; @@ -127,60 +127,46 @@ type UnboxPromise = T extends Promise ? U : T; export type GhRunners = UnboxPromise>['data']['runners']; -export async function removeGithubRunnerRepo(ec2runner: RunnerInfo, ghRunnerId: number, repo: Repo, metrics: Metrics) { - try { - const githubAppClient = await createGitHubClientForRunnerRepo(repo, metrics); - const result = await metrics.trackRequest( - metrics.deleteSelfHostedRunnerFromRepoGHCallSuccess, - metrics.deleteSelfHostedRunnerFromRepoGHCallFailure, - () => { - return githubAppClient.actions.deleteSelfHostedRunnerFromRepo({ - ...repo, - runner_id: ghRunnerId, - }); - }, - ); - - /* istanbul ignore next */ - if (result?.status == 204) { - await terminateRunner(ec2runner, metrics); - console.info( - `AWS runner instance '${ec2runner.instanceId}' [${ec2runner.runnerType}] is terminated ` + - `and GitHub runner is de-registered. (removeGithubRunnerRepo)`, - ); - } - } catch (e) { - console.warn( - `Error scaling down (removeGithubRunnerRepo) '${ec2runner.instanceId}' [${ec2runner.runnerType}]: ${e}`, +export async function removeGithubRunnerRepo(ghRunnerId: number, repo: Repo, metrics: Metrics) { + const githubAppClient = await createGitHubClientForRunnerRepo(repo, metrics); + const result = await metrics.trackRequest( + metrics.deleteSelfHostedRunnerFromRepoGHCallSuccess, + metrics.deleteSelfHostedRunnerFromRepoGHCallFailure, + () => { + return githubAppClient.actions.deleteSelfHostedRunnerFromRepo({ + ...repo, + runner_id: ghRunnerId, + }); + }, + ); + + /* istanbul ignore next */ + if ((result?.status ?? 0) != 204) { + throw ( + `Request deleteSelfHostedRunnerFromRepoGHCallSuccess returned status code different than 204: ` + + `${result?.status ?? 0} for ${repo} ${ghRunnerId}` ); } } -export async function removeGithubRunnerOrg(ec2runner: RunnerInfo, ghRunnerId: number, org: string, metrics: Metrics) { - try { - const githubAppClient = await createGitHubClientForRunnerOrg(org, metrics); - const result = await metrics.trackRequest( - metrics.deleteSelfHostedRunnerFromOrgGHCallSuccess, - metrics.deleteSelfHostedRunnerFromOrgGHCallFailure, - () => { - return githubAppClient.actions.deleteSelfHostedRunnerFromOrg({ - org: org, - runner_id: ghRunnerId, - }); - }, - ); - - /* istanbul ignore next */ - if (result?.status == 204) { - await terminateRunner(ec2runner, metrics); - console.info( - `AWS runner instance '${ec2runner.instanceId}' [${ec2runner.runnerType}] is terminated ` + - `and GitHub runner is de-registered. (removeGithubRunnerOrg)`, - ); - } - } catch (e) { - console.warn( - `Error scaling down (removeGithubRunnerOrg) '${ec2runner.instanceId}' [${ec2runner.runnerType}]: ${e}`, +export async function removeGithubRunnerOrg(ghRunnerId: number, org: string, metrics: Metrics) { + const githubAppClient = await createGitHubClientForRunnerOrg(org, metrics); + const result = await metrics.trackRequest( + metrics.deleteSelfHostedRunnerFromOrgGHCallSuccess, + metrics.deleteSelfHostedRunnerFromOrgGHCallFailure, + () => { + return githubAppClient.actions.deleteSelfHostedRunnerFromOrg({ + org: org, + runner_id: ghRunnerId, + }); + }, + ); + + /* istanbul ignore next */ + if ((result?.status ?? 0) != 204) { + throw ( + `Request deleteSelfHostedRunnerFromRepoGHCallSuccess returned status code different than 204: ` + + `${result?.status ?? 0} for ${org} ${ghRunnerId}` ); } } diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/metrics.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/metrics.ts index 9d8eff82c4..bd012a6329 100644 --- a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/metrics.ts +++ b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/metrics.ts @@ -691,6 +691,48 @@ export class ScaleDownMetrics extends Metrics { this.countEntry(`run.ec2runners.${org}.${ec2Runner.runnerType}.notFound`); } + /* istanbul ignore next */ + runnerGhTerminateSuccessOrg(org: string, ec2runner: RunnerInfo) { + this.countEntry(`run.ghRunner.byOrg.${org}.terminate.success`); + this.countEntry(`run.ghRunner.byOrg.${org}.byType.${ec2runner.runnerType}.terminate.success`); + this.countEntry(`run.ghRunner.byType.${ec2runner.runnerType}.terminate.success`); + } + + /* istanbul ignore next */ + runnerGhTerminateSuccessRepo(repo: Repo, ec2runner: RunnerInfo) { + this.countEntry(`run.ghRunner.byRepo.${repo.owner}.${repo.repo}.terminate.success`); + this.countEntry(`run.ghRunner.byRepo.${repo.owner}.${repo.repo}.byType.${ec2runner.runnerType}.terminate.success`); + this.countEntry(`run.ghRunner.byType.${ec2runner.runnerType}.terminate.success`); + } + + /* istanbul ignore next */ + runnerGhTerminateFailureOrg(org: string, ec2runner: RunnerInfo) { + this.countEntry(`run.ghRunner.byOrg.${org}.terminate.failure`); + this.countEntry(`run.ghRunner.byOrg.${org}.byType.${ec2runner.runnerType}.terminate.failure`); + this.countEntry(`run.ghRunner.byType.${ec2runner.runnerType}.terminate.failure`); + } + + /* istanbul ignore next */ + runnerGhTerminateFailureRepo(repo: Repo, ec2runner: RunnerInfo) { + this.countEntry(`run.ghRunner.byRepo.${repo.owner}.${repo.repo}.terminate.failure`); + this.countEntry(`run.ghRunner.byRepo.${repo.owner}.${repo.repo}.byType.${ec2runner.runnerType}.terminate.failure`); + this.countEntry(`run.ghRunner.byType.${ec2runner.runnerType}.terminate.failure`); + } + + /* istanbul ignore next */ + runnerGhTerminateNotFoundOrg(org: string, ec2runner: RunnerInfo) { + this.countEntry(`run.ghRunner.byOrg.${org}.terminate.notfound`); + this.countEntry(`run.ghRunner.byOrg.${org}.byType.${ec2runner.runnerType}.terminate.notfound`); + this.countEntry(`run.ghRunner.byType.${ec2runner.runnerType}.terminate.notfound`); + } + + /* istanbul ignore next */ + runnerGhTerminateNotFoundRepo(repo: Repo, ec2runner: RunnerInfo) { + this.countEntry(`run.ghRunner.byRepo.${repo.owner}.${repo.repo}.terminate.notfound`); + this.countEntry(`run.ghRunner.byRepo.${repo.owner}.${repo.repo}.byType.${ec2runner.runnerType}.terminate.notfound`); + this.countEntry(`run.ghRunner.byType.${ec2runner.runnerType}.terminate.notfound`); + } + /* istanbul ignore next */ runnerTerminateSuccess(ec2Runner: RunnerInfo) { this.countEntry('run.ec2Runners.terminate.success'); @@ -724,6 +766,23 @@ export class ScaleDownMetrics extends Metrics { this.countEntry(`run.ec2runners.${repo.owner}.${repo.repo}.terminate.failure`); } } + + /* istanbul ignore next */ + runnerTerminateSkipped(ec2Runner: RunnerInfo) { + this.countEntry('run.ec2Runners.terminate.skipped'); + this.countEntry(`run.ec2runners.${ec2Runner.runnerType}.terminate.skipped`); + + if (ec2Runner.org !== undefined) { + this.countEntry(`run.ec2runners.${ec2Runner.org}.${ec2Runner.runnerType}.terminate.skipped`); + this.countEntry(`run.ec2runners.${ec2Runner.org}.terminate.skipped`); + } + + if (ec2Runner.repo !== undefined) { + const repo = getRepo(ec2Runner.repo as string); + this.countEntry(`run.ec2runners.${repo.owner}.${repo.repo}.${ec2Runner.runnerType}.terminate.skipped`); + this.countEntry(`run.ec2runners.${repo.owner}.${repo.repo}.terminate.skipped`); + } + } } export interface sendMetricsTimeoutVars { diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts index a737742275..a84173d72a 100644 --- a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts +++ b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts @@ -163,7 +163,7 @@ describe('scale-down', () => { mockRunner({ id: '0004', name: 'keep-this-is-busy-02', busy: true }), mockRunner({ id: '0005', name: 'keep-this-not-min-time-03', busy: false }), mockRunner({ id: '0006', name: 'keep-this-is-busy-03', busy: true }), - mockRunner({ id: '0007', name: 'remove-ephemeral-01', busy: false }), + mockRunner({ id: '0007', name: 'remove-ephemeral-01-fail-ghr', busy: false }), mockRunner({ id: '0008', name: 'keep-min-runners-not-oldest-01', busy: false }), mockRunner({ id: '0009', name: 'keep-min-runners-oldest-01', busy: false }), mockRunner({ id: '0010', name: 'keep-min-runners-not-oldest-02', busy: false }), @@ -264,7 +264,7 @@ describe('scale-down', () => { }, { runnerType: 'a-ephemeral-runner', - instanceId: 'remove-ephemeral-01', // X + instanceId: 'remove-ephemeral-01-fail-ghr', // X org: theOrg, launchTime: dateRef .clone() @@ -380,6 +380,14 @@ describe('scale-down', () => { mockedListRunners.mockResolvedValueOnce(listRunnersRet); mockedListGithubRunnersOrg.mockResolvedValue(ghRunners); mockedGetRunnerTypes.mockResolvedValue(runnerTypes); + mockedRemoveGithubRunnerOrg.mockImplementation( + // eslint-disable-next-line @typescript-eslint/no-unused-vars + async (runnerId: number, org: string, metrics: MetricsModule.Metrics) => { + if (runnerId == 7) { + throw 'Failure'; + } + }, + ); await scaleDown(); @@ -395,46 +403,36 @@ describe('scale-down', () => { expect(mockedRemoveGithubRunnerOrg).toBeCalledTimes(3); { const { awsR, ghR } = getRunnerPair('keep-min-runners-oldest-02'); - expect(mockedRemoveGithubRunnerOrg).toBeCalledWith(awsR, ghR.id, awsR.org as string, metrics); + expect(mockedRemoveGithubRunnerOrg).toBeCalledWith(ghR.id, awsR.org as string, metrics); } { const { awsR, ghR } = getRunnerPair('keep-min-runners-oldest-01'); - expect(mockedRemoveGithubRunnerOrg).toBeCalledWith(awsR, ghR.id, awsR.org as string, metrics); + expect(mockedRemoveGithubRunnerOrg).toBeCalledWith(ghR.id, awsR.org as string, metrics); } { - const { awsR, ghR } = getRunnerPair('remove-ephemeral-01'); - expect(mockedRemoveGithubRunnerOrg).toBeCalledWith(awsR, ghR.id, awsR.org as string, metrics); + const { awsR, ghR } = getRunnerPair('remove-ephemeral-01-fail-ghr'); + expect(mockedRemoveGithubRunnerOrg).toBeCalledWith(ghR.id, awsR.org as string, metrics); } - expect(mockedTerminateRunner).toBeCalledTimes(6); - { - /* eslint-disable-next-line @typescript-eslint/no-unused-vars */ - const { awsR, ghR } = getRunnerPair('keep-lt-min-no-ghrunner-no-ghr-02'); - expect(mockedTerminateRunner).toBeCalledWith(awsR, metrics); - } + expect(mockedTerminateRunner).toBeCalledTimes(5); { - /* eslint-disable-next-line @typescript-eslint/no-unused-vars */ - const { awsR, ghR } = getRunnerPair('keep-lt-min-no-ghrunner-no-ghr-01'); + const { awsR } = getRunnerPair('keep-lt-min-no-ghrunner-no-ghr-02'); expect(mockedTerminateRunner).toBeCalledWith(awsR, metrics); } { - /* eslint-disable-next-line @typescript-eslint/no-unused-vars */ - const { awsR, ghR } = getRunnerPair('keep-min-runners-oldest-02'); + const { awsR } = getRunnerPair('keep-lt-min-no-ghrunner-no-ghr-01'); expect(mockedTerminateRunner).toBeCalledWith(awsR, metrics); } { - /* eslint-disable-next-line @typescript-eslint/no-unused-vars */ - const { awsR, ghR } = getRunnerPair('keep-min-runners-oldest-01'); + const { awsR } = getRunnerPair('keep-min-runners-oldest-02'); expect(mockedTerminateRunner).toBeCalledWith(awsR, metrics); } { - /* eslint-disable-next-line @typescript-eslint/no-unused-vars */ - const { awsR, ghR } = getRunnerPair('remove-ephemeral-02'); + const { awsR } = getRunnerPair('keep-min-runners-oldest-01'); expect(mockedTerminateRunner).toBeCalledWith(awsR, metrics); } { - /* eslint-disable-next-line @typescript-eslint/no-unused-vars */ - const { awsR, ghR } = getRunnerPair('remove-ephemeral-01'); + const { awsR } = getRunnerPair('remove-ephemeral-02'); expect(mockedTerminateRunner).toBeCalledWith(awsR, metrics); } }); @@ -460,7 +458,7 @@ describe('scale-down', () => { mockRunner({ id: '0004', name: 'keep-this-is-busy-02', busy: true }), mockRunner({ id: '0005', name: 'keep-this-not-min-time-03', busy: false }), mockRunner({ id: '0006', name: 'keep-this-is-busy-03', busy: true }), - mockRunner({ id: '0007', name: 'remove-ephemeral-01', busy: false }), + mockRunner({ id: '0007', name: 'remove-ephemeral-01-fail-ghr', busy: false }), mockRunner({ id: '0008', name: 'keep-min-runners-not-oldest-01', busy: false }), mockRunner({ id: '0009', name: 'keep-min-runners-oldest-01', busy: false }), mockRunner({ id: '0010', name: 'keep-min-runners-not-oldest-02', busy: false }), @@ -561,7 +559,7 @@ describe('scale-down', () => { }, { runnerType: 'a-ephemeral-runner', - instanceId: 'remove-ephemeral-01', // X + instanceId: 'remove-ephemeral-01-fail-ghr', // X repo: theRepo, launchTime: dateRef .clone() @@ -676,6 +674,14 @@ describe('scale-down', () => { mockedListRunners.mockResolvedValueOnce(listRunnersRet); mockedListGithubRunnersRepo.mockResolvedValue(ghRunners); mockedGetRunnerTypes.mockResolvedValue(runnerTypes); + mockedRemoveGithubRunnerRepo.mockImplementation( + // eslint-disable-next-line @typescript-eslint/no-unused-vars + async (runnerId: number, repo: Repo, metrics: MetricsModule.Metrics) => { + if (runnerId == 7) { + throw 'Failure'; + } + }, + ); await scaleDown(); @@ -690,47 +696,37 @@ describe('scale-down', () => { expect(mockedRemoveGithubRunnerRepo).toBeCalledTimes(3); { - const { awsR, ghR } = getRunnerPair('keep-min-runners-oldest-02'); - expect(mockedRemoveGithubRunnerRepo).toBeCalledWith(awsR, ghR.id, repo, metrics); + const { ghR } = getRunnerPair('keep-min-runners-oldest-02'); + expect(mockedRemoveGithubRunnerRepo).toBeCalledWith(ghR.id, repo, metrics); } { - const { awsR, ghR } = getRunnerPair('keep-min-runners-oldest-01'); - expect(mockedRemoveGithubRunnerRepo).toBeCalledWith(awsR, ghR.id, repo, metrics); + const { ghR } = getRunnerPair('keep-min-runners-oldest-01'); + expect(mockedRemoveGithubRunnerRepo).toBeCalledWith(ghR.id, repo, metrics); } { - const { awsR, ghR } = getRunnerPair('remove-ephemeral-01'); - expect(mockedRemoveGithubRunnerRepo).toBeCalledWith(awsR, ghR.id, repo, metrics); + const { ghR } = getRunnerPair('remove-ephemeral-01-fail-ghr'); + expect(mockedRemoveGithubRunnerRepo).toBeCalledWith(ghR.id, repo, metrics); } - expect(mockedTerminateRunner).toBeCalledTimes(6); - { - /* eslint-disable-next-line @typescript-eslint/no-unused-vars */ - const { awsR, ghR } = getRunnerPair('keep-lt-min-no-ghrunner-no-ghr-02'); - expect(mockedTerminateRunner).toBeCalledWith(awsR, metrics); - } + expect(mockedTerminateRunner).toBeCalledTimes(5); { - /* eslint-disable-next-line @typescript-eslint/no-unused-vars */ - const { awsR, ghR } = getRunnerPair('keep-lt-min-no-ghrunner-no-ghr-01'); + const { awsR } = getRunnerPair('keep-lt-min-no-ghrunner-no-ghr-02'); expect(mockedTerminateRunner).toBeCalledWith(awsR, metrics); } { - /* eslint-disable-next-line @typescript-eslint/no-unused-vars */ - const { awsR, ghR } = getRunnerPair('keep-min-runners-oldest-02'); + const { awsR } = getRunnerPair('keep-lt-min-no-ghrunner-no-ghr-01'); expect(mockedTerminateRunner).toBeCalledWith(awsR, metrics); } { - /* eslint-disable-next-line @typescript-eslint/no-unused-vars */ - const { awsR, ghR } = getRunnerPair('keep-min-runners-oldest-01'); + const { awsR } = getRunnerPair('keep-min-runners-oldest-02'); expect(mockedTerminateRunner).toBeCalledWith(awsR, metrics); } { - /* eslint-disable-next-line @typescript-eslint/no-unused-vars */ - const { awsR, ghR } = getRunnerPair('remove-ephemeral-02'); + const { awsR } = getRunnerPair('keep-min-runners-oldest-01'); expect(mockedTerminateRunner).toBeCalledWith(awsR, metrics); } { - /* eslint-disable-next-line @typescript-eslint/no-unused-vars */ - const { awsR, ghR } = getRunnerPair('remove-ephemeral-01'); + const { awsR } = getRunnerPair('remove-ephemeral-02'); expect(mockedTerminateRunner).toBeCalledWith(awsR, metrics); } }); diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts index 48f89d2598..6274f8caee 100644 --- a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts +++ b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts @@ -95,16 +95,61 @@ export async function scaleDown(): Promise { break; } - removedRunners += 1; - - // removeGithubRunner[Org || Repo] already call terminateRunner if successful + let shouldRemoveEC2 = true; if (ghRunner !== undefined) { if (Config.Instance.enableOrganizationRunners) { - await removeGithubRunnerOrg(ec2runner, ghRunner.id, ec2runner.org as string, metrics); + console.info( + `GH Runner instance '${ghRunner.id}'[${ec2runner.org}] for EC2 '${ec2runner.instanceId}' ` + + `[${ec2runner.runnerType}] will be removed.`, + ); + try { + await removeGithubRunnerOrg(ghRunner.id, ec2runner.org as string, metrics); + metrics.runnerGhTerminateSuccessOrg(ec2runner.org as string, ec2runner); + console.info( + `GH Runner instance '${ghRunner.id}'[${ec2runner.org}] for EC2 '${ec2runner.instanceId}' ` + + `[${ec2runner.runnerType}] successfuly removed.`, + ); + } catch (e) { + console.warn( + `GH Runner instance '${ghRunner.id}'[${ec2runner.org}] for EC2 '${ec2runner.instanceId}' ` + + `[${ec2runner.runnerType}] failed to be removed.`, + ); + metrics.runnerGhTerminateFailureOrg(ec2runner.org as string, ec2runner); + shouldRemoveEC2 = false; + } } else { - await removeGithubRunnerRepo(ec2runner, ghRunner.id, getRepo(ec2runner.repo as string), metrics); + const repo = getRepo(ec2runner.repo as string); + console.info( + `GH Runner instance '${ghRunner.id}'[${ec2runner.repo}] for EC2 '${ec2runner.instanceId}' ` + + `[${ec2runner.runnerType}] will be removed.`, + ); + try { + await removeGithubRunnerRepo(ghRunner.id, repo, metrics); + metrics.runnerGhTerminateSuccessRepo(repo, ec2runner); + console.info( + `GH Runner instance '${ghRunner.id}'[${ec2runner.repo}] for EC2 '${ec2runner.instanceId}' ` + + `[${ec2runner.runnerType}] successfuly removed.`, + ); + } catch (e) { + console.warn( + `GH Runner instance '${ghRunner.id}'[${ec2runner.repo}] for EC2 '${ec2runner.instanceId}' ` + + `[${ec2runner.runnerType}] failed to be removed.`, + ); + metrics.runnerGhTerminateFailureRepo(repo, ec2runner); + shouldRemoveEC2 = false; + } } } else { + if (Config.Instance.enableOrganizationRunners) { + metrics.runnerGhTerminateNotFoundOrg(ec2runner.org as string, ec2runner); + } else { + metrics.runnerGhTerminateFailureRepo(getRepo(ec2runner.repo as string), ec2runner); + } + } + + if (shouldRemoveEC2) { + removedRunners += 1; + console.info(`Runner '${ec2runner.instanceId}' [${ec2runner.runnerType}] will be removed.`); try { await terminateRunner(ec2runner, metrics); @@ -113,6 +158,8 @@ export async function scaleDown(): Promise { metrics.runnerTerminateFailure(ec2runner); console.error(`Runner '${ec2runner.instanceId}' [${ec2runner.runnerType}] cannot be removed: ${e}`); } + } else { + metrics.runnerTerminateSkipped(ec2runner); } } } From c236425cc6a2c32dc797275878722ba13b08b997 Mon Sep 17 00:00:00 2001 From: Jean Schmidt Date: Wed, 19 Oct 2022 11:40:01 +0200 Subject: [PATCH 3/3] Improved loggind, add expBackOff for some high throughput functions in gh-runners.ts --- .../runners/src/scale-runners/gh-runners.ts | 107 ++++++++++-------- .../runners/src/scale-runners/scale-down.ts | 8 +- 2 files changed, 61 insertions(+), 54 deletions(-) diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/gh-runners.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/gh-runners.ts index b5bc3b34ce..d5e9d257ec 100644 --- a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/gh-runners.ts +++ b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/gh-runners.ts @@ -1,4 +1,4 @@ -import { Repo, getRepoKey } from './utils'; +import { Repo, getRepoKey, expBackOff } from './utils'; import { RunnerType } from './runners'; import { createGithubAuth, createOctoClient } from './gh-auth'; @@ -129,16 +129,18 @@ export type GhRunners = UnboxPromise { - return githubAppClient.actions.deleteSelfHostedRunnerFromRepo({ - ...repo, - runner_id: ghRunnerId, - }); - }, - ); + const result = await expBackOff(() => { + return metrics.trackRequest( + metrics.deleteSelfHostedRunnerFromRepoGHCallSuccess, + metrics.deleteSelfHostedRunnerFromRepoGHCallFailure, + () => { + return githubAppClient.actions.deleteSelfHostedRunnerFromRepo({ + ...repo, + runner_id: ghRunnerId, + }); + }, + ); + }); /* istanbul ignore next */ if ((result?.status ?? 0) != 204) { @@ -151,16 +153,18 @@ export async function removeGithubRunnerRepo(ghRunnerId: number, repo: Repo, met export async function removeGithubRunnerOrg(ghRunnerId: number, org: string, metrics: Metrics) { const githubAppClient = await createGitHubClientForRunnerOrg(org, metrics); - const result = await metrics.trackRequest( - metrics.deleteSelfHostedRunnerFromOrgGHCallSuccess, - metrics.deleteSelfHostedRunnerFromOrgGHCallFailure, - () => { - return githubAppClient.actions.deleteSelfHostedRunnerFromOrg({ - org: org, - runner_id: ghRunnerId, - }); - }, - ); + const result = await expBackOff(() => { + return metrics.trackRequest( + metrics.deleteSelfHostedRunnerFromOrgGHCallSuccess, + metrics.deleteSelfHostedRunnerFromOrgGHCallFailure, + () => { + return githubAppClient.actions.deleteSelfHostedRunnerFromOrg({ + org: org, + runner_id: ghRunnerId, + }); + }, + ); + }); /* istanbul ignore next */ if ((result?.status ?? 0) != 204) { @@ -231,7 +235,7 @@ async function listGithubRunners(key: string, listCallback: () => Promise { - return client.actions.getSelfHostedRunnerForRepo({ - ...repo, - runner_id: runnerID as unknown as number, - }); - }, - ) + await expBackOff(() => { + return metrics.trackRequest( + metrics.getSelfHostedRunnerForRepoGHCallSuccess, + metrics.getSelfHostedRunnerForRepoGHCallFailure, + () => { + return client.actions.getSelfHostedRunnerForRepo({ + ...repo, + runner_id: runnerID as unknown as number, + }); + }, + ); + }) ).data; } catch (e) { console.warn(`[getRunnerRepo ]: ${e}`); @@ -269,16 +275,18 @@ export async function getRunnerOrg(org: string, runnerID: string, metrics: Metri try { return ( - await metrics.trackRequest( - metrics.getSelfHostedRunnerForOrgGHCallSuccess, - metrics.getSelfHostedRunnerForOrgGHCallFailure, - () => { - return client.actions.getSelfHostedRunnerForOrg({ - org: org, - runner_id: runnerID as unknown as number, - }); - }, - ) + await expBackOff(() => { + return metrics.trackRequest( + metrics.getSelfHostedRunnerForOrgGHCallSuccess, + metrics.getSelfHostedRunnerForOrgGHCallFailure, + () => { + return client.actions.getSelfHostedRunnerForOrg({ + org: org, + runner_id: runnerID as unknown as number, + }); + }, + ); + }) ).data; } catch (e) { console.warn(`[getRunnerOrg ]: ${e}`); @@ -295,8 +303,9 @@ export async function getRunnerTypes( try { const runnerTypeKey = getRepoKey(repo); - if (runnerTypeCache.get(runnerTypeKey) !== undefined) { - return runnerTypeCache.get(runnerTypeKey) as Map; + const runnerTypeCacheActualContent = runnerTypeCache.get(runnerTypeKey); + if (runnerTypeCacheActualContent !== undefined) { + return runnerTypeCacheActualContent as Map; } console.debug(`[getRunnerTypes] cache miss for ${runnerTypeKey}`); @@ -306,16 +315,14 @@ export async function getRunnerTypes( ? await createGitHubClientForRunnerOrg(repo.owner, metrics) : await createGitHubClientForRunnerRepo(repo, metrics); - const response = await metrics.trackRequest( - metrics.reposGetContentGHCallSuccess, - metrics.reposGetContentGHCallFailure, - () => { + const response = await expBackOff(() => { + return metrics.trackRequest(metrics.reposGetContentGHCallSuccess, metrics.reposGetContentGHCallFailure, () => { return githubAppClient.repos.getContent({ ...repo, path: filepath, }); - }, - ); + }); + }); /* istanbul ignore next */ const { content } = { ...(response?.data || {}) }; diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts index 6274f8caee..275d4711ac 100644 --- a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts +++ b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts @@ -98,7 +98,7 @@ export async function scaleDown(): Promise { let shouldRemoveEC2 = true; if (ghRunner !== undefined) { if (Config.Instance.enableOrganizationRunners) { - console.info( + console.debug( `GH Runner instance '${ghRunner.id}'[${ec2runner.org}] for EC2 '${ec2runner.instanceId}' ` + `[${ec2runner.runnerType}] will be removed.`, ); @@ -112,14 +112,14 @@ export async function scaleDown(): Promise { } catch (e) { console.warn( `GH Runner instance '${ghRunner.id}'[${ec2runner.org}] for EC2 '${ec2runner.instanceId}' ` + - `[${ec2runner.runnerType}] failed to be removed.`, + `[${ec2runner.runnerType}] failed to be removed. ${e}`, ); metrics.runnerGhTerminateFailureOrg(ec2runner.org as string, ec2runner); shouldRemoveEC2 = false; } } else { const repo = getRepo(ec2runner.repo as string); - console.info( + console.debug( `GH Runner instance '${ghRunner.id}'[${ec2runner.repo}] for EC2 '${ec2runner.instanceId}' ` + `[${ec2runner.runnerType}] will be removed.`, ); @@ -133,7 +133,7 @@ export async function scaleDown(): Promise { } catch (e) { console.warn( `GH Runner instance '${ghRunner.id}'[${ec2runner.repo}] for EC2 '${ec2runner.instanceId}' ` + - `[${ec2runner.runnerType}] failed to be removed.`, + `[${ec2runner.runnerType}] failed to be removed. ${e}`, ); metrics.runnerGhTerminateFailureRepo(repo, ec2runner); shouldRemoveEC2 = false;