-
Notifications
You must be signed in to change notification settings - Fork 580
/
scale-down.ts
247 lines (219 loc) · 9.19 KB
/
scale-down.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
import { Octokit } from '@octokit/rest';
import { createChildLogger } from '@terraform-aws-github-runner/aws-powertools-util';
import moment from 'moment';
import { createGithubAppAuth, createGithubInstallationAuth, createOctoClient } from '../gh-auth/gh-auth';
import { bootTimeExceeded, listEC2Runners, terminateRunner } from './../aws/runners';
import { RunnerInfo, RunnerList } from './../aws/runners.d';
import { GhRunners, githubCache } from './cache';
import { ScalingDownConfig, getEvictionStrategy, getIdleRunnerCount } from './scale-down-config';
const logger = createChildLogger('scale-down');
async function getOrCreateOctokit(runner: RunnerInfo): Promise<Octokit> {
const key = runner.owner;
const cachedOctokit = githubCache.clients.get(key);
if (cachedOctokit) {
logger.debug(`[createGitHubClientForRunner] Cache hit for ${key}`);
return cachedOctokit;
}
logger.debug(`[createGitHubClientForRunner] Cache miss for ${key}`);
const ghesBaseUrl = process.env.GHES_URL;
let ghesApiUrl = '';
if (ghesBaseUrl) {
ghesApiUrl = `${ghesBaseUrl}/api/v3`;
}
const ghAuthPre = await createGithubAppAuth(undefined, ghesApiUrl);
const githubClientPre = await createOctoClient(ghAuthPre.token, ghesApiUrl);
const installationId =
runner.type === 'Org'
? (
await githubClientPre.apps.getOrgInstallation({
org: runner.owner,
})
).data.id
: (
await githubClientPre.apps.getRepoInstallation({
owner: runner.owner.split('/')[0],
repo: runner.owner.split('/')[1],
})
).data.id;
const ghAuth = await createGithubInstallationAuth(installationId, ghesApiUrl);
const octokit = await createOctoClient(ghAuth.token, ghesApiUrl);
githubCache.clients.set(key, octokit);
return octokit;
}
async function getGitHubRunnerBusyState(client: Octokit, ec2runner: RunnerInfo, runnerId: number): Promise<boolean> {
const state =
ec2runner.type === 'Org'
? await client.actions.getSelfHostedRunnerForOrg({
runner_id: runnerId,
org: ec2runner.owner,
})
: await client.actions.getSelfHostedRunnerForRepo({
runner_id: runnerId,
owner: ec2runner.owner.split('/')[0],
repo: ec2runner.owner.split('/')[1],
});
logger.info(`Runner '${ec2runner.instanceId}' - GitHub Runner ID '${runnerId}' - Busy: ${state.data.busy}`);
return state.data.busy;
}
async function listGitHubRunners(runner: RunnerInfo): Promise<GhRunners> {
const key = runner.owner as string;
const cachedRunners = githubCache.runners.get(key);
if (cachedRunners) {
logger.debug(`[listGithubRunners] Cache hit for ${key}`);
return cachedRunners;
}
logger.debug(`[listGithubRunners] Cache miss for ${key}`);
const client = await getOrCreateOctokit(runner);
const runners =
runner.type === 'Org'
? await client.paginate(client.actions.listSelfHostedRunnersForOrg, {
org: runner.owner,
per_page: 100,
})
: await client.paginate(client.actions.listSelfHostedRunnersForRepo, {
owner: runner.owner.split('/')[0],
repo: runner.owner.split('/')[1],
per_page: 100,
});
githubCache.runners.set(key, runners);
logger.debug(`[listGithubRunners] Cache set for ${key}`);
logger.debug(`[listGithubRunners] Runners: ${JSON.stringify(runners)}`);
return runners;
}
function runnerMinimumTimeExceeded(runner: RunnerInfo): boolean {
const minimumRunningTimeInMinutes = process.env.MINIMUM_RUNNING_TIME_IN_MINUTES;
const launchTimePlusMinimum = moment(runner.launchTime).utc().add(minimumRunningTimeInMinutes, 'minutes');
const now = moment(new Date()).utc();
return launchTimePlusMinimum < now;
}
async function removeRunner(ec2runner: RunnerInfo, ghRunnerIds: number[]): Promise<void> {
const githubAppClient = await getOrCreateOctokit(ec2runner);
try {
const states = await Promise.all(
ghRunnerIds.map(async (ghRunnerId) => {
// Get busy state instead of using the output of listGitHubRunners(...) to minimize to race condition.
return await getGitHubRunnerBusyState(githubAppClient, ec2runner, ghRunnerId);
}),
);
if (states.every((busy) => busy === false)) {
const statuses = await Promise.all(
ghRunnerIds.map(async (ghRunnerId) => {
return (
ec2runner.type === 'Org'
? await githubAppClient.actions.deleteSelfHostedRunnerFromOrg({
runner_id: ghRunnerId,
org: ec2runner.owner,
})
: await githubAppClient.actions.deleteSelfHostedRunnerFromRepo({
runner_id: ghRunnerId,
owner: ec2runner.owner.split('/')[0],
repo: ec2runner.owner.split('/')[1],
})
).status;
}),
);
if (statuses.every((status) => status == 204)) {
await terminateRunner(ec2runner.instanceId);
logger.info(`AWS runner instance '${ec2runner.instanceId}' is terminated and GitHub runner is de-registered.`);
} else {
logger.error(`Failed to de-register GitHub runner: ${statuses}`);
}
} else {
logger.info(`Runner '${ec2runner.instanceId}' cannot be de-registered, because it is still busy.`);
}
} catch (e) {
logger.error(`Runner '${ec2runner.instanceId}' cannot be de-registered. Error: ${e}`, {
error: e as Error,
});
}
}
async function evaluateAndRemoveRunners(
ec2Runners: RunnerInfo[],
scaleDownConfigs: ScalingDownConfig[],
): Promise<void> {
let idleCounter = getIdleRunnerCount(scaleDownConfigs);
const evictionStrategy = getEvictionStrategy(scaleDownConfigs);
const ownerTags = new Set(ec2Runners.map((runner) => runner.owner));
for (const ownerTag of ownerTags) {
const ec2RunnersFiltered = ec2Runners
.filter((runner) => runner.owner === ownerTag)
.sort(evictionStrategy === 'oldest_first' ? oldestFirstStrategy : newestFirstStrategy);
logger.debug(`Found: '${ec2RunnersFiltered.length}' active GitHub runners with owner tag: '${ownerTag}'`);
logger.debug(`Active GitHub runners with owner tag: '${ownerTag}': ${JSON.stringify(ec2RunnersFiltered)}`);
for (const ec2Runner of ec2RunnersFiltered) {
const ghRunners = await listGitHubRunners(ec2Runner);
const ghRunnersFiltered = ghRunners.filter((runner: { name: string }) =>
runner.name.endsWith(ec2Runner.instanceId),
);
logger.debug(
`Found: '${ghRunnersFiltered.length}' GitHub runners for AWS runner instance: '${ec2Runner.instanceId}'`,
);
logger.debug(
`GitHub runners for AWS runner instance: '${ec2Runner.instanceId}': ${JSON.stringify(ghRunnersFiltered)}`,
);
if (ghRunnersFiltered.length) {
if (runnerMinimumTimeExceeded(ec2Runner)) {
if (idleCounter > 0) {
idleCounter--;
logger.info(`Runner '${ec2Runner.instanceId}' will be kept idle.`);
} else {
logger.info(`Will try to terminate runners that are not busy`);
await removeRunner(
ec2Runner,
ghRunnersFiltered.map((runner: { id: number }) => runner.id),
);
}
}
} else {
if (bootTimeExceeded(ec2Runner)) {
logger.info(`Runner '${ec2Runner.instanceId}' is orphaned and will be removed.`);
terminateOrphan(ec2Runner.instanceId);
} else {
logger.debug(`Runner ${ec2Runner.instanceId} has not yet booted.`);
}
}
}
}
}
async function terminateOrphan(instanceId: string): Promise<void> {
try {
await terminateRunner(instanceId);
} catch (e) {
logger.debug(`Orphan runner '${instanceId}' cannot be removed.`);
}
}
function oldestFirstStrategy(a: RunnerInfo, b: RunnerInfo): number {
if (a.launchTime === undefined) return 1;
if (b.launchTime === undefined) return 1;
if (a.launchTime < b.launchTime) return 1;
if (a.launchTime > b.launchTime) return -1;
return 0;
}
function newestFirstStrategy(a: RunnerInfo, b: RunnerInfo): number {
return oldestFirstStrategy(a, b) * -1;
}
async function listRunners(environment: string) {
return await listEC2Runners({
environment,
});
}
function filterRunners(ec2runners: RunnerList[]): RunnerInfo[] {
return ec2runners.filter((ec2Runner) => ec2Runner.type) as RunnerInfo[];
}
export async function scaleDown(): Promise<void> {
githubCache.reset();
const scaleDownConfigs = JSON.parse(process.env.SCALE_DOWN_CONFIG) as [ScalingDownConfig];
const environment = process.env.ENVIRONMENT;
const ec2Runners = await listRunners(environment);
const activeEc2RunnersCount = ec2Runners.length;
logger.info(`Found: '${activeEc2RunnersCount}' active GitHub EC2 runner instances before clean-up.`);
logger.debug(`Active GitHub EC2 runner instances: ${JSON.stringify(ec2Runners)}`);
if (activeEc2RunnersCount === 0) {
logger.debug(`No active runners found for environment: '${environment}'`);
return;
}
const runners = filterRunners(ec2Runners);
await evaluateAndRemoveRunners(runners, scaleDownConfigs);
const activeEc2RunnersCountAfter = (await listRunners(environment)).length;
logger.info(`Found: '${activeEc2RunnersCountAfter}' active GitHub EC2 runners instances after clean-up.`);
}