Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@ describe('Config', () => {
process.env.KMS_KEY_ID = 'KMS_KEY_ID';
process.env.LAMBDA_TIMEOUT = '113';
process.env.LAUNCH_TEMPLATE_NAME_LINUX = 'LAUNCH_TEMPLATE_NAME_LINUX';
process.env.LAUNCH_TEMPLATE_NAME_LINUX_NVIDIA = 'LAUNCH_TEMPLATE_NAME_LINUX_NVIDIA';
process.env.LAUNCH_TEMPLATE_NAME_WINDOWS = 'LAUNCH_TEMPLATE_NAME_WINDOWS';
process.env.LAUNCH_TEMPLATE_VERSION_LINUX = 'LAUNCH_TEMPLATE_VERSION_LINUX';
process.env.LAUNCH_TEMPLATE_VERSION_LINUX_NVIDIA = 'LAUNCH_TEMPLATE_VERSION_LINUX_NVIDIA';
process.env.LAUNCH_TEMPLATE_VERSION_WINDOWS = 'LAUNCH_TEMPLATE_VERSION_WINDOWS';
process.env.MINIMUM_RUNNING_TIME_IN_MINUTES = '33';
process.env.MIN_AVAILABLE_RUNNERS = '113';
Expand Down Expand Up @@ -55,8 +57,10 @@ describe('Config', () => {
expect(Config.Instance.kmsKeyId).toBe('KMS_KEY_ID');
expect(Config.Instance.lambdaTimeout).toBe(113);
expect(Config.Instance.launchTemplateNameLinux).toBe('LAUNCH_TEMPLATE_NAME_LINUX');
expect(Config.Instance.launchTemplateNameLinuxNvidia).toBe('LAUNCH_TEMPLATE_NAME_LINUX_NVIDIA');
expect(Config.Instance.launchTemplateNameWindows).toBe('LAUNCH_TEMPLATE_NAME_WINDOWS');
expect(Config.Instance.launchTemplateVersionLinux).toBe('LAUNCH_TEMPLATE_VERSION_LINUX');
expect(Config.Instance.launchTemplateVersionLinuxNvidia).toBe('LAUNCH_TEMPLATE_VERSION_LINUX_NVIDIA');
expect(Config.Instance.launchTemplateVersionWindows).toBe('LAUNCH_TEMPLATE_VERSION_WINDOWS');
expect(Config.Instance.minAvailableRunners).toBe(113);
expect(Config.Instance.minimumRunningTimeInMinutes).toBe(33);
Expand Down Expand Up @@ -92,8 +96,10 @@ describe('Config', () => {
delete process.env.KMS_KEY_ID;
delete process.env.LAMBDA_TIMEOUT;
process.env.LAUNCH_TEMPLATE_NAME_LINUX = 'LAUNCH_TEMPLATE_NAME_LINUX';
process.env.LAUNCH_TEMPLATE_NAME_LINUX_NVIDIA = 'LAUNCH_TEMPLATE_NAME_LINUX_NVIDIA';
process.env.LAUNCH_TEMPLATE_NAME_WINDOWS = 'LAUNCH_TEMPLATE_NAME_WINDOWS';
process.env.LAUNCH_TEMPLATE_VERSION_LINUX = 'LAUNCH_TEMPLATE_VERSION_LINUX';
process.env.LAUNCH_TEMPLATE_VERSION_LINUX_NVIDIA = 'LAUNCH_TEMPLATE_VERSION_LINUX_NVIDIA';
process.env.LAUNCH_TEMPLATE_VERSION_WINDOWS = 'LAUNCH_TEMPLATE_VERSION_WINDOWS';
delete process.env.MIN_AVAILABLE_RUNNERS;
delete process.env.MUST_HAVE_ISSUES_LABELS;
Expand All @@ -120,8 +126,10 @@ describe('Config', () => {
expect(Config.Instance.kmsKeyId).toBeUndefined();
expect(Config.Instance.lambdaTimeout).toEqual(600);
expect(Config.Instance.launchTemplateNameLinux).toBe('LAUNCH_TEMPLATE_NAME_LINUX');
expect(Config.Instance.launchTemplateNameLinuxNvidia).toBe('LAUNCH_TEMPLATE_NAME_LINUX_NVIDIA');
expect(Config.Instance.launchTemplateNameWindows).toBe('LAUNCH_TEMPLATE_NAME_WINDOWS');
expect(Config.Instance.launchTemplateVersionLinux).toBe('LAUNCH_TEMPLATE_VERSION_LINUX');
expect(Config.Instance.launchTemplateVersionLinuxNvidia).toBe('LAUNCH_TEMPLATE_VERSION_LINUX_NVIDIA');
expect(Config.Instance.launchTemplateVersionWindows).toBe('LAUNCH_TEMPLATE_VERSION_WINDOWS');
expect(Config.Instance.minAvailableRunners).toBe(10);
expect(Config.Instance.minimumRunningTimeInMinutes).toBe(10);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ export class Config {
readonly kmsKeyId: string | undefined;
readonly lambdaTimeout: number;
readonly launchTemplateNameLinux: string | undefined;
readonly launchTemplateNameLinuxNvidia: string | undefined;
readonly launchTemplateNameWindows: string | undefined;
readonly launchTemplateVersionLinux: string | undefined;
readonly launchTemplateVersionLinuxNvidia: string | undefined;
readonly launchTemplateVersionWindows: string | undefined;
readonly minAvailableRunners: number;
readonly minimumRunningTimeInMinutes: number;
Expand Down Expand Up @@ -46,8 +48,10 @@ export class Config {
this.kmsKeyId = process.env.KMS_KEY_ID;
this.lambdaTimeout = Number(process.env.LAMBDA_TIMEOUT || '600');
this.launchTemplateNameLinux = process.env.LAUNCH_TEMPLATE_NAME_LINUX;
this.launchTemplateNameLinuxNvidia = process.env.LAUNCH_TEMPLATE_NAME_LINUX_NVIDIA;
this.launchTemplateNameWindows = process.env.LAUNCH_TEMPLATE_NAME_WINDOWS;
this.launchTemplateVersionLinux = process.env.LAUNCH_TEMPLATE_VERSION_LINUX;
this.launchTemplateVersionLinuxNvidia = process.env.LAUNCH_TEMPLATE_VERSION_LINUX_NVIDIA;
this.launchTemplateVersionWindows = process.env.LAUNCH_TEMPLATE_VERSION_WINDOWS;

/* istanbul ignore next */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,12 @@ function createExpectedRunInstancesLinux(runnerParameters: RunnerInputParameters
MaxCount: 1,
MinCount: 1,
LaunchTemplate: {
LaunchTemplateName: Config.Instance.launchTemplateNameLinux,
Version: Config.Instance.launchTemplateVersionLinux,
LaunchTemplateName: runnerParameters.runnerType.runnerTypeName.includes('.nvidia.gpu')
? Config.Instance.launchTemplateNameLinuxNvidia
: Config.Instance.launchTemplateNameLinux,
Version: runnerParameters.runnerType.runnerTypeName.includes('.nvidia.gpu')
? Config.Instance.launchTemplateVersionLinuxNvidia
: Config.Instance.launchTemplateVersionLinux,
},
InstanceType: runnerParameters.runnerType.instance_type,
BlockDeviceMappings: [
Expand Down Expand Up @@ -345,7 +349,7 @@ describe('create runner', () => {
os: 'linux',
max_available: 200,
disk_size: 100,
runnerTypeName: 'linuxCpu',
runnerTypeName: 'linuxCpu.nvidia.gpu',
is_ephemeral: true,
},
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,18 @@ async function addSSMParameterRunnerConfig(
console.debug(`Created SSM Parameters(s): ${createdSSMParams.join(',')}`);
}

function getLaunchTemplateName(runnerParameters: RunnerInputParameters): Array<string | undefined> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: returning an object with two named fields, or a tuple type [string | undefined, string | undefined] is more suited here.

if (runnerParameters.runnerType.os === 'linux') {
if (runnerParameters.runnerType.runnerTypeName.includes('.nvidia.gpu')) {
return [Config.Instance.launchTemplateNameLinuxNvidia, Config.Instance.launchTemplateVersionLinuxNvidia];
} else {
return [Config.Instance.launchTemplateNameLinux, Config.Instance.launchTemplateVersionLinux];
}
} else {
return [Config.Instance.launchTemplateNameWindows, Config.Instance.launchTemplateVersionWindows];
}
}

export async function createRunner(runnerParameters: RunnerInputParameters, metrics: Metrics): Promise<void> {
try {
console.debug('Runner configuration: ' + JSON.stringify(runnerParameters));
Expand Down Expand Up @@ -256,6 +268,8 @@ export async function createRunner(runnerParameters: RunnerInputParameters, metr
});
}

const [launchTemplateName, launchTemplateVersion] = getLaunchTemplateName(runnerParameters);

const runInstancesResponse = await expBackOff(() => {
return metrics.trackRequest(
metrics.ec2RunInstancesAWSCallSuccess,
Expand All @@ -266,14 +280,8 @@ export async function createRunner(runnerParameters: RunnerInputParameters, metr
MaxCount: 1,
MinCount: 1,
LaunchTemplate: {
LaunchTemplateName:
runnerParameters.runnerType.os === 'linux'
? Config.Instance.launchTemplateNameLinux
: Config.Instance.launchTemplateNameWindows,
Version:
runnerParameters.runnerType.os === 'linux'
? Config.Instance.launchTemplateVersionLinux
: Config.Instance.launchTemplateVersionWindows,
LaunchTemplateName: launchTemplateName,
Version: launchTemplateVersion,
},
InstanceType: runnerParameters.runnerType.instance_type,
BlockDeviceMappings: [
Expand Down
31 changes: 31 additions & 0 deletions terraform-aws-github-runner/modules/runners/logging.tf
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,26 @@ resource "aws_ssm_parameter" "cloudwatch_agent_config_runner_linux" {
tags = local.tags
}

resource "aws_ssm_parameter" "cloudwatch_agent_config_runner_linux_nvidia" {
count = var.enable_cloudwatch_agent ? 1 : 0
name = "${var.environment}-cloudwatch_agent_config_runner_linux_nvidia"
type = "String"
value = jsonencode(
jsondecode(
templatefile(
"${path.module}/templates/cloudwatch_config.json",
{
aws_region = var.aws_region
environment = var.environment
logfiles = jsonencode(local.logfiles_linux)
metrics_collected = templatefile("${path.module}/templates/cloudwatch_config_linux_nvidia.json", {})
}
)
)
)
tags = local.tags
}

resource "aws_cloudwatch_log_group" "gh_runners_linux" {
count = length(local.loggroups_names_linux)
name = local.loggroups_names_linux[count.index]
Expand All @@ -95,6 +115,17 @@ resource "aws_iam_role_policy" "cloudwatch_linux" {
)
}

resource "aws_iam_role_policy" "cloudwatch_linux_nvidia" {
count = var.enable_ssm_on_runners ? 1 : 0
name = "CloudWatchLogginAndMetricsLinuxNvidia"
role = aws_iam_role.runner.name
policy = templatefile("${path.module}/policies/instance-cloudwatch-policy.json",
{
ssm_parameter_arn = aws_ssm_parameter.cloudwatch_agent_config_runner_linux_nvidia[0].arn
}
)
}

resource "aws_ssm_parameter" "cloudwatch_agent_config_runner_windows" {
count = var.enable_cloudwatch_agent ? 1 : 0
name = "${var.environment}-cloudwatch_agent_config_runner_windows"
Expand Down
46 changes: 46 additions & 0 deletions terraform-aws-github-runner/modules/runners/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,52 @@ resource "aws_launch_template" "linux_runner" {
tags = local.tags
}

resource "aws_launch_template" "linux_runner_nvidia" {
name = "${var.environment}-action-linux-runner-nvidia"

iam_instance_profile {
name = aws_iam_instance_profile.runner.name
}

instance_initiated_shutdown_behavior = "terminate"

image_id = data.aws_ami.runner_ami_linux.id
instance_type = var.instance_type
key_name = var.key_name

tag_specifications {
resource_type = "instance"
tags = merge(
local.tags,
{
"Name" = format("%s", local.name_runner)
},
)
}

tag_specifications {
resource_type = "volume"
tags = merge(
local.tags,
{
"Name" = format("%s", local.name_runner)
},
)
}

user_data = base64encode(templatefile(local.userdata_template, {
environment = var.environment
pre_install = var.userdata_pre_install
post_install = var.userdata_post_install
enable_cloudwatch_agent = var.enable_cloudwatch_agent
ssm_key_cloudwatch_agent_config = var.enable_cloudwatch_agent ? aws_ssm_parameter.cloudwatch_agent_config_runner_linux_nvidia[0].name : ""
ghes_url = var.ghes_url
install_config_runner = local.install_config_runner_linux
}))

tags = local.tags
}

resource "aws_launch_template" "windows_runner" {
name = "${var.environment}-action-windows-runner"

Expand Down
43 changes: 22 additions & 21 deletions terraform-aws-github-runner/modules/runners/scale-up.tf
Original file line number Diff line number Diff line change
Expand Up @@ -30,25 +30,27 @@ resource "aws_lambda_function" "scale_up" {

environment {
variables = {
AWS_REGION_INSTANCES = join(",", var.aws_region_instances)
CANT_HAVE_ISSUES_LABELS = join(",", var.cant_have_issues_labels)
ENABLE_ORGANIZATION_RUNNERS = var.enable_organization_runners
ENVIRONMENT = var.environment
GITHUB_APP_CLIENT_ID = var.github_app.client_id
GITHUB_APP_CLIENT_SECRET = local.github_app_client_secret
GITHUB_APP_ID = var.github_app.id
GITHUB_APP_KEY_BASE64 = local.github_app_key_base64
KMS_KEY_ID = var.encryption.kms_key_id
LAMBDA_TIMEOUT = var.lambda_timeout_scale_up
LAUNCH_TEMPLATE_NAME_LINUX = aws_launch_template.linux_runner.name
LAUNCH_TEMPLATE_NAME_WINDOWS = aws_launch_template.windows_runner.name
LAUNCH_TEMPLATE_VERSION_LINUX = aws_launch_template.linux_runner.latest_version
LAUNCH_TEMPLATE_VERSION_WINDOWS = aws_launch_template.windows_runner.latest_version
MUST_HAVE_ISSUES_LABELS = join(",", var.must_have_issues_labels)
RUNNER_EXTRA_LABELS = var.runner_extra_labels
SECRETSMANAGER_SECRETS_ID = var.secretsmanager_secrets_id
SECURITY_GROUP_IDS = join(",", concat([aws_security_group.runner_sg.id], var.runner_additional_security_group_ids))
SUBNET_IDS = join(",", var.subnet_ids)
AWS_REGION_INSTANCES = join(",", var.aws_region_instances)
CANT_HAVE_ISSUES_LABELS = join(",", var.cant_have_issues_labels)
ENABLE_ORGANIZATION_RUNNERS = var.enable_organization_runners
ENVIRONMENT = var.environment
GITHUB_APP_CLIENT_ID = var.github_app.client_id
GITHUB_APP_CLIENT_SECRET = local.github_app_client_secret
GITHUB_APP_ID = var.github_app.id
GITHUB_APP_KEY_BASE64 = local.github_app_key_base64
KMS_KEY_ID = var.encryption.kms_key_id
LAMBDA_TIMEOUT = var.lambda_timeout_scale_up
LAUNCH_TEMPLATE_NAME_LINUX = aws_launch_template.linux_runner.name
LAUNCH_TEMPLATE_NAME_LINUX_NVIDIA = aws_launch_template.linux_runner_nvidia.name
LAUNCH_TEMPLATE_NAME_WINDOWS = aws_launch_template.windows_runner.name
LAUNCH_TEMPLATE_VERSION_LINUX = aws_launch_template.linux_runner.latest_version
LAUNCH_TEMPLATE_VERSION_LINUX_NVIDIA = aws_launch_template.linux_runner_nvidia.latest_version
LAUNCH_TEMPLATE_VERSION_WINDOWS = aws_launch_template.windows_runner.latest_version
MUST_HAVE_ISSUES_LABELS = join(",", var.must_have_issues_labels)
RUNNER_EXTRA_LABELS = var.runner_extra_labels
SECRETSMANAGER_SECRETS_ID = var.secretsmanager_secrets_id
SECURITY_GROUP_IDS = join(",", concat([aws_security_group.runner_sg.id], var.runner_additional_security_group_ids))
SUBNET_IDS = join(",", var.subnet_ids)
}
}

Expand All @@ -62,7 +64,6 @@ resource "aws_lambda_function" "scale_up" {
}

resource "aws_lambda_alias" "scale_up_lambda_alias" {
count = var.scale_up_provisioned_concurrent_executions > 0 ? 1 : 0
name = "provisioned-${aws_lambda_function.scale_up.function_name}"
description = "Alias for provisioned instances of ${aws_lambda_function.scale_up.function_name}"
function_name = aws_lambda_function.scale_up.function_name
Expand All @@ -73,7 +74,7 @@ resource "aws_lambda_provisioned_concurrency_config" "scale_up_provisioned_concu
count = var.scale_up_provisioned_concurrent_executions > 0 ? 1 : 0
function_name = aws_lambda_alias.scale_up_lambda_alias.function_name
provisioned_concurrent_executions = var.scale_up_provisioned_concurrent_executions
qualifier = aws_lambda_alias.scale_up_lambda_alias.version
qualifier = aws_lambda_alias.scale_up_lambda_alias.name
}

resource "aws_cloudwatch_log_group" "scale_up" {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{
"cpu": {
"measurement": [
"cpu_usage_idle",
"cpu_usage_iowait",
"cpu_usage_user",
"cpu_usage_system"
],
"metrics_collection_interval": 10
},
"disk": {
"measurement": [
"free",
"total",
"used",
"used_percent",
"inodes_free",
"inodes_total"
],
"metrics_collection_interval": 10,
"resources": [
"*"
]
},
"diskio": {
"measurement": [
"io_time"
],
"metrics_collection_interval": 10,
"resources": [
"/"
]
},
"mem": {
"measurement": [
"total",
"used",
"free",
"used_percent"
],
"metrics_collection_interval": 10
},
"swap": {
"measurement": [
"swap_used_percent"
],
"metrics_collection_interval": 10
},
"nvidia_gpu": {
"measurement": [
"utilization_gpu",
"utilization_memory",
"memory_total",
"memory_used",
"memory_free"
]
}
}