Skip to content

Commit

Permalink
fix the validation credentials issue (aws-observability#68)
Browse files Browse the repository at this point in the history
* add testcases folder

* refactor

* fix the false code

* fix the testing_id issue

* add mock type test, so that we can run it in pr build

* reformat the code

* build image in mock test

* fix building issue in dockercompose of mock

* fix the env var issue in mock test

* add xrayreceiver mock test

* edit readme

* update readme

* resolve comments on pr

* add mock server into ecs

* add gitignore for nodejs

* remove aemm-metadata-default-values.json for imds

* add mock server into eks test

* add mocked server into ec2 test

* in mock test, build all the image from sources

* modify the trace template in validator

* add faked cert in basiccomponent

* switch mocked server validation to the sample app

* add mocked server in docker compose for ec2 test

* refactor

* add a ca bundle

* having a validation module

* modify readme

* refactor

* refactor

* fix mock server

* fix the validation config

* fix ecs test

* refactor soaking

* fix soaking test

* fix validation issue
  • Loading branch information
wyTrivail committed Nov 13, 2020
1 parent 5cd7ae5 commit b24c822
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 14 deletions.
1 change: 1 addition & 0 deletions terraform/soaking/amis.tf
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ variable "ami_family" {
cwagent_download_command = "sudo rpm -Uvh https://s3.amazonaws.com/amazoncloudwatch-agent/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm"
cwagent_start_command = "sudo /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -c file:/tmp/cwagent-config.json -s"
soaking_cpu_metric_name = "procstat_cpu_usage"
soaking_mem_metric_name = "procstat_memory_rss"
}
windows = {
login_user = "Administrator"
Expand Down
36 changes: 32 additions & 4 deletions terraform/soaking/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ module "ec2_setup" {
region = var.region
testcase = var.testcase
sample_app_image = var.soaking_data_emitter_image
validation_config = var.validation_config
skip_validation = true

# soaking test config
Expand Down Expand Up @@ -129,18 +128,47 @@ resource "aws_cloudwatch_metric_alarm" "cpu_alarm" {
}
}

# mem alarm
resource "aws_cloudwatch_metric_alarm" "mem_alarm" {
depends_on = [time_sleep.wait_2_minutes]
alarm_name = "otel-soaking-mem-alarm-${module.common.testing_id}"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = 2
threshold = "300"

metric_query {
id = "mem"
return_data = true

metric {
metric_name = local.ami_family["soaking_mem_metric_name"]
namespace = var.soaking_metric_namespace
period = 60
stat = "Average"

# use this dimension to identify each test
dimensions = {
InstanceId = module.ec2_setup.collector_instance_id
exe = "aws-otel-collector"
process_name = "aws-otel-collector"
}
}
}
}

##########################################
# Validation
##########################################
module "validator" {
source = "../validation"

validation_config = var.validation_config
validation_config = "alarm-pulling-validation.yml"
region = var.region
testing_id = module.common.testing_id
alarm_names = aws_cloudwatch_metric_alarm.cpu_alarm.alarm_name
cpu_alarm = aws_cloudwatch_metric_alarm.cpu_alarm.alarm_name
mem_alarm = aws_cloudwatch_metric_alarm.mem_alarm.alarm_name

depends_on = [aws_cloudwatch_metric_alarm.cpu_alarm]
depends_on = [aws_cloudwatch_metric_alarm.cpu_alarm, aws_cloudwatch_metric_alarm.mem_alarm]
}

# for debug
Expand Down
4 changes: 0 additions & 4 deletions terraform/soaking/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,6 @@ variable "soaking_data_type" {
default = "otlp"
}

variable "validation_config" {
default = "alarm-pulling-validation.yml"
}

variable "testing_ami" {
default = "soaking_linux"
}
Expand Down
5 changes: 4 additions & 1 deletion terraform/templates/defaults/validator_docker_compose.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ services:
validator:
build:
../../validator
volumes:
- ~/.aws:/root/.aws
command:
- "-c=${validation_config}"
- "-t=${testing_id}"
Expand All @@ -19,5 +21,6 @@ services:
- "ecsTaskDefFamily=${ecs_taskdef_family}"
- "--ecs-context"
- "ecsTaskDefVersion=${ecs_taskdef_version}"
- "--alarm-names=${alarm_names}"
- "--alarm-names=${cpu_alarm}"
- "--alarm-names=${mem_alarm}"

3 changes: 2 additions & 1 deletion terraform/validation/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ data "template_file" "docker_compose" {
ecs_taskdef_version = var.ecs_taskdef_version

# alarm related
alarm_names = var.alarm_names
cpu_alarm = var.cpu_alarm
mem_alarm = var.mem_alarm
}

}
Expand Down
6 changes: 5 additions & 1 deletion terraform/validation/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ variable "ecs_taskdef_version" {
default = ""
}

variable "alarm_names" {
variable "cpu_alarm" {
default = ""
}

variable "mem_alarm" {
default = ""
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import com.amazonaws.services.cloudwatch.model.MetricAlarm;
import lombok.extern.log4j.Log4j2;

import java.util.Collections;
import java.util.Comparator;
import java.util.List;

Expand All @@ -36,6 +37,7 @@ public void init(

@Override
public void validate() throws Exception {
Collections.sort(context.getAlarmNameList());
RetryHelper.retry(
this.pullTimes,
this.pullDuration * 1000,
Expand All @@ -45,8 +47,7 @@ public void validate() throws Exception {
this.cloudWatchAlarmService.listAlarms(context.getAlarmNameList());

// compare the alarm name
context.getAlarmNameList().sort(String::compareTo);
alarmList.sort(Comparator.comparing(MetricAlarm::getMetricName));
alarmList.sort(Comparator.comparing(MetricAlarm::getAlarmName));
for (int i = 0; i != context.getAlarmNameList().size(); ++i) {
if (!context.getAlarmNameList().get(i).equals(alarmList.get(i).getAlarmName())) {
log.error("alarm {} can not be found", context.getAlarmNameList().get(i));
Expand All @@ -56,11 +57,12 @@ public void validate() throws Exception {

// check the status of the alarms, exit if one of them is alarming
for (MetricAlarm metricAlarm : alarmList) {
log.info(metricAlarm.getStateValue());
if (metricAlarm.getStateValue().equals("ALARM")) {
log.error(
"alarm {} is alarming, metric is {}, failing to bake",
metricAlarm.getAlarmName(),
metricAlarm.getMetricName());
metricAlarm.getMetrics());
System.exit(1);
}
}
Expand Down

0 comments on commit b24c822

Please sign in to comment.