Skip to content

Commit

Permalink
[aws-ecs-metrics-receiver] refactoring for better test coverage and e…
Browse files Browse the repository at this point in the history
…rror handling (#1738)

* add null check to avoid seg fault

* check if memory and cpu limits are nil

* add unit tests for better coverage

Signed-off-by: Rayhan Hossain <hossain.rayhan@outlook.com>
  • Loading branch information
hossain-rayhan committed Dec 2, 2020
1 parent d1485f2 commit f043f14
Show file tree
Hide file tree
Showing 9 changed files with 445 additions and 173 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"time"

"go.opentelemetry.io/collector/consumer/pdata"
"go.uber.org/zap"
)

// metricDataAccumulator defines the accumulator
Expand All @@ -26,17 +27,26 @@ type metricDataAccumulator struct {
}

// getMetricsData generates OT Metrics data from task metadata and docker stats
func (acc *metricDataAccumulator) getMetricsData(containerStatsMap map[string]ContainerStats, metadata TaskMetadata) {
func (acc *metricDataAccumulator) getMetricsData(containerStatsMap map[string]*ContainerStats, metadata TaskMetadata, logger *zap.Logger) {

taskMetrics := ECSMetrics{}
timestamp := pdata.TimeToUnixNano(time.Now())
taskResource := taskResource(metadata)

for _, containerMetadata := range metadata.Containers {
stats := containerStatsMap[containerMetadata.DockerID]
containerMetrics := getContainerMetrics(stats)
containerMetrics.MemoryReserved = *containerMetadata.Limits.Memory
containerMetrics.CPUReserved = *containerMetadata.Limits.CPU
stats, ok := containerStatsMap[containerMetadata.DockerID]
if !ok || stats == nil {
continue
}

containerMetrics := getContainerMetrics(stats, logger)
if containerMetadata.Limits.Memory != nil {
containerMetrics.MemoryReserved = *containerMetadata.Limits.Memory
}

if containerMetadata.Limits.CPU != nil {
containerMetrics.CPUReserved = *containerMetadata.Limits.CPU
}

if containerMetrics.CPUReserved > 0 {
containerMetrics.CPUUtilized = (containerMetrics.CPUUtilized / containerMetrics.CPUReserved)
Expand Down Expand Up @@ -68,7 +78,9 @@ func (acc *metricDataAccumulator) getMetricsData(containerStatsMap map[string]Co
// at least in one place (either in task level or in container level). If the
// task level CPULimit is not present, we calculate it from the summation of
// all container CPU limits.
taskMetrics.CPUUtilized = ((taskMetrics.CPUUsageInVCPU / taskMetrics.CPUReserved) * 100)
if taskMetrics.CPUReserved > 0 {
taskMetrics.CPUUtilized = ((taskMetrics.CPUUsageInVCPU / taskMetrics.CPUReserved) * 100)
}

acc.accumulate(convertToOTLPMetrics(TaskPrefix, taskMetrics, taskResource, timestamp))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,18 @@ import (
"testing"

"github.com/stretchr/testify/require"
"go.opentelemetry.io/collector/consumer/pdata"
"go.uber.org/zap"
)

func TestGetMetricsData(t *testing.T) {
v := uint64(1)
f := 1.0
var (
v = uint64(1)
f = 1.0
floatZero = float64(0)
logger = zap.NewNop()

memStats := make(map[string]uint64)
memStats["cache"] = v
memStats = map[string]uint64{"cache": v}

mem := MemoryStats{
mem = MemoryStats{
Usage: &v,
MaxUsage: &v,
Limit: &v,
Expand All @@ -37,16 +38,15 @@ func TestGetMetricsData(t *testing.T) {
Stats: memStats,
}

disk := DiskStats{
disk = DiskStats{
IoServiceBytesRecursives: []IoServiceBytesRecursive{
{Op: "Read", Value: &v},
{Op: "Write", Value: &v},
{Op: "Total", Value: &v},
},
}

net := make(map[string]NetworkStats)
net["eth0"] = NetworkStats{
networkStat = NetworkStats{
RxBytes: &v,
RxPackets: &v,
RxErrors: &v,
Expand All @@ -56,38 +56,39 @@ func TestGetMetricsData(t *testing.T) {
TxErrors: &v,
TxDropped: &v,
}
net = map[string]NetworkStats{"eth0": networkStat}

netRate := NetworkRateStats{
netRate = NetworkRateStats{
RxBytesPerSecond: &f,
TxBytesPerSecond: &f,
}

percpu := []*uint64{&v, &v}
cpuUsage := CPUUsage{
percpu = []*uint64{&v, &v}
cpuUsage = CPUUsage{
TotalUsage: &v,
UsageInKernelmode: &v,
UsageInUserMode: &v,
PerCPUUsage: percpu,
}

cpuStats := CPUStats{
CPUUsage: cpuUsage,
cpuStats = CPUStats{
CPUUsage: &cpuUsage,
OnlineCpus: &v,
SystemCPUUsage: &v,
CPUUtilized: &v,
CPUReserved: &v,
}
containerStats := ContainerStats{
containerStats = ContainerStats{
Name: "test",
ID: "001",
Memory: mem,
Disk: disk,
Memory: &mem,
Disk: &disk,
Network: net,
NetworkRate: netRate,
CPU: cpuStats,
NetworkRate: &netRate,
CPU: &cpuStats,
}

tm := TaskMetadata{
tm = TaskMetadata{
Cluster: "cluster-1",
TaskARN: "arn:aws:some-value/001",
Family: "task-def-family-1",
Expand All @@ -98,15 +99,118 @@ func TestGetMetricsData(t *testing.T) {
Limits: Limit{CPU: &f, Memory: &v},
}

cstats := make(map[string]ContainerStats)
cstats["001"] = containerStats
cstats = map[string]*ContainerStats{"001": &containerStats}
acc = metricDataAccumulator{
mds: nil,
}
)

func TestGetMetricsDataAllValid(t *testing.T) {
acc.getMetricsData(cstats, tm, logger)
require.Less(t, 0, len(acc.mds))
}

func TestGetMetricsDataMissingContainerStats(t *testing.T) {
tm.Containers = []ContainerMetadata{
{ContainerName: "container-1", DockerID: "001-Missing", DockerName: "docker-container-1", Limits: Limit{CPU: &f, Memory: &v}},
}
acc.getMetricsData(cstats, tm, logger)
require.Less(t, 0, len(acc.mds))
}

var md []pdata.Metrics
func TestGetMetricsDataNilContainerStats(t *testing.T) {
cstats = map[string]*ContainerStats{"001": nil}

acc := metricDataAccumulator{
mds: md,
acc.getMetricsData(cstats, tm, logger)
require.Less(t, 0, len(acc.mds))
}

func TestGetMetricsDataMissingContainerLimit(t *testing.T) {
tm.Containers = []ContainerMetadata{
{ContainerName: "container-1", DockerID: "001", DockerName: "docker-container-1"},
}

acc.getMetricsData(cstats, tm, logger)
require.Less(t, 0, len(acc.mds))
}

func TestGetMetricsDataContainerLimitCpuNil(t *testing.T) {
tm.Containers = []ContainerMetadata{
{ContainerName: "container-1", DockerID: "001", DockerName: "docker-container-1", Limits: Limit{CPU: nil, Memory: &v}},
}

acc.getMetricsData(cstats, tm, logger)
require.Less(t, 0, len(acc.mds))
}

func TestGetMetricsDataContainerLimitMemoryNil(t *testing.T) {
tm.Containers = []ContainerMetadata{
{ContainerName: "container-1", DockerID: "001", DockerName: "docker-container-1", Limits: Limit{CPU: &f, Memory: nil}},
}

acc.getMetricsData(cstats, tm, logger)
require.Less(t, 0, len(acc.mds))
}

func TestGetMetricsDataMissingTaskLimit(t *testing.T) {
tm = TaskMetadata{
Cluster: "cluster-1",
TaskARN: "arn:aws:some-value/001",
Family: "task-def-family-1",
Revision: "task-def-version",
Containers: []ContainerMetadata{
{ContainerName: "container-1", DockerID: "001", DockerName: "docker-container-1", Limits: Limit{CPU: &f, Memory: &v}},
},
}

acc.getMetricsData(cstats, tm, logger)
require.Less(t, 0, len(acc.mds))
}

func TestGetMetricsDataTaskLimitCpuNil(t *testing.T) {
tm = TaskMetadata{
Cluster: "cluster-1",
TaskARN: "arn:aws:some-value/001",
Family: "task-def-family-1",
Revision: "task-def-version",
Containers: []ContainerMetadata{
{ContainerName: "container-1", DockerID: "001", DockerName: "docker-container-1", Limits: Limit{CPU: &f, Memory: &v}},
},
Limits: Limit{CPU: nil, Memory: &v},
}

acc.getMetricsData(cstats, tm, logger)
require.Less(t, 0, len(acc.mds))
}

func TestGetMetricsDataTaskLimitMemoryNil(t *testing.T) {
tm = TaskMetadata{
Cluster: "cluster-1",
TaskARN: "arn:aws:some-value/001",
Family: "task-def-family-1",
Revision: "task-def-version",
Containers: []ContainerMetadata{
{ContainerName: "container-1", DockerID: "001", DockerName: "docker-container-1", Limits: Limit{CPU: &f, Memory: &v}},
},
Limits: Limit{CPU: &f, Memory: nil},
}

acc.getMetricsData(cstats, tm, logger)
require.Less(t, 0, len(acc.mds))
}

func TestGetMetricsDataCpuReservedZero(t *testing.T) {
tm = TaskMetadata{
Cluster: "cluster-1",
TaskARN: "arn:aws:some-value/001",
Family: "task-def-family-1",
Revision: "task-def-version",
Containers: []ContainerMetadata{
{ContainerName: "container-1", DockerID: "001", DockerName: "docker-container-1", Limits: Limit{CPU: &floatZero, Memory: nil}},
},
Limits: Limit{CPU: &floatZero, Memory: &v},
}

acc.getMetricsData(cstats, tm)
acc.getMetricsData(cstats, tm, logger)
require.Less(t, 0, len(acc.mds))
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ type ContainerStats struct {
Read time.Time `json:"read"`
PreviousRead time.Time `json:"preread"`

Memory MemoryStats `json:"memory_stats,omitempty"`
Disk DiskStats `json:"blkio_stats,omitempty"`
Memory *MemoryStats `json:"memory_stats,omitempty"`
Disk *DiskStats `json:"blkio_stats,omitempty"`
Network map[string]NetworkStats `json:"networks,omitempty"`
NetworkRate NetworkRateStats `json:"network_rate_stats,omitempty"`
CPU CPUStats `json:"cpu_stats,omitempty"`
PreviousCPU CPUStats `json:"precpu_stats,omitempty"`
NetworkRate *NetworkRateStats `json:"network_rate_stats,omitempty"`
CPU *CPUStats `json:"cpu_stats,omitempty"`
PreviousCPU *CPUStats `json:"precpu_stats,omitempty"`
}

// MemoryStats defines the memory stats
Expand Down Expand Up @@ -82,9 +82,9 @@ type CPUUsage struct {

// CPUStats defines Cpu stats
type CPUStats struct {
CPUUsage CPUUsage `json:"cpu_usage,omitempty"`
OnlineCpus *uint64 `json:"online_cpus,omitempty"`
SystemCPUUsage *uint64 `json:"system_cpu_usage,omitempty"`
CPUUsage *CPUUsage `json:"cpu_usage,omitempty"`
OnlineCpus *uint64 `json:"online_cpus,omitempty"`
SystemCPUUsage *uint64 `json:"system_cpu_usage,omitempty"`
CPUUtilized *uint64
CPUReserved *uint64
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@ package awsecscontainermetrics

import (
"go.opentelemetry.io/collector/consumer/pdata"
"go.uber.org/zap"
)

// MetricsData generates OTLP metrics from endpoint raw data
func MetricsData(containerStatsMap map[string]ContainerStats, metadata TaskMetadata) []pdata.Metrics {
func MetricsData(containerStatsMap map[string]*ContainerStats, metadata TaskMetadata, logger *zap.Logger) []pdata.Metrics {
acc := &metricDataAccumulator{}
acc.getMetricsData(containerStatsMap, metadata)
acc.getMetricsData(containerStatsMap, metadata, logger)

return acc.mds
}
Loading

0 comments on commit f043f14

Please sign in to comment.