Skip to content

Commit

Permalink
Test adjustments to match re-implementation of Hot Shard RCA. (#301)
Browse files Browse the repository at this point in the history
Signed-off-by: Filip Drobnjakovic <drobnjakovicfilip@gmail.com>
  • Loading branch information
Tjofil committed Mar 13, 2023
1 parent 58ee0a0 commit a7d296e
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 83 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,12 @@
import org.jooq.Record;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.mockito.Mockito;
import org.opensearch.performanceanalyzer.grpc.FlowUnitMessage;
import org.opensearch.performanceanalyzer.grpc.HotShardSummaryMessage;
import org.opensearch.performanceanalyzer.rca.framework.core.GenericSummary;

@Ignore("Awaiting adjustments")
public class HotShardSummaryTest {
private final String INDEX_NAME = "index_1";
private final String SHARD_ID = "shard_1";
Expand Down Expand Up @@ -88,7 +86,7 @@ public void testGetTableName() {
@Test
public void testGetSqlSchema() {
List<Field<?>> schema = uut.getSqlSchema();
Assert.assertEquals(10, schema.size());
Assert.assertEquals(8, schema.size());
Assert.assertEquals(
HotShardSummary.HotShardSummaryField.INDEX_NAME_FIELD.getField(), schema.get(0));
Assert.assertEquals(
Expand All @@ -108,21 +106,21 @@ public void testGetSqlSchema() {
HotShardSummary.HotShardSummaryField.HEAP_ALLOC_RATE_THRESHOLD_FIELD.getField(),
schema.get(6));
Assert.assertEquals(
HotShardSummary.HotShardSummaryField.TIME_PERIOD_FIELD.getField(), schema.get(9));
HotShardSummary.HotShardSummaryField.TIME_PERIOD_FIELD.getField(), schema.get(7));
}

@Test
public void testGetSqlValue() {
List<Object> values = uut.getSqlValue();
Assert.assertEquals(10, values.size());
Assert.assertEquals(8, values.size());
Assert.assertEquals(INDEX_NAME, values.get(0));
Assert.assertEquals(SHARD_ID, values.get(1));
Assert.assertEquals(NODE_ID, values.get(2));
Assert.assertEquals(CPU_UTILIZATION, values.get(3));
Assert.assertEquals(CPU_UTILIZATION_THRESHOLD, values.get(4));
Assert.assertEquals(HEAP_ALLOC_RATE, values.get(5));
Assert.assertEquals(HEAP_ALLOC_RATE_THRESHOLD, values.get(6));
Assert.assertEquals(TIME_PERIOD, values.get(9));
Assert.assertEquals(TIME_PERIOD, values.get(7));
}

@Test
Expand Down Expand Up @@ -216,14 +214,14 @@ public void testBuildSummary() {
GenericSummary summary = HotShardSummary.buildSummary(testRecord);
Assert.assertNotNull(summary);
List<Object> values = summary.getSqlValue();
Assert.assertEquals(10, values.size());
Assert.assertEquals(8, values.size());
Assert.assertEquals(INDEX_NAME, values.get(0));
Assert.assertEquals(SHARD_ID, values.get(1));
Assert.assertEquals(NODE_ID, values.get(2));
Assert.assertEquals(CPU_UTILIZATION, values.get(3));
Assert.assertEquals(CPU_UTILIZATION_THRESHOLD, values.get(4));
Assert.assertEquals(HEAP_ALLOC_RATE, values.get(5));
Assert.assertEquals(HEAP_ALLOC_RATE_THRESHOLD, values.get(6));
Assert.assertEquals(TIME_PERIOD, values.get(9));
Assert.assertEquals(TIME_PERIOD, values.get(7));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import org.opensearch.performanceanalyzer.AppContext;
import org.opensearch.performanceanalyzer.ClientServers;
Expand Down Expand Up @@ -658,7 +657,6 @@ public void construct() {
}

@Test
@Ignore("Awaiting adjustments")
public void testHotShardClusterApiResponse() throws Exception {
AnalysisGraph analysisGraph = new AnalysisGraphHotShard();
List<ConnectedComponent> connectedComponents =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import java.util.List;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.opensearch.performanceanalyzer.AppContext;
Expand All @@ -28,7 +27,6 @@
import org.opensearch.performanceanalyzer.reader.ClusterDetailsEventProcessor;

@Category(GradleTaskForRca.class)
@Ignore("Awaiting adjustments")
public class HotShardClusterRcaTest {

private RcaTestHelper hotShardRca;
Expand Down Expand Up @@ -98,9 +96,8 @@ public void testOperateForEmptyFlowUnits() {
}

// 3. Healthy FlowUnits received, i.e :
// CPU_UTILIZATION < CPU_UTILIZATION_threshold
// and IO_THROUGHPUT < IO_THROUGHPUT_threshold
// and IO_SYSCALLRATE < IO_SYSCALLRATE_threshold
// CPU_UTILIZATION < CPU_UTILIZATION_threshold
// and HEAP_ALLOC_RATE < HEAP_ALLOC_RATE_threshold
@Test
public void testOperateForHealthyFlowUnits() {
// 3.1 Flow Units received from single node
Expand Down Expand Up @@ -241,8 +238,8 @@ public void testOperateForHotShardonSingleDimension() {
Assert.assertEquals(index.index_2.name(), nodeIndexShardInfo2[1]);
Assert.assertEquals(shard.shard_2.name(), nodeIndexShardInfo2[2]);

// 4.3 hot shards across multiple indices as per IO Total Throughput,
// ie. : IO_TOTAL_THROUGHPUT >= IO_TOTAL_THROUGHPUT_threshold
// 4.3 hot shards across multiple indices as per Heap alloc rate, CPU_Utilization
// ie. : HEAP_ALLOC_RATE >= HEAP_ALLOC_RATE_threshold
hotShardRca.mockFlowUnits(
Arrays.asList(
RcaTestHelper.generateFlowUnitForHotShard(
Expand Down Expand Up @@ -297,11 +294,9 @@ public void testOperateForHotShardonSingleDimension() {
List<Object> hotShard3 = nodeSummary.getNestedSummaryList().get(0).getSqlValue();
List<Object> hotShard4 = nodeSummary.getNestedSummaryList().get(1).getSqlValue();

// verify the resource type, IO total throughput, node ID, Index Name, shard ID
Assert.assertEquals(
ResourceUtil.IO_TOTAL_THROUGHPUT.getResourceEnumValue(), hotShard3.get(0));
Assert.assertEquals(
ResourceUtil.IO_TOTAL_THROUGHPUT.getResourceEnumValue(), hotShard4.get(0));
// verify the resource type, Heap alloc rate, node ID, Index Name, shard ID
Assert.assertEquals(ResourceUtil.HEAP_ALLOC_RATE.getResourceEnumValue(), hotShard3.get(0));
Assert.assertEquals(ResourceUtil.HEAP_ALLOC_RATE.getResourceEnumValue(), hotShard4.get(0));

Assert.assertEquals(550000.0, hotShard3.get(3));
String[] nodeIndexShardInfo3 = hotShard3.get(8).toString().split(" ");
Expand All @@ -315,23 +310,24 @@ public void testOperateForHotShardonSingleDimension() {
Assert.assertEquals(index.index_2.name(), nodeIndexShardInfo4[1]);
Assert.assertEquals(shard.shard_1.name(), nodeIndexShardInfo4[2]);

// 4.4 hot shards across multiple indices as per IO Total Throughput,
// ie. : IO_TOTAL_SYS_CALLRATE >= IO_TOTAL_SYS_CALLRATE_threshold
// 4.4 hot shards across indices as per Heap Alloc Rate,
// ie. : HEAP_ALLOC_RATE >= HEAP_ALLOC_RATE_threshold
// and CPU_UTILIZATION >= CPU_UTILIZATION_threshold
hotShardRca.mockFlowUnits(
Arrays.asList(
RcaTestHelper.generateFlowUnitForHotShard(
index.index_1.name(),
shard.shard_1.name(),
node.node_1.name(),
0.45,
490000,
0.80,
400000,
Resources.State.UNHEALTHY),
RcaTestHelper.generateFlowUnitForHotShard(
index.index_1.name(),
shard.shard_2.name(),
node.node_1.name(),
0.50,
400000,
370000,
Resources.State.UNHEALTHY),
RcaTestHelper.generateFlowUnitForHotShard(
index.index_1.name(),
Expand All @@ -345,14 +341,14 @@ public void testOperateForHotShardonSingleDimension() {
shard.shard_1.name(),
node.node_1.name(),
0.20,
350000,
250000,
Resources.State.UNHEALTHY),
RcaTestHelper.generateFlowUnitForHotShard(
index.index_2.name(),
shard.shard_2.name(),
node.node_2.name(),
0.25,
370000,
550000,
Resources.State.UNHEALTHY)));

ResourceFlowUnit flowUnit4 = hotShardClusterRca.operate();
Expand All @@ -364,19 +360,17 @@ public void testOperateForHotShardonSingleDimension() {
List<Object> hotShard5 = nodeSummary.getNestedSummaryList().get(0).getSqlValue();
List<Object> hotShard6 = nodeSummary.getNestedSummaryList().get(1).getSqlValue();

// verify the resource type, IO total sys callrate, node ID, Index Name, shard ID
Assert.assertEquals(
ResourceUtil.IO_TOTAL_SYS_CALLRATE.getResourceEnumValue(), hotShard5.get(0));
Assert.assertEquals(
ResourceUtil.IO_TOTAL_SYS_CALLRATE.getResourceEnumValue(), hotShard6.get(0));
// verify the resource type, cpu usage and heap alloc rate, node ID, Index Name, shard ID
Assert.assertEquals(ResourceUtil.CPU_USAGE.getResourceEnumValue(), hotShard5.get(0));
Assert.assertEquals(ResourceUtil.HEAP_ALLOC_RATE.getResourceEnumValue(), hotShard6.get(0));

Assert.assertEquals(0.75, hotShard5.get(3));
Assert.assertEquals(0.80, hotShard5.get(3));
String[] nodeIndexShardInfo5 = hotShard5.get(8).toString().split(" ");
Assert.assertEquals(node.node_1.name(), nodeIndexShardInfo5[0]);
Assert.assertEquals(index.index_1.name(), nodeIndexShardInfo5[1]);
Assert.assertEquals(shard.shard_1.name(), nodeIndexShardInfo5[2]);

Assert.assertEquals(0.50, hotShard6.get(3));
Assert.assertEquals(550000.0, hotShard6.get(3));
String[] nodeIndexShardInfo6 = hotShard6.get(8).toString().split(" ");
Assert.assertEquals(node.node_2.name(), nodeIndexShardInfo6[0]);
Assert.assertEquals(index.index_2.name(), nodeIndexShardInfo6[1]);
Expand All @@ -386,8 +380,8 @@ public void testOperateForHotShardonSingleDimension() {
// 5. UnHealthy FlowUnits received, hot shard identification on multiple Dimension
@Test
public void testOperateForHotShardonMultipleDimension() {
// CPU_UTILIZATION >= CPU_UTILIZATION_threshold, IO_TOTAL_SYS_CALLRATE >=
// IO_TOTAL_SYS_CALLRATE_threshold
// CPU_UTILIZATION >= CPU_UTILIZATION_threshold, HEAP_ALLOC_RATE >=
// HEAP_ALLOC_RATE_threshold
hotShardRca.mockFlowUnits(
Arrays.asList(
RcaTestHelper.generateFlowUnitForHotShard(
Expand Down Expand Up @@ -431,16 +425,12 @@ public void testOperateForHotShardonMultipleDimension() {

Assert.assertEquals(1, flowUnit2.getSummary().getNestedSummaryList().size());
GenericSummary nodeSummary = flowUnit2.getSummary().getNestedSummaryList().get(0);
Assert.assertEquals(3, nodeSummary.getNestedSummaryList().size());
Assert.assertEquals(2, nodeSummary.getNestedSummaryList().size());
List<Object> hotShard1 = nodeSummary.getNestedSummaryList().get(0).getSqlValue();
List<Object> hotShard2 = nodeSummary.getNestedSummaryList().get(1).getSqlValue();
List<Object> hotShard3 = nodeSummary.getNestedSummaryList().get(2).getSqlValue();

Assert.assertEquals(ResourceUtil.CPU_USAGE.getResourceEnumValue(), hotShard1.get(0));
Assert.assertEquals(
ResourceUtil.IO_TOTAL_THROUGHPUT.getResourceEnumValue(), hotShard2.get(0));
Assert.assertEquals(
ResourceUtil.IO_TOTAL_SYS_CALLRATE.getResourceEnumValue(), hotShard3.get(0));
Assert.assertEquals(ResourceUtil.HEAP_ALLOC_RATE.getResourceEnumValue(), hotShard2.get(0));

// verify the resource type, cpu utilization value, node ID, Index Name, shard ID
Assert.assertEquals(0.75, hotShard1.get(3));
Expand All @@ -449,18 +439,11 @@ public void testOperateForHotShardonMultipleDimension() {
Assert.assertEquals(index.index_1.name(), nodeIndexShardInfo1[1]);
Assert.assertEquals(shard.shard_1.name(), nodeIndexShardInfo1[2]);

// verify the resource type, IO total throughput, node ID, Index Name, shard ID
// verify the resource type, heap alloc rate, node ID, Index Name, shard ID
Assert.assertEquals(560000.0, hotShard2.get(3));
String[] nodeIndexShardInfo2 = hotShard2.get(8).toString().split(" ");
Assert.assertEquals(node.node_1.name(), nodeIndexShardInfo2[0]);
Assert.assertEquals(index.index_1.name(), nodeIndexShardInfo2[1]);
Assert.assertEquals(shard.shard_2.name(), nodeIndexShardInfo2[2]);

// verify the resource type, IO total sys callrate, node ID, Index Name, shard ID
Assert.assertEquals(0.50, hotShard3.get(3));
String[] nodeIndexShardInfo3 = hotShard3.get(8).toString().split(" ");
Assert.assertEquals(node.node_2.name(), nodeIndexShardInfo3[0]);
Assert.assertEquals(index.index_2.name(), nodeIndexShardInfo3[1]);
Assert.assertEquals(shard.shard_2.name(), nodeIndexShardInfo3[2]);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import java.util.List;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.opensearch.performanceanalyzer.AppContext;
Expand All @@ -32,7 +31,6 @@
import org.opensearch.performanceanalyzer.reader.ClusterDetailsEventProcessor;

@Category(GradleTaskForRca.class)
@Ignore("Awaiting adjustments")
public class HotShardRcaTest {

private HotShardRcaX hotShardRcaX;
Expand Down Expand Up @@ -93,8 +91,7 @@ public void testOperate() {
Clock constantClock = Clock.fixed(ofEpochMilli(0), ZoneId.systemDefault());

// ts = 0
// index = index_1, shard = shard_1, cpuUtilization = 0, ioTotThroughput = 0,
// ioTotSyscallRate = 0
// index = index_1, shard = shard_1, cpuUtilization = 0, heapAllocRate = 0
cpuUtilization.createTestFlowUnits(
columnName, Arrays.asList(index.index_1.toString(), "1", String.valueOf(0)));
heapAllocRate.createTestFlowUnits(
Expand All @@ -105,8 +102,7 @@ public void testOperate() {
Assert.assertFalse(flowUnit.getResourceContext().isUnhealthy());

// ts = 1
// index = index_1, shard = shard_1, cpuUtilization = 0.005, ioTotThroughput = 200000,
// ioTotSyscallRate = 0.005
// index = index_1, shard = shard_1, cpuUtilization = 0.005, heapAllocRate = 200000
cpuUtilization.createTestFlowUnits(
columnName, Arrays.asList(index.index_1.toString(), "1", String.valueOf(0.005)));
heapAllocRate.createTestFlowUnits(
Expand All @@ -117,8 +113,7 @@ public void testOperate() {
Assert.assertFalse(flowUnit.getResourceContext().isUnhealthy());

// ts = 2
// index = index_1, shard = shard_1, cpuUtilization = 0.75, ioTotThroughput = 200000,
// ioTotSyscallRate = 0.005
// index = index_1, shard = shard_1, cpuUtilization = 0.75, heapAllocRate = 200000
cpuUtilization.createTestFlowUnits(
columnName, Arrays.asList(index.index_1.toString(), "1", String.valueOf(0.75)));
heapAllocRate.createTestFlowUnits(
Expand All @@ -138,27 +133,22 @@ public void testOperate() {
Assert.assertEquals("node1", hotShardSummary1.getNodeId());

// ts = 3
// index = index_1, shard = shard_2, cpuUtilization = 0.75, ioTotThroughput = 400000,
// ioTotSyscallRate = 0.10
//
// and
// ts = 4
// index = index_1, shard = shard_2, cpuUtilization = 0.25, ioTotThroughput = 100000,
// ioTotSyscallRate = 0.10
cpuUtilization.createTestFlowUnits(
columnName, Arrays.asList(index.index_1.toString(), "2", String.valueOf(0.75)));
heapAllocRate.createTestFlowUnits(
columnName, Arrays.asList(index.index_1.toString(), "2", String.valueOf(400000)));
// index = index_1, shard = shard_2, cpuUtilization = 0.75, heapAllocRate = 100000
// index = index_1, shard = shard_3, cpuUtilization = 0.45, heapAllocRate = 1500000

hotShardRcaX.setClock(Clock.offset(constantClock, Duration.ofSeconds(3)));
flowUnit = hotShardRcaX.operate();
cpuUtilization.createTestFlowUnitsWithMultipleRows(
columnName,
Arrays.asList(
Arrays.asList(index.index_1.toString(), "2", String.valueOf(0.75)),
Arrays.asList(index.index_1.toString(), "3", String.valueOf(0.45))));
heapAllocRate.createTestFlowUnitsWithMultipleRows(
columnName,
Arrays.asList(
Arrays.asList(index.index_1.toString(), "2", String.valueOf(400000)),
Arrays.asList(index.index_1.toString(), "3", String.valueOf(1500000))));

cpuUtilization.createTestFlowUnits(
columnName, Arrays.asList(index.index_1.toString(), "2", String.valueOf(0.25)));
heapAllocRate.createTestFlowUnits(
columnName, Arrays.asList(index.index_1.toString(), "2", String.valueOf(100000)));
hotShardRcaX.setClock(Clock.offset(constantClock, Duration.ofSeconds(3)));

hotShardRcaX.setClock(Clock.offset(constantClock, Duration.ofSeconds(4)));
flowUnit = hotShardRcaX.operate();
HotNodeSummary summary2 = (HotNodeSummary) flowUnit.getSummary();
List<GenericSummary> hotShardSummaryList2 = summary2.getNestedSummaryList();
Expand All @@ -169,10 +159,11 @@ public void testOperate() {
HotShardSummary hotShardSummary2 = (HotShardSummary) hotShardSummaryList2.get(0);
HotShardSummary hotShardSummary3 = (HotShardSummary) hotShardSummaryList2.get(1);
Assert.assertEquals(index.index_1.toString(), hotShardSummary2.getIndexName());
Assert.assertEquals("1", hotShardSummary2.getShardId());
Assert.assertEquals("2", hotShardSummary2.getShardId());
Assert.assertEquals("node1", hotShardSummary2.getNodeId());
Assert.assertEquals("2", hotShardSummary3.getShardId());

Assert.assertEquals(index.index_1.toString(), hotShardSummary3.getIndexName());
Assert.assertEquals("3", hotShardSummary3.getShardId());
Assert.assertEquals("node1", hotShardSummary3.getNodeId());
}

Expand Down
6 changes: 3 additions & 3 deletions src/test/resources/rca/rca_elected_cluster_manager.conf
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,13 @@
//hot shard rca
"hot-shard-rca": {
"cpu-utilization" : 0.01,
"heap-alloc-rate-in-bytes" : 250000.0,
"heap-alloc-rate-in-bytes" : 25000.0,
"top-k-consumers" : 50
},
//hot shard cluster rca
"hot-shard-cluster-rca": {
"cpu-utilization-cluster-percentage" : 0.3,
"heap-alloc-rate-cluster-percentage" : 0.3
"cpu-utilization-cluster-percentage" : 0.1,
"heap-alloc-rate-cluster-percentage" : 0.1
},
"field-data-cache-rca-config": {
"field-data-cache-size-threshold" : 0.8,
Expand Down

0 comments on commit a7d296e

Please sign in to comment.