diff --git a/marklogic-client-api/src/test/java/com/marklogic/client/test/rows/UnnestTest.java b/marklogic-client-api/src/test/java/com/marklogic/client/test/rows/UnnestTest.java new file mode 100644 index 000000000..a3c287e59 --- /dev/null +++ b/marklogic-client-api/src/test/java/com/marklogic/client/test/rows/UnnestTest.java @@ -0,0 +1,108 @@ +package com.marklogic.client.test.rows; + +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.marklogic.client.expression.PlanBuilder; +import com.marklogic.client.row.RowRecord; +import org.junit.Before; +import org.junit.Test; +import org.springframework.util.StringUtils; + +import java.util.List; +import java.util.stream.Stream; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +/** + * Test demonstrates the primary use case for unnest, which is, for a given row, to create N rows based on a column in + * that row containing an array with N values. + */ +public class UnnestTest extends AbstractOpticUpdateTest { + + private final static String SINGLE_NAME_COLUMN = "teamMemberName"; + + /** + * Inserts a test document for testing with the unnestSchema/unnestView view. + */ + @Before + public void insertTestDocument() { + ObjectNode doc = mapper.createObjectNode(); + ArrayNode office = doc.putArray("office"); + Stream.of("Engineering:Cindy,Alice", "Sales:Bob", "Marketing: ").forEach(value -> { + String[] tokens = value.split(":"); + ObjectNode obj = office.addObject(); + obj.put("department", tokens[0]); + obj.put("teamMembers", StringUtils.hasText(tokens[1]) ? tokens[1] : null); + }); + + resultRows(op.fromDocDescriptors(op.docDescriptor(newWriteOp("/acme/office1.json", doc))).write()); + } + + @Test + public void unnestInner() { + PlanBuilder.ModifyPlan plan = op.fromView("unnestSchema", "unnestView") + .bind(op.as("teamMemberNameArray", op.fn.tokenize(op.col("teamMembers"), op.xs.string(",")))) + .unnestInner("teamMemberNameArray", SINGLE_NAME_COLUMN) + .orderBy(op.col(SINGLE_NAME_COLUMN)); + + List rows = resultRows(plan); + assertEquals(3, rows.size()); + assertEquals("Alice", rows.get(0).getString(SINGLE_NAME_COLUMN)); + assertEquals("Bob", rows.get(1).getString(SINGLE_NAME_COLUMN)); + assertEquals("Cindy", rows.get(2).getString(SINGLE_NAME_COLUMN)); + } + + @Test + public void unnestInnerWithOrdinality() { + PlanBuilder.ModifyPlan plan = op.fromView("unnestSchema", "unnestView") + .bind(op.as("teamMemberNameArray", op.fn.tokenize(op.col("teamMembers"), op.xs.string(",")))) + .unnestInner("teamMemberNameArray", SINGLE_NAME_COLUMN, "index") + .orderBy(op.col(SINGLE_NAME_COLUMN)); + + List rows = resultRows(plan); + assertEquals(3, rows.size()); + assertEquals("Alice", rows.get(0).getString(SINGLE_NAME_COLUMN)); + assertEquals( + "The ordinality column is expected to capture the index of the value in the array that it came from, " + + "where the index is 1-based, not 0-based", 2, rows.get(0).getInt("index")); + assertEquals("Bob", rows.get(1).getString(SINGLE_NAME_COLUMN)); + assertEquals(1, rows.get(1).getInt("index")); + assertEquals("Cindy", rows.get(2).getString(SINGLE_NAME_COLUMN)); + assertEquals(1, rows.get(2).getInt("index")); + } + + @Test + public void unnestLeftOuter() { + PlanBuilder.ModifyPlan plan = op.fromView("unnestSchema", "unnestView") + .bind(op.as("teamMemberNameArray", op.fn.tokenize(op.col("teamMembers"), op.xs.string(",")))) + .unnestLeftOuter("teamMemberNameArray", SINGLE_NAME_COLUMN) + .orderBy(op.col(SINGLE_NAME_COLUMN)); + + List rows = resultRows(plan); + assertEquals(4, rows.size()); + assertEquals("Alice", rows.get(0).getString(SINGLE_NAME_COLUMN)); + assertEquals("Bob", rows.get(1).getString(SINGLE_NAME_COLUMN)); + assertEquals("Cindy", rows.get(2).getString(SINGLE_NAME_COLUMN)); + assertNull(rows.get(3).get(SINGLE_NAME_COLUMN)); + } + + @Test + public void unnestLeftOuterWithOrdinality() { + PlanBuilder.ModifyPlan plan = op.fromView("unnestSchema", "unnestView") + .bind(op.as("teamMemberNameArray", op.fn.tokenize(op.col("teamMembers"), op.xs.string(",")))) + .unnestLeftOuter("teamMemberNameArray", SINGLE_NAME_COLUMN, "myIndex") + .orderBy(op.col(SINGLE_NAME_COLUMN)); + + List rows = resultRows(plan); + assertEquals(4, rows.size()); + assertEquals("Alice", rows.get(0).getString(SINGLE_NAME_COLUMN)); + assertEquals(2, rows.get(0).getInt("myIndex")); + assertEquals("Bob", rows.get(1).getString(SINGLE_NAME_COLUMN)); + assertEquals(1, rows.get(1).getInt("myIndex")); + assertEquals("Cindy", rows.get(2).getString(SINGLE_NAME_COLUMN)); + assertEquals(1, rows.get(2).getInt("myIndex")); + assertNull(rows.get(3).get(SINGLE_NAME_COLUMN)); + assertNull(rows.get(3).get("myIndex")); + } +} diff --git a/test-app/src/main/ml-schemas/tde/unnestView.json b/test-app/src/main/ml-schemas/tde/unnestView.json new file mode 100644 index 000000000..4d45bfc8a --- /dev/null +++ b/test-app/src/main/ml-schemas/tde/unnestView.json @@ -0,0 +1,27 @@ +{ + "template": { + "context": "office", + "rows": [ + { + "schemaName": "unnestSchema", + "viewName": "unnestView", + "columns": [ + { + "name": "department", + "scalarType": "string", + "val": "department", + "nullable": true, + "invalidValues": "ignore" + }, + { + "name": "teamMembers", + "scalarType": "string", + "val": "teamMembers", + "nullable": true, + "invalidValues": "ignore" + } + ] + } + ] + } +} \ No newline at end of file