Skip to content

Commit

Permalink
Core: Handle optional fields (apache#8050) (apache#8064)
Browse files Browse the repository at this point in the history
* Core: Handle allow optional fields

We expect:

- current-snapshot-id
- properties
- snapshots

to be there, but they are actually optional.

* Use AssertJ
  • Loading branch information
Fokko authored and nastra committed Jul 18, 2023
1 parent aa64a16 commit 9742b71
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 11 deletions.
38 changes: 27 additions & 11 deletions core/src/main/java/org/apache/iceberg/TableMetadataParser.java
Expand Up @@ -431,15 +431,26 @@ static TableMetadata fromJson(String metadataLocation, JsonNode node) {
defaultSortOrderId = defaultSortOrder.orderId();
}

// parse properties map
Map<String, String> properties = JsonUtil.getStringMap(PROPERTIES, node);
long currentSnapshotId = JsonUtil.getLong(CURRENT_SNAPSHOT_ID, node);
Map<String, String> properties;
if (node.has(PROPERTIES)) {
// parse properties map
properties = JsonUtil.getStringMap(PROPERTIES, node);
} else {
properties = ImmutableMap.of();
}

Long currentSnapshotId = JsonUtil.getLongOrNull(CURRENT_SNAPSHOT_ID, node);
if (currentSnapshotId == null) {
// This field is optional, but internally we set this to -1 when not set
currentSnapshotId = -1L;
}

long lastUpdatedMillis = JsonUtil.getLong(LAST_UPDATED_MILLIS, node);

Map<String, SnapshotRef> refs;
if (node.has(REFS)) {
refs = refsFromJson(node.get(REFS));
} else if (currentSnapshotId != -1) {
} else if (currentSnapshotId != -1L) {
// initialize the main branch if there are no refs
refs =
ImmutableMap.of(
Expand All @@ -448,14 +459,19 @@ static TableMetadata fromJson(String metadataLocation, JsonNode node) {
refs = ImmutableMap.of();
}

JsonNode snapshotArray = JsonUtil.get(SNAPSHOTS, node);
Preconditions.checkArgument(
snapshotArray.isArray(), "Cannot parse snapshots from non-array: %s", snapshotArray);
List<Snapshot> snapshots;
if (node.has(SNAPSHOTS)) {
JsonNode snapshotArray = JsonUtil.get(SNAPSHOTS, node);
Preconditions.checkArgument(
snapshotArray.isArray(), "Cannot parse snapshots from non-array: %s", snapshotArray);

List<Snapshot> snapshots = Lists.newArrayListWithExpectedSize(snapshotArray.size());
Iterator<JsonNode> iterator = snapshotArray.elements();
while (iterator.hasNext()) {
snapshots.add(SnapshotParser.fromJson(iterator.next()));
snapshots = Lists.newArrayListWithExpectedSize(snapshotArray.size());
Iterator<JsonNode> iterator = snapshotArray.elements();
while (iterator.hasNext()) {
snapshots.add(SnapshotParser.fromJson(iterator.next()));
}
} else {
snapshots = ImmutableList.of();
}

List<StatisticsFile> statisticsFiles;
Expand Down
10 changes: 10 additions & 0 deletions core/src/test/java/org/apache/iceberg/TestTableMetadata.java
Expand Up @@ -1343,6 +1343,16 @@ public void testParseSchemaIdentifierFields() throws Exception {
Assert.assertEquals(Sets.newHashSet(1, 2), parsed.schemasById().get(1).identifierFieldIds());
}

@Test
public void testParseMinimal() throws Exception {
String data = readTableMetadataInputFile("TableMetadataV2ValidMinimal.json");
TableMetadata parsed = TableMetadataParser.fromJson(data);
Assertions.assertThat(parsed.snapshots()).isEmpty();
Assertions.assertThat(parsed.snapshotLog()).isEmpty();
Assertions.assertThat(parsed.properties()).isEmpty();
Assertions.assertThat(parsed.previousFiles()).isEmpty();
}

@Test
public void testUpdateSchemaIdentifierFields() {
Schema schema = new Schema(Types.NestedField.required(10, "x", Types.StringType.get()));
Expand Down
71 changes: 71 additions & 0 deletions core/src/test/resources/TableMetadataV2ValidMinimal.json
@@ -0,0 +1,71 @@
{
"format-version": 2,
"table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
"location": "s3://bucket/test/location",
"last-sequence-number": 34,
"last-updated-ms": 1602638573590,
"last-column-id": 3,
"current-schema-id": 0,
"schemas": [
{
"type": "struct",
"schema-id": 0,
"fields": [
{
"id": 1,
"name": "x",
"required": true,
"type": "long"
},
{
"id": 2,
"name": "y",
"required": true,
"type": "long",
"doc": "comment"
},
{
"id": 3,
"name": "z",
"required": true,
"type": "long"
}
]
}
],
"default-spec-id": 0,
"partition-specs": [
{
"spec-id": 0,
"fields": [
{
"name": "x",
"transform": "identity",
"source-id": 1,
"field-id": 1000
}
]
}
],
"last-partition-id": 1000,
"default-sort-order-id": 3,
"sort-orders": [
{
"order-id": 3,
"fields": [
{
"transform": "identity",
"source-id": 2,
"direction": "asc",
"null-order": "nulls-first"
},
{
"transform": "bucket[4]",
"source-id": 3,
"direction": "desc",
"null-order": "nulls-last"
}
]
}
]
}

0 comments on commit 9742b71

Please sign in to comment.