Skip to content

Commit

Permalink
Core: Expired Snapshot files in a transaction should be deleted. (apa…
Browse files Browse the repository at this point in the history
…che#9183)

When a snapshot is expired as part of a transaction, the manifest list
file(s) should be deleted when the transaction commits. A recent change
(apache#6634) ensured that these files are not deleted when they have also
been committed as part of a transaction, but this breaks the simple
case where no new files are committed. Fix this by not skipping
deletion when the list of committed files is empty.

TESTING:

Extended a unit test to ensure that manifest list files are deleted.
Ran the test without the fix on a branch where apache#6634 was reverted
to show that this is a regression.
  • Loading branch information
bartash authored and lisirrx committed Jan 4, 2024
1 parent b82219b commit 02d2250
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 15 deletions.
24 changes: 10 additions & 14 deletions core/src/main/java/org/apache/iceberg/BaseTransaction.java
Expand Up @@ -446,20 +446,16 @@ private void commitSimpleTransaction() {
}

Set<String> committedFiles = committedFiles(ops, newSnapshots);
if (committedFiles != null) {
// delete all of the files that were deleted in the most recent set of operation commits
Tasks.foreach(deletedFiles)
.suppressFailureWhenFinished()
.onFailure((file, exc) -> LOG.warn("Failed to delete uncommitted file: {}", file, exc))
.run(
path -> {
if (!committedFiles.contains(path)) {
ops.io().deleteFile(path);
}
});
} else {
LOG.warn("Failed to load metadata for a committed snapshot, skipping clean-up");
}
// delete all of the files that were deleted in the most recent set of operation commits
Tasks.foreach(deletedFiles)
.suppressFailureWhenFinished()
.onFailure((file, exc) -> LOG.warn("Failed to delete uncommitted file: {}", file, exc))
.run(
path -> {
if (committedFiles == null || !committedFiles.contains(path)) {
ops.io().deleteFile(path);
}
});

} catch (RuntimeException e) {
LOG.warn("Failed to load committed metadata, skipping clean-up", e);
Expand Down
9 changes: 9 additions & 0 deletions core/src/test/java/org/apache/iceberg/TableTestBase.java
Expand Up @@ -212,6 +212,15 @@ List<File> listManifestFiles(File tableDirToList) {
&& Files.getFileExtension(name).equalsIgnoreCase("avro")));
}

List<File> listManifestLists(String tableDirToList) {
return Lists.newArrayList(
new File(tableDirToList, "metadata")
.listFiles(
(dir, name) ->
name.startsWith("snap")
&& Files.getFileExtension(name).equalsIgnoreCase("avro")));
}

public static long countAllMetadataFiles(File tableDir) {
return Arrays.stream(new File(tableDir, "metadata").listFiles())
.filter(f -> f.isFile())
Expand Down
Expand Up @@ -440,7 +440,10 @@ public void testRetainNAvailableSnapshotsWithTransaction() {
t3 = System.currentTimeMillis();
}

// Retain last 2 snapshots
Assert.assertEquals(
"Should be 3 manifest lists", 3, listManifestLists(table.location()).size());

// Retain last 2 snapshots, which means 1 is deleted.
Transaction tx = table.newTransaction();
removeSnapshots(tx.table()).expireOlderThan(t3).retainLast(2).commit();
tx.commitTransaction();
Expand All @@ -449,6 +452,8 @@ public void testRetainNAvailableSnapshotsWithTransaction() {
"Should have two snapshots.", 2, Lists.newArrayList(table.snapshots()).size());
Assert.assertEquals(
"First snapshot should not present.", null, table.snapshot(firstSnapshotId));
Assert.assertEquals(
"Should be 2 manifest lists", 2, listManifestLists(table.location()).size());
}

@Test
Expand Down
Expand Up @@ -309,6 +309,8 @@ public void testExpirationInTransaction() {
V2Assert.assertEquals("Snapshot sequence number should be 1", 1, snap1.sequenceNumber());
V2Assert.assertEquals(
"Last sequence number should be 1", 1, readMetadata().lastSequenceNumber());
V2Assert.assertEquals(
"Should be 1 manifest list", 1, listManifestLists(table.location()).size());

table.newAppend().appendFile(FILE_B).commit();
Snapshot snap2 = table.currentSnapshot();
Expand All @@ -319,12 +321,18 @@ public void testExpirationInTransaction() {
V2Assert.assertEquals("Snapshot sequence number should be 2", 2, snap2.sequenceNumber());
V2Assert.assertEquals(
"Last sequence number should be 2", 2, readMetadata().lastSequenceNumber());
V2Assert.assertEquals(
"Should be 2 manifest lists", 2, listManifestLists(table.location()).size());

Transaction txn = table.newTransaction();
txn.expireSnapshots().expireSnapshotId(commitId1).commit();
txn.commitTransaction();
V2Assert.assertEquals(
"Last sequence number should be 2", 2, readMetadata().lastSequenceNumber());
V2Assert.assertEquals(
"Should be 1 manifest list as 1 was deleted",
1,
listManifestLists(table.location()).size());
}

@Test
Expand Down

0 comments on commit 02d2250

Please sign in to comment.