Skip to content

Commit

Permalink
Fix deletes which trigger file rewrites in partitions with special ch…
Browse files Browse the repository at this point in the history
…aracters
  • Loading branch information
jkylling authored and ebyhr committed Oct 3, 2023
1 parent b02a15b commit 0cdb524
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 5 deletions.
Expand Up @@ -1978,7 +1978,7 @@ public void finishMerge(ConnectorSession session, ConnectorMergeTableHandle merg
}

for (String file : oldFiles) {
transactionLogWriter.appendRemoveFileEntry(new RemoveFileEntry(file, writeTimestamp, true));
transactionLogWriter.appendRemoveFileEntry(new RemoveFileEntry(toUriFormat(file), writeTimestamp, true));
}

appendAddFileEntries(transactionLogWriter, newFiles, partitionColumns, getExactColumnNames(handle.getMetadataEntry()), true);
Expand Down Expand Up @@ -2169,7 +2169,7 @@ private void finishOptimize(ConnectorSession session, DeltaLakeTableExecuteHandl

for (String scannedPath : scannedPaths) {
String relativePath = relativePath(tableLocation, scannedPath);
transactionLogWriter.appendRemoveFileEntry(new RemoveFileEntry(relativePath, writeTimestamp, false));
transactionLogWriter.appendRemoveFileEntry(new RemoveFileEntry(toUriFormat(relativePath), writeTimestamp, false));
}

// Note: during writes we want to preserve original case of partition columns
Expand Down Expand Up @@ -2632,7 +2632,7 @@ static String relativePath(String basePath, String path)
String basePathDirectory = basePath.endsWith("/") ? basePath : basePath + "/";
checkArgument(path.startsWith(basePathDirectory) && (path.length() > basePathDirectory.length()),
"path [%s] must be a subdirectory of basePath [%s]", path, basePath);
return toUriFormat(path.substring(basePathDirectory.length()));
return path.substring(basePathDirectory.length());
}

public void rollback()
Expand Down
Expand Up @@ -67,9 +67,13 @@ public void testTargetedDeleteWhenTableIsPartitionedWithColumnContainingSpecialC
"AS VALUES " +
"(1, 'with-hyphen'), " +
"(2, 'with:colon'), " +
"(3, 'with?question')", 3);
"(3, 'with:colon'), " + // create two rows in a single file to trigger parquet file rewrite on delete
"(4, 'with?question')", 4);
assertQuery("SELECT count(*), count(DISTINCT \"$path\"), col_name FROM " + tableName + " GROUP BY 3", "VALUES (1, 1, 'with-hyphen'), (2, 1, 'with:colon'), (1, 1, 'with?question')");
assertUpdate("DELETE FROM " + tableName + " WHERE id = 2", 1);
assertQuery("SELECT * FROM " + tableName, "VALUES(1, 'with-hyphen'), (3, 'with?question')");
assertQuery("SELECT * FROM " + tableName, "VALUES (1, 'with-hyphen'), (3, 'with:colon'), (4, 'with?question')");
assertUpdate("DELETE FROM " + tableName, 3);
assertQueryReturnsEmptyResult("SELECT * FROM " + tableName);
}

@Test
Expand Down

0 comments on commit 0cdb524

Please sign in to comment.