From 75b0b62968443bd2a5e64fae6948204d67cc8e56 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 30 Mar 2026 10:26:11 +0200 Subject: [PATCH 1/6] trino: fix wrong deletes in delta lake --- ...eting-incorect-records-in-Delta-Lake.patch | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 trino/trino/stackable/patches/477/0004-Fix-deleting-incorect-records-in-Delta-Lake.patch diff --git a/trino/trino/stackable/patches/477/0004-Fix-deleting-incorect-records-in-Delta-Lake.patch b/trino/trino/stackable/patches/477/0004-Fix-deleting-incorect-records-in-Delta-Lake.patch new file mode 100644 index 000000000..603900222 --- /dev/null +++ b/trino/trino/stackable/patches/477/0004-Fix-deleting-incorect-records-in-Delta-Lake.patch @@ -0,0 +1,90 @@ +From 22be03f0d6787e8c7fe292a9fe3d412ad67722e4 Mon Sep 17 00:00:00 2001 +From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> +Date: Mon, 30 Mar 2026 10:20:17 +0200 +Subject: Fix deleting incorect records in Delta Lake + +This is a manual backport of https://github.com/trinodb/trino/pull/28907 +which is only available starting with Trino 481. +--- + .../plugin/deltalake/DeltaLakeMetadata.java | 2 +- + .../DeltaLakePageSourceProvider.java | 2 +- + .../deltalake/DeltaLakeTableHandle.java | 26 +++++++++++++++++++ + 3 files changed, 28 insertions(+), 2 deletions(-) + +diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java +index 1f336bd1257..98892ff6110 100644 +--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java ++++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java +@@ -2591,7 +2591,7 @@ public class DeltaLakeMetadata + DeltaLakeInsertTableHandle insertHandle = createInsertHandle(retryMode, handle, inputColumns); + + Map deletionVectors = loadDeletionVectors(session, handle); +- return new DeltaLakeMergeTableHandle(handle, insertHandle, deletionVectors, findShallowCloneSourceTableLocation(session, handle)); ++ return new DeltaLakeMergeTableHandle(handle.forMerge(), insertHandle, deletionVectors, findShallowCloneSourceTableLocation(session, handle)); + } + + private Optional findShallowCloneSourceTableLocation(ConnectorSession session, DeltaLakeTableHandle handle) +diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java +index 9e0f75ec116..906efe496f0 100644 +--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java ++++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java +@@ -226,7 +226,7 @@ public class DeltaLakePageSourceProvider + .withMaxReadBlockSize(getParquetMaxReadBlockSize(session)) + .withMaxReadBlockRowCount(getParquetMaxReadBlockRowCount(session)) + .withSmallFileThreshold(getParquetSmallFileThreshold(session)) +- .withUseColumnIndex(split.getDeletionVector().isEmpty() && isParquetUseColumnIndex(session)) ++ .withUseColumnIndex(table.getWriteType().isEmpty() && split.getDeletionVector().isEmpty() && isParquetUseColumnIndex(session)) + .withIgnoreStatistics(isParquetIgnoreStatistics(session)) + .withVectorizedDecodingEnabled(isParquetVectorizedDecodingEnabled(session)) + .build(); +diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java +index 205cfcec48e..6b78c0e21bf 100644 +--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java ++++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java +@@ -31,6 +31,7 @@ import java.util.Optional; + import java.util.Set; + + import static com.google.common.base.Preconditions.checkArgument; ++import static io.trino.plugin.deltalake.DeltaLakeTableHandle.WriteType.MERGE; + import static io.trino.plugin.deltalake.DeltaLakeTableHandle.WriteType.UPDATE; + import static java.util.Objects.requireNonNull; + +@@ -40,6 +41,7 @@ public class DeltaLakeTableHandle + // Insert is not included here because it uses a separate TableHandle type + public enum WriteType + { ++ MERGE, + UPDATE, + DELETE + } +@@ -212,6 +214,30 @@ public class DeltaLakeTableHandle + vendedCredentials); + } + ++ public DeltaLakeTableHandle forMerge() ++ { ++ return new DeltaLakeTableHandle( ++ schemaName, ++ tableName, ++ managed, ++ location, ++ metadataEntry, ++ protocolEntry, ++ enforcedPartitionConstraint, ++ nonPartitionConstraint, ++ constraintColumns, ++ Optional.of(MERGE), ++ projectedColumns, ++ updatedColumns, ++ updateRowIdColumns, ++ analyzeHandle, ++ recordScannedFiles, ++ isOptimize, ++ maxScannedFileSize, ++ readVersion, ++ timeTravel); ++ } ++ + @Override + public SchemaTableName schemaTableName() + { From 545331ce306b25a62f53408314052df81b7d99f5 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 30 Mar 2026 10:41:26 +0200 Subject: [PATCH 2/6] patch Trino 479 --- ...eting-incorect-records-in-Delta-Lake.patch | 186 ++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100644 trino/trino/stackable/patches/479/0003-Fix-deleting-incorect-records-in-Delta-Lake.patch diff --git a/trino/trino/stackable/patches/479/0003-Fix-deleting-incorect-records-in-Delta-Lake.patch b/trino/trino/stackable/patches/479/0003-Fix-deleting-incorect-records-in-Delta-Lake.patch new file mode 100644 index 000000000..c9bc62837 --- /dev/null +++ b/trino/trino/stackable/patches/479/0003-Fix-deleting-incorect-records-in-Delta-Lake.patch @@ -0,0 +1,186 @@ +From 9a33a76a2172f035e71a25d5eb2f3ead844115c2 Mon Sep 17 00:00:00 2001 +From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> +Date: Mon, 30 Mar 2026 10:40:11 +0200 +Subject: Fix deleting incorect records in Delta Lake + +This is a manual backport of https://github.com/trinodb/trino/pull/28907 +which is only available starting with Trino 481. +--- + .../plugin/deltalake/DeltaLakeMetadata.java | 2 +- + .../DeltaLakePageSourceProvider.java | 2 +- + .../deltalake/DeltaLakeTableHandle.java | 26 ++++++++++++++++++ + .../plugin/deltalake/TestDeltaLakeBasic.java | 21 ++++++++++++++ + .../deltalake/large_parquet_file/README.md | 12 ++++++++ + .../_delta_log/00000000000000000000.json | 3 ++ + .../_delta_log/00000000000000000001.json | 2 ++ + ...4d6b-b4e5-eb56bab9ca7f-c000.snappy.parquet | Bin 0 -> 728 bytes + 8 files changed, 66 insertions(+), 2 deletions(-) + create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/README.md + create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/_delta_log/00000000000000000000.json + create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/_delta_log/00000000000000000001.json + create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/part-00000-2e1c15db-7523-4d6b-b4e5-eb56bab9ca7f-c000.snappy.parquet + +diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java +index 94eee5793d5..1f25a19ca7e 100644 +--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java ++++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java +@@ -2579,7 +2579,7 @@ public class DeltaLakeMetadata + DeltaLakeInsertTableHandle insertHandle = createInsertHandle(retryMode, handle, inputColumns); + + Map deletionVectors = loadDeletionVectors(session, handle); +- return new DeltaLakeMergeTableHandle(handle, insertHandle, deletionVectors, findShallowCloneSourceTableLocation(session, handle)); ++ return new DeltaLakeMergeTableHandle(handle.forMerge(), insertHandle, deletionVectors, findShallowCloneSourceTableLocation(session, handle)); + } + + private Optional findShallowCloneSourceTableLocation(ConnectorSession session, DeltaLakeTableHandle handle) +diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java +index f4bff5ccec4..256dd356c95 100644 +--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java ++++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java +@@ -226,7 +226,7 @@ public class DeltaLakePageSourceProvider + .withMaxReadBlockSize(getParquetMaxReadBlockSize(session)) + .withMaxReadBlockRowCount(getParquetMaxReadBlockRowCount(session)) + .withSmallFileThreshold(getParquetSmallFileThreshold(session)) +- .withUseColumnIndex(split.getDeletionVector().isEmpty() && isParquetUseColumnIndex(session)) ++ .withUseColumnIndex(table.getWriteType().isEmpty() && split.getDeletionVector().isEmpty() && isParquetUseColumnIndex(session)) + .withIgnoreStatistics(isParquetIgnoreStatistics(session)) + .withVectorizedDecodingEnabled(isParquetVectorizedDecodingEnabled(session)) + .build(); +diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java +index 893113de0b8..6e3caa69c29 100644 +--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java ++++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java +@@ -30,6 +30,7 @@ import java.util.Optional; + import java.util.Set; + + import static com.google.common.base.Preconditions.checkArgument; ++import static io.trino.plugin.deltalake.DeltaLakeTableHandle.WriteType.MERGE; + import static io.trino.plugin.deltalake.DeltaLakeTableHandle.WriteType.UPDATE; + import static java.util.Objects.requireNonNull; + +@@ -39,6 +40,7 @@ public class DeltaLakeTableHandle + // Insert is not included here because it uses a separate TableHandle type + public enum WriteType + { ++ MERGE, + UPDATE, + DELETE + } +@@ -203,6 +205,30 @@ public class DeltaLakeTableHandle + timeTravel); + } + ++ public DeltaLakeTableHandle forMerge() ++ { ++ return new DeltaLakeTableHandle( ++ schemaName, ++ tableName, ++ managed, ++ location, ++ metadataEntry, ++ protocolEntry, ++ enforcedPartitionConstraint, ++ nonPartitionConstraint, ++ constraintColumns, ++ Optional.of(MERGE), ++ projectedColumns, ++ updatedColumns, ++ updateRowIdColumns, ++ analyzeHandle, ++ recordScannedFiles, ++ isOptimize, ++ maxScannedFileSize, ++ readVersion, ++ timeTravel); ++ } ++ + @Override + public SchemaTableName schemaTableName() + { +diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java +index 6282ba948ba..e7f816ff2c9 100644 +--- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java ++++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java +@@ -1548,6 +1548,27 @@ public class TestDeltaLakeBasic + assertUpdate("DROP TABLE " + tableName); + } + ++ @Test // regression test for https://github.com/trinodb/trino/issues/28885 ++ public void testDeleteFromLargeParquetFile() ++ throws Exception ++ { ++ String tableName = "delete_from_large_parquet_file_" + randomNameSuffix(); ++ ++ Path tableLocation = catalogDir.resolve(tableName); ++ copyDirectoryContents(new File(Resources.getResource("deltalake/large_parquet_file").toURI()).toPath(), tableLocation); ++ assertUpdate("CALL system.register_table('%s', '%s', '%s')".formatted(getSession().getSchema().orElseThrow(), tableName, tableLocation.toUri())); ++ ++ assertThat(query("SELECT count(*) FROM " + tableName + " WHERE data = 5")) ++ .matches("VALUES BIGINT '5000'"); ++ ++ assertUpdate("DELETE FROM " + tableName + " WHERE data = 5", 5000); ++ ++ assertThat(query("SELECT count(*) FROM " + tableName + " WHERE data = 5")) ++ .matches("VALUES BIGINT '0'"); ++ ++ assertUpdate("DROP TABLE " + tableName); ++ } ++ + /** + * @see deltalake.liquid_clustering + */ +diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/README.md b/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/README.md +new file mode 100644 +index 00000000000..c06589d44ad +--- /dev/null ++++ b/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/README.md +@@ -0,0 +1,12 @@ ++Data generated using Delta Lake 4.0.0: ++ ++```sql ++CREATE TABLE test_large_parquet ++(data INT) ++USING delta ++LOCATION 's3://test-bucket/test_large_parquet'; ++ ++INSERT INTO test_large_parquet ++SELECT id / 5000 FROM RANGE(0, 50000) ++DISTRIBUTE BY 1; ++``` +diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/_delta_log/00000000000000000000.json +new file mode 100644 +index 00000000000..5f057f64adc +--- /dev/null ++++ b/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/_delta_log/00000000000000000000.json +@@ -0,0 +1,3 @@ ++{"commitInfo":{"timestamp":1774686576505,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/4.0.0 Delta-Lake/4.0.0","txnId":"1cd74e1c-d2e0-4b5a-a1ec-cb160b52c0c9"}} ++{"metaData":{"id":"26c72ddc-b89c-424c-8099-44e8da080d57","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"data\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1774686576349}} ++{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/_delta_log/00000000000000000001.json +new file mode 100644 +index 00000000000..db2d77c6173 +--- /dev/null ++++ b/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/_delta_log/00000000000000000001.json +@@ -0,0 +1,2 @@ ++{"commitInfo":{"timestamp":1774686583256,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"50000","numOutputBytes":"728"},"engineInfo":"Apache-Spark/4.0.0 Delta-Lake/4.0.0","txnId":"f231fd6c-67d7-4d6c-b0af-5f9801b16769"}} ++{"add":{"path":"part-00000-2e1c15db-7523-4d6b-b4e5-eb56bab9ca7f-c000.snappy.parquet","partitionValues":{},"size":728,"modificationTime":1774686583000,"dataChange":true,"stats":"{\"numRecords\":50000,\"minValues\":{\"data\":0},\"maxValues\":{\"data\":9},\"nullCount\":{\"data\":0}}"}} +diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/part-00000-2e1c15db-7523-4d6b-b4e5-eb56bab9ca7f-c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/part-00000-2e1c15db-7523-4d6b-b4e5-eb56bab9ca7f-c000.snappy.parquet +new file mode 100644 +index 0000000000000000000000000000000000000000..a93217a5925798e1f1ccb31a67c40bd10bf7dd20 +GIT binary patch +literal 728 +zcmZ{f!D|yi6vn@uOtxDPz4R?Rut*p%v=WzWn@y8Ygxs>|k@(BP +zWsHez3a&J+7)6a|_==flz)V!tyt`7yR_RQG*fP&FdSC+PXZ0%0e9vgt +zeDZ?>#6;=NmC-PWj_!}zhhaY;24Xju#rK0afSFEpVGFwmo0#wnor^yTF(sM5n0cNn +zo@qtHwlW|~vBJdaUmZk=IB}H>buhNpvfLulsScR0Io&p2~-Bu{(&vgD{dMIT-hPL8}+aMjVa9lDtfn +zjnm2G-UQa&i$8DmqrwGb!F62ESud=%#@*h2q2|@YpzT(@R=L)xuWxn&x8l{y?Tw9A +a+v`*|tCh0XX;o`$!nOgt)PUypj{6$`)ub^1 + +literal 0 +HcmV?d00001 + From 2ecd8ba19fc94e00e0e52431e6a6b10abcd7d313 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 30 Mar 2026 10:43:59 +0200 Subject: [PATCH 3/6] update readme --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c9fb2641f..2dd16db93 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Fixed + +- trino: Backport fix for wrong deletes in Delta Lake ([#1453]). + +[#1453]: https://github.com/stackabletech/docker-images/pull/1453 + ## [26.3.0] - 2026-03-16 ## [26.3.0-rc1] - 2026-03-16 From 0706458a1abcde2f7cffcdd2551388fcf861943c Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:00:50 +0200 Subject: [PATCH 4/6] add full patch to 477 and fix typo --- ...eting-incorect-records-in-Delta-Lake.patch | 90 --------- ...ting-incorrect-records-in-Delta-Lake.patch | 186 ++++++++++++++++++ ...ing-incorrect-records-in-Delta-Lake.patch} | 2 +- 3 files changed, 187 insertions(+), 91 deletions(-) delete mode 100644 trino/trino/stackable/patches/477/0004-Fix-deleting-incorect-records-in-Delta-Lake.patch create mode 100644 trino/trino/stackable/patches/477/0004-Fix-deleting-incorrect-records-in-Delta-Lake.patch rename trino/trino/stackable/patches/479/{0003-Fix-deleting-incorect-records-in-Delta-Lake.patch => 0003-Fix-deleting-incorrect-records-in-Delta-Lake.patch} (99%) diff --git a/trino/trino/stackable/patches/477/0004-Fix-deleting-incorect-records-in-Delta-Lake.patch b/trino/trino/stackable/patches/477/0004-Fix-deleting-incorect-records-in-Delta-Lake.patch deleted file mode 100644 index 603900222..000000000 --- a/trino/trino/stackable/patches/477/0004-Fix-deleting-incorect-records-in-Delta-Lake.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 22be03f0d6787e8c7fe292a9fe3d412ad67722e4 Mon Sep 17 00:00:00 2001 -From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> -Date: Mon, 30 Mar 2026 10:20:17 +0200 -Subject: Fix deleting incorect records in Delta Lake - -This is a manual backport of https://github.com/trinodb/trino/pull/28907 -which is only available starting with Trino 481. ---- - .../plugin/deltalake/DeltaLakeMetadata.java | 2 +- - .../DeltaLakePageSourceProvider.java | 2 +- - .../deltalake/DeltaLakeTableHandle.java | 26 +++++++++++++++++++ - 3 files changed, 28 insertions(+), 2 deletions(-) - -diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java -index 1f336bd1257..98892ff6110 100644 ---- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java -+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java -@@ -2591,7 +2591,7 @@ public class DeltaLakeMetadata - DeltaLakeInsertTableHandle insertHandle = createInsertHandle(retryMode, handle, inputColumns); - - Map deletionVectors = loadDeletionVectors(session, handle); -- return new DeltaLakeMergeTableHandle(handle, insertHandle, deletionVectors, findShallowCloneSourceTableLocation(session, handle)); -+ return new DeltaLakeMergeTableHandle(handle.forMerge(), insertHandle, deletionVectors, findShallowCloneSourceTableLocation(session, handle)); - } - - private Optional findShallowCloneSourceTableLocation(ConnectorSession session, DeltaLakeTableHandle handle) -diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java -index 9e0f75ec116..906efe496f0 100644 ---- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java -+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java -@@ -226,7 +226,7 @@ public class DeltaLakePageSourceProvider - .withMaxReadBlockSize(getParquetMaxReadBlockSize(session)) - .withMaxReadBlockRowCount(getParquetMaxReadBlockRowCount(session)) - .withSmallFileThreshold(getParquetSmallFileThreshold(session)) -- .withUseColumnIndex(split.getDeletionVector().isEmpty() && isParquetUseColumnIndex(session)) -+ .withUseColumnIndex(table.getWriteType().isEmpty() && split.getDeletionVector().isEmpty() && isParquetUseColumnIndex(session)) - .withIgnoreStatistics(isParquetIgnoreStatistics(session)) - .withVectorizedDecodingEnabled(isParquetVectorizedDecodingEnabled(session)) - .build(); -diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java -index 205cfcec48e..6b78c0e21bf 100644 ---- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java -+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java -@@ -31,6 +31,7 @@ import java.util.Optional; - import java.util.Set; - - import static com.google.common.base.Preconditions.checkArgument; -+import static io.trino.plugin.deltalake.DeltaLakeTableHandle.WriteType.MERGE; - import static io.trino.plugin.deltalake.DeltaLakeTableHandle.WriteType.UPDATE; - import static java.util.Objects.requireNonNull; - -@@ -40,6 +41,7 @@ public class DeltaLakeTableHandle - // Insert is not included here because it uses a separate TableHandle type - public enum WriteType - { -+ MERGE, - UPDATE, - DELETE - } -@@ -212,6 +214,30 @@ public class DeltaLakeTableHandle - vendedCredentials); - } - -+ public DeltaLakeTableHandle forMerge() -+ { -+ return new DeltaLakeTableHandle( -+ schemaName, -+ tableName, -+ managed, -+ location, -+ metadataEntry, -+ protocolEntry, -+ enforcedPartitionConstraint, -+ nonPartitionConstraint, -+ constraintColumns, -+ Optional.of(MERGE), -+ projectedColumns, -+ updatedColumns, -+ updateRowIdColumns, -+ analyzeHandle, -+ recordScannedFiles, -+ isOptimize, -+ maxScannedFileSize, -+ readVersion, -+ timeTravel); -+ } -+ - @Override - public SchemaTableName schemaTableName() - { diff --git a/trino/trino/stackable/patches/477/0004-Fix-deleting-incorrect-records-in-Delta-Lake.patch b/trino/trino/stackable/patches/477/0004-Fix-deleting-incorrect-records-in-Delta-Lake.patch new file mode 100644 index 000000000..c08b0e9a1 --- /dev/null +++ b/trino/trino/stackable/patches/477/0004-Fix-deleting-incorrect-records-in-Delta-Lake.patch @@ -0,0 +1,186 @@ +From 772115dbe7c2e39069cb7fd527f0ce042ede9763 Mon Sep 17 00:00:00 2001 +From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> +Date: Mon, 30 Mar 2026 10:56:06 +0200 +Subject: Fix deleting incorrect records in Delta Lake + +This is backport of https://github.com/trinodb/trino/pull/28907 which is +only available starting with Trino 481. +--- + .../plugin/deltalake/DeltaLakeMetadata.java | 2 +- + .../DeltaLakePageSourceProvider.java | 2 +- + .../deltalake/DeltaLakeTableHandle.java | 26 ++++++++++++++++++ + .../plugin/deltalake/TestDeltaLakeBasic.java | 21 ++++++++++++++ + .../deltalake/large_parquet_file/README.md | 12 ++++++++ + .../_delta_log/00000000000000000000.json | 3 ++ + .../_delta_log/00000000000000000001.json | 2 ++ + ...4d6b-b4e5-eb56bab9ca7f-c000.snappy.parquet | Bin 0 -> 728 bytes + 8 files changed, 66 insertions(+), 2 deletions(-) + create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/README.md + create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/_delta_log/00000000000000000000.json + create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/_delta_log/00000000000000000001.json + create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/part-00000-2e1c15db-7523-4d6b-b4e5-eb56bab9ca7f-c000.snappy.parquet + +diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java +index 1f336bd1257..98892ff6110 100644 +--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java ++++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java +@@ -2591,7 +2591,7 @@ public class DeltaLakeMetadata + DeltaLakeInsertTableHandle insertHandle = createInsertHandle(retryMode, handle, inputColumns); + + Map deletionVectors = loadDeletionVectors(session, handle); +- return new DeltaLakeMergeTableHandle(handle, insertHandle, deletionVectors, findShallowCloneSourceTableLocation(session, handle)); ++ return new DeltaLakeMergeTableHandle(handle.forMerge(), insertHandle, deletionVectors, findShallowCloneSourceTableLocation(session, handle)); + } + + private Optional findShallowCloneSourceTableLocation(ConnectorSession session, DeltaLakeTableHandle handle) +diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java +index 9e0f75ec116..906efe496f0 100644 +--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java ++++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java +@@ -226,7 +226,7 @@ public class DeltaLakePageSourceProvider + .withMaxReadBlockSize(getParquetMaxReadBlockSize(session)) + .withMaxReadBlockRowCount(getParquetMaxReadBlockRowCount(session)) + .withSmallFileThreshold(getParquetSmallFileThreshold(session)) +- .withUseColumnIndex(split.getDeletionVector().isEmpty() && isParquetUseColumnIndex(session)) ++ .withUseColumnIndex(table.getWriteType().isEmpty() && split.getDeletionVector().isEmpty() && isParquetUseColumnIndex(session)) + .withIgnoreStatistics(isParquetIgnoreStatistics(session)) + .withVectorizedDecodingEnabled(isParquetVectorizedDecodingEnabled(session)) + .build(); +diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java +index 205cfcec48e..6b78c0e21bf 100644 +--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java ++++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java +@@ -31,6 +31,7 @@ import java.util.Optional; + import java.util.Set; + + import static com.google.common.base.Preconditions.checkArgument; ++import static io.trino.plugin.deltalake.DeltaLakeTableHandle.WriteType.MERGE; + import static io.trino.plugin.deltalake.DeltaLakeTableHandle.WriteType.UPDATE; + import static java.util.Objects.requireNonNull; + +@@ -40,6 +41,7 @@ public class DeltaLakeTableHandle + // Insert is not included here because it uses a separate TableHandle type + public enum WriteType + { ++ MERGE, + UPDATE, + DELETE + } +@@ -212,6 +214,30 @@ public class DeltaLakeTableHandle + vendedCredentials); + } + ++ public DeltaLakeTableHandle forMerge() ++ { ++ return new DeltaLakeTableHandle( ++ schemaName, ++ tableName, ++ managed, ++ location, ++ metadataEntry, ++ protocolEntry, ++ enforcedPartitionConstraint, ++ nonPartitionConstraint, ++ constraintColumns, ++ Optional.of(MERGE), ++ projectedColumns, ++ updatedColumns, ++ updateRowIdColumns, ++ analyzeHandle, ++ recordScannedFiles, ++ isOptimize, ++ maxScannedFileSize, ++ readVersion, ++ timeTravel); ++ } ++ + @Override + public SchemaTableName schemaTableName() + { +diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java +index 27db0930313..cd01bfe1de2 100644 +--- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java ++++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java +@@ -1537,6 +1537,27 @@ public class TestDeltaLakeBasic + assertUpdate("DROP TABLE " + tableName); + } + ++ @Test // regression test for https://github.com/trinodb/trino/issues/28885 ++ public void testDeleteFromLargeParquetFile() ++ throws Exception ++ { ++ String tableName = "delete_from_large_parquet_file_" + randomNameSuffix(); ++ ++ Path tableLocation = catalogDir.resolve(tableName); ++ copyDirectoryContents(new File(Resources.getResource("deltalake/large_parquet_file").toURI()).toPath(), tableLocation); ++ assertUpdate("CALL system.register_table('%s', '%s', '%s')".formatted(getSession().getSchema().orElseThrow(), tableName, tableLocation.toUri())); ++ ++ assertThat(query("SELECT count(*) FROM " + tableName + " WHERE data = 5")) ++ .matches("VALUES BIGINT '5000'"); ++ ++ assertUpdate("DELETE FROM " + tableName + " WHERE data = 5", 5000); ++ ++ assertThat(query("SELECT count(*) FROM " + tableName + " WHERE data = 5")) ++ .matches("VALUES BIGINT '0'"); ++ ++ assertUpdate("DROP TABLE " + tableName); ++ } ++ + /** + * @see deltalake.liquid_clustering + */ +diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/README.md b/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/README.md +new file mode 100644 +index 00000000000..c06589d44ad +--- /dev/null ++++ b/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/README.md +@@ -0,0 +1,12 @@ ++Data generated using Delta Lake 4.0.0: ++ ++```sql ++CREATE TABLE test_large_parquet ++(data INT) ++USING delta ++LOCATION 's3://test-bucket/test_large_parquet'; ++ ++INSERT INTO test_large_parquet ++SELECT id / 5000 FROM RANGE(0, 50000) ++DISTRIBUTE BY 1; ++``` +diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/_delta_log/00000000000000000000.json +new file mode 100644 +index 00000000000..5f057f64adc +--- /dev/null ++++ b/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/_delta_log/00000000000000000000.json +@@ -0,0 +1,3 @@ ++{"commitInfo":{"timestamp":1774686576505,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/4.0.0 Delta-Lake/4.0.0","txnId":"1cd74e1c-d2e0-4b5a-a1ec-cb160b52c0c9"}} ++{"metaData":{"id":"26c72ddc-b89c-424c-8099-44e8da080d57","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"data\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1774686576349}} ++{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/_delta_log/00000000000000000001.json +new file mode 100644 +index 00000000000..db2d77c6173 +--- /dev/null ++++ b/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/_delta_log/00000000000000000001.json +@@ -0,0 +1,2 @@ ++{"commitInfo":{"timestamp":1774686583256,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"50000","numOutputBytes":"728"},"engineInfo":"Apache-Spark/4.0.0 Delta-Lake/4.0.0","txnId":"f231fd6c-67d7-4d6c-b0af-5f9801b16769"}} ++{"add":{"path":"part-00000-2e1c15db-7523-4d6b-b4e5-eb56bab9ca7f-c000.snappy.parquet","partitionValues":{},"size":728,"modificationTime":1774686583000,"dataChange":true,"stats":"{\"numRecords\":50000,\"minValues\":{\"data\":0},\"maxValues\":{\"data\":9},\"nullCount\":{\"data\":0}}"}} +diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/part-00000-2e1c15db-7523-4d6b-b4e5-eb56bab9ca7f-c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/part-00000-2e1c15db-7523-4d6b-b4e5-eb56bab9ca7f-c000.snappy.parquet +new file mode 100644 +index 0000000000000000000000000000000000000000..a93217a5925798e1f1ccb31a67c40bd10bf7dd20 +GIT binary patch +literal 728 +zcmZ{f!D|yi6vn@uOtxDPz4R?Rut*p%v=WzWn@y8Ygxs>|k@(BP +zWsHez3a&J+7)6a|_==flz)V!tyt`7yR_RQG*fP&FdSC+PXZ0%0e9vgt +zeDZ?>#6;=NmC-PWj_!}zhhaY;24Xju#rK0afSFEpVGFwmo0#wnor^yTF(sM5n0cNn +zo@qtHwlW|~vBJdaUmZk=IB}H>buhNpvfLulsScR0Io&p2~-Bu{(&vgD{dMIT-hPL8}+aMjVa9lDtfn +zjnm2G-UQa&i$8DmqrwGb!F62ESud=%#@*h2q2|@YpzT(@R=L)xuWxn&x8l{y?Tw9A +a+v`*|tCh0XX;o`$!nOgt)PUypj{6$`)ub^1 + +literal 0 +HcmV?d00001 + diff --git a/trino/trino/stackable/patches/479/0003-Fix-deleting-incorect-records-in-Delta-Lake.patch b/trino/trino/stackable/patches/479/0003-Fix-deleting-incorrect-records-in-Delta-Lake.patch similarity index 99% rename from trino/trino/stackable/patches/479/0003-Fix-deleting-incorect-records-in-Delta-Lake.patch rename to trino/trino/stackable/patches/479/0003-Fix-deleting-incorrect-records-in-Delta-Lake.patch index c9bc62837..62ae97e5a 100644 --- a/trino/trino/stackable/patches/479/0003-Fix-deleting-incorect-records-in-Delta-Lake.patch +++ b/trino/trino/stackable/patches/479/0003-Fix-deleting-incorrect-records-in-Delta-Lake.patch @@ -1,7 +1,7 @@ From 9a33a76a2172f035e71a25d5eb2f3ead844115c2 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 30 Mar 2026 10:40:11 +0200 -Subject: Fix deleting incorect records in Delta Lake +Subject: Fix deleting incorrect records in Delta Lake This is a manual backport of https://github.com/trinodb/trino/pull/28907 which is only available starting with Trino 481. From 23617bda675cd43a9c6092028a54316fc3af23af Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:38:25 +0200 Subject: [PATCH 5/6] update constructor arguments --- ...x-deleting-incorrect-records-in-Delta-Lake.patch | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/trino/trino/stackable/patches/477/0004-Fix-deleting-incorrect-records-in-Delta-Lake.patch b/trino/trino/stackable/patches/477/0004-Fix-deleting-incorrect-records-in-Delta-Lake.patch index c08b0e9a1..c7a7aa4a3 100644 --- a/trino/trino/stackable/patches/477/0004-Fix-deleting-incorrect-records-in-Delta-Lake.patch +++ b/trino/trino/stackable/patches/477/0004-Fix-deleting-incorrect-records-in-Delta-Lake.patch @@ -1,4 +1,4 @@ -From 772115dbe7c2e39069cb7fd527f0ce042ede9763 Mon Sep 17 00:00:00 2001 +From c3c9abdf3acad91028da5aa87470bfac4e2525a3 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 30 Mar 2026 10:56:06 +0200 Subject: Fix deleting incorrect records in Delta Lake @@ -8,13 +8,13 @@ only available starting with Trino 481. --- .../plugin/deltalake/DeltaLakeMetadata.java | 2 +- .../DeltaLakePageSourceProvider.java | 2 +- - .../deltalake/DeltaLakeTableHandle.java | 26 ++++++++++++++++++ + .../deltalake/DeltaLakeTableHandle.java | 27 ++++++++++++++++++ .../plugin/deltalake/TestDeltaLakeBasic.java | 21 ++++++++++++++ .../deltalake/large_parquet_file/README.md | 12 ++++++++ .../_delta_log/00000000000000000000.json | 3 ++ .../_delta_log/00000000000000000001.json | 2 ++ ...4d6b-b4e5-eb56bab9ca7f-c000.snappy.parquet | Bin 0 -> 728 bytes - 8 files changed, 66 insertions(+), 2 deletions(-) + 8 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/README.md create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/_delta_log/00000000000000000000.json create mode 100644 plugin/trino-delta-lake/src/test/resources/deltalake/large_parquet_file/_delta_log/00000000000000000001.json @@ -47,7 +47,7 @@ index 9e0f75ec116..906efe496f0 100644 .withVectorizedDecodingEnabled(isParquetVectorizedDecodingEnabled(session)) .build(); diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java -index 205cfcec48e..6b78c0e21bf 100644 +index 205cfcec48e..6583623858a 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTableHandle.java @@ -31,6 +31,7 @@ import java.util.Optional; @@ -66,7 +66,7 @@ index 205cfcec48e..6b78c0e21bf 100644 UPDATE, DELETE } -@@ -212,6 +214,30 @@ public class DeltaLakeTableHandle +@@ -212,6 +214,31 @@ public class DeltaLakeTableHandle vendedCredentials); } @@ -91,7 +91,8 @@ index 205cfcec48e..6b78c0e21bf 100644 + isOptimize, + maxScannedFileSize, + readVersion, -+ timeTravel); ++ timeTravel, ++ vendedCredentials); + } + @Override From b3df76a4e602917397d55ac3ca5ac5a86c5e9800 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 30 Mar 2026 14:03:55 +0200 Subject: [PATCH 6/6] typos --- .../477/0004-Fix-deleting-incorrect-records-in-Delta-Lake.patch | 2 +- .../479/0003-Fix-deleting-incorrect-records-in-Delta-Lake.patch | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/trino/trino/stackable/patches/477/0004-Fix-deleting-incorrect-records-in-Delta-Lake.patch b/trino/trino/stackable/patches/477/0004-Fix-deleting-incorrect-records-in-Delta-Lake.patch index c7a7aa4a3..bff6c1873 100644 --- a/trino/trino/stackable/patches/477/0004-Fix-deleting-incorrect-records-in-Delta-Lake.patch +++ b/trino/trino/stackable/patches/477/0004-Fix-deleting-incorrect-records-in-Delta-Lake.patch @@ -3,7 +3,7 @@ From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 30 Mar 2026 10:56:06 +0200 Subject: Fix deleting incorrect records in Delta Lake -This is backport of https://github.com/trinodb/trino/pull/28907 which is +This is a backport of https://github.com/trinodb/trino/pull/28907 which is only available starting with Trino 481. --- .../plugin/deltalake/DeltaLakeMetadata.java | 2 +- diff --git a/trino/trino/stackable/patches/479/0003-Fix-deleting-incorrect-records-in-Delta-Lake.patch b/trino/trino/stackable/patches/479/0003-Fix-deleting-incorrect-records-in-Delta-Lake.patch index 62ae97e5a..e0a552c82 100644 --- a/trino/trino/stackable/patches/479/0003-Fix-deleting-incorrect-records-in-Delta-Lake.patch +++ b/trino/trino/stackable/patches/479/0003-Fix-deleting-incorrect-records-in-Delta-Lake.patch @@ -3,7 +3,7 @@ From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 30 Mar 2026 10:40:11 +0200 Subject: Fix deleting incorrect records in Delta Lake -This is a manual backport of https://github.com/trinodb/trino/pull/28907 +This is a backport of https://github.com/trinodb/trino/pull/28907 which is only available starting with Trino 481. --- .../plugin/deltalake/DeltaLakeMetadata.java | 2 +-