-
Notifications
You must be signed in to change notification settings - Fork 78
Integrate batched orphan files deletion with the existing schedule workflow #604
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
abhisheknath2011
wants to merge
4
commits into
linkedin:main
Choose a base branch
from
abhisheknath2011:batched-tables
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
200508e
Optimizer: Batched orphan file deletion using bin packing
abhisheknath2011 ff6c881
Addressed review comments
abhisheknath2011 ec76920
Count orphan files using Iterables to reduce driver memeory usage
abhisheknath2011 ae1fdff
Integrate batched orphan files deletion with the existing schedule wo…
abhisheknath2011 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
79 changes: 79 additions & 0 deletions
79
...java/com/linkedin/openhouse/jobs/scheduler/tasks/BatchedTableOrphanFilesDeletionTask.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| package com.linkedin.openhouse.jobs.scheduler.tasks; | ||
|
|
||
| import com.linkedin.openhouse.jobs.client.JobsClient; | ||
| import com.linkedin.openhouse.jobs.client.TablesClient; | ||
| import com.linkedin.openhouse.jobs.client.model.JobConf; | ||
| import com.linkedin.openhouse.jobs.util.TableMetadata; | ||
| import com.linkedin.openhouse.jobs.util.TableMetadataBatch; | ||
| import java.util.Arrays; | ||
| import java.util.Collections; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.stream.Collectors; | ||
| import lombok.Getter; | ||
| import lombok.extern.slf4j.Slf4j; | ||
|
|
||
| /** | ||
| * A task to remove orphan files from a batch of tables in a single Spark job. Pairs with {@code | ||
| * com.linkedin.openhouse.jobs.spark.BatchedOrphanFilesDeletionSparkApp} via the {@link | ||
| * JobConf.JobTypeEnum#ORPHAN_FILES_DELETION_BATCH} JobType. | ||
| * | ||
| * <p>The legacy {@link com.linkedin.openhouse.jobs.scheduler.JobsScheduler} pre-dates the optimizer | ||
| * service, so this task omits the optimizer-only CLI flags ({@code --resultsEndpoint}, {@code | ||
| * --operationIds}, {@code --tableUuids}). The Spark app treats them as optional and falls back to | ||
| * HTS-only lifecycle tracking when they are absent. | ||
| * | ||
| * @see <a href="https://iceberg.apache.org/docs/latest/maintenance/#delete-orphan-files">Delete | ||
| * orphan files</a> | ||
| */ | ||
| @Slf4j | ||
| @Getter | ||
| public class BatchedTableOrphanFilesDeletionTask extends OperationTask<TableMetadataBatch> { | ||
| public static final JobConf.JobTypeEnum OPERATION_TYPE = | ||
| JobConf.JobTypeEnum.ORPHAN_FILES_DELETION_BATCH; | ||
|
|
||
| public BatchedTableOrphanFilesDeletionTask( | ||
| JobsClient jobsClient, | ||
| TablesClient tablesClient, | ||
| TableMetadataBatch metadata, | ||
| long pollIntervalMs, | ||
| long queuedTimeoutMs, | ||
| long taskTimeoutMs) { | ||
| super(jobsClient, tablesClient, metadata, pollIntervalMs, queuedTimeoutMs, taskTimeoutMs); | ||
| } | ||
|
|
||
| public BatchedTableOrphanFilesDeletionTask( | ||
| JobsClient jobsClient, TablesClient tablesClient, TableMetadataBatch metadata) { | ||
| super(jobsClient, tablesClient, metadata); | ||
| } | ||
|
|
||
| @Override | ||
| public JobConf.JobTypeEnum getType() { | ||
| return OPERATION_TYPE; | ||
| } | ||
|
|
||
| @Override | ||
| protected List<String> getArgs() { | ||
| String tableNames = | ||
| metadata.getTables().stream().map(TableMetadata::fqtn).collect(Collectors.joining(",")); | ||
| return Arrays.asList("--tableNames", tableNames); | ||
| } | ||
|
|
||
| @Override | ||
| protected boolean shouldRun() { | ||
| return !metadata.getTables().isEmpty(); | ||
| } | ||
|
|
||
| @Override | ||
| protected boolean launchJob() { | ||
| String jobName = | ||
| String.format("%s_%s_%d", getType(), metadata.getDbName(), metadata.getTables().size()); | ||
| Map<String, String> executionProperties = Collections.emptyMap(); | ||
| String proxyUser = metadata.getTables().get(0).getCreator(); | ||
| jobId = | ||
| jobsClient | ||
| .launch(jobName, getType(), proxyUser, executionProperties, getArgs()) | ||
| .orElse(null); | ||
| return jobId != null; | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,17 +9,23 @@ | |
| import com.linkedin.openhouse.jobs.client.TablesClient; | ||
| import com.linkedin.openhouse.jobs.client.model.JobConf; | ||
| import com.linkedin.openhouse.jobs.scheduler.JobsScheduler; | ||
| import com.linkedin.openhouse.jobs.spark.BatchedOrphanFilesDeletionSparkApp; | ||
| import com.linkedin.openhouse.jobs.util.AppConstants; | ||
| import com.linkedin.openhouse.jobs.util.DataLayoutUtil; | ||
| import com.linkedin.openhouse.jobs.util.DatabaseMetadata; | ||
| import com.linkedin.openhouse.jobs.util.DirectoryMetadata; | ||
| import com.linkedin.openhouse.jobs.util.Metadata; | ||
| import com.linkedin.openhouse.jobs.util.TableDataLayoutMetadata; | ||
| import com.linkedin.openhouse.jobs.util.TableMetadata; | ||
| import com.linkedin.openhouse.jobs.util.TableMetadataBatch; | ||
| import com.linkedin.openhouse.jobs.util.binpack.Bin; | ||
| import com.linkedin.openhouse.jobs.util.binpack.BinItem; | ||
| import com.linkedin.openhouse.jobs.util.binpack.FirstFitDecreasingBinPacker; | ||
| import io.opentelemetry.api.common.AttributeKey; | ||
| import io.opentelemetry.api.common.Attributes; | ||
| import java.util.ArrayList; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.Optional; | ||
| import java.util.Properties; | ||
| import java.util.stream.Collectors; | ||
|
|
@@ -40,10 +46,12 @@ | |
| public class OperationTasksBuilder { | ||
| public static final String MAX_COST_BUDGET_GB_HRS = "maxCostBudgetGbHrs"; | ||
| public static final String MAX_STRATEGIES_COUNT = "maxStrategiesCount"; | ||
| public static final String BATCH_MAX_ITEMS = "batchMaxItems"; | ||
| private static final double COMPUTE_COST_WEIGHT_DEFAULT = 0.3; | ||
| private static final double COMPACTION_GAIN_WEIGHT_DEFAULT = 0.7; | ||
| private static final double MAX_COST_BUDGET_GB_HRS_DEFAULT = 1000.0; | ||
| private static final int MAX_STRATEGIES_COUNT_DEFAULT = 10; | ||
| private static final int BATCH_MAX_ITEMS_DEFAULT = 25; | ||
| private static final String METRICS_SCOPE = JobsScheduler.class.getName(); | ||
|
|
||
| private final OperationTaskFactory<? extends OperationTask<?>> taskFactory; | ||
|
|
@@ -65,6 +73,80 @@ private List<OperationTask<?>> prepareTableOperationTaskList( | |
| return processMetadataList(tableMetadataList, jobType, operationMode, otelEmitter); | ||
| } | ||
|
|
||
| /** | ||
| * Builds one {@link BatchedTableOrphanFilesDeletionTask} per database-scoped bin. Groups eligible | ||
| * tables by database (batches never cross databases), then applies the first-fit-decreasing bin | ||
| * packer with a per-bin item cap from {@code properties} (defaults to {@value | ||
| * #BATCH_MAX_ITEMS_DEFAULT}). Tables with the maintenance op disabled are filtered out before | ||
| * grouping. | ||
| */ | ||
| private List<OperationTask<?>> prepareBatchedOrphanFilesDeletionTaskList( | ||
| JobConf.JobTypeEnum jobType, | ||
| Properties properties, | ||
| OperationMode operationMode, | ||
| OtelEmitter otelEmitter) { | ||
| int maxItemsPerBin = | ||
| NumberUtils.toInt(properties.getProperty(BATCH_MAX_ITEMS), BATCH_MAX_ITEMS_DEFAULT); | ||
| if (maxItemsPerBin > BatchedOrphanFilesDeletionSparkApp.MAX_BATCH_SIZE) { | ||
| throw new IllegalArgumentException( | ||
| String.format( | ||
| "--%s=%d exceeds Spark-app ceiling MAX_BATCH_SIZE=%d", | ||
| BATCH_MAX_ITEMS, maxItemsPerBin, BatchedOrphanFilesDeletionSparkApp.MAX_BATCH_SIZE)); | ||
| } | ||
| List<TableMetadata> eligible = | ||
| tablesClient.getTableMetadataList().stream() | ||
| .filter(t -> !t.isMaintenanceJobDisabled(jobType)) | ||
| .collect(Collectors.toList()); | ||
| log.info( | ||
| "Fetched metadata for {} batched-OFD-eligible tables; binMaxItems={}", | ||
| eligible.size(), | ||
| maxItemsPerBin); | ||
|
|
||
| FirstFitDecreasingBinPacker packer = | ||
| FirstFitDecreasingBinPacker.builder() | ||
| .maxItemsPerBin(maxItemsPerBin) | ||
| // Item-count cap only; weight/size dimensions disabled until table_stats is wired in. | ||
| .maxWeightPerBin(0) | ||
| .maxSizeBytesPerBin(0) | ||
| .build(); | ||
|
|
||
| Map<String, List<TableMetadata>> byDb = | ||
| eligible.stream().collect(Collectors.groupingBy(TableMetadata::getDbName)); | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Group the tables by DBs. |
||
|
|
||
| List<TableMetadataBatch> batches = new ArrayList<>(); | ||
| for (Map.Entry<String, List<TableMetadata>> dbGroup : byDb.entrySet()) { | ||
| String dbName = dbGroup.getKey(); | ||
| List<BinItem> items = | ||
| dbGroup.getValue().stream() | ||
| .map( | ||
| t -> | ||
| BinItem.builder() | ||
| .fqtn(t.fqtn()) | ||
| .operationId("") | ||
| .tableUuid("") | ||
| .databaseName(t.getDbName()) | ||
| .tableName(t.getTableName()) | ||
| .weight(1L) | ||
| .sizeBytes(0L) | ||
| .build()) | ||
| .collect(Collectors.toList()); | ||
| for (Bin bin : packer.pack(items)) { | ||
| List<TableMetadata> tablesForBin = | ||
| bin.items().stream() | ||
| .map( | ||
| item -> | ||
| dbGroup.getValue().stream() | ||
| .filter(t -> t.fqtn().equals(item.getFqtn())) | ||
| .findFirst() | ||
| .orElseThrow(() -> new IllegalStateException("missing table for bin"))) | ||
| .collect(Collectors.toList()); | ||
| batches.add(TableMetadataBatch.builder().dbName(dbName).tables(tablesForBin).build()); | ||
| } | ||
| } | ||
| log.info("Packed {} eligible tables into {} batches", eligible.size(), batches.size()); | ||
| return processMetadataList(batches, jobType, operationMode, otelEmitter); | ||
| } | ||
|
|
||
| private List<OperationTask<?>> prepareReplicationOperationTaskList( | ||
| JobConf.JobTypeEnum jobType, OperationMode operationMode, OtelEmitter otelEmitter) { | ||
| List<TableMetadata> replicationSetupTableMetadataList = tablesClient.getTableMetadataList(); | ||
|
|
@@ -272,6 +354,9 @@ public List<OperationTask<?>> buildOperationTaskList( | |
| case DATA_LAYOUT_STRATEGY_GENERATION: | ||
| case SORT_STATS_COLLECTION: | ||
| return prepareTableOperationTaskList(jobType, operationMode, otelEmitter); | ||
| case ORPHAN_FILES_DELETION_BATCH: | ||
| return prepareBatchedOrphanFilesDeletionTaskList( | ||
| jobType, properties, operationMode, otelEmitter); | ||
| case REPLICATION: | ||
| return prepareReplicationOperationTaskList(jobType, operationMode, otelEmitter); | ||
| case DATA_LAYOUT_STRATEGY_EXECUTION: | ||
|
|
@@ -300,6 +385,22 @@ public void buildOperationTaskListInParallel( | |
| buildDataLayoutOperationTaskListInParallel(jobType, properties, operationMode, otelEmitter); | ||
| } else if (jobType == JobConf.JobTypeEnum.TABLE_DIRECTORY_DELETION) { | ||
| buildDatabaseLevelOperationTasksInParallel(jobType, operationMode, otelEmitter); | ||
| } else if (jobType == JobConf.JobTypeEnum.ORPHAN_FILES_DELETION_BATCH) { | ||
| // Batched OFD needs the full table set in hand before it can group-by-db and bin-pack, | ||
| // so we use the synchronous fetch path then enqueue the tasks in bulk. | ||
| List<OperationTask<?>> tasks = | ||
| prepareBatchedOrphanFilesDeletionTaskList( | ||
| jobType, properties, operationMode, otelEmitter); | ||
| for (OperationTask<?> task : tasks) { | ||
| try { | ||
| operationTaskManager.addData(task); | ||
| } catch (InterruptedException e) { | ||
| Thread.currentThread().interrupt(); | ||
| log.warn("Interrupted while enqueueing batched OFD task", e); | ||
| } | ||
| } | ||
| operationTaskManager.updateDataGenerationCompletion(); | ||
| log.info("Enqueued {} batched OFD tasks for job type: {}", tasks.size(), jobType); | ||
| } else { | ||
| buildOperationTaskListInParallelInternal(jobType, operationMode, otelEmitter); | ||
| } | ||
|
|
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Task implementation for batch job submission.