Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package com.scalar.db.dataloader.core.dataimport.datachunk;

import java.util.List;
import lombok.Builder;
import lombok.Data;

/** * Import data chunk data */
@Data
@Builder
public class ImportDataChunk {

int dataChunkId;
List<ImportRow> sourceData;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package com.scalar.db.dataloader.core.dataimport.datachunk;

import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import java.time.Instant;
import lombok.Builder;
import lombok.Data;

/** * A DTO to store import data chunk details */
@Data
@Builder
@JsonDeserialize(builder = ImportDataChunkStatus.ImportDataChunkStatusBuilder.class)
public class ImportDataChunkStatus {

@JsonProperty("dataChunkId")
private final int dataChunkId;

@JsonProperty("startTime")
private final Instant startTime;

@JsonProperty("endTime")
private final Instant endTime;

@JsonProperty("totalRecords")
private final int totalRecords;

@JsonProperty("successCount")
private final int successCount;

@JsonProperty("failureCount")
private final int failureCount;

@JsonProperty("batchCount")
private final int batchCount;

@JsonProperty("totalDurationInMilliSeconds")
private final int totalDurationInMilliSeconds;

@JsonProperty("status")
private final ImportDataChunkStatusState status;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package com.scalar.db.dataloader.core.dataimport.datachunk;

/** * Status of the import data chunk during the import process */
public enum ImportDataChunkStatusState {
START,
IN_PROGRESS,
COMPLETE
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.scalar.db.dataloader.core.dataimport.datachunk;

import com.fasterxml.jackson.databind.JsonNode;
import lombok.Value;

/** Stores data related to a single row on import file */
@Value
public class ImportRow {
int rowNumber;
JsonNode sourceData;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package com.scalar.db.dataloader.core.dataimport.task;

/** * Import task actions */
public enum ImportTaskAction {
INSERT,
UPDATE,
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package com.scalar.db.dataloader.core.dataimport.task.result;

public enum ImportResultStatus {
SUCCESS,
PARTIAL_SUCCESS,
FAILURE,
VALIDATION_FAILED,
RETRIEVAL_FAILED,
MAPPING_FAILED,
TIMEOUT,
CANCELLED
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package com.scalar.db.dataloader.core.dataimport.task.result;

import com.fasterxml.jackson.databind.JsonNode;
import com.scalar.db.dataloader.core.dataimport.task.ImportTaskAction;
import java.util.List;
import lombok.Builder;
import lombok.Value;

@Builder
@Value
public class ImportTargetResult {
String namespace;
String tableName;
ImportTaskAction importAction;
List<String> errors;
boolean dataMapped;
JsonNode importedRecord;
ImportTargetResultStatus status;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package com.scalar.db.dataloader.core.dataimport.task.result;

public enum ImportTargetResultStatus {
VALIDATION_FAILED,
RETRIEVAL_FAILED,
MISSING_COLUMNS,
DATA_ALREADY_EXISTS,
DATA_NOT_FOUND,
SAVE_FAILED,
SAVED,
ABORTED
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package com.scalar.db.dataloader.core.dataimport.task.result;

import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import java.util.List;
import lombok.Builder;
import lombok.Value;

@Builder
@Value
@JsonDeserialize(builder = ImportTaskResult.ImportTaskResultBuilder.class)
public class ImportTaskResult {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Honestly speaking, it's hard to me to get the relationship of these classes and how to use them from this PR. It would be great if there is a simple diagram or something to show the relationship.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, my previous comment might be unclear. I just read the design document again. The definition of the following terms are still unclear to me, so adding comments about the definitions of xxxxx would be enough:

  • batch (I guess ImportTransactionBatch is related)
  • task (I guess ImportTaskAction and ImportTaskResult are related)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@komamitsu san,
Sorry for the late update, I will provide the general idea for batch amd task.

The import file is split into smaller data chunks for import initially. Each data chunk when the import is in transaction mode is further split into transaction batches. This is indicated as a batch .
In each transaction batch, each individual row of data is imported by a transaction. The process of importing each individual row via a transaction is termed as a task.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@inv-jishnu Thanks for the explanation! I think the information will be very helpful for future code reviewers and maintainers. Could you add the description to the design doc or as source code comments?

Copy link
Contributor Author

@inv-jishnu inv-jishnu Jan 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@komamitsu san,
I will check with @ypeckstadt and add these in the design doc.
Thank you.

@JsonProperty("rowNumber")
int rowNumber;

@JsonProperty("targets")
List<ImportTargetResult> targets;

@JsonProperty("rawRecord")
JsonNode rawRecord;

@JsonProperty("dataChunkId")
int dataChunkId;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package com.scalar.db.dataloader.core.dataimport.transactionbatch;

import com.scalar.db.dataloader.core.dataimport.datachunk.ImportRow;
import java.util.List;
import lombok.Builder;
import lombok.Value;

/** Transaction batch details */
@Builder
@Value
public class ImportTransactionBatch {
int transactionBatchId;
List<ImportRow> sourceData;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package com.scalar.db.dataloader.core.dataimport.transactionbatch;

import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.scalar.db.dataloader.core.dataimport.task.result.ImportTaskResult;
import java.util.List;
import lombok.Builder;
import lombok.Value;

/** Transaction batch result */
@Builder
@Value
@JsonDeserialize(builder = ImportTransactionBatchResult.ImportTransactionBatchResultBuilder.class)
public class ImportTransactionBatchResult {
@JsonProperty("dataChunkId")
int dataChunkId;

@JsonProperty("transactionBatchId")
int transactionBatchId;

@JsonProperty("transactionId")
String transactionId;

@JsonProperty("records")
List<ImportTaskResult> records;

@JsonProperty("errors")
List<String> errors;

@JsonProperty("success")
boolean success;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package com.scalar.db.dataloader.core.dataimport.transactionbatch;

import com.scalar.db.dataloader.core.dataimport.task.result.ImportTaskResult;
import java.util.List;
import lombok.Builder;
import lombok.Value;

/** Batch status details */
@Builder
@Value
public class ImportTransactionBatchStatus {
int dataChunkId;
int transactionBatchId;
String transactionId;
List<ImportTaskResult> records;
List<String> errors;
boolean success;
}