From 753618b5f2173ba05b05d1e5218eefa78e564762 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Wed, 4 Dec 2024 15:59:38 +0530 Subject: [PATCH 01/87] Util classes for data loader --- data-loader/build.gradle | 11 + .../db/dataloader/core/ErrorMessage.java | 62 +++++ .../core/exception/Base64Exception.java | 14 ++ .../dataloader/core/util/CollectionUtil.java | 19 ++ .../db/dataloader/core/util/CsvUtil.java | 17 ++ .../db/dataloader/core/util/DebugUtil.java | 30 +++ .../db/dataloader/core/util/DecimalUtil.java | 40 ++++ .../db/dataloader/core/util/PathUtil.java | 24 ++ .../db/dataloader/core/util/RuntimeUtil.java | 21 ++ .../db/dataloader/core/UnitTestUtils.java | 226 ++++++++++++++++++ .../core/util/CollectionUtilTest.java | 28 +++ .../db/dataloader/core/util/CsvUtilTest.java | 25 ++ .../dataloader/core/util/DecimalUtilTest.java | 20 ++ .../db/dataloader/core/util/PathUtilTest.java | 45 ++++ .../dataloader/core/util/RuntimeUtilTest.java | 24 ++ 15 files changed, 606 insertions(+) create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/ErrorMessage.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/exception/Base64Exception.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/CollectionUtil.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/CsvUtil.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/DebugUtil.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/DecimalUtil.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/PathUtil.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/RuntimeUtil.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/CollectionUtilTest.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/CsvUtilTest.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/DecimalUtilTest.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/PathUtilTest.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/RuntimeUtilTest.java diff --git a/data-loader/build.gradle b/data-loader/build.gradle index f633095150..3be6a49e81 100644 --- a/data-loader/build.gradle +++ b/data-loader/build.gradle @@ -1,4 +1,8 @@ subprojects { + + ext { + jacksonVersion = '2.17.0' + } group = "scalardb.dataloader" dependencies { // AssertJ @@ -13,6 +17,7 @@ subprojects { // Apache Commons implementation("org.apache.commons:commons-lang3:${commonsLangVersion}") implementation("commons-io:commons-io:${commonsIoVersion}") + implementation("org.slf4j:slf4j-simple:${slf4jVersion}") // Mockito testImplementation "org.mockito:mockito-core:${mockitoVersion}" @@ -24,5 +29,11 @@ subprojects { annotationProcessor "org.projectlombok:lombok:${lombokVersion}" testCompileOnly "org.projectlombok:lombok:${lombokVersion}" testAnnotationProcessor "org.projectlombok:lombok:${lombokVersion}" + + // Jackson + implementation("com.fasterxml.jackson.core:jackson-core:${jacksonVersion}") + implementation("com.fasterxml.jackson.core:jackson-databind:${jacksonVersion}") + implementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:${jacksonVersion}") + } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/ErrorMessage.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/ErrorMessage.java new file mode 100644 index 0000000000..395385467e --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/ErrorMessage.java @@ -0,0 +1,62 @@ +package com.scalar.db.dataloader.core; + +public class ErrorMessage { + public static final String ERROR_MISSING_NAMESPACE_OR_TABLE = + "the provided namespace '%s' and/or table name '%s' is incorrect and could not be found"; + public static final String ERROR_MISSING_COLUMN = "missing field or column mapping for %s"; + public static final String ERROR_MISSING_PARTITION_KEY_COLUMN = + "missing required field or column mapping for partition key %s"; + public static final String ERROR_MISSING_CLUSTERING_KEY_COLUMN = + "missing required field or column mapping for clustering key %s"; + public static final String ERROR_CRUD_EXCEPTION = + "something went wrong while trying to save the data"; + public static final String ERROR_DATA_ALREADY_EXISTS = "record already exists"; + public static final String ERROR_DATA_NOT_FOUND = "record was not found"; + public static final String ERROR_CONTROL_FILE_MISSING_DATA_MAPPINGS = + "the control file is missing data mappings"; + public static final String ERROR_TARGET_COLUMN_NOT_FOUND = + "The target column '%s' for source field '%s' could not be found in table '%s'"; + public static final String ERROR_MISSING_PARTITION_KEY = + "The required partition key '%s' is missing in the control file mapping for table '%s'"; + public static final String ERROR_MISSING_CLUSTERING_KEY = + "The required clustering key '%s' is missing in the control file mapping for table '%s'"; + public static final String ERROR_MISSING_SOURCE_FIELD = + "the data mapping source field '%s' for table '%s' is missing in the json data record"; + public static final String ERROR_DUPLICATE_DATA_MAPPINGS = + "Duplicate data mappings found for table '%s' in the control file"; + public static final String ERROR_MISSING_COLUMN_MAPPING = + "No mapping found for column '%s' in table '%s' in the control file. \nControl file validation set at 'FULL'. All columns need to be mapped."; + public static final String ERROR_MULTIPLE_MAPPINGS_FOR_COLUMN_FOUND = + "Multiple data mappings found for column '%s' in table '%s'"; + public static final String ERROR_METHOD_NULL_ARGUMENT = "Method null argument not allowed"; + public static final String ERROR_COULD_NOT_FIND_PARTITION_KEY = + "could not find the partition key"; + public static final String ERROR_METADATA_OR_DATA_TYPES_NOT_FOUND = + "no table meta data or a data type map was found for %s.%s"; + public static final String ERROR_EMPTY_SOURCE_ROW = + "The source record data was undefined or empty"; + public static final String ERROR_UPSERT_INSERT_MISSING_COLUMNS = + "The source record needs to contain all fields if the UPSERT turns into an INSERT"; + public static final String ERROR_SCAN_FAILED = "Could not complete the scan"; + public static final String ERROR_UNKNOWN_TRANSACTION_STATUS = + "Error : the transaction to retrieve the account is in an unknown state"; + public static final String ERROR_INVALID_PROJECTION = "The column '%s' was not found"; + public static final String ERROR_SCAN = + "Something went wrong while scanning. Are you sure you are running in the correct transaction mode?"; + public static final String ERROR_CLUSTERING_KEY_NOT_FOUND = + "The provided clustering key %s was not found"; + public static final String ERROR_KEY_NOT_FOUND = "The key '%s' could not be found"; + public static final String ERROR_KEY_FORMATTING = + "They provided key '%s is not formatted correctly. Expected format is field=value."; + public static final String ERROR_SORT_FORMATTING = + "They provided sort '%s is not formatted correctly. Expected format is field=asc|desc."; + public static final String ERROR_VALUE_TO_STRING_CONVERSION_FAILED = + "Something went wrong while converting the ScalarDB values to strings. The table metadata and Value datatype probably do not match."; + + public static final String ERROR_BASE64_ENCODING = + "Invalid base64 encoding for blob value for column %s"; + public static final String ERROR_NUMBER_FORMAT_EXCEPTION = + "Invalid number specified for column %s"; + public static final String ERROR_NULL_POINTER_EXCEPTION = + "The %s column does not support a null value"; +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/exception/Base64Exception.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/exception/Base64Exception.java new file mode 100644 index 0000000000..9cf94854c0 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/exception/Base64Exception.java @@ -0,0 +1,14 @@ +package com.scalar.db.dataloader.core.exception; + +/** Exception thrown when an error occurs while trying to encode or decode base64 values. */ +public class Base64Exception extends Exception { + + /** + * Class constructor + * + * @param message Exception message + */ + public Base64Exception(String message) { + super(message); + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/CollectionUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/CollectionUtil.java new file mode 100644 index 0000000000..184e29b7f3 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/CollectionUtil.java @@ -0,0 +1,19 @@ +package com.scalar.db.dataloader.core.util; + +import java.util.Collection; + +/** Utils for collection classes */ +public class CollectionUtil { + + /** + * Check if lists are of same length + * + * @param collections List of collections + * @return collections are same length or not + */ + public static boolean areSameLength(Collection... collections) { + int N = collections[0].size(); + for (Collection a : collections) if (a.size() != N) return false; + return true; + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/CsvUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/CsvUtil.java new file mode 100644 index 0000000000..9979ce58ca --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/CsvUtil.java @@ -0,0 +1,17 @@ +package com.scalar.db.dataloader.core.util; + +/** Utils for csv data manipulation */ +public class CsvUtil { + + /** + * Remove the last character in the string builder if it's a delimiter + * + * @param stringBuilder String builder instance + * @param delimiter Delimiter character used in the CSV content + */ + public static void removeTrailingDelimiter(StringBuilder stringBuilder, String delimiter) { + if (stringBuilder.substring(stringBuilder.length() - 1).equals(delimiter)) { + stringBuilder.setLength(stringBuilder.length() - 1); + } + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/DebugUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/DebugUtil.java new file mode 100644 index 0000000000..cde28ca33e --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/DebugUtil.java @@ -0,0 +1,30 @@ +package com.scalar.db.dataloader.core.util; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DebugUtil { + + private static final Logger LOGGER = LoggerFactory.getLogger(DebugUtil.class); + + /** + * log memory usage + * + * @param stage stage of process + */ + public static void logMemoryUsage(String stage) { + Runtime runtime = Runtime.getRuntime(); + long usedMemory = runtime.totalMemory() - runtime.freeMemory(); + long maxMemory = runtime.maxMemory(); + + LOGGER.info( + "Memory usage at {}: Used Memory = {} MB, Max Memory = {} MB", + stage, + formatMemorySize(usedMemory), + formatMemorySize(maxMemory)); + } + + private static String formatMemorySize(long size) { + return String.format("%.2f", size / (1024.0 * 1024.0)); + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/DecimalUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/DecimalUtil.java new file mode 100644 index 0000000000..8372dc8aac --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/DecimalUtil.java @@ -0,0 +1,40 @@ +package com.scalar.db.dataloader.core.util; + +import java.text.DecimalFormat; +import java.text.DecimalFormatSymbols; +import java.util.Locale; + +/** Utils for decimal handling */ +public class DecimalUtil { + + /** + * Convert a Double to a non-scientific formatted string + * + * @param doubleValue Double value + * @return formatted double as a string + */ + public static String convertToNonScientific(Double doubleValue) { + return createFormatter().format(doubleValue); + } + + /** + * Convert a Float to a non-scientific formatted string + * + * @param floatValue Float value + * @return formatted float as a string + */ + public static String convertToNonScientific(Float floatValue) { + return createFormatter().format(floatValue); + } + + /** + * Create a Decimal formatter + * + * @return decimal formatter instance + */ + private static DecimalFormat createFormatter() { + DecimalFormat df = new DecimalFormat("0", DecimalFormatSymbols.getInstance(Locale.ENGLISH)); + df.setMaximumFractionDigits(340); // 340 = DecimalFormat.DOUBLE_FRACTION_DIGITS + return df; + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/PathUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/PathUtil.java new file mode 100644 index 0000000000..c0bd226e45 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/PathUtil.java @@ -0,0 +1,24 @@ +package com.scalar.db.dataloader.core.util; + +public class PathUtil { + + /** + * Ensures the specified path has a trailing slash. + * + *

java.nio.file.Path is not used because this is also used for virtual paths. + * + * @param path the path + * @return the path with a trailing slash + */ + public static String ensureTrailingSlash(String path) { + if (path == null || path.isEmpty()) { + return ""; + } + + if (!path.endsWith("/")) { + return path + "/"; + } + + return path; + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/RuntimeUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/RuntimeUtil.java new file mode 100644 index 0000000000..a5de36d34c --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/RuntimeUtil.java @@ -0,0 +1,21 @@ +package com.scalar.db.dataloader.core.util; + +import static com.scalar.db.dataloader.core.ErrorMessage.ERROR_METHOD_NULL_ARGUMENT; + +/** Utils for runtime checks */ +public class RuntimeUtil { + + /** + * Argument null check + * + * @param values List of arguments + * @throws NullPointerException when one of the arguments is null + */ + public static void checkNotNull(Object... values) { + for (Object value : values) { + if (value == null) { + throw new NullPointerException(ERROR_METHOD_NULL_ARGUMENT); + } + } + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java new file mode 100644 index 0000000000..b915b64af8 --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java @@ -0,0 +1,226 @@ +package com.scalar.db.dataloader.core; + +import static com.scalar.db.io.DataType.BIGINT; +import static com.scalar.db.io.DataType.BLOB; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.dataloader.core.util.DecimalUtil; +import com.scalar.db.io.BigIntColumn; +import com.scalar.db.io.BlobColumn; +import com.scalar.db.io.BooleanColumn; +import com.scalar.db.io.Column; +import com.scalar.db.io.DataType; +import com.scalar.db.io.DoubleColumn; +import com.scalar.db.io.FloatColumn; +import com.scalar.db.io.IntColumn; +import com.scalar.db.io.TextColumn; +import com.scalar.db.transaction.consensuscommit.Attribute; +import java.util.*; + +/** Utils for the service unit tests */ +public class UnitTestUtils { + public static final String TEST_NAMESPACE = "namespace"; + public static final String TEST_TABLE_NAME = "table"; + public static final String TEST_COLUMN_1_PK = "col1"; + public static final String TEST_COLUMN_2_CK = "col2"; + public static final String TEST_COLUMN_3_CK = "col3"; + public static final String TEST_COLUMN_4 = "col4"; + public static final String TEST_COLUMN_5 = "col5"; + public static final String TEST_COLUMN_6 = "col6"; + public static final String TEST_COLUMN_7 = "col7"; + + public static final String TEST_VALUE_TEXT = "test value"; + + public static final String TEST_VALUE_BLOB_STRING = "blob test value"; + public static final byte[] TEST_VALUE_BLOB = TEST_VALUE_BLOB_STRING.getBytes(); + public static final String TEST_VALUE_BLOB_BASE64 = + new String(Base64.getEncoder().encode(TEST_VALUE_BLOB)); + public static final String TEST_VALUE_TX_ID = "txt value 464654654"; + public static final Float TEST_VALUE_FLOAT = Float.MIN_VALUE; + public static final int TEST_VALUE_INT = Integer.MAX_VALUE; + public static final Long TEST_VALUE_LONG = BigIntColumn.MAX_VALUE; + public static final boolean TEST_VALUE_BOOLEAN = true; + public static final double TEST_VALUE_DOUBLE = Double.MIN_VALUE; + public static final String TEST_CSV_DELIMITER = ";"; + + public static TableMetadata createTestTableMetadata() { + return TableMetadata.newBuilder() + .addColumn(TEST_COLUMN_1_PK, BIGINT) + .addColumn(TEST_COLUMN_2_CK, DataType.INT) + .addColumn(TEST_COLUMN_3_CK, DataType.BOOLEAN) + .addColumn(TEST_COLUMN_4, DataType.FLOAT) + .addColumn(TEST_COLUMN_5, DataType.DOUBLE) + .addColumn(TEST_COLUMN_6, DataType.TEXT) + .addColumn(TEST_COLUMN_7, BLOB) + .addColumn(Attribute.BEFORE_PREFIX + TEST_COLUMN_4, DataType.FLOAT) + .addColumn(Attribute.BEFORE_PREFIX + TEST_COLUMN_5, DataType.DOUBLE) + .addColumn(Attribute.BEFORE_PREFIX + TEST_COLUMN_6, DataType.TEXT) + .addColumn(Attribute.BEFORE_PREFIX + TEST_COLUMN_7, BLOB) + .addColumn(Attribute.ID, DataType.TEXT) + .addColumn(Attribute.STATE, DataType.INT) + .addColumn(Attribute.VERSION, DataType.INT) + .addColumn(Attribute.PREPARED_AT, BIGINT) + .addColumn(Attribute.COMMITTED_AT, BIGINT) + .addColumn(Attribute.BEFORE_ID, DataType.TEXT) + .addColumn(Attribute.BEFORE_STATE, DataType.INT) + .addColumn(Attribute.BEFORE_VERSION, DataType.INT) + .addColumn(Attribute.BEFORE_PREPARED_AT, BIGINT) + .addColumn(Attribute.BEFORE_COMMITTED_AT, BIGINT) + .addPartitionKey(TEST_COLUMN_1_PK) + .addClusteringKey(TEST_COLUMN_2_CK) + .addClusteringKey(TEST_COLUMN_3_CK) + .build(); + } + + public static ObjectNode getOutputDataWithMetadata() { + ObjectMapper mapper = new ObjectMapper(); + ObjectNode rootNode = mapper.createObjectNode(); + rootNode.put(TEST_COLUMN_1_PK, TEST_VALUE_LONG); + rootNode.put(TEST_COLUMN_2_CK, TEST_VALUE_INT); + rootNode.put(TEST_COLUMN_3_CK, TEST_VALUE_BOOLEAN); + rootNode.put(TEST_COLUMN_4, TEST_VALUE_FLOAT); + rootNode.put(TEST_COLUMN_5, TEST_VALUE_DOUBLE); + rootNode.put(TEST_COLUMN_6, TEST_VALUE_TEXT); + rootNode.put(TEST_COLUMN_7, TEST_VALUE_BLOB); + rootNode.put(Attribute.BEFORE_PREFIX + TEST_COLUMN_4, TEST_VALUE_FLOAT); + rootNode.put(Attribute.BEFORE_PREFIX + TEST_COLUMN_5, TEST_VALUE_DOUBLE); + rootNode.put(Attribute.BEFORE_PREFIX + TEST_COLUMN_6, TEST_VALUE_TEXT); + rootNode.put(Attribute.BEFORE_PREFIX + TEST_COLUMN_7, TEST_VALUE_BLOB); + rootNode.put(Attribute.ID, TEST_VALUE_TX_ID); + rootNode.put(Attribute.STATE, TEST_VALUE_INT); + rootNode.put(Attribute.VERSION, TEST_VALUE_INT); + rootNode.put(Attribute.PREPARED_AT, TEST_VALUE_LONG); + rootNode.put(Attribute.COMMITTED_AT, TEST_VALUE_LONG); + rootNode.put(Attribute.BEFORE_ID, TEST_VALUE_TEXT); + rootNode.put(Attribute.BEFORE_STATE, TEST_VALUE_INT); + rootNode.put(Attribute.BEFORE_VERSION, TEST_VALUE_INT); + rootNode.put(Attribute.BEFORE_PREPARED_AT, TEST_VALUE_LONG); + rootNode.put(Attribute.BEFORE_COMMITTED_AT, TEST_VALUE_LONG); + return rootNode; + } + + public static ObjectNode getOutputDataWithoutMetadata() { + ObjectMapper mapper = new ObjectMapper(); + ObjectNode rootNode = mapper.createObjectNode(); + rootNode.put(TEST_COLUMN_1_PK, TEST_VALUE_LONG); + rootNode.put(TEST_COLUMN_2_CK, TEST_VALUE_INT); + rootNode.put(TEST_COLUMN_3_CK, TEST_VALUE_BOOLEAN); + rootNode.put(TEST_COLUMN_4, TEST_VALUE_FLOAT); + rootNode.put(TEST_COLUMN_5, TEST_VALUE_DOUBLE); + rootNode.put(TEST_COLUMN_6, TEST_VALUE_TEXT); + rootNode.put(TEST_COLUMN_7, TEST_VALUE_BLOB); + return rootNode; + } + + public static List getColumnsListOfMetadata() { + List projectedColumns = new ArrayList<>(); + projectedColumns.add(TEST_COLUMN_1_PK); + projectedColumns.add(TEST_COLUMN_2_CK); + projectedColumns.add(TEST_COLUMN_3_CK); + projectedColumns.add(TEST_COLUMN_4); + projectedColumns.add(TEST_COLUMN_5); + projectedColumns.add(TEST_COLUMN_6); + projectedColumns.add(TEST_COLUMN_7); + projectedColumns.add(Attribute.BEFORE_PREFIX + TEST_COLUMN_4); + projectedColumns.add(Attribute.BEFORE_PREFIX + TEST_COLUMN_5); + projectedColumns.add(Attribute.BEFORE_PREFIX + TEST_COLUMN_6); + projectedColumns.add(Attribute.BEFORE_PREFIX + TEST_COLUMN_7); + projectedColumns.add(Attribute.ID); + projectedColumns.add(Attribute.STATE); + projectedColumns.add(Attribute.VERSION); + projectedColumns.add(Attribute.PREPARED_AT); + projectedColumns.add(Attribute.COMMITTED_AT); + projectedColumns.add(Attribute.BEFORE_ID); + projectedColumns.add(Attribute.BEFORE_STATE); + projectedColumns.add(Attribute.BEFORE_VERSION); + projectedColumns.add(Attribute.BEFORE_PREPARED_AT); + projectedColumns.add(Attribute.BEFORE_COMMITTED_AT); + return projectedColumns; + } + + public static Map getColumnData() { + Map columnData = new HashMap<>(); + columnData.put(TEST_COLUMN_1_PK, BIGINT); + columnData.put(TEST_COLUMN_2_CK, DataType.INT); + columnData.put(TEST_COLUMN_3_CK, DataType.BOOLEAN); + columnData.put(TEST_COLUMN_4, DataType.FLOAT); + columnData.put(TEST_COLUMN_5, DataType.DOUBLE); + columnData.put(TEST_COLUMN_6, DataType.TEXT); + columnData.put(TEST_COLUMN_7, BLOB); + columnData.put(Attribute.BEFORE_PREFIX + TEST_COLUMN_4, DataType.FLOAT); + columnData.put(Attribute.BEFORE_PREFIX + TEST_COLUMN_5, DataType.DOUBLE); + columnData.put(Attribute.BEFORE_PREFIX + TEST_COLUMN_6, DataType.TEXT); + columnData.put(Attribute.BEFORE_PREFIX + TEST_COLUMN_7, BLOB); + columnData.put(Attribute.ID, DataType.TEXT); + columnData.put(Attribute.STATE, DataType.INT); + columnData.put(Attribute.VERSION, DataType.INT); + columnData.put(Attribute.PREPARED_AT, BIGINT); + columnData.put(Attribute.COMMITTED_AT, BIGINT); + columnData.put(Attribute.BEFORE_ID, DataType.TEXT); + columnData.put(Attribute.BEFORE_STATE, DataType.INT); + columnData.put(Attribute.BEFORE_VERSION, DataType.INT); + columnData.put(Attribute.BEFORE_PREPARED_AT, BIGINT); + columnData.put(Attribute.BEFORE_COMMITTED_AT, BIGINT); + return columnData; + } + + public static Map> createTestValues() { + Map> values = new HashMap<>(); + values.put(TEST_COLUMN_1_PK, BigIntColumn.of(TEST_COLUMN_1_PK, TEST_VALUE_LONG)); + values.put(TEST_COLUMN_2_CK, IntColumn.of(TEST_COLUMN_2_CK, TEST_VALUE_INT)); + values.put(TEST_COLUMN_3_CK, BooleanColumn.of(TEST_COLUMN_3_CK, TEST_VALUE_BOOLEAN)); + values.put(TEST_COLUMN_4, FloatColumn.of(TEST_COLUMN_4, TEST_VALUE_FLOAT)); + values.put(TEST_COLUMN_5, DoubleColumn.of(TEST_COLUMN_5, TEST_VALUE_DOUBLE)); + values.put(TEST_COLUMN_6, TextColumn.of(TEST_COLUMN_6, TEST_VALUE_TEXT)); + values.put(TEST_COLUMN_7, BlobColumn.of(TEST_COLUMN_7, TEST_VALUE_BLOB)); + values.put( + Attribute.BEFORE_PREFIX + TEST_COLUMN_4, + FloatColumn.of(Attribute.BEFORE_PREFIX + TEST_COLUMN_4, TEST_VALUE_FLOAT)); + values.put( + Attribute.BEFORE_PREFIX + TEST_COLUMN_5, + DoubleColumn.of(Attribute.BEFORE_PREFIX + TEST_COLUMN_5, TEST_VALUE_DOUBLE)); + values.put( + Attribute.BEFORE_PREFIX + TEST_COLUMN_6, + TextColumn.of(Attribute.BEFORE_PREFIX + TEST_COLUMN_6, TEST_VALUE_TEXT)); + values.put( + Attribute.BEFORE_PREFIX + TEST_COLUMN_7, + BlobColumn.of(Attribute.BEFORE_PREFIX + TEST_COLUMN_7, TEST_VALUE_BLOB)); + values.put(Attribute.ID, TextColumn.of(Attribute.ID, TEST_VALUE_TX_ID)); + values.put(Attribute.STATE, IntColumn.of(Attribute.STATE, TEST_VALUE_INT)); + values.put(Attribute.VERSION, IntColumn.of(Attribute.VERSION, TEST_VALUE_INT)); + values.put(Attribute.PREPARED_AT, BigIntColumn.of(Attribute.PREPARED_AT, TEST_VALUE_LONG)); + values.put(Attribute.COMMITTED_AT, BigIntColumn.of(Attribute.COMMITTED_AT, TEST_VALUE_LONG)); + values.put(Attribute.BEFORE_ID, TextColumn.of(Attribute.BEFORE_ID, TEST_VALUE_TEXT)); + values.put(Attribute.BEFORE_STATE, IntColumn.of(Attribute.BEFORE_STATE, TEST_VALUE_INT)); + values.put(Attribute.BEFORE_VERSION, IntColumn.of(Attribute.BEFORE_VERSION, TEST_VALUE_INT)); + values.put( + Attribute.BEFORE_PREPARED_AT, + BigIntColumn.of(Attribute.BEFORE_PREPARED_AT, TEST_VALUE_LONG)); + values.put( + Attribute.BEFORE_COMMITTED_AT, + BigIntColumn.of(Attribute.BEFORE_COMMITTED_AT, TEST_VALUE_LONG)); + return values; + } + + public static String getSourceTestValue(DataType dataType) { + switch (dataType) { + case INT: + return Integer.toString(TEST_VALUE_INT); + case BIGINT: + return Long.toString(TEST_VALUE_LONG); + case FLOAT: + return DecimalUtil.convertToNonScientific(TEST_VALUE_FLOAT); + case DOUBLE: + return DecimalUtil.convertToNonScientific(TEST_VALUE_DOUBLE); + case BLOB: + return TEST_VALUE_BLOB_BASE64; + case BOOLEAN: + return Boolean.toString(TEST_VALUE_BOOLEAN); + case TEXT: + default: + return TEST_VALUE_TEXT; + } + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/CollectionUtilTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/CollectionUtilTest.java new file mode 100644 index 0000000000..b054a55cef --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/CollectionUtilTest.java @@ -0,0 +1,28 @@ +package com.scalar.db.dataloader.core.util; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.ArrayList; +import java.util.List; +import org.junit.jupiter.api.Test; + +/** Unit tests for CollectionUtils */ +class CollectionUtilTest { + + @Test + void areSameLength_CollectionsAllSameLength_ShouldReturnTrue() { + List listOne = new ArrayList<>(); + List listTwo = new ArrayList<>(); + boolean actual = CollectionUtil.areSameLength(listOne, listTwo); + assertThat(actual).isTrue(); + } + + @Test + void areSameLength_CollectionsDifferentLength_ShouldReturnFalse() { + List listOne = new ArrayList<>(); + List listTwo = new ArrayList<>(); + listTwo.add(5); + boolean actual = CollectionUtil.areSameLength(listOne, listTwo); + assertThat(actual).isFalse(); + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/CsvUtilTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/CsvUtilTest.java new file mode 100644 index 0000000000..2afcfbcbe8 --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/CsvUtilTest.java @@ -0,0 +1,25 @@ +package com.scalar.db.dataloader.core.util; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.jupiter.api.Test; + +/** Unit tests for CsvUtils */ +class CsvUtilTest { + + @Test + void removeTrailingDelimiter_HasTrailingDelimiter_ShouldRemoveDelimiter() { + StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append("testing;"); + CsvUtil.removeTrailingDelimiter(stringBuilder, ";"); + assertThat(stringBuilder.toString()).isEqualTo("testing"); + } + + @Test + void removeTrailingDelimiter_DoesNotHaveTrailingDelimiter_ShouldNotRemoveAnything() { + StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append("testing"); + CsvUtil.removeTrailingDelimiter(stringBuilder, ";"); + assertThat(stringBuilder.toString()).isEqualTo("testing"); + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/DecimalUtilTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/DecimalUtilTest.java new file mode 100644 index 0000000000..c99ad0866d --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/DecimalUtilTest.java @@ -0,0 +1,20 @@ +package com.scalar.db.dataloader.core.util; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class DecimalUtilTest { + @Test + void convertToNonScientific_withValidDoubleValue_shouldReturnProperStringValue() { + String expectedValue = "340.55"; + Double value = 340.55; + Assertions.assertEquals(expectedValue, DecimalUtil.convertToNonScientific(value)); + } + + @Test + void convertToNonScientific_withValidFloatValue_shouldReturnProperStringValue() { + String expectedValue = "356"; + Float value = 356F; + Assertions.assertEquals(expectedValue, DecimalUtil.convertToNonScientific(value)); + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/PathUtilTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/PathUtilTest.java new file mode 100644 index 0000000000..0db7c4edff --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/PathUtilTest.java @@ -0,0 +1,45 @@ +package com.scalar.db.dataloader.core.util; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class PathUtilTest { + + @Test + void ensureTrailingSlash_nullPath_returnsEmptyString() { + String path = null; + String result = PathUtil.ensureTrailingSlash(path); + Assertions.assertEquals("", result); + } + + @Test + void ensureTrailingSlash_emptyPath_returnsEmptyString() { + String path = ""; + String result = PathUtil.ensureTrailingSlash(path); + Assertions.assertEquals("", result); + } + + @Test + void ensureTrailingSlash_pathWithoutTrailingSlash_addsTrailingSlash() { + String path = "/path/to/directory"; + String expectedResult = "/path/to/directory/"; + String result = PathUtil.ensureTrailingSlash(path); + Assertions.assertEquals(expectedResult, result); + } + + @Test + void ensureTrailingSlash_pathWithTrailingSlash_returnsOriginalPath() { + String path = "/path/to/directory/"; + String expectedResult = "/path/to/directory/"; + String result = PathUtil.ensureTrailingSlash(path); + Assertions.assertEquals(expectedResult, result); + } + + @Test + void ensureTrailingSlash_virtualPath_addsTrailingSlash() { + String path = "s3://bucket/path"; + String expectedResult = "s3://bucket/path/"; + String result = PathUtil.ensureTrailingSlash(path); + Assertions.assertEquals(expectedResult, result); + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/RuntimeUtilTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/RuntimeUtilTest.java new file mode 100644 index 0000000000..32659fe1cf --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/RuntimeUtilTest.java @@ -0,0 +1,24 @@ +package com.scalar.db.dataloader.core.util; + +import static com.scalar.db.dataloader.core.ErrorMessage.ERROR_METHOD_NULL_ARGUMENT; +import static org.assertj.core.api.AssertionsForClassTypes.assertThatThrownBy; + +import org.junit.jupiter.api.Test; + +/** RuntimeUtils unit tests */ +public class RuntimeUtilTest { + + @Test + public void checkNotNull_HasNullValues_ShouldThrowException() { + assertThatThrownBy(() -> RuntimeUtil.checkNotNull(null, null)) + .isExactlyInstanceOf(NullPointerException.class) + .hasMessage(ERROR_METHOD_NULL_ARGUMENT); + } + + @Test + public void checkNotNull_HasNoNullValues_ShouldNotThrowException() { + String string = "1"; + Object object = new Object(); + RuntimeUtil.checkNotNull(string, object); + } +} From 8d39d02677c53fe747492f6f3ddcd343bcbb05a6 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Wed, 4 Dec 2024 17:55:22 +0530 Subject: [PATCH 02/87] Fix spotbug issue --- .../java/com/scalar/db/dataloader/core/UnitTestUtils.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java index b915b64af8..bf4b4414af 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java @@ -17,6 +17,7 @@ import com.scalar.db.io.IntColumn; import com.scalar.db.io.TextColumn; import com.scalar.db.transaction.consensuscommit.Attribute; +import java.nio.charset.StandardCharsets; import java.util.*; /** Utils for the service unit tests */ @@ -34,9 +35,9 @@ public class UnitTestUtils { public static final String TEST_VALUE_TEXT = "test value"; public static final String TEST_VALUE_BLOB_STRING = "blob test value"; - public static final byte[] TEST_VALUE_BLOB = TEST_VALUE_BLOB_STRING.getBytes(); + static final byte[] TEST_VALUE_BLOB = TEST_VALUE_BLOB_STRING.getBytes(StandardCharsets.UTF_8); public static final String TEST_VALUE_BLOB_BASE64 = - new String(Base64.getEncoder().encode(TEST_VALUE_BLOB)); + new String(Base64.getEncoder().encode(TEST_VALUE_BLOB), StandardCharsets.UTF_8); public static final String TEST_VALUE_TX_ID = "txt value 464654654"; public static final Float TEST_VALUE_FLOAT = Float.MIN_VALUE; public static final int TEST_VALUE_INT = Integer.MAX_VALUE; From bf94c495a8f5b4834d31aca6f15aa42dabcf9df1 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Fri, 6 Dec 2024 16:00:01 +0530 Subject: [PATCH 03/87] Removed error message and added core error --- .../com/scalar/db/common/error/CoreError.java | 6 ++ .../db/dataloader/core/ErrorMessage.java | 62 ------------------- .../db/dataloader/core/util/RuntimeUtil.java | 4 +- 3 files changed, 8 insertions(+), 64 deletions(-) delete mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/ErrorMessage.java diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index 4325ef0090..bb54ba6a9f 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -688,6 +688,12 @@ public enum CoreError implements ScalarDbError { "Invalid number specified for column %s in table %s in namespace %s", "", ""), + DATA_LOADER_ERROR_METHOD_NULL_ARGUMENT( + Category.USER_ERROR, + "0151", + "Method null argument not allowed", + "", + ""), // // Errors for the concurrency error category diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/ErrorMessage.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/ErrorMessage.java deleted file mode 100644 index 395385467e..0000000000 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/ErrorMessage.java +++ /dev/null @@ -1,62 +0,0 @@ -package com.scalar.db.dataloader.core; - -public class ErrorMessage { - public static final String ERROR_MISSING_NAMESPACE_OR_TABLE = - "the provided namespace '%s' and/or table name '%s' is incorrect and could not be found"; - public static final String ERROR_MISSING_COLUMN = "missing field or column mapping for %s"; - public static final String ERROR_MISSING_PARTITION_KEY_COLUMN = - "missing required field or column mapping for partition key %s"; - public static final String ERROR_MISSING_CLUSTERING_KEY_COLUMN = - "missing required field or column mapping for clustering key %s"; - public static final String ERROR_CRUD_EXCEPTION = - "something went wrong while trying to save the data"; - public static final String ERROR_DATA_ALREADY_EXISTS = "record already exists"; - public static final String ERROR_DATA_NOT_FOUND = "record was not found"; - public static final String ERROR_CONTROL_FILE_MISSING_DATA_MAPPINGS = - "the control file is missing data mappings"; - public static final String ERROR_TARGET_COLUMN_NOT_FOUND = - "The target column '%s' for source field '%s' could not be found in table '%s'"; - public static final String ERROR_MISSING_PARTITION_KEY = - "The required partition key '%s' is missing in the control file mapping for table '%s'"; - public static final String ERROR_MISSING_CLUSTERING_KEY = - "The required clustering key '%s' is missing in the control file mapping for table '%s'"; - public static final String ERROR_MISSING_SOURCE_FIELD = - "the data mapping source field '%s' for table '%s' is missing in the json data record"; - public static final String ERROR_DUPLICATE_DATA_MAPPINGS = - "Duplicate data mappings found for table '%s' in the control file"; - public static final String ERROR_MISSING_COLUMN_MAPPING = - "No mapping found for column '%s' in table '%s' in the control file. \nControl file validation set at 'FULL'. All columns need to be mapped."; - public static final String ERROR_MULTIPLE_MAPPINGS_FOR_COLUMN_FOUND = - "Multiple data mappings found for column '%s' in table '%s'"; - public static final String ERROR_METHOD_NULL_ARGUMENT = "Method null argument not allowed"; - public static final String ERROR_COULD_NOT_FIND_PARTITION_KEY = - "could not find the partition key"; - public static final String ERROR_METADATA_OR_DATA_TYPES_NOT_FOUND = - "no table meta data or a data type map was found for %s.%s"; - public static final String ERROR_EMPTY_SOURCE_ROW = - "The source record data was undefined or empty"; - public static final String ERROR_UPSERT_INSERT_MISSING_COLUMNS = - "The source record needs to contain all fields if the UPSERT turns into an INSERT"; - public static final String ERROR_SCAN_FAILED = "Could not complete the scan"; - public static final String ERROR_UNKNOWN_TRANSACTION_STATUS = - "Error : the transaction to retrieve the account is in an unknown state"; - public static final String ERROR_INVALID_PROJECTION = "The column '%s' was not found"; - public static final String ERROR_SCAN = - "Something went wrong while scanning. Are you sure you are running in the correct transaction mode?"; - public static final String ERROR_CLUSTERING_KEY_NOT_FOUND = - "The provided clustering key %s was not found"; - public static final String ERROR_KEY_NOT_FOUND = "The key '%s' could not be found"; - public static final String ERROR_KEY_FORMATTING = - "They provided key '%s is not formatted correctly. Expected format is field=value."; - public static final String ERROR_SORT_FORMATTING = - "They provided sort '%s is not formatted correctly. Expected format is field=asc|desc."; - public static final String ERROR_VALUE_TO_STRING_CONVERSION_FAILED = - "Something went wrong while converting the ScalarDB values to strings. The table metadata and Value datatype probably do not match."; - - public static final String ERROR_BASE64_ENCODING = - "Invalid base64 encoding for blob value for column %s"; - public static final String ERROR_NUMBER_FORMAT_EXCEPTION = - "Invalid number specified for column %s"; - public static final String ERROR_NULL_POINTER_EXCEPTION = - "The %s column does not support a null value"; -} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/RuntimeUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/RuntimeUtil.java index a5de36d34c..0d967cf719 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/RuntimeUtil.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/RuntimeUtil.java @@ -1,6 +1,6 @@ package com.scalar.db.dataloader.core.util; -import static com.scalar.db.dataloader.core.ErrorMessage.ERROR_METHOD_NULL_ARGUMENT; +import static com.scalar.db.common.error.CoreError.DATA_LOADER_ERROR_METHOD_NULL_ARGUMENT; /** Utils for runtime checks */ public class RuntimeUtil { @@ -14,7 +14,7 @@ public class RuntimeUtil { public static void checkNotNull(Object... values) { for (Object value : values) { if (value == null) { - throw new NullPointerException(ERROR_METHOD_NULL_ARGUMENT); + throw new NullPointerException(DATA_LOADER_ERROR_METHOD_NULL_ARGUMENT.getMessage()); } } } From 47be388a02af373d08bc373f35d4743836f8e709 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Fri, 6 Dec 2024 16:09:57 +0530 Subject: [PATCH 04/87] Applied spotless --- .../src/main/java/com/scalar/db/common/error/CoreError.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index bb54ba6a9f..b02b3c45a6 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -689,11 +689,7 @@ public enum CoreError implements ScalarDbError { "", ""), DATA_LOADER_ERROR_METHOD_NULL_ARGUMENT( - Category.USER_ERROR, - "0151", - "Method null argument not allowed", - "", - ""), + Category.USER_ERROR, "0151", "Method null argument not allowed", "", ""), // // Errors for the concurrency error category From 913eb1c069e452ff5da752fb35c8a6b50e49d758 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Fri, 6 Dec 2024 16:19:21 +0530 Subject: [PATCH 05/87] Fixed unit test failures --- .../com/scalar/db/dataloader/core/util/RuntimeUtilTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/RuntimeUtilTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/RuntimeUtilTest.java index 32659fe1cf..fc8d281cb9 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/RuntimeUtilTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/RuntimeUtilTest.java @@ -1,6 +1,6 @@ package com.scalar.db.dataloader.core.util; -import static com.scalar.db.dataloader.core.ErrorMessage.ERROR_METHOD_NULL_ARGUMENT; +import static com.scalar.db.common.error.CoreError.DATA_LOADER_ERROR_METHOD_NULL_ARGUMENT; import static org.assertj.core.api.AssertionsForClassTypes.assertThatThrownBy; import org.junit.jupiter.api.Test; @@ -12,7 +12,7 @@ public class RuntimeUtilTest { public void checkNotNull_HasNullValues_ShouldThrowException() { assertThatThrownBy(() -> RuntimeUtil.checkNotNull(null, null)) .isExactlyInstanceOf(NullPointerException.class) - .hasMessage(ERROR_METHOD_NULL_ARGUMENT); + .hasMessage(DATA_LOADER_ERROR_METHOD_NULL_ARGUMENT.getMessage()); } @Test From 6cfa83aa2f9d83d1afcb2f34ea102c4e10212b0f Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Wed, 11 Dec 2024 16:26:17 +0530 Subject: [PATCH 06/87] Basic data import enum and exception --- .../db/dataloader/core/dataimport/ImportMode.java | 8 ++++++++ .../ControlFileValidationException.java | 14 ++++++++++++++ .../controlfile/ControlFileValidationLevel.java | 11 +++++++++++ 3 files changed, 33 insertions(+) create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportMode.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidationException.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidationLevel.java diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportMode.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportMode.java new file mode 100644 index 0000000000..7f2a805e75 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportMode.java @@ -0,0 +1,8 @@ +package com.scalar.db.dataloader.core.dataimport; + +/** Represents the way to be imported data is handled */ +public enum ImportMode { + INSERT, + UPDATE, + UPSERT +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidationException.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidationException.java new file mode 100644 index 0000000000..e4e032a4c8 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidationException.java @@ -0,0 +1,14 @@ +package com.scalar.db.dataloader.core.dataimport.controlfile; + +/** Represents the control file */ +public class ControlFileValidationException extends Exception { + + /** + * Class constructor + * + * @param message error message + */ + public ControlFileValidationException(String message) { + super(message); + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidationLevel.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidationLevel.java new file mode 100644 index 0000000000..3753d0ba65 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidationLevel.java @@ -0,0 +1,11 @@ +package com.scalar.db.dataloader.core.dataimport.controlfile; + +/** Control file validation level */ +public enum ControlFileValidationLevel { + /* All columns need to be mapped */ + FULL, + /* All partition key and clustering key columns need to be mapped */ + KEYS, + /* Only validate the columns that are mapped */ + MAPPED +} From d381b2b6ae77657f42e7972625cf9355a9c2518c Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Wed, 11 Dec 2024 17:17:21 +0530 Subject: [PATCH 07/87] Removed exception class for now --- .../ControlFileValidationException.java | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidationException.java diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidationException.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidationException.java deleted file mode 100644 index e4e032a4c8..0000000000 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidationException.java +++ /dev/null @@ -1,14 +0,0 @@ -package com.scalar.db.dataloader.core.dataimport.controlfile; - -/** Represents the control file */ -public class ControlFileValidationException extends Exception { - - /** - * Class constructor - * - * @param message error message - */ - public ControlFileValidationException(String message) { - super(message); - } -} From 67f24744048d9650f3ce461fff873b08414d4631 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 12 Dec 2024 11:30:59 +0530 Subject: [PATCH 08/87] Added DECIMAL_FORMAT --- .../com/scalar/db/dataloader/core/util/DecimalUtil.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/DecimalUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/DecimalUtil.java index 8372dc8aac..b1c23e50de 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/DecimalUtil.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/DecimalUtil.java @@ -7,6 +7,8 @@ /** Utils for decimal handling */ public class DecimalUtil { + private static final DecimalFormat DECIMAL_FORMAT = createFormatter(); + /** * Convert a Double to a non-scientific formatted string * @@ -14,7 +16,7 @@ public class DecimalUtil { * @return formatted double as a string */ public static String convertToNonScientific(Double doubleValue) { - return createFormatter().format(doubleValue); + return DECIMAL_FORMAT.format(doubleValue); } /** @@ -24,7 +26,7 @@ public static String convertToNonScientific(Double doubleValue) { * @return formatted float as a string */ public static String convertToNonScientific(Float floatValue) { - return createFormatter().format(floatValue); + return DECIMAL_FORMAT.format(floatValue); } /** From 14e359379469f1b22f27ae3b12683d071c585f62 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 12 Dec 2024 14:35:31 +0530 Subject: [PATCH 09/87] Path util class updated --- .../db/dataloader/core/util/PathUtil.java | 14 +++++----- .../db/dataloader/core/util/PathUtilTest.java | 26 ++++++------------- 2 files changed, 15 insertions(+), 25 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/PathUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/PathUtil.java index c0bd226e45..c307ea961f 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/PathUtil.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/PathUtil.java @@ -1,22 +1,22 @@ package com.scalar.db.dataloader.core.util; +import java.io.File; + public class PathUtil { /** - * Ensures the specified path has a trailing slash. - * - *

java.nio.file.Path is not used because this is also used for virtual paths. + * Ensures the specified path has a trailing path separator. * * @param path the path - * @return the path with a trailing slash + * @return the path with a trailing path separator. */ - public static String ensureTrailingSlash(String path) { + public static String ensureTrailingSeparator(String path) { if (path == null || path.isEmpty()) { return ""; } - if (!path.endsWith("/")) { - return path + "/"; + if (!path.endsWith(File.separator)) { + return path + File.separator; } return path; diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/PathUtilTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/PathUtilTest.java index 0db7c4edff..85d3ed1ce7 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/PathUtilTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/PathUtilTest.java @@ -6,40 +6,30 @@ class PathUtilTest { @Test - void ensureTrailingSlash_nullPath_returnsEmptyString() { - String path = null; - String result = PathUtil.ensureTrailingSlash(path); + void ensureTrailingSeparator_nullPath_returnsEmptyString() { + String result = PathUtil.ensureTrailingSeparator(null); Assertions.assertEquals("", result); } @Test - void ensureTrailingSlash_emptyPath_returnsEmptyString() { - String path = ""; - String result = PathUtil.ensureTrailingSlash(path); + void ensureTrailingSeparator_emptyPath_returnsEmptyString() { + String result = PathUtil.ensureTrailingSeparator(""); Assertions.assertEquals("", result); } @Test - void ensureTrailingSlash_pathWithoutTrailingSlash_addsTrailingSlash() { + void ensureTrailingSlash_pathWithoutTrailingSlash_addsTrailingSeparator() { String path = "/path/to/directory"; String expectedResult = "/path/to/directory/"; - String result = PathUtil.ensureTrailingSlash(path); + String result = PathUtil.ensureTrailingSeparator(path); Assertions.assertEquals(expectedResult, result); } @Test - void ensureTrailingSlash_pathWithTrailingSlash_returnsOriginalPath() { + void ensureTrailingSlash_pathWithTrailingSeparator_returnsOriginalPath() { String path = "/path/to/directory/"; String expectedResult = "/path/to/directory/"; - String result = PathUtil.ensureTrailingSlash(path); - Assertions.assertEquals(expectedResult, result); - } - - @Test - void ensureTrailingSlash_virtualPath_addsTrailingSlash() { - String path = "s3://bucket/path"; - String expectedResult = "s3://bucket/path/"; - String result = PathUtil.ensureTrailingSlash(path); + String result = PathUtil.ensureTrailingSeparator(path); Assertions.assertEquals(expectedResult, result); } } From a096d51e4bd29c478e5f75709901e380ff855efe Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Fri, 13 Dec 2024 11:10:38 +0530 Subject: [PATCH 10/87] Feedback changes --- .../scalar/db/dataloader/core/util/CollectionUtil.java | 8 ++++++-- .../com/scalar/db/dataloader/core/util/DebugUtil.java | 4 ++-- .../com/scalar/db/dataloader/core/util/DecimalUtil.java | 6 ++---- .../com/scalar/db/dataloader/core/util/RuntimeUtil.java | 2 +- .../scalar/db/dataloader/core/util/RuntimeUtilTest.java | 2 +- 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/CollectionUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/CollectionUtil.java index 184e29b7f3..e98f4beef7 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/CollectionUtil.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/CollectionUtil.java @@ -12,8 +12,12 @@ public class CollectionUtil { * @return collections are same length or not */ public static boolean areSameLength(Collection... collections) { - int N = collections[0].size(); - for (Collection a : collections) if (a.size() != N) return false; + int n = collections[0].size(); + for (Collection c : collections) { + if (c.size() != n) { + return false; + } + } return true; } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/DebugUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/DebugUtil.java index cde28ca33e..a16e2fae02 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/DebugUtil.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/DebugUtil.java @@ -5,7 +5,7 @@ public class DebugUtil { - private static final Logger LOGGER = LoggerFactory.getLogger(DebugUtil.class); + private static final Logger logger = LoggerFactory.getLogger(DebugUtil.class); /** * log memory usage @@ -17,7 +17,7 @@ public static void logMemoryUsage(String stage) { long usedMemory = runtime.totalMemory() - runtime.freeMemory(); long maxMemory = runtime.maxMemory(); - LOGGER.info( + logger.info( "Memory usage at {}: Used Memory = {} MB, Max Memory = {} MB", stage, formatMemorySize(usedMemory), diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/DecimalUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/DecimalUtil.java index b1c23e50de..8372dc8aac 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/DecimalUtil.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/DecimalUtil.java @@ -7,8 +7,6 @@ /** Utils for decimal handling */ public class DecimalUtil { - private static final DecimalFormat DECIMAL_FORMAT = createFormatter(); - /** * Convert a Double to a non-scientific formatted string * @@ -16,7 +14,7 @@ public class DecimalUtil { * @return formatted double as a string */ public static String convertToNonScientific(Double doubleValue) { - return DECIMAL_FORMAT.format(doubleValue); + return createFormatter().format(doubleValue); } /** @@ -26,7 +24,7 @@ public static String convertToNonScientific(Double doubleValue) { * @return formatted float as a string */ public static String convertToNonScientific(Float floatValue) { - return DECIMAL_FORMAT.format(floatValue); + return createFormatter().format(floatValue); } /** diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/RuntimeUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/RuntimeUtil.java index 0d967cf719..870e70285a 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/RuntimeUtil.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/RuntimeUtil.java @@ -14,7 +14,7 @@ public class RuntimeUtil { public static void checkNotNull(Object... values) { for (Object value : values) { if (value == null) { - throw new NullPointerException(DATA_LOADER_ERROR_METHOD_NULL_ARGUMENT.getMessage()); + throw new NullPointerException(DATA_LOADER_ERROR_METHOD_NULL_ARGUMENT.buildMessage()); } } } diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/RuntimeUtilTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/RuntimeUtilTest.java index fc8d281cb9..6a46c6c716 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/RuntimeUtilTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/RuntimeUtilTest.java @@ -12,7 +12,7 @@ public class RuntimeUtilTest { public void checkNotNull_HasNullValues_ShouldThrowException() { assertThatThrownBy(() -> RuntimeUtil.checkNotNull(null, null)) .isExactlyInstanceOf(NullPointerException.class) - .hasMessage(DATA_LOADER_ERROR_METHOD_NULL_ARGUMENT.getMessage()); + .hasMessage(DATA_LOADER_ERROR_METHOD_NULL_ARGUMENT.buildMessage()); } @Test From 52890c8d8992f3a2f7c437b2e6df6456ffe646b8 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Mon, 16 Dec 2024 11:07:50 +0530 Subject: [PATCH 11/87] Changes --- data-loader/build.gradle | 1 - .../com/scalar/db/dataloader/core/util/RuntimeUtilTest.java | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/data-loader/build.gradle b/data-loader/build.gradle index 3be6a49e81..87a057933b 100644 --- a/data-loader/build.gradle +++ b/data-loader/build.gradle @@ -1,5 +1,4 @@ subprojects { - ext { jacksonVersion = '2.17.0' } diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/RuntimeUtilTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/RuntimeUtilTest.java index 6a46c6c716..8b03c0c0ab 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/RuntimeUtilTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/RuntimeUtilTest.java @@ -6,17 +6,17 @@ import org.junit.jupiter.api.Test; /** RuntimeUtils unit tests */ -public class RuntimeUtilTest { +class RuntimeUtilTest { @Test - public void checkNotNull_HasNullValues_ShouldThrowException() { + void checkNotNull_HasNullValues_ShouldThrowException() { assertThatThrownBy(() -> RuntimeUtil.checkNotNull(null, null)) .isExactlyInstanceOf(NullPointerException.class) .hasMessage(DATA_LOADER_ERROR_METHOD_NULL_ARGUMENT.buildMessage()); } @Test - public void checkNotNull_HasNoNullValues_ShouldNotThrowException() { + void checkNotNull_HasNoNullValues_ShouldNotThrowException() { String string = "1"; Object object = new Object(); RuntimeUtil.checkNotNull(string, object); From 1997eb87fcae398495ced54ad724d41daceacb4b Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 17 Dec 2024 12:26:58 +0530 Subject: [PATCH 12/87] Added ScalarDB Dao --- .../com/scalar/db/common/error/CoreError.java | 6 + .../core/dataimport/dao/ScalarDBDao.java | 426 ++++++++++++++++++ .../dataimport/dao/ScalarDBDaoException.java | 15 + .../core/dataimport/dao/ScalarDBManager.java | 68 +++ .../core/dataimport/dao/ScalarDBDaoTest.java | 225 +++++++++ 5 files changed, 740 insertions(+) create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDaoException.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDaoTest.java diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index b02b3c45a6..474b8f7b80 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -941,6 +941,12 @@ public enum CoreError implements ScalarDbError { "Handling the before-preparation snapshot hook failed. Details: %s", "", ""), + DATA_LOADER_ERROR_CRUD_EXCEPTION( + Category.INTERNAL_ERROR, "0047", "something went wrong while trying to save the data", "", "" + ), + DATA_LOADER_ERROR_SCAN( + Category.INTERNAL_ERROR, "0048", "Something went wrong while scanning. Are you sure you are running in the correct transaction mode?", "", "" + ), // // Errors for the unknown transaction status error category diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java new file mode 100644 index 0000000000..9016d38d8c --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java @@ -0,0 +1,426 @@ +package com.scalar.db.dataloader.core.dataimport.dao; + +import com.scalar.db.api.DistributedStorage; +import com.scalar.db.api.DistributedTransaction; +import com.scalar.db.api.Get; +import com.scalar.db.api.Put; +import com.scalar.db.api.PutBuilder.Buildable; +import com.scalar.db.api.Result; +import com.scalar.db.api.Scan; +import com.scalar.db.api.ScanBuilder; +import com.scalar.db.api.Scanner; +import com.scalar.db.common.error.CoreError; +import com.scalar.db.dataloader.core.ScanRange; +import com.scalar.db.exception.storage.ExecutionException; +import com.scalar.db.exception.transaction.CrudException; +import com.scalar.db.io.Column; +import com.scalar.db.io.Key; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.Optional; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** The generic DAO that is used to scan ScalarDB data */ +public class ScalarDBDao { + + /* Class logger */ + private static final Logger LOGGER = LoggerFactory.getLogger(ScalarDBDao.class); + + /** + * Retrieve record from ScalarDB instance in storage mode + * + * @param namespace Namespace name + * @param tableName Table name + * @param partitionKey Partition key + * @param clusteringKey Optional clustering key for get + * @param storage Distributed storage for ScalarDB connection that is running in storage mode. + * @return Optional get result + * @throws ScalarDBDaoException if something goes wrong while reading the data + */ + public Optional get( + String namespace, + String tableName, + Key partitionKey, + Key clusteringKey, + DistributedStorage storage) + throws ScalarDBDaoException { + + String printKey = keysToString(partitionKey, clusteringKey); + + try { + Get get = createGetWith(namespace, tableName, partitionKey, clusteringKey); + Optional result = storage.get(get); + LOGGER.info("GET completed for " + printKey); + return result; + } catch (ExecutionException e) { + throw new ScalarDBDaoException("error GET " + printKey, e); + } + } + + /** + * Retrieve record from ScalarDB instance in transaction mode + * + * @param namespace Namespace name + * @param tableName Table name + * @param partitionKey Partition key + * @param clusteringKey Optional clustering key for get + * @param transaction ScalarDB transaction instance + * @return Optional get result + * @throws ScalarDBDaoException if something goes wrong while reading the data + */ + public Optional get( + String namespace, + String tableName, + Key partitionKey, + Key clusteringKey, + DistributedTransaction transaction) + throws ScalarDBDaoException { + + Get get = createGetWith(namespace, tableName, partitionKey, clusteringKey); + String printKey = keysToString(partitionKey, clusteringKey); + try { + Optional result = transaction.get(get); + LOGGER.info("GET completed for " + printKey); + return result; + } catch (CrudException e) { + throw new ScalarDBDaoException("error GET " + printKey, e.getCause()); + } + } + + /** + * Save record in ScalarDB instance + * + * @param namespace Namespace name + * @param tableName Table name + * @param partitionKey Partition key + * @param clusteringKey Optional clustering key + * @param columns List of column values to be inserted or updated + * @param transaction ScalarDB transaction instance + * @throws ScalarDBDaoException if something goes wrong while executing the transaction + */ + public void put( + String namespace, + String tableName, + Key partitionKey, + Key clusteringKey, + List> columns, + DistributedTransaction transaction) + throws ScalarDBDaoException { + + Put put = createPutWith(namespace, tableName, partitionKey, clusteringKey, columns); + try { + transaction.put(put); + } catch (CrudException e) { + throw new ScalarDBDaoException(CoreError.DATA_LOADER_ERROR_CRUD_EXCEPTION.buildMessage(), e); + } + LOGGER.info("PUT completed for " + keysToString(partitionKey, clusteringKey)); + } + + /** + * Save record in ScalarDB instance + * + * @param namespace Namespace name + * @param tableName Table name + * @param partitionKey Partition key + * @param clusteringKey Optional clustering key + * @param columns List of column values to be inserted or updated + * @param storage Distributed storage for ScalarDB connection that is running in storage mode + * @throws ScalarDBDaoException if something goes wrong while executing the transaction + */ + public void put( + String namespace, + String tableName, + Key partitionKey, + Key clusteringKey, + List> columns, + DistributedStorage storage) + throws ScalarDBDaoException { + Put put = createPutWith(namespace, tableName, partitionKey, clusteringKey, columns); + try { + storage.put(put); + } catch (ExecutionException e) { + throw new ScalarDBDaoException(CoreError.DATA_LOADER_ERROR_CRUD_EXCEPTION.buildMessage(), e); + } + LOGGER.info("PUT completed for " + keysToString(partitionKey, clusteringKey)); + } + + /** + * Scan a ScalarDB table + * + * @param namespace ScalarDB namespace + * @param tableName ScalarDB table name + * @param partitionKey Partition key used in ScalarDB scan + * @param range Optional range to set ScalarDB scan start and end values + * @param sorts Optional scan clustering key sorting values + * @param projections List of column projection to use during scan + * @param limit Scan limit value + * @param storage Distributed storage for ScalarDB connection that is running in storage mode + * @return List of ScalarDB scan results + * @throws ScalarDBDaoException if scan fails + */ + public List scan( + String namespace, + String tableName, + Key partitionKey, + ScanRange range, + List sorts, + List projections, + int limit, + DistributedStorage storage) + throws ScalarDBDaoException { + // Create scan + Scan scan = createScan(namespace, tableName, partitionKey, range, sorts, projections, limit); + + // scan data + try { + LOGGER.info("SCAN started..."); + Scanner scanner = storage.scan(scan); + List allResults = scanner.all(); + scanner.close(); + LOGGER.info("SCAN completed"); + return allResults; + } catch (ExecutionException | IOException e) { + throw new ScalarDBDaoException(CoreError.DATA_LOADER_ERROR_SCAN.buildMessage(), e); + } + } + + /** + * Scan a ScalarDB table + * + * @param namespace ScalarDB namespace + * @param tableName ScalarDB table name + * @param partitionKey Partition key used in ScalarDB scan + * @param range Optional range to set ScalarDB scan start and end values + * @param sorts Optional scan clustering key sorting values + * @param projections List of column projection to use during scan + * @param limit Scan limit value + * @param transaction Distributed Transaction manager for ScalarDB connection that is * running in + * transaction mode + * @return List of ScalarDB scan results + * @throws ScalarDBDaoException if scan fails + */ + public List scan( + String namespace, + String tableName, + Key partitionKey, + ScanRange range, + List sorts, + List projections, + int limit, + DistributedTransaction transaction) + throws ScalarDBDaoException { + + // Create scan + Scan scan = createScan(namespace, tableName, partitionKey, range, sorts, projections, limit); + + // scan data + try { + LOGGER.info("SCAN started..."); + List results = transaction.scan(scan); + LOGGER.info("SCAN completed"); + return results; + } catch (CrudException | NoSuchElementException e) { + // No such element Exception is thrown when the scan is done in transaction mode but + // ScalarDB is running in storage mode + throw new ScalarDBDaoException(CoreError.DATA_LOADER_ERROR_SCAN.buildMessage(), e); + } + } + + /** + * Create a ScalarDB scanner instance + * + * @param namespace ScalarDB namespace + * @param tableName ScalarDB table name + * @param projectionColumns List of column projection to use during scan + * @param limit Scan limit value + * @param storage Distributed storage for ScalarDB connection that is running in storage mode + * @return ScalarDB Scanner object + * @throws ScalarDBDaoException if scan fails + */ + public Scanner createScanner( + String namespace, + String tableName, + List projectionColumns, + int limit, + DistributedStorage storage) + throws ScalarDBDaoException { + Scan scan = + createScan(namespace, tableName, null, null, new ArrayList<>(), projectionColumns, limit); + try { + return storage.scan(scan); + } catch (ExecutionException e) { + throw new ScalarDBDaoException(CoreError.DATA_LOADER_ERROR_SCAN.buildMessage(), e); + } + } + + /** + * Create a ScalarDB scanner instance + * + * @param namespace ScalarDB namespace + * @param tableName ScalarDB table name + * @param partitionKey Partition key used in ScalarDB scan + * @param scanRange Optional range to set ScalarDB scan start and end values + * @param sortOrders Optional scan clustering key sorting values + * @param projectionColumns List of column projection to use during scan + * @param limit Scan limit value + * @param storage Distributed storage for ScalarDB connection that is running in storage mode + * @return ScalarDB Scanner object + * @throws ScalarDBDaoException if scan fails + */ + public Scanner createScanner( + String namespace, + String tableName, + Key partitionKey, + ScanRange scanRange, + List sortOrders, + List projectionColumns, + int limit, + DistributedStorage storage) + throws ScalarDBDaoException { + Scan scan = + createScan( + namespace, tableName, partitionKey, scanRange, sortOrders, projectionColumns, limit); + try { + return storage.scan(scan); + } catch (ExecutionException e) { + throw new ScalarDBDaoException(CoreError.DATA_LOADER_ERROR_SCAN.buildMessage(), e); + } + } + + /** + * Create ScalarDB scan instance + * + * @param namespace ScalarDB namespace + * @param tableName ScalarDB table name + * @param partitionKey Partition key used in ScalarDB scan + * @param scanRange Optional range to set ScalarDB scan start and end values + * @param sortOrders Optional scan clustering key sorting values + * @param projectionColumns List of column projection to use during scan + * @param limit Scan limit value + * @return ScalarDB scan instance + */ + Scan createScan( + String namespace, + String tableName, + Key partitionKey, + ScanRange scanRange, + List sortOrders, + List projectionColumns, + int limit) { + // If no partition key is provided a scan all is created + if (partitionKey == null) { + ScanBuilder.BuildableScanAll buildableScanAll = + Scan.newBuilder().namespace(namespace).table(tableName).all(); + + // projection columns + if (projectionColumns != null && !projectionColumns.isEmpty()) { + buildableScanAll.projections(projectionColumns); + } + + // limit + if (limit > 0) { + buildableScanAll.limit(limit); + } + return buildableScanAll.build(); + } + + // Create a scan with partition key (not a scan all) + + ScanBuilder.BuildableScan buildableScan = + Scan.newBuilder().namespace(namespace).table(tableName).partitionKey(partitionKey); + + // Set the scan boundary + if (scanRange != null) { + // Set boundary start + if (scanRange.getScanStartKey() != null) { + buildableScan.start(scanRange.getScanStartKey(), scanRange.isStartInclusive()); + } + + // with end + if (scanRange.getScanEndKey() != null) { + buildableScan.end(scanRange.getScanEndKey(), scanRange.isEndInclusive()); + } + } + + // clustering order + for (Scan.Ordering sort : sortOrders) { + buildableScan.ordering(sort); + } + + // projections + if (projectionColumns != null && !projectionColumns.isEmpty()) { + buildableScan.projections(projectionColumns); + } + + // limit + if (limit > 0) { + buildableScan.limit(limit); + } + return buildableScan.build(); + } + + /** + * Return a ScalarDB get based on provided parameters + * + * @param namespace Namespace name + * @param tableName Table name + * @param partitionKey Partition key + * @param clusteringKey Optional clustering key for get + * @return ScalarDB Get instance + */ + private Get createGetWith( + String namespace, String tableName, Key partitionKey, Key clusteringKey) { + if (clusteringKey != null) { + return Get.newBuilder() + .namespace(namespace) + .table(tableName) + .partitionKey(partitionKey) + .clusteringKey(clusteringKey) + .build(); + } + return Get.newBuilder() + .namespace(namespace) + .table(tableName) + .partitionKey(partitionKey) + .build(); + } + + /** + * Return a ScalarDB put based on provided parameters + * + * @param namespace Namespace name + * @param tableName Table name + * @param partitionKey Partition key + * @param clusteringKey Optional clustering key + * @param columns List of column values + * @return ScalarDB Put Instance + */ + private Put createPutWith( + String namespace, + String tableName, + Key partitionKey, + Key clusteringKey, + List> columns) { + Buildable buildable = + Put.newBuilder().namespace(namespace).table(tableName).partitionKey(partitionKey); + if (clusteringKey != null) { + buildable.clusteringKey(clusteringKey); + } + + for (Column column : columns) { + buildable.value(column); + } + return buildable.build(); + } + + private String keysToString(Key partitionKey, Key clusteringKey) { + if (clusteringKey != null) { + return partitionKey.toString() + "," + clusteringKey; + } else { + return partitionKey.toString(); + } + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDaoException.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDaoException.java new file mode 100644 index 0000000000..1e50affb07 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDaoException.java @@ -0,0 +1,15 @@ +package com.scalar.db.dataloader.core.dataimport.dao; + +/** A custom DAO exception that encapsulates errors thrown by ScalarDB operations */ +public class ScalarDBDaoException extends Exception { + + /** + * Class constructor + * + * @param message error message + * @param cause reason for exception + */ + public ScalarDBDaoException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java new file mode 100644 index 0000000000..6ab2a4f4e6 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java @@ -0,0 +1,68 @@ +package com.scalar.db.dataloader.core.dataimport.dao; + +import com.scalar.db.api.DistributedStorage; +import com.scalar.db.api.DistributedStorageAdmin; +import com.scalar.db.api.DistributedTransactionAdmin; +import com.scalar.db.api.DistributedTransactionManager; +import com.scalar.db.service.StorageFactory; +import com.scalar.db.service.TransactionFactory; +import java.io.IOException; + +/** + * A manager to retrieve the various ScalarDB managers based on the running mode + * + * @author Yves Peckstadt + */ +public class ScalarDBManager { + + /* Distributed storage for ScalarDB connection that is running in storage mode. */ + private final DistributedStorage storage; + /* Distributed Transaction manager for ScalarDB connection that is running in transaction mode */ + private final DistributedTransactionManager transactionManager; + /* Distributed storage admin for ScalarDB admin operations */ + private final DistributedStorageAdmin storageAdmin; + private final DistributedTransactionAdmin transactionAdmin; + + /** + * Class constructor + * + * @param storageFactory Factory to create all the necessary ScalarDB data managers + */ + public ScalarDBManager(StorageFactory storageFactory) throws IOException { + storage = storageFactory.getStorage(); + storageAdmin = storageFactory.getStorageAdmin(); + transactionManager = null; + transactionAdmin = null; + } + + /** + * Class constructor + * + * @param transactionFactory Factory to create all the necessary ScalarDB data managers + */ + public ScalarDBManager(TransactionFactory transactionFactory) throws IOException { + + transactionManager = transactionFactory.getTransactionManager(); + transactionAdmin = transactionFactory.getTransactionAdmin(); + storageAdmin = null; + storage = null; + } + + /** @return storage for ScalarDB connection that is running in storage mode */ + public DistributedStorage getDistributedStorage() { + return storage; + } + + /** + * @return Distributed Transaction manager for ScalarDB connection that is running in transaction + * mode + */ + public DistributedTransactionManager getDistributedTransactionManager() { + return transactionManager; + } + + /** @return Distributed storage admin for ScalarDB admin operations */ + public DistributedStorageAdmin getDistributedStorageAdmin() { + return storageAdmin; + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDaoTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDaoTest.java new file mode 100644 index 0000000000..ab9de219b3 --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDaoTest.java @@ -0,0 +1,225 @@ +package com.scalar.db.dataloader.core.dataimport.dao; + +import static com.scalar.db.dataloader.core.UnitTestUtils.*; +import static org.assertj.core.api.Assertions.assertThat; + +import com.scalar.db.api.Scan; +import com.scalar.db.api.ScanBuilder; +import com.scalar.db.dataloader.core.ScanRange; +import com.scalar.db.io.Key; +import java.util.*; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class ScalarDBDaoTest { + + private static final int TEST_VALUE_INT_MIN = 1; + private ScalarDBDao dao; + + @BeforeEach + public void setUp() { + this.dao = new ScalarDBDao(); + } + + @Test + void createScan_scanWithPartitionKey_shouldCreateScanObjectWithPartitionKey() { + + // Create Scan Object + Scan scan = + this.dao.createScan( + TEST_NAMESPACE, + TEST_TABLE_NAME, + Key.newBuilder().addBigInt(TEST_COLUMN_1_PK, TEST_VALUE_LONG).build(), + new ScanRange(null, null, false, false), + new ArrayList<>(), + new ArrayList<>(), + 0); + + // Create expected result + Scan expectedResult = + generateScanResult( + Key.newBuilder().addBigInt(TEST_COLUMN_1_PK, TEST_VALUE_LONG).build(), + new ScanRange(null, null, false, false), + new ArrayList<>(), + new ArrayList<>(), + 0); + + // Compare Scan object + assertThat(scan.toString()).isEqualTo(expectedResult.toString()); + } + + @Test + void createScan_scanWithLimitAndProjection_shouldCreateScanObjectWithLimitAndProjection() { + + // Create Scan Object + Scan scan = + this.dao.createScan( + TEST_NAMESPACE, + TEST_TABLE_NAME, + Key.newBuilder().addBigInt(TEST_COLUMN_1_PK, TEST_VALUE_LONG).build(), + new ScanRange(null, null, false, false), + new ArrayList<>(), + Arrays.asList(TEST_COLUMN_4, TEST_COLUMN_5, TEST_COLUMN_6), + 5); + + // Create expected result + Scan expectedResult = + generateScanResult( + Key.newBuilder().addBigInt(TEST_COLUMN_1_PK, TEST_VALUE_LONG).build(), + new ScanRange(null, null, false, false), + new ArrayList<>(), + Arrays.asList(TEST_COLUMN_4, TEST_COLUMN_5, TEST_COLUMN_6), + 5); + + // Compare Scan object + assertThat(scan.toString()).isEqualTo(expectedResult.toString()); + } + + @Test + void createScan_scanWithScanRangeAndOrder_shouldCreateScanObjectWithSortAndRange() { + + // Create Scan Object + Scan scan = + this.dao.createScan( + TEST_NAMESPACE, + TEST_TABLE_NAME, + Key.newBuilder().addBigInt(TEST_COLUMN_1_PK, TEST_VALUE_LONG).build(), + new ScanRange( + Key.newBuilder().addInt(TEST_COLUMN_2_CK, TEST_VALUE_INT_MIN).build(), + Key.newBuilder().addInt(TEST_COLUMN_2_CK, TEST_VALUE_INT).build(), + true, + false), + List.of(Scan.Ordering.asc(TEST_COLUMN_2_CK)), + new ArrayList<>(), + 0); + // Create expected result + Scan expectedResult = + generateScanResult( + Key.newBuilder().addBigInt(TEST_COLUMN_1_PK, TEST_VALUE_LONG).build(), + new ScanRange( + Key.newBuilder().addInt(TEST_COLUMN_2_CK, TEST_VALUE_INT_MIN).build(), + Key.newBuilder().addInt(TEST_COLUMN_2_CK, TEST_VALUE_INT).build(), + true, + false), + List.of(Scan.Ordering.asc(TEST_COLUMN_2_CK)), + new ArrayList<>(), + 0); + // Compare Scan object + assertThat(scan.toString()).isEqualTo(expectedResult.toString()); + } + + @Test + void createScan_scanWithoutPartitionKey_shouldCreateScanAllObject() { + + // Create Scan Object + Scan scan = + this.dao.createScan( + TEST_NAMESPACE, + TEST_TABLE_NAME, + null, + new ScanRange(null, null, false, false), + new ArrayList<>(), + new ArrayList<>(), + 0); + + // Create expected result + Scan expectedResult = generateScanAllResult(new ArrayList<>(), 0); + + // Compare ScanAll object + assertThat(scan.toString()).isEqualTo(expectedResult.toString()); + } + + @Test + void createScan_scanAllWithLimitAndProjection_shouldCreateScanAllObjectWithLimitAndProjection() { + + // Create Scan Object + Scan scan = + this.dao.createScan( + TEST_NAMESPACE, + TEST_TABLE_NAME, + null, + new ScanRange(null, null, false, false), + new ArrayList<>(), + Arrays.asList(TEST_COLUMN_4, TEST_COLUMN_5, TEST_COLUMN_6), + 5); + + // Create expected result + Scan expectedResult = + generateScanAllResult(Arrays.asList(TEST_COLUMN_4, TEST_COLUMN_5, TEST_COLUMN_6), 5); + + // Compare ScanAll object + assertThat(scan.toString()).isEqualTo(expectedResult.toString()); + } + + /** + * Create Scan Object + * + * @param partitionKey Partition key used in ScalarDB scan + * @param range Optional range to set ScalarDB scan start and end values + * @param sorts Optional scan clustering key sorting values + * @param projections List of column projection to use during scan + * @param limit Scan limit value + * @return ScalarDB scan instance + */ + private Scan generateScanResult( + Key partitionKey, + ScanRange range, + List sorts, + List projections, + int limit) { + ScanBuilder.BuildableScan scan = + Scan.newBuilder() + .namespace(TEST_NAMESPACE) + .table(TEST_TABLE_NAME) + .partitionKey(partitionKey); + + // Set boundary start + if (range.getScanStartKey() != null) { + scan.start(range.getScanStartKey(), range.isStartInclusive()); + } + + // with end + if (range.getScanEndKey() != null) { + scan.end(range.getScanEndKey(), range.isEndInclusive()); + } + + // clustering order + for (Scan.Ordering sort : sorts) { + scan.ordering(sort); + } + + // projections + if (projections != null && !projections.isEmpty()) { + scan.projections(projections); + } + + // limit + if (limit > 0) { + scan.limit(limit); + } + return scan.build(); + } + + /** + * Create ScanAll Object + * + * @param projections List of column projection to use during scan + * @param limit Scan limit value + * @return ScalarDB scan instance + */ + private Scan generateScanAllResult(List projections, int limit) { + ScanBuilder.BuildableScanAll scan = + Scan.newBuilder().namespace(TEST_NAMESPACE).table(TEST_TABLE_NAME).all(); + + // projections + if (projections != null && !projections.isEmpty()) { + scan.projections(projections); + } + + // limit + if (limit > 0) { + scan.limit(limit); + } + return scan.build(); + } +} From 8a7338be9fbf3d698dd0b864b467cb95ec42f403 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 17 Dec 2024 17:15:16 +0530 Subject: [PATCH 13/87] Remove unnecessary files --- .../db/dataloader/core/dataimport/ImportMode.java | 8 -------- .../controlfile/ControlFileValidationLevel.java | 11 ----------- 2 files changed, 19 deletions(-) delete mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportMode.java delete mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidationLevel.java diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportMode.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportMode.java deleted file mode 100644 index 7f2a805e75..0000000000 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportMode.java +++ /dev/null @@ -1,8 +0,0 @@ -package com.scalar.db.dataloader.core.dataimport; - -/** Represents the way to be imported data is handled */ -public enum ImportMode { - INSERT, - UPDATE, - UPSERT -} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidationLevel.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidationLevel.java deleted file mode 100644 index 3753d0ba65..0000000000 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidationLevel.java +++ /dev/null @@ -1,11 +0,0 @@ -package com.scalar.db.dataloader.core.dataimport.controlfile; - -/** Control file validation level */ -public enum ControlFileValidationLevel { - /* All columns need to be mapped */ - FULL, - /* All partition key and clustering key columns need to be mapped */ - KEYS, - /* Only validate the columns that are mapped */ - MAPPED -} From 2b52eeb727a1476a6a5ab95029a1076c02cecf52 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 17 Dec 2024 18:05:04 +0530 Subject: [PATCH 14/87] Initial commit [skip ci] --- .../controlfile/ControlFileTable.java | 40 +++++ .../tablemetadata/TableMetadataException.java | 24 +++ .../tablemetadata/TableMetadataRequest.java | 27 +++ .../tablemetadata/TableMetadataService.java | 63 +++++++ .../core/util/TableMetadataUtil.java | 156 ++++++++++++++++++ 5 files changed, 310 insertions(+) create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTable.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataException.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataRequest.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataService.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTable.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTable.java new file mode 100644 index 0000000000..d9308794fc --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTable.java @@ -0,0 +1,40 @@ +package com.scalar.db.dataloader.core.dataimport.controlfile; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.ArrayList; +import java.util.List; +import lombok.Getter; +import lombok.Setter; + +/** Represents the mapping for one table in the control file */ +@Getter +@Setter +public class ControlFileTable { + + @JsonProperty("namespace") + public String namespace; + + @JsonProperty("table_name") + public String tableName; + + @JsonProperty("mappings") + public List mappings; + + /** Class constructor */ + public ControlFileTable(String namespace, String tableName) { + this.tableName = tableName; + this.namespace = namespace; + this.mappings = new ArrayList<>(); + } + + @JsonCreator + public ControlFileTable( + @JsonProperty("namespace") String namespace, + @JsonProperty("table_name") String tableName, + @JsonProperty("mappings") List mappings) { + this.namespace = namespace; + this.tableName = tableName; + this.mappings = mappings; + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataException.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataException.java new file mode 100644 index 0000000000..31773a9b64 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataException.java @@ -0,0 +1,24 @@ +package com.scalar.db.dataloader.core.tablemetadata; + +/** A custom exception that encapsulates errors thrown by the TableMetaDataService */ +public class TableMetadataException extends Exception { + + /** + * Class constructor + * + * @param message error message + * @param cause reason for exception + */ + public TableMetadataException(String message, Throwable cause) { + super(message, cause); + } + + /** + * Class constructor + * + * @param message error message + */ + public TableMetadataException(String message) { + super(message); + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataRequest.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataRequest.java new file mode 100644 index 0000000000..cb2c0fe7e5 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataRequest.java @@ -0,0 +1,27 @@ +package com.scalar.db.dataloader.core.tablemetadata; + +/** Represents the request for metadata for a single ScalarDB table */ +public class TableMetadataRequest { + + private final String namespace; + private final String tableName; + + /** + * Class constructor + * + * @param namespace ScalarDB namespace + * @param tableName ScalarDB table name + */ + public TableMetadataRequest(String namespace, String tableName) { + this.namespace = namespace; + this.tableName = tableName; + } + + public String getNamespace() { + return namespace; + } + + public String getTableName() { + return tableName; + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataService.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataService.java new file mode 100644 index 0000000000..88d60f778e --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataService.java @@ -0,0 +1,63 @@ +package com.scalar.db.dataloader.core.tablemetadata; + +import com.scalar.db.api.DistributedStorageAdmin; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.dataloader.core.util.TableMetadataUtil; +import com.scalar.db.exception.storage.ExecutionException; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public class TableMetadataService { + private static final String ERROR_MISSING_NAMESPACE_OR_TABLE = + "Missing namespace or table: %s, %s"; + + private final DistributedStorageAdmin storageAdmin; + + /** + * Returns the TableMetadata for the given namespace and table name. + * + * @param namespace ScalarDb namespace + * @param tableName ScalarDb table name + * @return TableMetadata + * @throws TableMetadataException if the namespace or table is missing + */ + public TableMetadata getTableMetadata(String namespace, String tableName) + throws TableMetadataException { + try { + TableMetadata tableMetadata = storageAdmin.getTableMetadata(namespace, tableName); + if (tableMetadata == null) { + throw new TableMetadataException( + String.format(ERROR_MISSING_NAMESPACE_OR_TABLE, namespace, tableName)); + } + return tableMetadata; + } catch (ExecutionException e) { + throw new TableMetadataException( + String.format(ERROR_MISSING_NAMESPACE_OR_TABLE, namespace, tableName), e.getCause()); + } + } + + /** + * Returns the TableMetadata for the given list of TableMetadataRequest. + * + * @param requests List of TableMetadataRequest + * @return Map of TableMetadata + * @throws TableMetadataException if the namespace or table is missing + */ + public Map getTableMetadata(Collection requests) + throws TableMetadataException { + Map metadataMap = new HashMap<>(); + + for (TableMetadataRequest request : requests) { + String namespace = request.getNamespace(); + String tableName = request.getTableName(); + TableMetadata tableMetadata = getTableMetadata(namespace, tableName); + String key = TableMetadataUtil.getTableLookupKey(namespace, tableName); + metadataMap.put(key, tableMetadata); + } + + return metadataMap; + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java new file mode 100644 index 0000000000..7ce43a3f71 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java @@ -0,0 +1,156 @@ +package com.scalar.db.dataloader.core.util; + +import com.scalar.db.api.TableMetadata; +import com.scalar.db.dataloader.core.Constants; +import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileTable; +import com.scalar.db.io.DataType; +import com.scalar.db.transaction.consensuscommit.Attribute; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** Utils for ScalarDB table metadata */ +public class TableMetadataUtil { + + /** + * Check if the field is a metadata column or not + * + * @param columnName Table column name + * @param metadataColumns Fixed list of metadata columns + * @param columnNames List of all column names in a table + * @return The field is metadata or not + */ + public static boolean isMetadataColumn( + String columnName, Set metadataColumns, Set columnNames) { + // Skip field if it can be ignored + if (metadataColumns.contains(columnName)) { + return true; + } + + // Skip if the field is a "before_" field + return columnName.startsWith(Attribute.BEFORE_PREFIX) + && !columnNames.contains(Attribute.BEFORE_PREFIX + columnName); + } + + /** + * Check if the field is a metadata column or not + * + * @param columnName ScalarDB table column name5 + * @param tableMetadata Metadata for a single ScalarDB + * @return is the field a metadata column or not + */ + public static boolean isMetadataColumn(String columnName, TableMetadata tableMetadata) { + Set metadataColumns = getMetadataColumns(); + LinkedHashSet columnNames = tableMetadata.getColumnNames(); + + // Skip field if it can be ignored + if (metadataColumns.contains(columnName)) { + return true; + } + + // Skip if the field is a "before_" field + return columnName.startsWith(Attribute.BEFORE_PREFIX) + && !columnNames.contains(Attribute.BEFORE_PREFIX + columnName); + } + + /** + * Return a list of fixed metadata columns + * + * @return Set of columns + */ + public static Set getMetadataColumns() { + return Stream.of( + Attribute.ID, + Attribute.STATE, + Attribute.VERSION, + Attribute.PREPARED_AT, + Attribute.COMMITTED_AT, + Attribute.BEFORE_ID, + Attribute.BEFORE_STATE, + Attribute.BEFORE_VERSION, + Attribute.BEFORE_PREPARED_AT, + Attribute.BEFORE_COMMITTED_AT) + .collect(Collectors.toCollection(HashSet::new)); + } + + /** + * Return a map with the data types for all columns in a ScalarDB table + * + * @param tableMetadata Metadata for a single ScalarDB table + * @return data types map + */ + public static Map extractColumnDataTypes(TableMetadata tableMetadata) { + Map definitions = new HashMap<>(); + for (String columnName : tableMetadata.getColumnNames()) { + definitions.put(columnName, tableMetadata.getColumnDataType(columnName)); + } + return definitions; + } + + /** + * Return lookup key for a table in a namespace + * + * @param namespace Namespace + * @param tableName Table name + * @return Table metadata lookup key + */ + public static String getTableLookupKey(String namespace, String tableName) { + return String.format(Constants.TABLE_LOOKUP_KEY_FORMAT, namespace, tableName); + } + + /** + * Return lookup key for a table in a namespace + * + * @param controlFileTable Control file data mapping + * @return Table metadata lookup key + */ + public static String getTableLookupKey(ControlFileTable controlFileTable) { + return String.format( + Constants.TABLE_LOOKUP_KEY_FORMAT, controlFileTable.namespace, controlFileTable.tableName); + } + + /** + * Populate the projection columns with metadata columns + * + * @param tableMetadata Metadata for a single ScalarDB table + * @param projections List of projection columns + * @return List of projection columns with metadata columns + */ + public static List populateProjectionsWithMetadata( + TableMetadata tableMetadata, List projections) { + List projectionMetadata = new ArrayList<>(); + + // Add projection columns along with metadata columns + projections.forEach( + projection -> { + projectionMetadata.add(projection); + if (!isKeyColumn(projection, tableMetadata)) { + // Add metadata column before the projection if it's not a key column + projectionMetadata.add(Attribute.BEFORE_PREFIX + projection); + } + }); + + // Add fixed metadata columns + projectionMetadata.addAll(getMetadataColumns()); + + return projectionMetadata; + } + + /** + * Checks if a column is a key column (partition key or clustering key) in the table. + * + * @param column The column name to check. + * @param tableMetadata The metadata of the ScalarDB table. + * @return True if the column is a key column, false otherwise. + */ + private static boolean isKeyColumn(String column, TableMetadata tableMetadata) { + return tableMetadata.getPartitionKeyNames().contains(column) + || tableMetadata.getClusteringKeyNames().contains(column); + } +} From e20607392270c21cfe64e2c0023e6dba96a47990 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 17 Dec 2024 18:15:18 +0530 Subject: [PATCH 15/87] Changes --- .../core/dataimport/dao/ScalarDBDao.java | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java index 9016d38d8c..9a96401d55 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java @@ -27,7 +27,11 @@ public class ScalarDBDao { /* Class logger */ - private static final Logger LOGGER = LoggerFactory.getLogger(ScalarDBDao.class); + private static final Logger logger = LoggerFactory.getLogger(ScalarDBDao.class); + private static final String GET_COMPLETED_MSG = "GET completed for %s"; + private static final String PUT_COMPLETED_MSG = "PUT completed for %s"; + private static final String SCAN_START_MSG = "SCAN started..."; + private static final String SCAN_END_MSG = "SCAN completed"; /** * Retrieve record from ScalarDB instance in storage mode @@ -53,7 +57,7 @@ public Optional get( try { Get get = createGetWith(namespace, tableName, partitionKey, clusteringKey); Optional result = storage.get(get); - LOGGER.info("GET completed for " + printKey); + logger.info(String.format(GET_COMPLETED_MSG, printKey)); return result; } catch (ExecutionException e) { throw new ScalarDBDaoException("error GET " + printKey, e); @@ -83,7 +87,7 @@ public Optional get( String printKey = keysToString(partitionKey, clusteringKey); try { Optional result = transaction.get(get); - LOGGER.info("GET completed for " + printKey); + logger.info(String.format(GET_COMPLETED_MSG, printKey)); return result; } catch (CrudException e) { throw new ScalarDBDaoException("error GET " + printKey, e.getCause()); @@ -116,7 +120,7 @@ public void put( } catch (CrudException e) { throw new ScalarDBDaoException(CoreError.DATA_LOADER_ERROR_CRUD_EXCEPTION.buildMessage(), e); } - LOGGER.info("PUT completed for " + keysToString(partitionKey, clusteringKey)); + logger.info(String.format(PUT_COMPLETED_MSG, keysToString(partitionKey, clusteringKey))); } /** @@ -144,7 +148,7 @@ public void put( } catch (ExecutionException e) { throw new ScalarDBDaoException(CoreError.DATA_LOADER_ERROR_CRUD_EXCEPTION.buildMessage(), e); } - LOGGER.info("PUT completed for " + keysToString(partitionKey, clusteringKey)); + logger.info(String.format(PUT_COMPLETED_MSG, keysToString(partitionKey, clusteringKey))); } /** @@ -176,11 +180,11 @@ public List scan( // scan data try { - LOGGER.info("SCAN started..."); + logger.info(SCAN_START_MSG); Scanner scanner = storage.scan(scan); List allResults = scanner.all(); scanner.close(); - LOGGER.info("SCAN completed"); + logger.info(SCAN_END_MSG); return allResults; } catch (ExecutionException | IOException e) { throw new ScalarDBDaoException(CoreError.DATA_LOADER_ERROR_SCAN.buildMessage(), e); @@ -218,9 +222,9 @@ public List scan( // scan data try { - LOGGER.info("SCAN started..."); + logger.info(SCAN_START_MSG); List results = transaction.scan(scan); - LOGGER.info("SCAN completed"); + logger.info(SCAN_END_MSG); return results; } catch (CrudException | NoSuchElementException e) { // No such element Exception is thrown when the scan is done in transaction mode but From 26d3144e1f77788e002a64b245971a15d575be00 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Wed, 18 Dec 2024 12:12:25 +0530 Subject: [PATCH 16/87] Changes --- .../com/scalar/db/common/error/CoreError.java | 14 ++++++++++---- .../core/dataimport/dao/ScalarDBDao.java | 15 ++++++++------- .../core/dataimport/dao/ScalarDBManager.java | 1 - 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index 474b8f7b80..637a549dad 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -942,11 +942,17 @@ public enum CoreError implements ScalarDbError { "", ""), DATA_LOADER_ERROR_CRUD_EXCEPTION( - Category.INTERNAL_ERROR, "0047", "something went wrong while trying to save the data", "", "" - ), + Category.INTERNAL_ERROR, + "0047", + "something went wrong while trying to save the data", + "", + ""), DATA_LOADER_ERROR_SCAN( - Category.INTERNAL_ERROR, "0048", "Something went wrong while scanning. Are you sure you are running in the correct transaction mode?", "", "" - ), + Category.INTERNAL_ERROR, + "0048", + "Something went wrong while scanning. Are you sure you are running in the correct transaction mode?", + "", + ""), // // Errors for the unknown transaction status error category diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java index 9a96401d55..8f3556818b 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java @@ -52,15 +52,16 @@ public Optional get( DistributedStorage storage) throws ScalarDBDaoException { - String printKey = keysToString(partitionKey, clusteringKey); + // Retrieving the key data for logging + String loggingKey = keysToString(partitionKey, clusteringKey); try { Get get = createGetWith(namespace, tableName, partitionKey, clusteringKey); Optional result = storage.get(get); - logger.info(String.format(GET_COMPLETED_MSG, printKey)); + logger.info(String.format(GET_COMPLETED_MSG, loggingKey)); return result; } catch (ExecutionException e) { - throw new ScalarDBDaoException("error GET " + printKey, e); + throw new ScalarDBDaoException("error GET " + loggingKey, e); } } @@ -84,13 +85,14 @@ public Optional get( throws ScalarDBDaoException { Get get = createGetWith(namespace, tableName, partitionKey, clusteringKey); - String printKey = keysToString(partitionKey, clusteringKey); + // Retrieving the key data for logging + String loggingKey = keysToString(partitionKey, clusteringKey); try { Optional result = transaction.get(get); - logger.info(String.format(GET_COMPLETED_MSG, printKey)); + logger.info(String.format(GET_COMPLETED_MSG, loggingKey)); return result; } catch (CrudException e) { - throw new ScalarDBDaoException("error GET " + printKey, e.getCause()); + throw new ScalarDBDaoException("error GET " + loggingKey, e.getCause()); } } @@ -332,7 +334,6 @@ Scan createScan( } // Create a scan with partition key (not a scan all) - ScanBuilder.BuildableScan buildableScan = Scan.newBuilder().namespace(namespace).table(tableName).partitionKey(partitionKey); diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java index 6ab2a4f4e6..ac246d8354 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java @@ -41,7 +41,6 @@ public ScalarDBManager(StorageFactory storageFactory) throws IOException { * @param transactionFactory Factory to create all the necessary ScalarDB data managers */ public ScalarDBManager(TransactionFactory transactionFactory) throws IOException { - transactionManager = transactionFactory.getTransactionManager(); transactionAdmin = transactionFactory.getTransactionAdmin(); storageAdmin = null; From b86487d68fd7690e0becb336b4ef6c5d48a113ab Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Wed, 18 Dec 2024 12:27:36 +0530 Subject: [PATCH 17/87] spotbugs exclude --- gradle/spotbugs-exclude.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gradle/spotbugs-exclude.xml b/gradle/spotbugs-exclude.xml index 05571f3fdb..1724740470 100644 --- a/gradle/spotbugs-exclude.xml +++ b/gradle/spotbugs-exclude.xml @@ -34,4 +34,9 @@ + + + + + From 818a2b40660214e2a715a0cde34afca117a37813 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Wed, 18 Dec 2024 15:12:21 +0530 Subject: [PATCH 18/87] spotbugs exclude -2 --- gradle/spotbugs-exclude.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/spotbugs-exclude.xml b/gradle/spotbugs-exclude.xml index 1724740470..0479b8fa24 100644 --- a/gradle/spotbugs-exclude.xml +++ b/gradle/spotbugs-exclude.xml @@ -36,7 +36,7 @@ - + From 90c41051e93e5df9928f13cef86611d61076f441 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Wed, 18 Dec 2024 16:33:02 +0530 Subject: [PATCH 19/87] Added a file [skip ci] --- .../ControlFileTableFieldMapping.java | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTableFieldMapping.java diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTableFieldMapping.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTableFieldMapping.java new file mode 100644 index 0000000000..78c16ed726 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTableFieldMapping.java @@ -0,0 +1,32 @@ +package com.scalar.db.dataloader.core.dataimport.controlfile; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.Getter; +import lombok.Setter; + +/** Represents the one field mapping for a table mapping in the control file */ +@Getter +@Setter +public class ControlFileTableFieldMapping { + + @JsonProperty("source_field") + public String sourceField; + + @JsonProperty("target_column") + public String targetColumn; + + /** + * Class constructor + * + * @param sourceField The data field in the provided json field + * @param targetColumn The column in the ScalarDB table + */ + @JsonCreator + public ControlFileTableFieldMapping( + @JsonProperty("source_field") String sourceField, + @JsonProperty("target_column") String targetColumn) { + this.sourceField = sourceField; + this.targetColumn = targetColumn; + } +} From 3d5d3e09f5031c5559c48c7d6f5800ff1099dd15 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Wed, 18 Dec 2024 16:40:14 +0530 Subject: [PATCH 20/87] Added unit test files [skip ci] --- .../TableMetadataServiceTest.java | 53 +++++++++++ .../core/util/TableMetadataUtilTest.java | 87 +++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataServiceTest.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/TableMetadataUtilTest.java diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataServiceTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataServiceTest.java new file mode 100644 index 0000000000..2724ca5d31 --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataServiceTest.java @@ -0,0 +1,53 @@ +package com.scalar.db.dataloader.core.tablemetadata; + +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.scalar.db.api.DistributedStorageAdmin; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.dataloader.core.UnitTestUtils; +import com.scalar.db.exception.storage.ExecutionException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; + +public class TableMetadataServiceTest { + + DistributedStorageAdmin storageAdmin; + TableMetadataService tableMetadataService; + + @BeforeEach + public void setup() throws ExecutionException { + storageAdmin = Mockito.mock(DistributedStorageAdmin.class); + Mockito.when(storageAdmin.getTableMetadata("namespace", "table")) + .thenReturn(UnitTestUtils.createTestTableMetadata()); + // Mockito.when(storageAdmin.getTableMetadata("namespace1","table1")).thenReturn(null); + tableMetadataService = new TableMetadataService(storageAdmin); + } + + @Test + void getTableMetadata_withValidNamespaceAndTable_shouldReturnTableMetadataMap() + throws TableMetadataException { + + Map expected = new HashMap<>(); + expected.put("namespace.table", UnitTestUtils.createTestTableMetadata()); + TableMetadataRequest tableMetadataRequest = new TableMetadataRequest("namespace", "table"); + Map output = + tableMetadataService.getTableMetadata(Collections.singleton(tableMetadataRequest)); + Assertions.assertEquals(expected.get("namespace.table"), output.get("namespace.table")); + } + + @Test + void getTableMetadata_withInvalidNamespaceAndTable_shouldThrowException() + throws TableMetadataException { + TableMetadataRequest tableMetadataRequest = new TableMetadataRequest("namespace2", "table2"); + assertThatThrownBy( + () -> + tableMetadataService.getTableMetadata(Collections.singleton(tableMetadataRequest))) + .isInstanceOf(TableMetadataException.class) + .hasMessage("Missing namespace or table: namespace2, table2"); + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/TableMetadataUtilTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/TableMetadataUtilTest.java new file mode 100644 index 0000000000..b1a9452ddd --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/TableMetadataUtilTest.java @@ -0,0 +1,87 @@ +package com.scalar.db.dataloader.core.util; + +import static com.scalar.db.dataloader.core.Constants.TABLE_LOOKUP_KEY_FORMAT; +import static org.assertj.core.api.Assertions.assertThat; + +import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileTable; +import com.scalar.db.transaction.consensuscommit.Attribute; +import java.util.HashSet; +import java.util.Set; +import org.junit.jupiter.api.Test; + +/** Unit tests for TableMetadataUtils */ +class TableMetadataUtilTest { + + private static final String NAMESPACE = "ns"; + private static final String TABLE_NAME = "table"; + + @Test + void isMetadataColumn_IsMetaDataColumn_ShouldReturnTrue() { + boolean isMetadataColumn = + TableMetadataUtil.isMetadataColumn( + Attribute.ID, TableMetadataUtil.getMetadataColumns(), new HashSet<>()); + assertThat(isMetadataColumn).isTrue(); + } + + @Test + void isMetadataColumn_IsNotMetadataColumn_ShouldReturnFalse() { + boolean isMetadataColumn = + TableMetadataUtil.isMetadataColumn( + "columnName", TableMetadataUtil.getMetadataColumns(), new HashSet<>()); + assertThat(isMetadataColumn).isFalse(); + } + + @Test + void isMetadataColumn_IsBeforePrefixColumn_ShouldReturnTrue() { + boolean isMetadataColumn = + TableMetadataUtil.isMetadataColumn( + Attribute.BEFORE_PREFIX + "columnName", + TableMetadataUtil.getMetadataColumns(), + new HashSet<>()); + assertThat(isMetadataColumn).isTrue(); + } + + @Test + void isMetadataColumn_IsNotBeforePrefixColumn_ShouldReturnFalse() { + Set columnNames = new HashSet<>(); + columnNames.add("before_before_testing"); + boolean isMetadataColumn = + TableMetadataUtil.isMetadataColumn( + "before_testing", TableMetadataUtil.getMetadataColumns(), columnNames); + assertThat(isMetadataColumn).isFalse(); + } + + @Test + void getMetadataColumns_NoArgs_ShouldReturnSet() { + + Set columns = new HashSet<>(); + columns.add(Attribute.ID); + columns.add(Attribute.STATE); + columns.add(Attribute.VERSION); + columns.add(Attribute.PREPARED_AT); + columns.add(Attribute.COMMITTED_AT); + columns.add(Attribute.BEFORE_ID); + columns.add(Attribute.BEFORE_STATE); + columns.add(Attribute.BEFORE_VERSION); + columns.add(Attribute.BEFORE_PREPARED_AT); + columns.add(Attribute.BEFORE_COMMITTED_AT); + + Set metadataColumns = TableMetadataUtil.getMetadataColumns(); + assertThat(metadataColumns).containsExactlyInAnyOrder(columns.toArray(new String[0])); + } + + @Test + void getTableLookupKey_ValidStringArgs_ShouldReturnLookupKey() { + String actual = TableMetadataUtil.getTableLookupKey(NAMESPACE, TABLE_NAME); + String expected = String.format(TABLE_LOOKUP_KEY_FORMAT, NAMESPACE, TABLE_NAME); + assertThat(actual).isEqualTo(expected); + } + + @Test + void getTableLookupKey_ValidControlFileArg_ShouldReturnLookupKey() { + ControlFileTable controlFileTable = new ControlFileTable(NAMESPACE, TABLE_NAME); + String actual = TableMetadataUtil.getTableLookupKey(controlFileTable); + String expected = String.format(TABLE_LOOKUP_KEY_FORMAT, NAMESPACE, TABLE_NAME); + assertThat(actual).isEqualTo(expected); + } +} From 64952025b7b860c91450dce95d0d1e684d87cd9b Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 19 Dec 2024 11:23:48 +0530 Subject: [PATCH 21/87] Spotbug fixes --- .../dataimport/controlfile/ControlFileTable.java | 12 +++++++++--- .../controlfile/ControlFileTableFieldMapping.java | 4 ++-- .../core/tablemetadata/TableMetadataRequest.java | 10 +++------- .../db/dataloader/core/util/TableMetadataUtil.java | 2 +- .../core/tablemetadata/TableMetadataServiceTest.java | 8 +++----- gradle/spotbugs-exclude.xml | 5 +++++ 6 files changed, 23 insertions(+), 18 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTable.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTable.java index d9308794fc..3216193bbf 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTable.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTable.java @@ -13,13 +13,13 @@ public class ControlFileTable { @JsonProperty("namespace") - public String namespace; + private String namespace; @JsonProperty("table_name") - public String tableName; + private String tableName; @JsonProperty("mappings") - public List mappings; + private final List mappings; /** Class constructor */ public ControlFileTable(String namespace, String tableName) { @@ -28,6 +28,12 @@ public ControlFileTable(String namespace, String tableName) { this.mappings = new ArrayList<>(); } + /** + * Added for mapping data to control file table object from API request + * @param namespace namespace + * @param tableName table name + * @param mappings column name mapping from control file + */ @JsonCreator public ControlFileTable( @JsonProperty("namespace") String namespace, diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTableFieldMapping.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTableFieldMapping.java index 78c16ed726..064d5c430b 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTableFieldMapping.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTableFieldMapping.java @@ -11,10 +11,10 @@ public class ControlFileTableFieldMapping { @JsonProperty("source_field") - public String sourceField; + private String sourceField; @JsonProperty("target_column") - public String targetColumn; + private String targetColumn; /** * Class constructor diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataRequest.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataRequest.java index cb2c0fe7e5..0ddd9ab686 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataRequest.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataRequest.java @@ -1,6 +1,9 @@ package com.scalar.db.dataloader.core.tablemetadata; +import lombok.Getter; + /** Represents the request for metadata for a single ScalarDB table */ +@Getter public class TableMetadataRequest { private final String namespace; @@ -17,11 +20,4 @@ public TableMetadataRequest(String namespace, String tableName) { this.tableName = tableName; } - public String getNamespace() { - return namespace; - } - - public String getTableName() { - return tableName; - } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java index 7ce43a3f71..cd3c8d94d5 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java @@ -112,7 +112,7 @@ public static String getTableLookupKey(String namespace, String tableName) { */ public static String getTableLookupKey(ControlFileTable controlFileTable) { return String.format( - Constants.TABLE_LOOKUP_KEY_FORMAT, controlFileTable.namespace, controlFileTable.tableName); + Constants.TABLE_LOOKUP_KEY_FORMAT, controlFileTable.getNamespace(), controlFileTable.getTableName()); } /** diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataServiceTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataServiceTest.java index 2724ca5d31..52269a98e5 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataServiceTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataServiceTest.java @@ -14,17 +14,16 @@ import org.junit.jupiter.api.Test; import org.mockito.Mockito; -public class TableMetadataServiceTest { +class TableMetadataServiceTest { DistributedStorageAdmin storageAdmin; TableMetadataService tableMetadataService; @BeforeEach - public void setup() throws ExecutionException { + void setup() throws ExecutionException { storageAdmin = Mockito.mock(DistributedStorageAdmin.class); Mockito.when(storageAdmin.getTableMetadata("namespace", "table")) .thenReturn(UnitTestUtils.createTestTableMetadata()); - // Mockito.when(storageAdmin.getTableMetadata("namespace1","table1")).thenReturn(null); tableMetadataService = new TableMetadataService(storageAdmin); } @@ -41,8 +40,7 @@ void getTableMetadata_withValidNamespaceAndTable_shouldReturnTableMetadataMap() } @Test - void getTableMetadata_withInvalidNamespaceAndTable_shouldThrowException() - throws TableMetadataException { + void getTableMetadata_withInvalidNamespaceAndTable_shouldThrowException() { TableMetadataRequest tableMetadataRequest = new TableMetadataRequest("namespace2", "table2"); assertThatThrownBy( () -> diff --git a/gradle/spotbugs-exclude.xml b/gradle/spotbugs-exclude.xml index 05571f3fdb..1724740470 100644 --- a/gradle/spotbugs-exclude.xml +++ b/gradle/spotbugs-exclude.xml @@ -34,4 +34,9 @@ + + + + + From 90abd9ede55b94e2f8a44a84ceac57b478a3cf3c Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 19 Dec 2024 13:46:45 +0530 Subject: [PATCH 22/87] Removed use of List.of to fix CI error --- .../db/dataloader/core/dataimport/dao/ScalarDBDaoTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDaoTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDaoTest.java index ab9de219b3..d0571ddddf 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDaoTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDaoTest.java @@ -89,7 +89,7 @@ void createScan_scanWithScanRangeAndOrder_shouldCreateScanObjectWithSortAndRange Key.newBuilder().addInt(TEST_COLUMN_2_CK, TEST_VALUE_INT).build(), true, false), - List.of(Scan.Ordering.asc(TEST_COLUMN_2_CK)), + Arrays.asList(Scan.Ordering.asc(TEST_COLUMN_2_CK)), new ArrayList<>(), 0); // Create expected result @@ -101,7 +101,7 @@ void createScan_scanWithScanRangeAndOrder_shouldCreateScanObjectWithSortAndRange Key.newBuilder().addInt(TEST_COLUMN_2_CK, TEST_VALUE_INT).build(), true, false), - List.of(Scan.Ordering.asc(TEST_COLUMN_2_CK)), + Arrays.asList(Scan.Ordering.asc(TEST_COLUMN_2_CK)), new ArrayList<>(), 0); // Compare Scan object From 30db9882df9f5cc94f08a71a365b142722e672e3 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 19 Dec 2024 15:45:44 +0530 Subject: [PATCH 23/87] Applied spotless --- .../core/dataimport/controlfile/ControlFileTable.java | 1 + .../dataloader/core/tablemetadata/TableMetadataRequest.java | 1 - .../com/scalar/db/dataloader/core/util/TableMetadataUtil.java | 4 +++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTable.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTable.java index 3216193bbf..c9b0626e8a 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTable.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTable.java @@ -30,6 +30,7 @@ public ControlFileTable(String namespace, String tableName) { /** * Added for mapping data to control file table object from API request + * * @param namespace namespace * @param tableName table name * @param mappings column name mapping from control file diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataRequest.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataRequest.java index 0ddd9ab686..c0e62f1c52 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataRequest.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataRequest.java @@ -19,5 +19,4 @@ public TableMetadataRequest(String namespace, String tableName) { this.namespace = namespace; this.tableName = tableName; } - } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java index cd3c8d94d5..9f10b9b3ea 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java @@ -112,7 +112,9 @@ public static String getTableLookupKey(String namespace, String tableName) { */ public static String getTableLookupKey(ControlFileTable controlFileTable) { return String.format( - Constants.TABLE_LOOKUP_KEY_FORMAT, controlFileTable.getNamespace(), controlFileTable.getTableName()); + Constants.TABLE_LOOKUP_KEY_FORMAT, + controlFileTable.getNamespace(), + controlFileTable.getTableName()); } /** From e9bb00481a4b37bbd70acbf84118d31e88a4b3ce Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 19 Dec 2024 17:21:13 +0530 Subject: [PATCH 24/87] Added export options validator --- .../com/scalar/db/common/error/CoreError.java | 7 + .../ExportOptionsValidationException.java | 14 ++ .../validation/ExportOptionsValidator.java | 107 +++++++++++++ .../ExportOptionsValidatorTest.java | 149 ++++++++++++++++++ 4 files changed, 277 insertions(+) create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidationException.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidator.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidatorTest.java diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index b02b3c45a6..b4d43a73c0 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -690,6 +690,13 @@ public enum CoreError implements ScalarDbError { ""), DATA_LOADER_ERROR_METHOD_NULL_ARGUMENT( Category.USER_ERROR, "0151", "Method null argument not allowed", "", ""), + DATA_LOADER_CLUSTERING_KEY_NOT_FOUND( + Category.USER_ERROR, "0152", "The provided clustering key %s was not found", "", "" + ), + DATA_LOADER_INVALID_PROJECTION( + Category.USER_ERROR, "0153", "The column '%s' was not found", "", "" + ), + // // Errors for the concurrency error category diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidationException.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidationException.java new file mode 100644 index 0000000000..42e342dec7 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidationException.java @@ -0,0 +1,14 @@ +package com.scalar.db.dataloader.core.dataexport.validation; + +/** A custom exception for export options validation errors */ +public class ExportOptionsValidationException extends Exception { + + /** + * Class constructor + * + * @param message error message + */ + public ExportOptionsValidationException(String message) { + super(message); + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidator.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidator.java new file mode 100644 index 0000000000..3aefe1ce31 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidator.java @@ -0,0 +1,107 @@ +package com.scalar.db.dataloader.core.dataexport.validation; + +import com.scalar.db.api.Scan; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.common.error.CoreError; +import com.scalar.db.dataloader.core.ScanRange; +import com.scalar.db.dataloader.core.dataexport.ExportOptions; +import com.scalar.db.io.Key; +import java.util.LinkedHashSet; +import java.util.List; + +/** Validation for Export options */ +public class ExportOptionsValidator { + + /** + * Validate export request + * + * @param exportOptions Export options + * @param tableMetadata Metadata for a single ScalarDB table + * @throws ExportOptionsValidationException when the options are invalid + */ + public static void validate(ExportOptions exportOptions, TableMetadata tableMetadata) + throws ExportOptionsValidationException { + LinkedHashSet clusteringKeyNames = tableMetadata.getClusteringKeyNames(); + ScanRange scanRange = exportOptions.getScanRange(); + + // validate projections + validateProjectionColumns(tableMetadata.getColumnNames(), exportOptions.getProjectionColumns()); + + // validate sorts + if (!exportOptions.getSortOrders().isEmpty()) { + for (Scan.Ordering sort : exportOptions.getSortOrders()) { + validateClusteringKey(clusteringKeyNames, sort.getColumnName()); + } + } + + // Validate scan start key + if (scanRange.getScanStartKey() != null) { + validateClusteringKey(clusteringKeyNames, scanRange.getScanStartKey()); + } + + // Validate scan end key + if (scanRange.getScanEndKey() != null) { + validateClusteringKey(clusteringKeyNames, scanRange.getScanEndKey()); + } + } + + /** + * Check if the provided clustering key is available in the ScalarDB table + * + * @param clusteringKeyNames List of clustering key names available in a + * @param key To be validated ScalarDB key + * @throws ExportOptionsValidationException if the key could not be found or is not a clustering + * key + */ + private static void validateClusteringKey(LinkedHashSet clusteringKeyNames, Key key) + throws ExportOptionsValidationException { + if (clusteringKeyNames == null) { + return; + } + String columnName = key.getColumnName(0); + validateClusteringKey(clusteringKeyNames, columnName); + } + + /** + * Check if the provided clustering key is available in the ScalarDB table + * + * @param clusteringKeyNames List of clustering key names available in a + * @param columnName Column name of the to be validated clustering key + * @throws ExportOptionsValidationException if the key could not be found or is not a clustering + * key + */ + private static void validateClusteringKey( + LinkedHashSet clusteringKeyNames, String columnName) + throws ExportOptionsValidationException { + if (clusteringKeyNames == null) { + return; + } + + if (!clusteringKeyNames.contains(columnName)) { + throw new ExportOptionsValidationException( + CoreError.DATA_LOADER_CLUSTERING_KEY_NOT_FOUND.buildMessage(columnName)); + } + } + + /** + * Check if the provided projection column names are available in the ScalarDB table + * + * @param columnNames List of ScalarDB table column names + * @param columns List of to be validated column names + * @throws ExportOptionsValidationException if the column name was not found in the table + */ + private static void validateProjectionColumns( + LinkedHashSet columnNames, List columns) + throws ExportOptionsValidationException { + if (columns == null || columns.isEmpty()) { + return; + } + for (String column : columns) { + // O(n) but list is always going to be very small, so it's ok + if (!columnNames.contains(column)) { + throw new ExportOptionsValidationException( + CoreError.DATA_LOADER_INVALID_PROJECTION.buildMessage(column)); + } + } + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidatorTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidatorTest.java new file mode 100644 index 0000000000..b34daaba2d --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidatorTest.java @@ -0,0 +1,149 @@ +package com.scalar.db.dataloader.core.dataexport.validation; + +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.scalar.db.api.Scan; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.common.error.CoreError; +import com.scalar.db.dataloader.core.FileFormat; +import com.scalar.db.dataloader.core.ScanRange; +import com.scalar.db.dataloader.core.dataexport.ExportOptions; +import com.scalar.db.io.DataType; +import com.scalar.db.io.IntColumn; +import com.scalar.db.io.Key; +import com.scalar.db.io.TextColumn; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class ExportOptionsValidatorTest { + + TableMetadata mockMetadata; + List projectedColumns; + + @BeforeEach + void setup() { + mockMetadata = + TableMetadata.newBuilder() + .addColumn("id", DataType.INT) + .addColumn("name", DataType.TEXT) + .addColumn("email", DataType.TEXT) + .addColumn("department", DataType.TEXT) + .addPartitionKey("id") + .addClusteringKey("department") + .build(); + projectedColumns = new ArrayList<>(); + projectedColumns.add("id"); + projectedColumns.add("name"); + projectedColumns.add("email"); + projectedColumns.add("department"); + } + + @Test + void validate_withValidExportOptions_ShouldNotThrowException() + throws ExportOptionsValidationException { + ExportOptions exportOptions = + ExportOptions.builder( + "test", + "sample", + Key.newBuilder().add(IntColumn.of("id", 1)).build(), + FileFormat.JSON) + .sortOrders(Collections.emptyList()) + .scanRange(new ScanRange(null, null, false, false)) + .projectionColumns(projectedColumns) + .build(); + ExportOptionsValidator.validate(exportOptions, mockMetadata); + } + + @Test + void validate_withInValidColumnInProjectionColumnList_ShouldThrowException() { + ExportOptions exportOptions = + ExportOptions.builder( + "test", + "sample", + Key.newBuilder().add(IntColumn.of("id", 1)).build(), + FileFormat.JSON) + .sortOrders(Collections.emptyList()) + .scanRange(new ScanRange(null, null, false, false)) + .projectionColumns(Collections.singletonList("sample")) + .build(); + assertThatThrownBy(() -> ExportOptionsValidator.validate(exportOptions, mockMetadata)) + .isInstanceOf(ExportOptionsValidationException.class) + .hasMessage(CoreError.DATA_LOADER_INVALID_PROJECTION.buildMessage("sample")); + } + + @Test + void validate_withInValidSortOrderWithMultipleClusteringKeys_ShouldThrowException() { + mockMetadata = + TableMetadata.newBuilder() + .addColumn("id", DataType.INT) + .addColumn("name", DataType.TEXT) + .addColumn("email", DataType.TEXT) + .addColumn("department", DataType.TEXT) + .addColumn("building", DataType.TEXT) + .addPartitionKey("id") + .addClusteringKey("department") + .addClusteringKey("building") + .build(); + ExportOptions exportOptions = + ExportOptions.builder( + "test", + "sample", + Key.newBuilder().add(IntColumn.of("id", 1)).build(), + FileFormat.JSON) + .sortOrders( + Collections.singletonList(new Scan.Ordering("name", Scan.Ordering.Order.ASC))) + .scanRange(new ScanRange(null, null, false, false)) + .projectionColumns(Collections.singletonList("id")) + .build(); + assertThatThrownBy(() -> ExportOptionsValidator.validate(exportOptions, mockMetadata)) + .isInstanceOf(ExportOptionsValidationException.class) + .hasMessage(CoreError.DATA_LOADER_CLUSTERING_KEY_NOT_FOUND.buildMessage("name")); + } + + @Test + void validate_withInValidKeyInSortRange_ShouldThrowException() { + ExportOptions exportOptions = + ExportOptions.builder( + "test", + "sample", + Key.newBuilder().add(IntColumn.of("id", 1)).build(), + FileFormat.JSON) + .sortOrders(Collections.emptyList()) + .scanRange( + new ScanRange( + Key.newBuilder().add(IntColumn.of("id", 1)).build(), + Key.newBuilder().add(IntColumn.of("id", 100)).build(), + false, + false)) + .projectionColumns(Collections.singletonList("id")) + .build(); + assertThatThrownBy(() -> ExportOptionsValidator.validate(exportOptions, mockMetadata)) + .isInstanceOf(ExportOptionsValidationException.class) + .hasMessage(CoreError.DATA_LOADER_CLUSTERING_KEY_NOT_FOUND.buildMessage("id")); + } + + @Test + void validate_withInValidEndKeyInSortRange_ShouldThrowException() { + ExportOptions exportOptions = + ExportOptions.builder( + "test", + "sample", + Key.newBuilder().add(IntColumn.of("id", 1)).build(), + FileFormat.JSON) + .sortOrders(Collections.emptyList()) + .scanRange( + new ScanRange( + Key.newBuilder().add(TextColumn.of("department", "sample")).build(), + Key.newBuilder().add(IntColumn.of("name", 100)).build(), + false, + false)) + .projectionColumns(projectedColumns) + .build(); + assertThatThrownBy(() -> ExportOptionsValidator.validate(exportOptions, mockMetadata)) + .isInstanceOf(ExportOptionsValidationException.class) + .hasMessage(CoreError.DATA_LOADER_CLUSTERING_KEY_NOT_FOUND.buildMessage("name")); + } +} From 03324e1837ca2b80880571e2fe592f1e0a7fcd18 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 19 Dec 2024 17:22:56 +0530 Subject: [PATCH 25/87] Minor change in test --- .../db/dataloader/core/dataimport/dao/ScalarDBDaoTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDaoTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDaoTest.java index d0571ddddf..c46843156f 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDaoTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDaoTest.java @@ -17,7 +17,7 @@ class ScalarDBDaoTest { private ScalarDBDao dao; @BeforeEach - public void setUp() { + void setUp() { this.dao = new ScalarDBDao(); } From d6aaf85c89cd88e4345dc57287fc997de51fd227 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 19 Dec 2024 17:52:04 +0530 Subject: [PATCH 26/87] Applied spotless on CoreError --- .../main/java/com/scalar/db/common/error/CoreError.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index b4d43a73c0..3a41f7b20b 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -691,12 +691,9 @@ public enum CoreError implements ScalarDbError { DATA_LOADER_ERROR_METHOD_NULL_ARGUMENT( Category.USER_ERROR, "0151", "Method null argument not allowed", "", ""), DATA_LOADER_CLUSTERING_KEY_NOT_FOUND( - Category.USER_ERROR, "0152", "The provided clustering key %s was not found", "", "" - ), + Category.USER_ERROR, "0152", "The provided clustering key %s was not found", "", ""), DATA_LOADER_INVALID_PROJECTION( - Category.USER_ERROR, "0153", "The column '%s' was not found", "", "" - ), - + Category.USER_ERROR, "0153", "The column '%s' was not found", "", ""), // // Errors for the concurrency error category From 4439dea7097f592de5f419ef52798c623f49a047 Mon Sep 17 00:00:00 2001 From: Peckstadt Yves Date: Fri, 20 Dec 2024 08:55:18 +0900 Subject: [PATCH 27/87] Make constructor private and improve javadocs --- .../validation/ExportOptionsValidator.java | 48 ++++++++++--------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidator.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidator.java index 3aefe1ce31..143fabfd33 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidator.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidator.java @@ -8,26 +8,32 @@ import com.scalar.db.io.Key; import java.util.LinkedHashSet; import java.util.List; +import lombok.AccessLevel; +import lombok.NoArgsConstructor; -/** Validation for Export options */ +/** + * A validator for ensuring that export options are consistent with the ScalarDB table metadata and + * follow the defined constraints. + */ +@NoArgsConstructor(access = AccessLevel.PRIVATE) public class ExportOptionsValidator { /** - * Validate export request + * Validates the export request. * - * @param exportOptions Export options - * @param tableMetadata Metadata for a single ScalarDB table - * @throws ExportOptionsValidationException when the options are invalid + * @param exportOptions The export options provided by the user. + * @param tableMetadata The metadata of the ScalarDB table to validate against. + * @throws ExportOptionsValidationException If the export options are invalid. */ public static void validate(ExportOptions exportOptions, TableMetadata tableMetadata) throws ExportOptionsValidationException { LinkedHashSet clusteringKeyNames = tableMetadata.getClusteringKeyNames(); ScanRange scanRange = exportOptions.getScanRange(); - // validate projections + // Validate projection columns validateProjectionColumns(tableMetadata.getColumnNames(), exportOptions.getProjectionColumns()); - // validate sorts + // Validate sort orders if (!exportOptions.getSortOrders().isEmpty()) { for (Scan.Ordering sort : exportOptions.getSortOrders()) { validateClusteringKey(clusteringKeyNames, sort.getColumnName()); @@ -46,12 +52,11 @@ public static void validate(ExportOptions exportOptions, TableMetadata tableMeta } /** - * Check if the provided clustering key is available in the ScalarDB table + * Checks if the provided clustering key is valid for the ScalarDB table. * - * @param clusteringKeyNames List of clustering key names available in a - * @param key To be validated ScalarDB key - * @throws ExportOptionsValidationException if the key could not be found or is not a clustering - * key + * @param clusteringKeyNames The set of valid clustering key names for the table. + * @param key The ScalarDB key to validate. + * @throws ExportOptionsValidationException If the key is invalid or not a clustering key. */ private static void validateClusteringKey(LinkedHashSet clusteringKeyNames, Key key) throws ExportOptionsValidationException { @@ -63,12 +68,11 @@ private static void validateClusteringKey(LinkedHashSet clusteringKeyNam } /** - * Check if the provided clustering key is available in the ScalarDB table + * Checks if the provided clustering key column name is valid for the ScalarDB table. * - * @param clusteringKeyNames List of clustering key names available in a - * @param columnName Column name of the to be validated clustering key - * @throws ExportOptionsValidationException if the key could not be found or is not a clustering - * key + * @param clusteringKeyNames The set of valid clustering key names for the table. + * @param columnName The column name of the clustering key to validate. + * @throws ExportOptionsValidationException If the column name is not a valid clustering key. */ private static void validateClusteringKey( LinkedHashSet clusteringKeyNames, String columnName) @@ -84,11 +88,11 @@ private static void validateClusteringKey( } /** - * Check if the provided projection column names are available in the ScalarDB table + * Checks if the provided projection column names are valid for the ScalarDB table. * - * @param columnNames List of ScalarDB table column names - * @param columns List of to be validated column names - * @throws ExportOptionsValidationException if the column name was not found in the table + * @param columnNames The set of valid column names for the table. + * @param columns The list of projection column names to validate. + * @throws ExportOptionsValidationException If any of the column names are invalid. */ private static void validateProjectionColumns( LinkedHashSet columnNames, List columns) @@ -97,7 +101,7 @@ private static void validateProjectionColumns( return; } for (String column : columns) { - // O(n) but list is always going to be very small, so it's ok + // O(n) lookup, but acceptable given the typically small list size if (!columnNames.contains(column)) { throw new ExportOptionsValidationException( CoreError.DATA_LOADER_INVALID_PROJECTION.buildMessage(column)); From ccb1ace5e8aa61ae82c5d27cf2a0a162bdea398a Mon Sep 17 00:00:00 2001 From: Peckstadt Yves Date: Fri, 20 Dec 2024 09:02:09 +0900 Subject: [PATCH 28/87] Improve javadocs --- .../controlfile/ControlFileTable.java | 31 ++++++-- .../ControlFileTableFieldMapping.java | 15 +++- .../tablemetadata/TableMetadataService.java | 30 +++++--- .../core/util/TableMetadataUtil.java | 77 ++++++++----------- 4 files changed, 86 insertions(+), 67 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTable.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTable.java index c9b0626e8a..e1d7c6a9d0 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTable.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTable.java @@ -7,33 +7,50 @@ import lombok.Getter; import lombok.Setter; -/** Represents the mapping for one table in the control file */ +/** + * Represents the configuration for a single table in the control file, including its namespace, + * table name, and field mappings. This class is used to define how data from a control file maps to + * a specific table in ScalarDB. + */ @Getter @Setter public class ControlFileTable { + /** The namespace of the table in ScalarDB. */ @JsonProperty("namespace") private String namespace; + /** The name of the table in ScalarDB. */ @JsonProperty("table_name") private String tableName; + /** + * A list of mappings defining the correspondence between control file fields and table columns. + */ @JsonProperty("mappings") private final List mappings; - /** Class constructor */ + /** + * Creates a new {@code ControlFileTable} instance with the specified namespace and table name. + * The mappings list is initialized as an empty list. + * + * @param namespace The namespace of the table in ScalarDB. + * @param tableName The name of the table in ScalarDB. + */ public ControlFileTable(String namespace, String tableName) { - this.tableName = tableName; this.namespace = namespace; + this.tableName = tableName; this.mappings = new ArrayList<>(); } /** - * Added for mapping data to control file table object from API request + * Constructs a {@code ControlFileTable} instance using data from a serialized JSON object. This + * constructor is used for deserialization of API requests or control files. * - * @param namespace namespace - * @param tableName table name - * @param mappings column name mapping from control file + * @param namespace The namespace of the table in ScalarDB. + * @param tableName The name of the table in ScalarDB. + * @param mappings A list of mappings that define the relationship between control file fields and + * table columns. */ @JsonCreator public ControlFileTable( diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTableFieldMapping.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTableFieldMapping.java index 064d5c430b..1068573304 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTableFieldMapping.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTableFieldMapping.java @@ -5,22 +5,29 @@ import lombok.Getter; import lombok.Setter; -/** Represents the one field mapping for a table mapping in the control file */ +/** + * Represents the mapping of a single field in the control file to a column in a ScalarDB table. + * This class defines how data from a specific field in the input source should be mapped to the + * corresponding column in the database. + */ @Getter @Setter public class ControlFileTableFieldMapping { + /** The name of the field in the input source (e.g., JSON or CSV). */ @JsonProperty("source_field") private String sourceField; + /** The name of the column in the ScalarDB table that the field maps to. */ @JsonProperty("target_column") private String targetColumn; /** - * Class constructor + * Constructs a {@code ControlFileTableFieldMapping} instance using data from a serialized JSON + * object. This constructor is primarily used for deserialization of control file mappings. * - * @param sourceField The data field in the provided json field - * @param targetColumn The column in the ScalarDB table + * @param sourceField The name of the field in the input source (e.g., JSON or CSV). + * @param targetColumn The name of the corresponding column in the ScalarDB table. */ @JsonCreator public ControlFileTableFieldMapping( diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataService.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataService.java index 88d60f778e..4eea38a95d 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataService.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/tablemetadata/TableMetadataService.java @@ -9,20 +9,26 @@ import java.util.Map; import lombok.RequiredArgsConstructor; +/** + * Service for retrieving {@link TableMetadata} from ScalarDB. Provides methods to fetch metadata + * for individual tables or a collection of tables. + */ @RequiredArgsConstructor public class TableMetadataService { + private static final String ERROR_MISSING_NAMESPACE_OR_TABLE = "Missing namespace or table: %s, %s"; private final DistributedStorageAdmin storageAdmin; /** - * Returns the TableMetadata for the given namespace and table name. + * Retrieves the {@link TableMetadata} for a specific namespace and table name. * - * @param namespace ScalarDb namespace - * @param tableName ScalarDb table name - * @return TableMetadata - * @throws TableMetadataException if the namespace or table is missing + * @param namespace The ScalarDB namespace. + * @param tableName The name of the table within the specified namespace. + * @return The {@link TableMetadata} object containing schema details of the specified table. + * @throws TableMetadataException If the table or namespace does not exist, or if an error occurs + * while fetching the metadata. */ public TableMetadata getTableMetadata(String namespace, String tableName) throws TableMetadataException { @@ -40,11 +46,17 @@ public TableMetadata getTableMetadata(String namespace, String tableName) } /** - * Returns the TableMetadata for the given list of TableMetadataRequest. + * Retrieves the {@link TableMetadata} for a collection of table metadata requests. + * + *

Each request specifies a namespace and table name. The method consolidates the metadata into + * a map keyed by a unique lookup key generated for each table. * - * @param requests List of TableMetadataRequest - * @return Map of TableMetadata - * @throws TableMetadataException if the namespace or table is missing + * @param requests A collection of {@link TableMetadataRequest} objects specifying the tables to + * retrieve metadata for. + * @return A map where the keys are unique lookup keys (namespace + table name) and the values are + * the corresponding {@link TableMetadata} objects. + * @throws TableMetadataException If any of the requested tables or namespaces are missing, or if + * an error occurs while fetching the metadata. */ public Map getTableMetadata(Collection requests) throws TableMetadataException { diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java index 9f10b9b3ea..0b165494d0 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java @@ -15,54 +15,43 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -/** Utils for ScalarDB table metadata */ +/** Utility class for handling ScalarDB table metadata operations. */ public class TableMetadataUtil { /** - * Check if the field is a metadata column or not + * Determines whether a given column is a metadata column based on predefined criteria. * - * @param columnName Table column name - * @param metadataColumns Fixed list of metadata columns - * @param columnNames List of all column names in a table - * @return The field is metadata or not + * @param columnName The name of the table column to check. + * @param metadataColumns A set of predefined metadata columns. + * @param columnNames A set of all column names in the table. + * @return {@code true} if the column is a metadata column; {@code false} otherwise. */ public static boolean isMetadataColumn( String columnName, Set metadataColumns, Set columnNames) { - // Skip field if it can be ignored if (metadataColumns.contains(columnName)) { return true; } - - // Skip if the field is a "before_" field return columnName.startsWith(Attribute.BEFORE_PREFIX) && !columnNames.contains(Attribute.BEFORE_PREFIX + columnName); } /** - * Check if the field is a metadata column or not + * Determines whether a given column is a metadata column using table metadata. * - * @param columnName ScalarDB table column name5 - * @param tableMetadata Metadata for a single ScalarDB - * @return is the field a metadata column or not + * @param columnName The name of the ScalarDB table column to check. + * @param tableMetadata The metadata of the table. + * @return {@code true} if the column is a metadata column; {@code false} otherwise. */ public static boolean isMetadataColumn(String columnName, TableMetadata tableMetadata) { Set metadataColumns = getMetadataColumns(); LinkedHashSet columnNames = tableMetadata.getColumnNames(); - - // Skip field if it can be ignored - if (metadataColumns.contains(columnName)) { - return true; - } - - // Skip if the field is a "before_" field - return columnName.startsWith(Attribute.BEFORE_PREFIX) - && !columnNames.contains(Attribute.BEFORE_PREFIX + columnName); + return isMetadataColumn(columnName, metadataColumns, columnNames); } /** - * Return a list of fixed metadata columns + * Retrieves a set of fixed metadata column names used in ScalarDB. * - * @return Set of columns + * @return A set of predefined metadata column names. */ public static Set getMetadataColumns() { return Stream.of( @@ -80,10 +69,10 @@ public static Set getMetadataColumns() { } /** - * Return a map with the data types for all columns in a ScalarDB table + * Extracts a mapping of column names to their data types from the table metadata. * - * @param tableMetadata Metadata for a single ScalarDB table - * @return data types map + * @param tableMetadata The metadata of the ScalarDB table. + * @return A map where keys are column names and values are their corresponding {@link DataType}. */ public static Map extractColumnDataTypes(TableMetadata tableMetadata) { Map definitions = new HashMap<>(); @@ -94,21 +83,21 @@ public static Map extractColumnDataTypes(TableMetadata tableMe } /** - * Return lookup key for a table in a namespace + * Generates a unique lookup key for a table within a namespace. * - * @param namespace Namespace - * @param tableName Table name - * @return Table metadata lookup key + * @param namespace The namespace of the table. + * @param tableName The name of the table. + * @return A formatted string representing the table lookup key. */ public static String getTableLookupKey(String namespace, String tableName) { return String.format(Constants.TABLE_LOOKUP_KEY_FORMAT, namespace, tableName); } /** - * Return lookup key for a table in a namespace + * Generates a unique lookup key for a table using control file table data. * - * @param controlFileTable Control file data mapping - * @return Table metadata lookup key + * @param controlFileTable The control file table object containing namespace and table name. + * @return A formatted string representing the table lookup key. */ public static String getTableLookupKey(ControlFileTable controlFileTable) { return String.format( @@ -118,38 +107,32 @@ public static String getTableLookupKey(ControlFileTable controlFileTable) { } /** - * Populate the projection columns with metadata columns + * Adds metadata columns to a list of projection columns for a ScalarDB table. * - * @param tableMetadata Metadata for a single ScalarDB table - * @param projections List of projection columns - * @return List of projection columns with metadata columns + * @param tableMetadata The metadata of the ScalarDB table. + * @param projections A list of projection column names. + * @return A new list containing projection columns along with metadata columns. */ public static List populateProjectionsWithMetadata( TableMetadata tableMetadata, List projections) { List projectionMetadata = new ArrayList<>(); - - // Add projection columns along with metadata columns projections.forEach( projection -> { projectionMetadata.add(projection); if (!isKeyColumn(projection, tableMetadata)) { - // Add metadata column before the projection if it's not a key column projectionMetadata.add(Attribute.BEFORE_PREFIX + projection); } }); - - // Add fixed metadata columns projectionMetadata.addAll(getMetadataColumns()); - return projectionMetadata; } /** - * Checks if a column is a key column (partition key or clustering key) in the table. + * Checks whether a column is a key column (partition key or clustering key) in the table. * - * @param column The column name to check. + * @param column The name of the column to check. * @param tableMetadata The metadata of the ScalarDB table. - * @return True if the column is a key column, false otherwise. + * @return {@code true} if the column is a key column; {@code false} otherwise. */ private static boolean isKeyColumn(String column, TableMetadata tableMetadata) { return tableMetadata.getPartitionKeyNames().contains(column) From a374f1a57d95d0cc10e845e5a96c9fa9d32bce9e Mon Sep 17 00:00:00 2001 From: Peckstadt Yves Date: Fri, 20 Dec 2024 09:02:35 +0900 Subject: [PATCH 29/87] Add private constructor to TableMetadataUtil --- .../com/scalar/db/dataloader/core/util/TableMetadataUtil.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java index 0b165494d0..0c5daedd52 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java @@ -5,6 +5,9 @@ import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileTable; import com.scalar.db.io.DataType; import com.scalar.db.transaction.consensuscommit.Attribute; +import lombok.AccessLevel; +import lombok.NoArgsConstructor; + import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -16,6 +19,7 @@ import java.util.stream.Stream; /** Utility class for handling ScalarDB table metadata operations. */ +@NoArgsConstructor(access = AccessLevel.PRIVATE) public class TableMetadataUtil { /** From a65c9b5ba91d8d523198eb57e63e265ef8f6bfd3 Mon Sep 17 00:00:00 2001 From: Peckstadt Yves Date: Fri, 20 Dec 2024 09:24:08 +0900 Subject: [PATCH 30/87] Apply spotless fix --- .../scalar/db/dataloader/core/util/TableMetadataUtil.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java index 0c5daedd52..acfd509d0f 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java @@ -5,9 +5,6 @@ import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileTable; import com.scalar.db.io.DataType; import com.scalar.db.transaction.consensuscommit.Attribute; -import lombok.AccessLevel; -import lombok.NoArgsConstructor; - import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -17,6 +14,8 @@ import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; +import lombok.AccessLevel; +import lombok.NoArgsConstructor; /** Utility class for handling ScalarDB table metadata operations. */ @NoArgsConstructor(access = AccessLevel.PRIVATE) From b3279baa7b08e7753e97a6028b3ece6fa0282dad Mon Sep 17 00:00:00 2001 From: Peckstadt Yves Date: Mon, 23 Dec 2024 17:22:49 +0900 Subject: [PATCH 31/87] Fix the validation for partition and clustering keys --- .../com/scalar/db/common/error/CoreError.java | 14 ++ .../validation/ExportOptionsValidator.java | 115 +++++++--- .../ExportOptionsValidatorTest.java | 206 ++++++++++-------- 3 files changed, 214 insertions(+), 121 deletions(-) diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index 3a41f7b20b..7535c3553d 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -694,6 +694,20 @@ public enum CoreError implements ScalarDbError { Category.USER_ERROR, "0152", "The provided clustering key %s was not found", "", ""), DATA_LOADER_INVALID_PROJECTION( Category.USER_ERROR, "0153", "The column '%s' was not found", "", ""), + DATA_LOADER_INCOMPLETE_PARTITION_KEY( + Category.USER_ERROR, "0154", "The provided partition key is incomplete. Required key: %s", "", ""), + DATA_LOADER_CLUSTERING_KEY_ORDER_MISMATCH( + Category.USER_ERROR, + "0155", + "The provided clustering key order does not match the table schema. Required order: %s", + "", + ""), + DATA_LOADER_PARTITION_KEY_ORDER_MISMATCH( + Category.USER_ERROR, + "0156", + "The provided partition key order does not match the table schema. Required order: %s", + "", + ""), // // Errors for the concurrency error category diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidator.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidator.java index 143fabfd33..7bf7645b0e 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidator.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidator.java @@ -5,7 +5,9 @@ import com.scalar.db.common.error.CoreError; import com.scalar.db.dataloader.core.ScanRange; import com.scalar.db.dataloader.core.dataexport.ExportOptions; +import com.scalar.db.io.Column; import com.scalar.db.io.Key; +import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import lombok.AccessLevel; @@ -27,57 +29,107 @@ public class ExportOptionsValidator { */ public static void validate(ExportOptions exportOptions, TableMetadata tableMetadata) throws ExportOptionsValidationException { + LinkedHashSet partitionKeyNames = tableMetadata.getPartitionKeyNames(); LinkedHashSet clusteringKeyNames = tableMetadata.getClusteringKeyNames(); ScanRange scanRange = exportOptions.getScanRange(); - // Validate projection columns + validatePartitionKey(partitionKeyNames, exportOptions.getScanPartitionKey()); validateProjectionColumns(tableMetadata.getColumnNames(), exportOptions.getProjectionColumns()); + validateSortOrders(clusteringKeyNames, exportOptions.getSortOrders()); - // Validate sort orders - if (!exportOptions.getSortOrders().isEmpty()) { - for (Scan.Ordering sort : exportOptions.getSortOrders()) { - validateClusteringKey(clusteringKeyNames, sort.getColumnName()); - } - } - - // Validate scan start key if (scanRange.getScanStartKey() != null) { validateClusteringKey(clusteringKeyNames, scanRange.getScanStartKey()); } - - // Validate scan end key if (scanRange.getScanEndKey() != null) { validateClusteringKey(clusteringKeyNames, scanRange.getScanEndKey()); } } - /** - * Checks if the provided clustering key is valid for the ScalarDB table. - * - * @param clusteringKeyNames The set of valid clustering key names for the table. - * @param key The ScalarDB key to validate. - * @throws ExportOptionsValidationException If the key is invalid or not a clustering key. + /* + * Check if the provided partition key is available in the ScalarDB table + * @param partitionKeyNames List of partition key names available in a + * @param key To be validated ScalarDB key + * @throws ExportOptionsValidationException if the key could not be found or is not a partition */ - private static void validateClusteringKey(LinkedHashSet clusteringKeyNames, Key key) + private static void validatePartitionKey(LinkedHashSet partitionKeyNames, Key key) + throws ExportOptionsValidationException { + if (partitionKeyNames == null || key == null) { + return; + } + + // Make sure that all partition key columns are provided + if (partitionKeyNames.size() != key.getColumns().size()) { + throw new ExportOptionsValidationException( + CoreError.DATA_LOADER_INCOMPLETE_PARTITION_KEY.buildMessage(partitionKeyNames)); + } + + // Check if the order of columns in key.getColumns() matches the order in partitionKeyNames + Iterator partitionKeyIterator = partitionKeyNames.iterator(); + for (Column column : key.getColumns()) { + // Check if the column names match in order + if (!partitionKeyIterator.hasNext() + || !partitionKeyIterator.next().equals(column.getName())) { + throw new ExportOptionsValidationException( + CoreError.DATA_LOADER_PARTITION_KEY_ORDER_MISMATCH.buildMessage(partitionKeyNames)); + } + } + } + + private static void validateSortOrders( + LinkedHashSet clusteringKeyNames, List sortOrders) throws ExportOptionsValidationException { - if (clusteringKeyNames == null) { + if (sortOrders == null || sortOrders.isEmpty()) { return; } - String columnName = key.getColumnName(0); - validateClusteringKey(clusteringKeyNames, columnName); + + for (Scan.Ordering sortOrder : sortOrders) { + checkIfColumnExistsAsClusteringKey(clusteringKeyNames, sortOrder.getColumnName()); + } } /** - * Checks if the provided clustering key column name is valid for the ScalarDB table. + * Validates that the clustering key columns in the given Key object match the expected order + * defined in the clusteringKeyNames. The Key can be a prefix of the clusteringKeyNames, but the + * order must remain consistent. * - * @param clusteringKeyNames The set of valid clustering key names for the table. - * @param columnName The column name of the clustering key to validate. - * @throws ExportOptionsValidationException If the column name is not a valid clustering key. + * @param clusteringKeyNames the expected ordered set of clustering key names + * @param key the Key object containing the actual clustering key columns + * @throws ExportOptionsValidationException if the order or names of clustering keys do not match */ - private static void validateClusteringKey( + private static void validateClusteringKey(LinkedHashSet clusteringKeyNames, Key key) + throws ExportOptionsValidationException { + // If either clusteringKeyNames or key is null, no validation is needed + if (clusteringKeyNames == null || key == null) { + return; + } + + // Create an iterator to traverse the clusteringKeyNames in order + Iterator clusteringKeyIterator = clusteringKeyNames.iterator(); + + // Iterate through the columns in the given Key + for (Column column : key.getColumns()) { + // If clusteringKeyNames have been exhausted but columns still exist in the Key, + // it indicates a mismatch + if (!clusteringKeyIterator.hasNext()) { + throw new ExportOptionsValidationException( + CoreError.DATA_LOADER_CLUSTERING_KEY_ORDER_MISMATCH.buildMessage(clusteringKeyNames)); + } + + // Get the next expected clustering key name + String expectedKey = clusteringKeyIterator.next(); + + // Check if the current column name matches the expected clustering key name + if (!column.getName().equals(expectedKey)) { + throw new ExportOptionsValidationException( + CoreError.DATA_LOADER_CLUSTERING_KEY_ORDER_MISMATCH.buildMessage(clusteringKeyNames)); + } + } + } + + private static void checkIfColumnExistsAsClusteringKey( LinkedHashSet clusteringKeyNames, String columnName) throws ExportOptionsValidationException { - if (clusteringKeyNames == null) { + if (clusteringKeyNames == null || columnName == null) { return; } @@ -87,21 +139,14 @@ private static void validateClusteringKey( } } - /** - * Checks if the provided projection column names are valid for the ScalarDB table. - * - * @param columnNames The set of valid column names for the table. - * @param columns The list of projection column names to validate. - * @throws ExportOptionsValidationException If any of the column names are invalid. - */ private static void validateProjectionColumns( LinkedHashSet columnNames, List columns) throws ExportOptionsValidationException { if (columns == null || columns.isEmpty()) { return; } + for (String column : columns) { - // O(n) lookup, but acceptable given the typically small list size if (!columnNames.contains(column)) { throw new ExportOptionsValidationException( CoreError.DATA_LOADER_INVALID_PROJECTION.buildMessage(column)); diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidatorTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidatorTest.java index b34daaba2d..b36522a0fc 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidatorTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/validation/ExportOptionsValidatorTest.java @@ -2,7 +2,6 @@ import static org.assertj.core.api.Assertions.assertThatThrownBy; -import com.scalar.db.api.Scan; import com.scalar.db.api.TableMetadata; import com.scalar.db.common.error.CoreError; import com.scalar.db.dataloader.core.FileFormat; @@ -14,136 +13,171 @@ import com.scalar.db.io.TextColumn; import java.util.ArrayList; import java.util.Collections; +import java.util.LinkedHashSet; import java.util.List; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; class ExportOptionsValidatorTest { - TableMetadata mockMetadata; - List projectedColumns; + private TableMetadata singlePkCkMetadata; + private TableMetadata multiplePkCkMetadata; + private List projectedColumns; @BeforeEach void setup() { - mockMetadata = - TableMetadata.newBuilder() - .addColumn("id", DataType.INT) - .addColumn("name", DataType.TEXT) - .addColumn("email", DataType.TEXT) - .addColumn("department", DataType.TEXT) - .addPartitionKey("id") - .addClusteringKey("department") - .build(); - projectedColumns = new ArrayList<>(); - projectedColumns.add("id"); - projectedColumns.add("name"); - projectedColumns.add("email"); - projectedColumns.add("department"); + singlePkCkMetadata = createMockMetadata(1, 1); + multiplePkCkMetadata = createMockMetadata(2, 2); + projectedColumns = createProjectedColumns(); + } + + private TableMetadata createMockMetadata(int pkCount, int ckCount) { + TableMetadata.Builder builder = TableMetadata.newBuilder(); + + // Add partition keys + for (int i = 1; i <= pkCount; i++) { + builder.addColumn("pk" + i, DataType.INT); + builder.addPartitionKey("pk" + i); + } + + // Add clustering keys + for (int i = 1; i <= ckCount; i++) { + builder.addColumn("ck" + i, DataType.TEXT); + builder.addClusteringKey("ck" + i); + } + + return builder.build(); + } + + private List createProjectedColumns() { + List columns = new ArrayList<>(); + columns.add("pk1"); + columns.add("ck1"); + return columns; } @Test - void validate_withValidExportOptions_ShouldNotThrowException() + void validate_withValidExportOptionsForSinglePkCk_ShouldNotThrowException() throws ExportOptionsValidationException { + + Key partitionKey = Key.newBuilder().add(IntColumn.of("pk1", 1)).build(); + ExportOptions exportOptions = - ExportOptions.builder( - "test", - "sample", - Key.newBuilder().add(IntColumn.of("id", 1)).build(), - FileFormat.JSON) - .sortOrders(Collections.emptyList()) - .scanRange(new ScanRange(null, null, false, false)) + ExportOptions.builder("test", "sample", partitionKey, FileFormat.JSON) .projectionColumns(projectedColumns) + .scanRange(new ScanRange(null, null, false, false)) .build(); - ExportOptionsValidator.validate(exportOptions, mockMetadata); + + ExportOptionsValidator.validate(exportOptions, singlePkCkMetadata); } @Test - void validate_withInValidColumnInProjectionColumnList_ShouldThrowException() { + void validate_withValidExportOptionsForMultiplePkCk_ShouldNotThrowException() + throws ExportOptionsValidationException { + + Key partitionKey = + Key.newBuilder().add(IntColumn.of("pk1", 1)).add(IntColumn.of("pk2", 2)).build(); + ExportOptions exportOptions = - ExportOptions.builder( - "test", - "sample", - Key.newBuilder().add(IntColumn.of("id", 1)).build(), - FileFormat.JSON) - .sortOrders(Collections.emptyList()) + ExportOptions.builder("test", "sample", partitionKey, FileFormat.JSON) + .projectionColumns(projectedColumns) .scanRange(new ScanRange(null, null, false, false)) - .projectionColumns(Collections.singletonList("sample")) .build(); - assertThatThrownBy(() -> ExportOptionsValidator.validate(exportOptions, mockMetadata)) + + ExportOptionsValidator.validate(exportOptions, multiplePkCkMetadata); + } + + @Test + void validate_withIncompletePartitionKeyForSinglePk_ShouldThrowException() { + Key incompletePartitionKey = Key.newBuilder().build(); + + ExportOptions exportOptions = + ExportOptions.builder("test", "sample", incompletePartitionKey, FileFormat.JSON).build(); + + assertThatThrownBy(() -> ExportOptionsValidator.validate(exportOptions, singlePkCkMetadata)) .isInstanceOf(ExportOptionsValidationException.class) - .hasMessage(CoreError.DATA_LOADER_INVALID_PROJECTION.buildMessage("sample")); + .hasMessage( + CoreError.DATA_LOADER_INCOMPLETE_PARTITION_KEY.buildMessage( + singlePkCkMetadata.getPartitionKeyNames())); } @Test - void validate_withInValidSortOrderWithMultipleClusteringKeys_ShouldThrowException() { - mockMetadata = - TableMetadata.newBuilder() - .addColumn("id", DataType.INT) - .addColumn("name", DataType.TEXT) - .addColumn("email", DataType.TEXT) - .addColumn("department", DataType.TEXT) - .addColumn("building", DataType.TEXT) - .addPartitionKey("id") - .addClusteringKey("department") - .addClusteringKey("building") - .build(); + void validate_withIncompletePartitionKeyForMultiplePks_ShouldThrowException() { + Key incompletePartitionKey = Key.newBuilder().add(IntColumn.of("pk1", 1)).build(); + + ExportOptions exportOptions = + ExportOptions.builder("test", "sample", incompletePartitionKey, FileFormat.JSON).build(); + + assertThatThrownBy(() -> ExportOptionsValidator.validate(exportOptions, multiplePkCkMetadata)) + .isInstanceOf(ExportOptionsValidationException.class) + .hasMessage( + CoreError.DATA_LOADER_INCOMPLETE_PARTITION_KEY.buildMessage( + multiplePkCkMetadata.getPartitionKeyNames())); + } + + @Test + void validate_withInvalidProjectionColumn_ShouldThrowException() { ExportOptions exportOptions = ExportOptions.builder( "test", "sample", - Key.newBuilder().add(IntColumn.of("id", 1)).build(), + Key.newBuilder().add(IntColumn.of("pk1", 1)).build(), FileFormat.JSON) - .sortOrders( - Collections.singletonList(new Scan.Ordering("name", Scan.Ordering.Order.ASC))) - .scanRange(new ScanRange(null, null, false, false)) - .projectionColumns(Collections.singletonList("id")) + .projectionColumns(Collections.singletonList("invalid_column")) .build(); - assertThatThrownBy(() -> ExportOptionsValidator.validate(exportOptions, mockMetadata)) + + assertThatThrownBy(() -> ExportOptionsValidator.validate(exportOptions, singlePkCkMetadata)) .isInstanceOf(ExportOptionsValidationException.class) - .hasMessage(CoreError.DATA_LOADER_CLUSTERING_KEY_NOT_FOUND.buildMessage("name")); + .hasMessage(CoreError.DATA_LOADER_INVALID_PROJECTION.buildMessage("invalid_column")); } @Test - void validate_withInValidKeyInSortRange_ShouldThrowException() { + void validate_withInvalidClusteringKeyInScanRange_ShouldThrowException() { + ScanRange scanRange = + new ScanRange( + Key.newBuilder().add(TextColumn.of("invalid_ck", "value")).build(), + Key.newBuilder().add(TextColumn.of("ck1", "value")).build(), + false, + false); + ExportOptions exportOptions = - ExportOptions.builder( - "test", - "sample", - Key.newBuilder().add(IntColumn.of("id", 1)).build(), - FileFormat.JSON) - .sortOrders(Collections.emptyList()) - .scanRange( - new ScanRange( - Key.newBuilder().add(IntColumn.of("id", 1)).build(), - Key.newBuilder().add(IntColumn.of("id", 100)).build(), - false, - false)) - .projectionColumns(Collections.singletonList("id")) + ExportOptions.builder("test", "sample", createValidPartitionKey(), FileFormat.JSON) + .scanRange(scanRange) .build(); - assertThatThrownBy(() -> ExportOptionsValidator.validate(exportOptions, mockMetadata)) + + assertThatThrownBy(() -> ExportOptionsValidator.validate(exportOptions, singlePkCkMetadata)) .isInstanceOf(ExportOptionsValidationException.class) - .hasMessage(CoreError.DATA_LOADER_CLUSTERING_KEY_NOT_FOUND.buildMessage("id")); + .hasMessage(CoreError.DATA_LOADER_CLUSTERING_KEY_ORDER_MISMATCH.buildMessage("[ck1]")); } @Test - void validate_withInValidEndKeyInSortRange_ShouldThrowException() { + void validate_withInvalidPartitionKeyOrder_ShouldThrowException() { + // Partition key names are expected to be "pk1", "pk2" + LinkedHashSet partitionKeyNames = new LinkedHashSet<>(); + partitionKeyNames.add("pk1"); + partitionKeyNames.add("pk2"); + + // Create a partition key with reversed order, expecting an error + Key invalidPartitionKey = + Key.newBuilder() + .add(IntColumn.of("pk2", 2)) // Incorrect order + .add(IntColumn.of("pk1", 1)) // Incorrect order + .build(); + ExportOptions exportOptions = - ExportOptions.builder( - "test", - "sample", - Key.newBuilder().add(IntColumn.of("id", 1)).build(), - FileFormat.JSON) - .sortOrders(Collections.emptyList()) - .scanRange( - new ScanRange( - Key.newBuilder().add(TextColumn.of("department", "sample")).build(), - Key.newBuilder().add(IntColumn.of("name", 100)).build(), - false, - false)) + ExportOptions.builder("test", "sample", invalidPartitionKey, FileFormat.JSON) .projectionColumns(projectedColumns) + .scanRange(new ScanRange(null, null, false, false)) .build(); - assertThatThrownBy(() -> ExportOptionsValidator.validate(exportOptions, mockMetadata)) + + // Verify that the validator throws the correct exception + assertThatThrownBy(() -> ExportOptionsValidator.validate(exportOptions, multiplePkCkMetadata)) .isInstanceOf(ExportOptionsValidationException.class) - .hasMessage(CoreError.DATA_LOADER_CLUSTERING_KEY_NOT_FOUND.buildMessage("name")); + .hasMessage( + CoreError.DATA_LOADER_PARTITION_KEY_ORDER_MISMATCH.buildMessage(partitionKeyNames)); + } + + private Key createValidPartitionKey() { + return Key.newBuilder().add(IntColumn.of("pk1", 1)).build(); } } From 78a817067a710673d6671cbab4af4f1050a4e8bf Mon Sep 17 00:00:00 2001 From: Peckstadt Yves Date: Mon, 23 Dec 2024 17:34:37 +0900 Subject: [PATCH 32/87] Fix spotless format --- .../com/scalar/db/common/error/CoreError.java | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index 7535c3553d..7a8d518320 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -695,7 +695,11 @@ public enum CoreError implements ScalarDbError { DATA_LOADER_INVALID_PROJECTION( Category.USER_ERROR, "0153", "The column '%s' was not found", "", ""), DATA_LOADER_INCOMPLETE_PARTITION_KEY( - Category.USER_ERROR, "0154", "The provided partition key is incomplete. Required key: %s", "", ""), + Category.USER_ERROR, + "0154", + "The provided partition key is incomplete. Required key: %s", + "", + ""), DATA_LOADER_CLUSTERING_KEY_ORDER_MISMATCH( Category.USER_ERROR, "0155", @@ -703,11 +707,11 @@ public enum CoreError implements ScalarDbError { "", ""), DATA_LOADER_PARTITION_KEY_ORDER_MISMATCH( - Category.USER_ERROR, - "0156", - "The provided partition key order does not match the table schema. Required order: %s", - "", - ""), + Category.USER_ERROR, + "0156", + "The provided partition key order does not match the table schema. Required order: %s", + "", + ""), // // Errors for the concurrency error category From acedabe6f596e27a14d480cab73e10ee456e32bf Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 24 Dec 2024 13:47:27 +0530 Subject: [PATCH 33/87] Partial feedback changes --- .../com/scalar/db/common/error/CoreError.java | 4 +- .../core/dataimport/dao/ScalarDBDao.java | 110 ++++++++---------- .../core/dataimport/dao/ScalarDBManager.java | 3 +- 3 files changed, 54 insertions(+), 63 deletions(-) diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index 637a549dad..d6a4492264 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -944,13 +944,13 @@ public enum CoreError implements ScalarDbError { DATA_LOADER_ERROR_CRUD_EXCEPTION( Category.INTERNAL_ERROR, "0047", - "something went wrong while trying to save the data", + "Something went wrong while trying to save the data. Details %s", "", ""), DATA_LOADER_ERROR_SCAN( Category.INTERNAL_ERROR, "0048", - "Something went wrong while scanning. Are you sure you are running in the correct transaction mode?", + "Something went wrong while scanning. Are you sure you are running in the correct transaction mode? Details %s", "", ""), diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java index 8f3556818b..e7270de8eb 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java @@ -1,14 +1,7 @@ package com.scalar.db.dataloader.core.dataimport.dao; -import com.scalar.db.api.DistributedStorage; -import com.scalar.db.api.DistributedTransaction; -import com.scalar.db.api.Get; -import com.scalar.db.api.Put; +import com.scalar.db.api.*; import com.scalar.db.api.PutBuilder.Buildable; -import com.scalar.db.api.Result; -import com.scalar.db.api.Scan; -import com.scalar.db.api.ScanBuilder; -import com.scalar.db.api.Scanner; import com.scalar.db.common.error.CoreError; import com.scalar.db.dataloader.core.ScanRange; import com.scalar.db.exception.storage.ExecutionException; @@ -37,7 +30,7 @@ public class ScalarDBDao { * Retrieve record from ScalarDB instance in storage mode * * @param namespace Namespace name - * @param tableName Table name + * @param table Table name * @param partitionKey Partition key * @param clusteringKey Optional clustering key for get * @param storage Distributed storage for ScalarDB connection that is running in storage mode. @@ -46,7 +39,7 @@ public class ScalarDBDao { */ public Optional get( String namespace, - String tableName, + String table, Key partitionKey, Key clusteringKey, DistributedStorage storage) @@ -56,7 +49,7 @@ public Optional get( String loggingKey = keysToString(partitionKey, clusteringKey); try { - Get get = createGetWith(namespace, tableName, partitionKey, clusteringKey); + Get get = createGetWith(namespace, table, partitionKey, clusteringKey); Optional result = storage.get(get); logger.info(String.format(GET_COMPLETED_MSG, loggingKey)); return result; @@ -69,7 +62,7 @@ public Optional get( * Retrieve record from ScalarDB instance in transaction mode * * @param namespace Namespace name - * @param tableName Table name + * @param table Table name * @param partitionKey Partition key * @param clusteringKey Optional clustering key for get * @param transaction ScalarDB transaction instance @@ -78,13 +71,13 @@ public Optional get( */ public Optional get( String namespace, - String tableName, + String table, Key partitionKey, Key clusteringKey, DistributedTransaction transaction) throws ScalarDBDaoException { - Get get = createGetWith(namespace, tableName, partitionKey, clusteringKey); + Get get = createGetWith(namespace, table, partitionKey, clusteringKey); // Retrieving the key data for logging String loggingKey = keysToString(partitionKey, clusteringKey); try { @@ -100,7 +93,7 @@ public Optional get( * Save record in ScalarDB instance * * @param namespace Namespace name - * @param tableName Table name + * @param table Table name * @param partitionKey Partition key * @param clusteringKey Optional clustering key * @param columns List of column values to be inserted or updated @@ -109,18 +102,19 @@ public Optional get( */ public void put( String namespace, - String tableName, + String table, Key partitionKey, Key clusteringKey, List> columns, DistributedTransaction transaction) throws ScalarDBDaoException { - Put put = createPutWith(namespace, tableName, partitionKey, clusteringKey, columns); + Put put = createPutWith(namespace, table, partitionKey, clusteringKey, columns); try { transaction.put(put); } catch (CrudException e) { - throw new ScalarDBDaoException(CoreError.DATA_LOADER_ERROR_CRUD_EXCEPTION.buildMessage(), e); + throw new ScalarDBDaoException( + CoreError.DATA_LOADER_ERROR_CRUD_EXCEPTION.buildMessage(e.getMessage()), e); } logger.info(String.format(PUT_COMPLETED_MSG, keysToString(partitionKey, clusteringKey))); } @@ -129,7 +123,7 @@ public void put( * Save record in ScalarDB instance * * @param namespace Namespace name - * @param tableName Table name + * @param table Table name * @param partitionKey Partition key * @param clusteringKey Optional clustering key * @param columns List of column values to be inserted or updated @@ -138,17 +132,18 @@ public void put( */ public void put( String namespace, - String tableName, + String table, Key partitionKey, Key clusteringKey, List> columns, DistributedStorage storage) throws ScalarDBDaoException { - Put put = createPutWith(namespace, tableName, partitionKey, clusteringKey, columns); + Put put = createPutWith(namespace, table, partitionKey, clusteringKey, columns); try { storage.put(put); } catch (ExecutionException e) { - throw new ScalarDBDaoException(CoreError.DATA_LOADER_ERROR_CRUD_EXCEPTION.buildMessage(), e); + throw new ScalarDBDaoException( + CoreError.DATA_LOADER_ERROR_CRUD_EXCEPTION.buildMessage(e.getMessage()), e); } logger.info(String.format(PUT_COMPLETED_MSG, keysToString(partitionKey, clusteringKey))); } @@ -157,7 +152,7 @@ public void put( * Scan a ScalarDB table * * @param namespace ScalarDB namespace - * @param tableName ScalarDB table name + * @param table ScalarDB table name * @param partitionKey Partition key used in ScalarDB scan * @param range Optional range to set ScalarDB scan start and end values * @param sorts Optional scan clustering key sorting values @@ -169,7 +164,7 @@ public void put( */ public List scan( String namespace, - String tableName, + String table, Key partitionKey, ScanRange range, List sorts, @@ -178,7 +173,7 @@ public List scan( DistributedStorage storage) throws ScalarDBDaoException { // Create scan - Scan scan = createScan(namespace, tableName, partitionKey, range, sorts, projections, limit); + Scan scan = createScan(namespace, table, partitionKey, range, sorts, projections, limit); // scan data try { @@ -189,7 +184,8 @@ public List scan( logger.info(SCAN_END_MSG); return allResults; } catch (ExecutionException | IOException e) { - throw new ScalarDBDaoException(CoreError.DATA_LOADER_ERROR_SCAN.buildMessage(), e); + throw new ScalarDBDaoException( + CoreError.DATA_LOADER_ERROR_SCAN.buildMessage(e.getMessage()), e); } } @@ -197,7 +193,7 @@ public List scan( * Scan a ScalarDB table * * @param namespace ScalarDB namespace - * @param tableName ScalarDB table name + * @param table ScalarDB table name * @param partitionKey Partition key used in ScalarDB scan * @param range Optional range to set ScalarDB scan start and end values * @param sorts Optional scan clustering key sorting values @@ -210,7 +206,7 @@ public List scan( */ public List scan( String namespace, - String tableName, + String table, Key partitionKey, ScanRange range, List sorts, @@ -220,7 +216,7 @@ public List scan( throws ScalarDBDaoException { // Create scan - Scan scan = createScan(namespace, tableName, partitionKey, range, sorts, projections, limit); + Scan scan = createScan(namespace, table, partitionKey, range, sorts, projections, limit); // scan data try { @@ -231,7 +227,8 @@ public List scan( } catch (CrudException | NoSuchElementException e) { // No such element Exception is thrown when the scan is done in transaction mode but // ScalarDB is running in storage mode - throw new ScalarDBDaoException(CoreError.DATA_LOADER_ERROR_SCAN.buildMessage(), e); + throw new ScalarDBDaoException( + CoreError.DATA_LOADER_ERROR_SCAN.buildMessage(e.getMessage()), e); } } @@ -239,7 +236,7 @@ public List scan( * Create a ScalarDB scanner instance * * @param namespace ScalarDB namespace - * @param tableName ScalarDB table name + * @param table ScalarDB table name * @param projectionColumns List of column projection to use during scan * @param limit Scan limit value * @param storage Distributed storage for ScalarDB connection that is running in storage mode @@ -248,17 +245,18 @@ public List scan( */ public Scanner createScanner( String namespace, - String tableName, + String table, List projectionColumns, int limit, DistributedStorage storage) throws ScalarDBDaoException { Scan scan = - createScan(namespace, tableName, null, null, new ArrayList<>(), projectionColumns, limit); + createScan(namespace, table, null, null, new ArrayList<>(), projectionColumns, limit); try { return storage.scan(scan); } catch (ExecutionException e) { - throw new ScalarDBDaoException(CoreError.DATA_LOADER_ERROR_SCAN.buildMessage(), e); + throw new ScalarDBDaoException( + CoreError.DATA_LOADER_ERROR_SCAN.buildMessage(e.getMessage()), e); } } @@ -266,7 +264,7 @@ public Scanner createScanner( * Create a ScalarDB scanner instance * * @param namespace ScalarDB namespace - * @param tableName ScalarDB table name + * @param table ScalarDB table name * @param partitionKey Partition key used in ScalarDB scan * @param scanRange Optional range to set ScalarDB scan start and end values * @param sortOrders Optional scan clustering key sorting values @@ -278,7 +276,7 @@ public Scanner createScanner( */ public Scanner createScanner( String namespace, - String tableName, + String table, Key partitionKey, ScanRange scanRange, List sortOrders, @@ -287,12 +285,12 @@ public Scanner createScanner( DistributedStorage storage) throws ScalarDBDaoException { Scan scan = - createScan( - namespace, tableName, partitionKey, scanRange, sortOrders, projectionColumns, limit); + createScan(namespace, table, partitionKey, scanRange, sortOrders, projectionColumns, limit); try { return storage.scan(scan); } catch (ExecutionException e) { - throw new ScalarDBDaoException(CoreError.DATA_LOADER_ERROR_SCAN.buildMessage(), e); + throw new ScalarDBDaoException( + CoreError.DATA_LOADER_ERROR_SCAN.buildMessage(e.getMessage()), e); } } @@ -300,7 +298,7 @@ public Scanner createScanner( * Create ScalarDB scan instance * * @param namespace ScalarDB namespace - * @param tableName ScalarDB table name + * @param table ScalarDB table name * @param partitionKey Partition key used in ScalarDB scan * @param scanRange Optional range to set ScalarDB scan start and end values * @param sortOrders Optional scan clustering key sorting values @@ -310,7 +308,7 @@ public Scanner createScanner( */ Scan createScan( String namespace, - String tableName, + String table, Key partitionKey, ScanRange scanRange, List sortOrders, @@ -319,7 +317,7 @@ Scan createScan( // If no partition key is provided a scan all is created if (partitionKey == null) { ScanBuilder.BuildableScanAll buildableScanAll = - Scan.newBuilder().namespace(namespace).table(tableName).all(); + Scan.newBuilder().namespace(namespace).table(table).all(); // projection columns if (projectionColumns != null && !projectionColumns.isEmpty()) { @@ -335,7 +333,7 @@ Scan createScan( // Create a scan with partition key (not a scan all) ScanBuilder.BuildableScan buildableScan = - Scan.newBuilder().namespace(namespace).table(tableName).partitionKey(partitionKey); + Scan.newBuilder().namespace(namespace).table(table).partitionKey(partitionKey); // Set the scan boundary if (scanRange != null) { @@ -371,33 +369,25 @@ Scan createScan( * Return a ScalarDB get based on provided parameters * * @param namespace Namespace name - * @param tableName Table name + * @param table Table name * @param partitionKey Partition key * @param clusteringKey Optional clustering key for get * @return ScalarDB Get instance */ - private Get createGetWith( - String namespace, String tableName, Key partitionKey, Key clusteringKey) { + private Get createGetWith(String namespace, String table, Key partitionKey, Key clusteringKey) { + GetBuilder.BuildableGetWithPartitionKey buildable = + Get.newBuilder().namespace(namespace).table(table).partitionKey(partitionKey); if (clusteringKey != null) { - return Get.newBuilder() - .namespace(namespace) - .table(tableName) - .partitionKey(partitionKey) - .clusteringKey(clusteringKey) - .build(); + buildable.clusteringKey(clusteringKey); } - return Get.newBuilder() - .namespace(namespace) - .table(tableName) - .partitionKey(partitionKey) - .build(); + return buildable.build(); } /** * Return a ScalarDB put based on provided parameters * * @param namespace Namespace name - * @param tableName Table name + * @param table Table name * @param partitionKey Partition key * @param clusteringKey Optional clustering key * @param columns List of column values @@ -405,12 +395,12 @@ private Get createGetWith( */ private Put createPutWith( String namespace, - String tableName, + String table, Key partitionKey, Key clusteringKey, List> columns) { Buildable buildable = - Put.newBuilder().namespace(namespace).table(tableName).partitionKey(partitionKey); + Put.newBuilder().namespace(namespace).table(table).partitionKey(partitionKey); if (clusteringKey != null) { buildable.clusteringKey(clusteringKey); } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java index ac246d8354..1016eaaba4 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java @@ -7,6 +7,7 @@ import com.scalar.db.service.StorageFactory; import com.scalar.db.service.TransactionFactory; import java.io.IOException; +import javax.annotation.Nullable; /** * A manager to retrieve the various ScalarDB managers based on the running mode @@ -16,7 +17,7 @@ public class ScalarDBManager { /* Distributed storage for ScalarDB connection that is running in storage mode. */ - private final DistributedStorage storage; + @Nullable private final DistributedStorage storage; /* Distributed Transaction manager for ScalarDB connection that is running in transaction mode */ private final DistributedTransactionManager transactionManager; /* Distributed storage admin for ScalarDB admin operations */ From bf31a0122c602e2207778dd524193beccfa332d1 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 24 Dec 2024 16:24:41 +0530 Subject: [PATCH 34/87] Data chunk and task result enums and dtos --- .../dataimport/datachunk/ImportDataChunk.java | 14 +++++++ .../datachunk/ImportDataChunkStatus.java | 41 +++++++++++++++++++ .../datachunk/ImportDataChunkStatusState.java | 8 ++++ .../core/dataimport/datachunk/ImportRow.java | 11 +++++ .../dataimport/task/ImportTaskAction.java | 9 ++++ .../task/result/ImportResultStatus.java | 12 ++++++ .../task/result/ImportTargetResult.java | 19 +++++++++ .../task/result/ImportTargetResultStatus.java | 12 ++++++ .../task/result/ImportTaskResult.java | 27 ++++++++++++ 9 files changed, 153 insertions(+) create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/datachunk/ImportDataChunk.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/datachunk/ImportDataChunkStatus.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/datachunk/ImportDataChunkStatusState.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/datachunk/ImportRow.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskAction.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/result/ImportResultStatus.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/result/ImportTargetResult.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/result/ImportTargetResultStatus.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/result/ImportTaskResult.java diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/datachunk/ImportDataChunk.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/datachunk/ImportDataChunk.java new file mode 100644 index 0000000000..69ed97421a --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/datachunk/ImportDataChunk.java @@ -0,0 +1,14 @@ +package com.scalar.db.dataloader.core.dataimport.datachunk; + +import java.util.List; +import lombok.Builder; +import lombok.Data; + +/** * Import data chunk data */ +@Data +@Builder +public class ImportDataChunk { + + int dataChunkId; + List sourceData; +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/datachunk/ImportDataChunkStatus.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/datachunk/ImportDataChunkStatus.java new file mode 100644 index 0000000000..d6db3e1e7f --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/datachunk/ImportDataChunkStatus.java @@ -0,0 +1,41 @@ +package com.scalar.db.dataloader.core.dataimport.datachunk; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import java.time.Instant; +import lombok.Builder; +import lombok.Data; + +/** * A DTO to store import data chunk details */ +@Data +@Builder +@JsonDeserialize(builder = ImportDataChunkStatus.ImportDataChunkStatusBuilder.class) +public class ImportDataChunkStatus { + + @JsonProperty("dataChunkId") + private final int dataChunkId; + + @JsonProperty("startTime") + private final Instant startTime; + + @JsonProperty("endTime") + private final Instant endTime; + + @JsonProperty("totalRecords") + private final int totalRecords; + + @JsonProperty("successCount") + private final int successCount; + + @JsonProperty("failureCount") + private final int failureCount; + + @JsonProperty("batchCount") + private final int batchCount; + + @JsonProperty("totalDurationInMilliSeconds") + private final int totalDurationInMilliSeconds; + + @JsonProperty("status") + private final ImportDataChunkStatusState status; +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/datachunk/ImportDataChunkStatusState.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/datachunk/ImportDataChunkStatusState.java new file mode 100644 index 0000000000..013c640dcf --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/datachunk/ImportDataChunkStatusState.java @@ -0,0 +1,8 @@ +package com.scalar.db.dataloader.core.dataimport.datachunk; + +/** * Status of the import data chunk which during the import process */ +public enum ImportDataChunkStatusState { + START, + IN_PROGRESS, + COMPLETE +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/datachunk/ImportRow.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/datachunk/ImportRow.java new file mode 100644 index 0000000000..824ca4ffa4 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/datachunk/ImportRow.java @@ -0,0 +1,11 @@ +package com.scalar.db.dataloader.core.dataimport.datachunk; + +import com.fasterxml.jackson.databind.JsonNode; +import lombok.Value; + +/** Stores data related to a single row on import file */ +@Value +public class ImportRow { + int rowNumber; + JsonNode sourceData; +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskAction.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskAction.java new file mode 100644 index 0000000000..b1850cb84d --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskAction.java @@ -0,0 +1,9 @@ +package com.scalar.db.dataloader.core.dataimport.task; + +/*** + * Import task actions + */ +public enum ImportTaskAction { + INSERT, + UPDATE, +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/result/ImportResultStatus.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/result/ImportResultStatus.java new file mode 100644 index 0000000000..70c03b9c37 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/result/ImportResultStatus.java @@ -0,0 +1,12 @@ +package com.scalar.db.dataloader.core.dataimport.task.result; + +public enum ImportResultStatus { + SUCCESS, + PARTIAL_SUCCESS, + FAILURE, + VALIDATION_FAILED, + RETRIEVAL_FAILED, + MAPPING_FAILED, + TIMEOUT, + CANCELLED +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/result/ImportTargetResult.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/result/ImportTargetResult.java new file mode 100644 index 0000000000..0fe4e0379d --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/result/ImportTargetResult.java @@ -0,0 +1,19 @@ +package com.scalar.db.dataloader.core.dataimport.task.result; + +import com.fasterxml.jackson.databind.JsonNode; +import com.scalar.db.dataloader.core.dataimport.task.ImportTaskAction; +import java.util.List; +import lombok.Builder; +import lombok.Value; + +@Builder +@Value +public class ImportTargetResult { + String namespace; + String tableName; + ImportTaskAction importAction; + List errors; + boolean dataMapped; + JsonNode importedRecord; + ImportTargetResultStatus status; +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/result/ImportTargetResultStatus.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/result/ImportTargetResultStatus.java new file mode 100644 index 0000000000..d774f8f823 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/result/ImportTargetResultStatus.java @@ -0,0 +1,12 @@ +package com.scalar.db.dataloader.core.dataimport.task.result; + +public enum ImportTargetResultStatus { + VALIDATION_FAILED, + RETRIEVAL_FAILED, + MISSING_COLUMNS, + DATA_ALREADY_EXISTS, + DATA_NOT_FOUND, + SAVE_FAILED, + SAVED, + ABORTED +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/result/ImportTaskResult.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/result/ImportTaskResult.java new file mode 100644 index 0000000000..bebffdbe4d --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/result/ImportTaskResult.java @@ -0,0 +1,27 @@ +package com.scalar.db.dataloader.core.dataimport.task.result; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import java.util.List; +import lombok.Builder; +import lombok.Setter; +import lombok.Value; + +@Builder +@Value +@JsonDeserialize(builder = ImportTaskResult.ImportTaskResultBuilder.class) +public class ImportTaskResult { + @JsonProperty("rowNumber") + int rowNumber; + + @JsonProperty("targets") + List targets; + + @JsonProperty("rawRecord") + JsonNode rawRecord; + + @Setter + @JsonProperty("dataChunkId") + int dataChunkId; +} From 57cd3304a09e2434ea1e6e553b7241ecd2705720 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 24 Dec 2024 16:43:35 +0530 Subject: [PATCH 35/87] Spotless applied --- .../db/dataloader/core/dataimport/task/ImportTaskAction.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskAction.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskAction.java index b1850cb84d..6b8c95d2a8 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskAction.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskAction.java @@ -1,8 +1,6 @@ package com.scalar.db.dataloader.core.dataimport.task; -/*** - * Import task actions - */ +/** * Import task actions */ public enum ImportTaskAction { INSERT, UPDATE, From 7a39564e310a2072cf9af155da564513e7d6af61 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 26 Dec 2024 11:44:40 +0530 Subject: [PATCH 36/87] Changes --- .../core/dataimport/task/result/ImportTaskResult.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/result/ImportTaskResult.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/result/ImportTaskResult.java index bebffdbe4d..3e08cc709b 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/result/ImportTaskResult.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/result/ImportTaskResult.java @@ -5,7 +5,6 @@ import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import java.util.List; import lombok.Builder; -import lombok.Setter; import lombok.Value; @Builder @@ -21,7 +20,6 @@ public class ImportTaskResult { @JsonProperty("rawRecord") JsonNode rawRecord; - @Setter @JsonProperty("dataChunkId") int dataChunkId; } From fd1c186e085aeb4f285f059b72add5c4beea4d4a Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 31 Dec 2024 17:48:31 +0530 Subject: [PATCH 37/87] Control file files --- .../com/scalar/db/common/error/CoreError.java | 44 ++ .../dataimport/controlfile/ControlFile.java | 27 + .../ControlFileValidationException.java | 14 + .../controlfile/ControlFileValidator.java | 226 +++++++ .../controlfile/ControlFileValidatorTest.java | 564 ++++++++++++++++++ 5 files changed, 875 insertions(+) create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFile.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidationException.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidator.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidatorTest.java diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index d6a4492264..7786cedf50 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -691,6 +691,50 @@ public enum CoreError implements ScalarDbError { DATA_LOADER_ERROR_METHOD_NULL_ARGUMENT( Category.USER_ERROR, "0151", "Method null argument not allowed", "", ""), + DATA_LOADER_DUPLICATE_DATA_MAPPINGS( + Category.USER_ERROR, + "0158", + "Duplicate data mappings found for table '%s' in the control file", + "", + ""), + DATA_LOADER_MISSING_COLUMN_MAPPING( + Category.USER_ERROR, + "0159", + "No mapping found for column '%s' in table '%s' in the control file. \\nControl file validation set at 'FULL'. All columns need to be mapped.", + "", + ""), + DATA_LOADER_CONTROL_FILE_MISSING_DATA_MAPPINGS( + Category.USER_ERROR, "0160", "The control file is missing data mappings", "", ""), + DATA_LOADER__MISSING_NAMESPACE_OR_TABLE( + Category.USER_ERROR, + "0161", + "The provided namespace '%s' and/or table name '%s' is incorrect and could not be found", + "", + ""), + DATA_LOADER_TARGET_COLUMN_NOT_FOUND( + Category.USER_ERROR, + "0162", + "The target column '%s' for source field '%s' could not be found in table '%s'", + "", + ""), + DATA_LOADER_MISSING_PARTITION_KEY( + Category.USER_ERROR, + "0163", + "The required partition key '%s' is missing in the control file mapping for table '%s'", + "", + ""), + DATA_LOADER_MISSING_CLUSTERING_KEY( + Category.USER_ERROR, + "0164", + "The required clustering key '%s' is missing in the control file mapping for table '%s'", + "", + ""), + DATA_LOADER_MULTIPLE_MAPPINGS_FOR_COLUMN_FOUND( + Category.USER_ERROR, + "0165", + "Multiple data mappings found for column '%s' in table '%s'", + "", + ""), // // Errors for the concurrency error category // diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFile.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFile.java new file mode 100644 index 0000000000..bfa66b345d --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFile.java @@ -0,0 +1,27 @@ +package com.scalar.db.dataloader.core.dataimport.controlfile; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.ArrayList; +import java.util.List; +import lombok.Getter; +import lombok.Setter; + +/** Represents the control file */ +@Getter +@Setter +public class ControlFile { + + @JsonProperty("tables") + private final List tables; + + /** Class constructor */ + public ControlFile() { + this.tables = new ArrayList<>(); + } + + @JsonCreator + public ControlFile(@JsonProperty("tables") List tables) { + this.tables = tables; + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidationException.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidationException.java new file mode 100644 index 0000000000..e4e032a4c8 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidationException.java @@ -0,0 +1,14 @@ +package com.scalar.db.dataloader.core.dataimport.controlfile; + +/** Represents the control file */ +public class ControlFileValidationException extends Exception { + + /** + * Class constructor + * + * @param message error message + */ + public ControlFileValidationException(String message) { + super(message); + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidator.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidator.java new file mode 100644 index 0000000000..a9b71d066a --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidator.java @@ -0,0 +1,226 @@ +package com.scalar.db.dataloader.core.dataimport.controlfile; + +import com.scalar.db.api.TableMetadata; +import com.scalar.db.common.error.CoreError; +import com.scalar.db.dataloader.core.util.RuntimeUtil; +import com.scalar.db.dataloader.core.util.TableMetadataUtil; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Set; + +/** Class to validate a control file */ +public class ControlFileValidator { + + /** + * Validate a control file + * + * @param controlFile Control file instance + * @param controlFileValidationMode Defines the strictness of the control file validation + * @param tableMetadataMap Metadata for one or more ScalarDB tables + * @throws ControlFileValidationException when the control file is invalid + */ + public static void validate( + ControlFile controlFile, + ControlFileValidationLevel controlFileValidationMode, + Map tableMetadataMap) + throws ControlFileValidationException { + + // Method argument null check + RuntimeUtil.checkNotNull(controlFile, controlFileValidationMode, tableMetadataMap); + + // Make sure the control file is not empty + checkEmptyMappings(controlFile); + + // Table metadata existence and target column validation + Set uniqueTables = new HashSet<>(); + for (ControlFileTable controlFileTable : controlFile.getTables()) { + String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); + + // Make sure that multiple table mappings for one table do not exist + if (uniqueTables.contains(lookupKey)) { + throw new ControlFileValidationException( + CoreError.DATA_LOADER_DUPLICATE_DATA_MAPPINGS.buildMessage(lookupKey)); + } + uniqueTables.add(lookupKey); + + // Make sure no column is mapped multiple times + Set mappedTargetColumns = checkDuplicateColumnMappings(controlFileTable); + + // Make sure table metadata is provided for each table mentioned in the data mappings + checkMultiTableMetadata(tableMetadataMap, controlFileTable); + + // Make sure the specified target columns in the mappings actually exist + checkIfTargetColumnExist(tableMetadataMap, controlFileTable); + TableMetadata tableMetadata = tableMetadataMap.get(lookupKey); + + // Make sure all table columns are mapped + if (controlFileValidationMode == ControlFileValidationLevel.FULL) { + checkIfAllColumnsAreMapped(tableMetadata, mappedTargetColumns, controlFileTable); + continue; + } + + // Make sure all keys (partition keys and clustering keys) are mapped + if (controlFileValidationMode == ControlFileValidationLevel.KEYS) { + checkPartitionKeys(tableMetadata, mappedTargetColumns, controlFileTable); + checkClusteringKeys(tableMetadata, mappedTargetColumns, controlFileTable); + } + } + } + + /** + * Check that all table columns are mapped in the control file. Ran only when the control file + * validation mode is set to 'FULL' + * + * @param tableMetadata Metadata for one ScalarDB table + * @param mappedTargetColumns All target columns that are mapped in the control file + * @param controlFileTable Control file entry for one ScalarDB table + * @throws ControlFileValidationException when there is a column that is not mapped in the control + * file + */ + private static void checkIfAllColumnsAreMapped( + TableMetadata tableMetadata, + Set mappedTargetColumns, + ControlFileTable controlFileTable) + throws ControlFileValidationException { + LinkedHashSet columnNames = tableMetadata.getColumnNames(); + for (String columnName : columnNames) { + if (!mappedTargetColumns.contains(columnName)) { + throw new ControlFileValidationException( + CoreError.DATA_LOADER_MISSING_COLUMN_MAPPING.buildMessage( + columnName, TableMetadataUtil.getTableLookupKey(controlFileTable))); + } + } + } + + /** + * Check that the control file has mappings for at least one table + * + * @param controlFile Control file instance + * @throws ControlFileValidationException when the control file has no mappings for any table + */ + private static void checkEmptyMappings(ControlFile controlFile) + throws ControlFileValidationException { + // Make sure data mapping for at least one table is provided + if (controlFile.getTables() == null || controlFile.getTables().isEmpty()) { + throw new ControlFileValidationException( + CoreError.DATA_LOADER_CONTROL_FILE_MISSING_DATA_MAPPINGS.buildMessage()); + } + } + + /** + * Check that metadata is provided for each table that is mapped in the control file. If the table + * metadata is missing this probably means the namespace and table combination does not exist. + * + * @param tableMetadataMap Metadata for one or more ScalarDB tables + * @param controlFileTable Control file entry for one ScalarDB table + * @throws ControlFileValidationException when metadata for a mapped table is missing + */ + private static void checkMultiTableMetadata( + Map tableMetadataMap, ControlFileTable controlFileTable) + throws ControlFileValidationException { + // Make sure table metadata is available for each table data mapping + String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); + if (!tableMetadataMap.containsKey(lookupKey)) { + throw new ControlFileValidationException( + CoreError.DATA_LOADER__MISSING_NAMESPACE_OR_TABLE.buildMessage( + controlFileTable.getNamespace(), controlFileTable.getTableName())); + } + } + + /** + * Check that the mapped target column exists in the provided table metadata. + * + * @param tableMetadataMap Metadata for one or more ScalarDB tables + * @param controlFileTable Control file entry for one ScalarDB table + * @throws ControlFileValidationException when the target column does not exist + */ + private static void checkIfTargetColumnExist( + Map tableMetadataMap, ControlFileTable controlFileTable) + throws ControlFileValidationException { + + String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); + TableMetadata tableMetadata = + tableMetadataMap.get(TableMetadataUtil.getTableLookupKey(controlFileTable)); + LinkedHashSet columnNames = tableMetadata.getColumnNames(); + + for (ControlFileTableFieldMapping mapping : controlFileTable.getMappings()) { + // Make sure the target fields are found in the table metadata + if (!columnNames.contains(mapping.getTargetColumn())) { + throw new ControlFileValidationException( + CoreError.DATA_LOADER_TARGET_COLUMN_NOT_FOUND.buildMessage( + mapping.getTargetColumn(), mapping.getSourceField(), lookupKey)); + } + } + } + + /** + * Check that the required partition keys are mapped in the control file. Ran only for control + * file validation mode KEYS and FULL. + * + * @param tableMetadata Metadata for one ScalarDB table + * @param mappedTargetColumns Set of target columns that are mapped in the control file + * @param controlFileTable Control file entry for one ScalarDB table + * @throws ControlFileValidationException when a partition key is not mapped + */ + private static void checkPartitionKeys( + TableMetadata tableMetadata, + Set mappedTargetColumns, + ControlFileTable controlFileTable) + throws ControlFileValidationException { + LinkedHashSet partitionKeyNames = tableMetadata.getPartitionKeyNames(); + for (String partitionKeyName : partitionKeyNames) { + if (!mappedTargetColumns.contains(partitionKeyName)) { + throw new ControlFileValidationException( + CoreError.DATA_LOADER_MISSING_PARTITION_KEY.buildMessage( + partitionKeyName, TableMetadataUtil.getTableLookupKey(controlFileTable))); + } + } + } + + /** + * Check that the required clustering keys are mapped in the control file. Ran only for control + * file validation mode KEYS and FULL. + * + * @param tableMetadata Metadata for one ScalarDB table + * @param mappedTargetColumns Set of target columns that are mapped in the control file + * @param controlFileTable Control file entry for one ScalarDB table + * @throws ControlFileValidationException when a clustering key is not mapped + */ + private static void checkClusteringKeys( + TableMetadata tableMetadata, + Set mappedTargetColumns, + ControlFileTable controlFileTable) + throws ControlFileValidationException { + LinkedHashSet clusteringKeyNames = tableMetadata.getClusteringKeyNames(); + for (String clusteringKeyName : clusteringKeyNames) { + if (!mappedTargetColumns.contains(clusteringKeyName)) { + throw new ControlFileValidationException( + CoreError.DATA_LOADER_MISSING_CLUSTERING_KEY.buildMessage( + clusteringKeyName, TableMetadataUtil.getTableLookupKey(controlFileTable))); + } + } + } + + /** + * Check that a control file table mapping does not contain duplicate mappings for the same target + * column + * + * @param controlFileTable Control file entry for one ScalarDB table + * @return Set of uniquely mapped target columns + * @throws ControlFileValidationException when a duplicate mapping is found + */ + private static Set checkDuplicateColumnMappings(ControlFileTable controlFileTable) + throws ControlFileValidationException { + Set mappedTargetColumns = new HashSet<>(); + for (ControlFileTableFieldMapping mapping : controlFileTable.getMappings()) { + if (mappedTargetColumns.contains(mapping.getTargetColumn())) { + throw new ControlFileValidationException( + CoreError.DATA_LOADER_MULTIPLE_MAPPINGS_FOR_COLUMN_FOUND.buildMessage( + mapping.getTargetColumn(), TableMetadataUtil.getTableLookupKey(controlFileTable))); + } + mappedTargetColumns.add(mapping.getTargetColumn()); + } + return mappedTargetColumns; + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidatorTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidatorTest.java new file mode 100644 index 0000000000..67cb8ea9f5 --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidatorTest.java @@ -0,0 +1,564 @@ +package com.scalar.db.dataloader.core.dataimport.controlfile; + +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.scalar.db.api.TableMetadata; +import com.scalar.db.common.error.CoreError; +import com.scalar.db.dataloader.core.util.TableMetadataUtil; +import com.scalar.db.io.DataType; +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.Test; + +class ControlFileValidatorTest { + + private static final String TABLE_NAME = "table"; + + private static final String TABLE_NAME_TWO = "table_two"; + private static final String NAMESPACE = "ns"; + private static final String COLUMN_PARTITION_KEY = "pk"; + private static final String COLUMN_CLUSTERING_KEY = "ck"; + private static final String COLUMN_ONE = "c1"; + + @Test + void validate_nullValuesGiven_shouldThrowNullPointerException() { + assertThatThrownBy(() -> ControlFileValidator.validate(null, null, null)) + .isExactlyInstanceOf(NullPointerException.class) + .hasMessage(CoreError.DATA_LOADER_ERROR_METHOD_NULL_ARGUMENT.buildMessage()); + } + + @Test + void validate_noTableMappingsGiven_shouldThrowControlFileValidationException() { + ControlFile controlFile = new ControlFile(); + Map tableMetadataMap = new HashMap<>(); + assertThatThrownBy( + () -> + ControlFileValidator.validate( + controlFile, ControlFileValidationLevel.FULL, tableMetadataMap)) + .isExactlyInstanceOf(ControlFileValidationException.class) + .hasMessage(CoreError.DATA_LOADER_CONTROL_FILE_MISSING_DATA_MAPPINGS.buildMessage()); + } + + @Test + void validate_duplicateTableMappingsGiven_shouldThrowControlFileValidationException() { + ControlFile controlFile = new ControlFile(); + ControlFileTable controlFileTable = new ControlFileTable(NAMESPACE, TABLE_NAME); + controlFile.getTables().add(controlFileTable); + controlFile.getTables().add(controlFileTable); + + String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); + + TableMetadata tableMetadata = + TableMetadata.newBuilder() + .addColumn(COLUMN_PARTITION_KEY, DataType.TEXT) + .addPartitionKey(COLUMN_PARTITION_KEY) + .build(); + + Map tableMetadataMap = new HashMap<>(); + tableMetadataMap.put(lookupKey, tableMetadata); + + assertThatThrownBy( + () -> + ControlFileValidator.validate( + controlFile, ControlFileValidationLevel.MAPPED, tableMetadataMap)) + .isExactlyInstanceOf(ControlFileValidationException.class) + .hasMessage(CoreError.DATA_LOADER_DUPLICATE_DATA_MAPPINGS.buildMessage(lookupKey)); + } + + @Test + void validate_duplicateTableColumnMappingsGiven_shouldThrowControlFileValidationException() { + ControlFile controlFile = new ControlFile(); + ControlFileTable controlFileTable = new ControlFileTable(NAMESPACE, TABLE_NAME); + controlFileTable.getMappings().add(new ControlFileTableFieldMapping(COLUMN_ONE, COLUMN_ONE)); + controlFileTable.getMappings().add(new ControlFileTableFieldMapping(COLUMN_ONE, COLUMN_ONE)); + controlFile.getTables().add(controlFileTable); + + String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); + + TableMetadata tableMetadata = + TableMetadata.newBuilder() + .addColumn(COLUMN_PARTITION_KEY, DataType.TEXT) + .addPartitionKey(COLUMN_PARTITION_KEY) + .build(); + + Map tableMetadataMap = new HashMap<>(); + tableMetadataMap.put(lookupKey, tableMetadata); + + assertThatThrownBy( + () -> + ControlFileValidator.validate( + controlFile, ControlFileValidationLevel.MAPPED, tableMetadataMap)) + .isExactlyInstanceOf(ControlFileValidationException.class) + .hasMessage( + CoreError.DATA_LOADER_MULTIPLE_MAPPINGS_FOR_COLUMN_FOUND.buildMessage( + COLUMN_ONE, lookupKey)); + } + + @Test + void validate_missingTableMetadataGiven_shouldThrowControlFileValidationException() { + ControlFile controlFile = new ControlFile(); + ControlFileTable controlFileTable = new ControlFileTable(NAMESPACE, TABLE_NAME); + controlFileTable.getMappings().add(new ControlFileTableFieldMapping(COLUMN_ONE, COLUMN_ONE)); + controlFile.getTables().add(controlFileTable); + + Map tableMetadataMap = new HashMap<>(); + + assertThatThrownBy( + () -> + ControlFileValidator.validate( + controlFile, ControlFileValidationLevel.MAPPED, tableMetadataMap)) + .isExactlyInstanceOf(ControlFileValidationException.class) + .hasMessage( + CoreError.DATA_LOADER__MISSING_NAMESPACE_OR_TABLE.buildMessage( + controlFileTable.getNamespace(), controlFileTable.getTableName())); + } + + @Test + void validate_nonExistingTargetColumnGiven_shouldThrowControlFileValidationException() { + ControlFile controlFile = new ControlFile(); + ControlFileTable controlFileTable = new ControlFileTable(NAMESPACE, TABLE_NAME); + controlFileTable.getMappings().add(new ControlFileTableFieldMapping(COLUMN_ONE, COLUMN_ONE)); + controlFile.getTables().add(controlFileTable); + + String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); + + TableMetadata tableMetadata = + TableMetadata.newBuilder() + .addColumn(COLUMN_PARTITION_KEY, DataType.TEXT) + .addPartitionKey(COLUMN_PARTITION_KEY) + .build(); + Map tableMetadataMap = new HashMap<>(); + tableMetadataMap.put(lookupKey, tableMetadata); + + assertThatThrownBy( + () -> + ControlFileValidator.validate( + controlFile, ControlFileValidationLevel.MAPPED, tableMetadataMap)) + .isExactlyInstanceOf(ControlFileValidationException.class) + .hasMessage( + CoreError.DATA_LOADER_TARGET_COLUMN_NOT_FOUND.buildMessage( + COLUMN_ONE, COLUMN_ONE, lookupKey)); + } + + @Test + void + validate_fullValidationAndHasMissingMappedColumnsGiven_shouldThrowControlFileValidationException() { + ControlFile controlFile = new ControlFile(); + ControlFileTable controlFileTable = new ControlFileTable(NAMESPACE, TABLE_NAME); + controlFileTable.getMappings().add(new ControlFileTableFieldMapping(COLUMN_ONE, COLUMN_ONE)); + controlFile.getTables().add(controlFileTable); + + String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); + + TableMetadata tableMetadata = + TableMetadata.newBuilder() + .addColumn(COLUMN_PARTITION_KEY, DataType.TEXT) + .addColumn(COLUMN_ONE, DataType.TEXT) + .addPartitionKey(COLUMN_PARTITION_KEY) + .build(); + Map tableMetadataMap = new HashMap<>(); + tableMetadataMap.put(lookupKey, tableMetadata); + + assertThatThrownBy( + () -> + ControlFileValidator.validate( + controlFile, ControlFileValidationLevel.FULL, tableMetadataMap)) + .isExactlyInstanceOf(ControlFileValidationException.class) + .hasMessage( + CoreError.DATA_LOADER_MISSING_COLUMN_MAPPING.buildMessage( + COLUMN_PARTITION_KEY, lookupKey)); + } + + @Test + void + validate_keysValidationAndHasMissingMappedPartitionKeysGiven_shouldThrowControlFileValidationException() { + ControlFile controlFile = new ControlFile(); + ControlFileTable controlFileTable = new ControlFileTable(NAMESPACE, TABLE_NAME); + controlFileTable.getMappings().add(new ControlFileTableFieldMapping(COLUMN_ONE, COLUMN_ONE)); + controlFile.getTables().add(controlFileTable); + + String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); + + TableMetadata tableMetadata = + TableMetadata.newBuilder() + .addColumn(COLUMN_PARTITION_KEY, DataType.TEXT) + .addColumn(COLUMN_ONE, DataType.TEXT) + .addPartitionKey(COLUMN_PARTITION_KEY) + .build(); + Map tableMetadataMap = new HashMap<>(); + tableMetadataMap.put(lookupKey, tableMetadata); + + assertThatThrownBy( + () -> + ControlFileValidator.validate( + controlFile, ControlFileValidationLevel.KEYS, tableMetadataMap)) + .isExactlyInstanceOf(ControlFileValidationException.class) + .hasMessage( + CoreError.DATA_LOADER_MISSING_PARTITION_KEY.buildMessage( + COLUMN_PARTITION_KEY, lookupKey)); + } + + @Test + void + validate_keysValidationAndHasMissingMappedClusteringKeysGiven_shouldThrowControlFileValidationException() { + ControlFile controlFile = new ControlFile(); + ControlFileTable controlFileTable = new ControlFileTable(NAMESPACE, TABLE_NAME); + controlFileTable + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_PARTITION_KEY, COLUMN_PARTITION_KEY)); + controlFileTable.getMappings().add(new ControlFileTableFieldMapping(COLUMN_ONE, COLUMN_ONE)); + controlFile.getTables().add(controlFileTable); + + String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); + + TableMetadata tableMetadata = + TableMetadata.newBuilder() + .addColumn(COLUMN_PARTITION_KEY, DataType.TEXT) + .addColumn(COLUMN_CLUSTERING_KEY, DataType.TEXT) + .addColumn(COLUMN_ONE, DataType.TEXT) + .addPartitionKey(COLUMN_PARTITION_KEY) + .addClusteringKey(COLUMN_CLUSTERING_KEY) + .build(); + Map tableMetadataMap = new HashMap<>(); + tableMetadataMap.put(lookupKey, tableMetadata); + + assertThatThrownBy( + () -> + ControlFileValidator.validate( + controlFile, ControlFileValidationLevel.KEYS, tableMetadataMap)) + .isExactlyInstanceOf(ControlFileValidationException.class) + .hasMessage( + CoreError.DATA_LOADER_MISSING_CLUSTERING_KEY.buildMessage( + COLUMN_CLUSTERING_KEY, lookupKey)); + } + + @Test + void validate_mappedValidationAndValidArgumentsGiven_shouldNotThrowException() + throws ControlFileValidationException { + ControlFile controlFile = new ControlFile(); + ControlFileTable controlFileTable = new ControlFileTable(NAMESPACE, TABLE_NAME); + controlFileTable.getMappings().add(new ControlFileTableFieldMapping(COLUMN_ONE, COLUMN_ONE)); + controlFile.getTables().add(controlFileTable); + + String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); + + TableMetadata tableMetadata = + TableMetadata.newBuilder() + .addColumn(COLUMN_PARTITION_KEY, DataType.TEXT) + .addColumn(COLUMN_CLUSTERING_KEY, DataType.TEXT) + .addColumn(COLUMN_ONE, DataType.TEXT) + .addPartitionKey(COLUMN_PARTITION_KEY) + .addClusteringKey(COLUMN_CLUSTERING_KEY) + .build(); + Map tableMetadataMap = new HashMap<>(); + tableMetadataMap.put(lookupKey, tableMetadata); + + ControlFileValidator.validate(controlFile, ControlFileValidationLevel.MAPPED, tableMetadataMap); + } + + @Test + void validate_keysValidationAndValidArgumentsGiven_shouldNotThrowException() + throws ControlFileValidationException { + ControlFile controlFile = new ControlFile(); + ControlFileTable controlFileTable = new ControlFileTable(NAMESPACE, TABLE_NAME); + controlFileTable + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_PARTITION_KEY, COLUMN_PARTITION_KEY)); + controlFileTable + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_CLUSTERING_KEY, COLUMN_CLUSTERING_KEY)); + controlFile.getTables().add(controlFileTable); + + String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); + + TableMetadata tableMetadata = + TableMetadata.newBuilder() + .addColumn(COLUMN_PARTITION_KEY, DataType.TEXT) + .addColumn(COLUMN_CLUSTERING_KEY, DataType.TEXT) + .addColumn(COLUMN_ONE, DataType.TEXT) + .addPartitionKey(COLUMN_PARTITION_KEY) + .addClusteringKey(COLUMN_CLUSTERING_KEY) + .build(); + Map tableMetadataMap = new HashMap<>(); + tableMetadataMap.put(lookupKey, tableMetadata); + + ControlFileValidator.validate(controlFile, ControlFileValidationLevel.MAPPED, tableMetadataMap); + } + + @Test + void validate_fullValidationAndValidArgumentsGiven_shouldNotThrowException() + throws ControlFileValidationException { + ControlFile controlFile = new ControlFile(); + ControlFileTable controlFileTable = new ControlFileTable(NAMESPACE, TABLE_NAME); + controlFileTable + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_PARTITION_KEY, COLUMN_PARTITION_KEY)); + controlFileTable + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_CLUSTERING_KEY, COLUMN_CLUSTERING_KEY)); + controlFileTable.getMappings().add(new ControlFileTableFieldMapping(COLUMN_ONE, COLUMN_ONE)); + controlFile.getTables().add(controlFileTable); + + String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); + + TableMetadata tableMetadata = + TableMetadata.newBuilder() + .addColumn(COLUMN_PARTITION_KEY, DataType.TEXT) + .addColumn(COLUMN_CLUSTERING_KEY, DataType.TEXT) + .addColumn(COLUMN_ONE, DataType.TEXT) + .addPartitionKey(COLUMN_PARTITION_KEY) + .addClusteringKey(COLUMN_CLUSTERING_KEY) + .build(); + Map tableMetadataMap = new HashMap<>(); + tableMetadataMap.put(lookupKey, tableMetadata); + + ControlFileValidator.validate(controlFile, ControlFileValidationLevel.FULL, tableMetadataMap); + } + + @Test + void + validate_twoControlFileTablesAndFullValidationAndHasMissingMappedColumnsGiven_shouldThrowControlFileValidationException() { + ControlFile controlFile = new ControlFile(); + ControlFileTable controlFileTable = new ControlFileTable(NAMESPACE, TABLE_NAME); + controlFileTable + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_PARTITION_KEY, COLUMN_PARTITION_KEY)); + controlFileTable + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_CLUSTERING_KEY, COLUMN_CLUSTERING_KEY)); + controlFileTable.getMappings().add(new ControlFileTableFieldMapping(COLUMN_ONE, COLUMN_ONE)); + controlFile.getTables().add(controlFileTable); + ControlFileTable controlFileTable2 = new ControlFileTable(NAMESPACE, TABLE_NAME_TWO); + controlFileTable2 + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_PARTITION_KEY, COLUMN_PARTITION_KEY)); + controlFileTable2 + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_CLUSTERING_KEY, COLUMN_CLUSTERING_KEY)); + controlFile.getTables().add(controlFileTable2); + + String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); + String lookupKeyTwo = TableMetadataUtil.getTableLookupKey(controlFileTable2); + + TableMetadata tableMetadata = + TableMetadata.newBuilder() + .addColumn(COLUMN_PARTITION_KEY, DataType.TEXT) + .addColumn(COLUMN_CLUSTERING_KEY, DataType.TEXT) + .addColumn(COLUMN_ONE, DataType.TEXT) + .addPartitionKey(COLUMN_PARTITION_KEY) + .addClusteringKey(COLUMN_CLUSTERING_KEY) + .build(); + Map tableMetadataMap = new HashMap<>(); + tableMetadataMap.put(lookupKey, tableMetadata); + tableMetadataMap.put(lookupKeyTwo, tableMetadata); + + assertThatThrownBy( + () -> + ControlFileValidator.validate( + controlFile, ControlFileValidationLevel.FULL, tableMetadataMap)) + .isExactlyInstanceOf(ControlFileValidationException.class) + .hasMessage( + CoreError.DATA_LOADER_MISSING_COLUMN_MAPPING.buildMessage(COLUMN_ONE, lookupKeyTwo)); + } + + @Test + void + validate_twoControlFileTablesAndKeysValidationAndHasMissingMappedColumnsGiven_shouldThrowControlFileValidationException() { + ControlFile controlFile = new ControlFile(); + ControlFileTable controlFileTable = new ControlFileTable(NAMESPACE, TABLE_NAME); + controlFileTable + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_PARTITION_KEY, COLUMN_PARTITION_KEY)); + controlFileTable + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_CLUSTERING_KEY, COLUMN_CLUSTERING_KEY)); + controlFile.getTables().add(controlFileTable); + ControlFileTable controlFileTable2 = new ControlFileTable(NAMESPACE, TABLE_NAME_TWO); + controlFileTable2 + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_PARTITION_KEY, COLUMN_PARTITION_KEY)); + controlFile.getTables().add(controlFileTable2); + + String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); + String lookupKeyTwo = TableMetadataUtil.getTableLookupKey(controlFileTable2); + + TableMetadata tableMetadata = + TableMetadata.newBuilder() + .addColumn(COLUMN_PARTITION_KEY, DataType.TEXT) + .addColumn(COLUMN_CLUSTERING_KEY, DataType.TEXT) + .addColumn(COLUMN_ONE, DataType.TEXT) + .addPartitionKey(COLUMN_PARTITION_KEY) + .addClusteringKey(COLUMN_CLUSTERING_KEY) + .build(); + Map tableMetadataMap = new HashMap<>(); + tableMetadataMap.put(lookupKey, tableMetadata); + tableMetadataMap.put(lookupKeyTwo, tableMetadata); + + assertThatThrownBy( + () -> + ControlFileValidator.validate( + controlFile, ControlFileValidationLevel.KEYS, tableMetadataMap)) + .isExactlyInstanceOf(ControlFileValidationException.class) + .hasMessage( + CoreError.DATA_LOADER_MISSING_CLUSTERING_KEY.buildMessage( + COLUMN_CLUSTERING_KEY, lookupKeyTwo)); + } + + @Test + void + validate_twoControlFileTablesAndMappedValidationAndHasMissingMappedColumnsInOneTableGiven_shouldThrowControlFileValidationException() { + ControlFile controlFile = new ControlFile(); + ControlFileTable controlFileTable = new ControlFileTable(NAMESPACE, TABLE_NAME); + controlFileTable + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_PARTITION_KEY, COLUMN_PARTITION_KEY)); + controlFileTable + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_CLUSTERING_KEY, COLUMN_CLUSTERING_KEY)); + controlFile.getTables().add(controlFileTable); + ControlFileTable controlFileTable2 = new ControlFileTable(NAMESPACE, TABLE_NAME_TWO); + controlFile.getTables().add(controlFileTable2); + + String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); + String lookupKeyTwo = TableMetadataUtil.getTableLookupKey(controlFileTable2); + + TableMetadata tableMetadata = + TableMetadata.newBuilder() + .addColumn(COLUMN_PARTITION_KEY, DataType.TEXT) + .addColumn(COLUMN_CLUSTERING_KEY, DataType.TEXT) + .addColumn(COLUMN_ONE, DataType.TEXT) + .addPartitionKey(COLUMN_PARTITION_KEY) + .addClusteringKey(COLUMN_CLUSTERING_KEY) + .build(); + Map tableMetadataMap = new HashMap<>(); + tableMetadataMap.put(lookupKey, tableMetadata); + tableMetadataMap.put(lookupKeyTwo, tableMetadata); + + assertThatThrownBy( + () -> + ControlFileValidator.validate( + controlFile, ControlFileValidationLevel.KEYS, tableMetadataMap)) + .isExactlyInstanceOf(ControlFileValidationException.class) + .hasMessage( + CoreError.DATA_LOADER_MISSING_PARTITION_KEY.buildMessage( + COLUMN_PARTITION_KEY, lookupKeyTwo)); + } + + @Test + void + validate_twoControlFileTablesAndFullValidationAndHasValidArgumentsGiven_shouldNotThrowException() + throws ControlFileValidationException { + ControlFile controlFile = new ControlFile(); + ControlFileTable controlFileTable = new ControlFileTable(NAMESPACE, TABLE_NAME); + controlFileTable + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_PARTITION_KEY, COLUMN_PARTITION_KEY)); + controlFileTable + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_CLUSTERING_KEY, COLUMN_CLUSTERING_KEY)); + controlFileTable.getMappings().add(new ControlFileTableFieldMapping(COLUMN_ONE, COLUMN_ONE)); + controlFile.getTables().add(controlFileTable); + ControlFileTable controlFileTable2 = new ControlFileTable(NAMESPACE, TABLE_NAME_TWO); + controlFileTable2 + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_PARTITION_KEY, COLUMN_PARTITION_KEY)); + controlFileTable2 + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_CLUSTERING_KEY, COLUMN_CLUSTERING_KEY)); + controlFileTable2.getMappings().add(new ControlFileTableFieldMapping(COLUMN_ONE, COLUMN_ONE)); + controlFile.getTables().add(controlFileTable2); + + String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); + String lookupKeyTwo = TableMetadataUtil.getTableLookupKey(controlFileTable2); + + TableMetadata tableMetadata = + TableMetadata.newBuilder() + .addColumn(COLUMN_PARTITION_KEY, DataType.TEXT) + .addColumn(COLUMN_CLUSTERING_KEY, DataType.TEXT) + .addColumn(COLUMN_ONE, DataType.TEXT) + .addPartitionKey(COLUMN_PARTITION_KEY) + .addClusteringKey(COLUMN_CLUSTERING_KEY) + .build(); + Map tableMetadataMap = new HashMap<>(); + tableMetadataMap.put(lookupKey, tableMetadata); + tableMetadataMap.put(lookupKeyTwo, tableMetadata); + + ControlFileValidator.validate(controlFile, ControlFileValidationLevel.FULL, tableMetadataMap); + } + + @Test + void + validate_twoControlFileTablesAndKeysValidationAndHasValidArgumentsGiven_shouldNotThrowException() + throws ControlFileValidationException { + ControlFile controlFile = new ControlFile(); + ControlFileTable controlFileTable = new ControlFileTable(NAMESPACE, TABLE_NAME); + controlFileTable + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_PARTITION_KEY, COLUMN_PARTITION_KEY)); + controlFileTable + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_CLUSTERING_KEY, COLUMN_CLUSTERING_KEY)); + controlFile.getTables().add(controlFileTable); + ControlFileTable controlFileTable2 = new ControlFileTable(NAMESPACE, TABLE_NAME_TWO); + controlFileTable2 + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_PARTITION_KEY, COLUMN_PARTITION_KEY)); + controlFile.getTables().add(controlFileTable2); + controlFileTable2 + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_CLUSTERING_KEY, COLUMN_CLUSTERING_KEY)); + + String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); + String lookupKeyTwo = TableMetadataUtil.getTableLookupKey(controlFileTable2); + + TableMetadata tableMetadata = + TableMetadata.newBuilder() + .addColumn(COLUMN_PARTITION_KEY, DataType.TEXT) + .addColumn(COLUMN_CLUSTERING_KEY, DataType.TEXT) + .addColumn(COLUMN_ONE, DataType.TEXT) + .addPartitionKey(COLUMN_PARTITION_KEY) + .addClusteringKey(COLUMN_CLUSTERING_KEY) + .build(); + Map tableMetadataMap = new HashMap<>(); + tableMetadataMap.put(lookupKey, tableMetadata); + tableMetadataMap.put(lookupKeyTwo, tableMetadata); + + ControlFileValidator.validate(controlFile, ControlFileValidationLevel.KEYS, tableMetadataMap); + } + + @Test + void + validate_twoControlFileTablesAndMappedValidationAndHasValidArgumentsGiven_shouldNotThrowException() + throws ControlFileValidationException { + ControlFile controlFile = new ControlFile(); + ControlFileTable controlFileTable = new ControlFileTable(NAMESPACE, TABLE_NAME); + controlFileTable + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_PARTITION_KEY, COLUMN_PARTITION_KEY)); + controlFileTable.getMappings().add(new ControlFileTableFieldMapping(COLUMN_ONE, COLUMN_ONE)); + controlFile.getTables().add(controlFileTable); + ControlFileTable controlFileTable2 = new ControlFileTable(NAMESPACE, TABLE_NAME_TWO); + controlFileTable2 + .getMappings() + .add(new ControlFileTableFieldMapping(COLUMN_PARTITION_KEY, COLUMN_PARTITION_KEY)); + controlFileTable2.getMappings().add(new ControlFileTableFieldMapping(COLUMN_ONE, COLUMN_ONE)); + controlFile.getTables().add(controlFileTable2); + + String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); + String lookupKeyTwo = TableMetadataUtil.getTableLookupKey(controlFileTable2); + + TableMetadata tableMetadata = + TableMetadata.newBuilder() + .addColumn(COLUMN_PARTITION_KEY, DataType.TEXT) + .addColumn(COLUMN_CLUSTERING_KEY, DataType.TEXT) + .addColumn(COLUMN_ONE, DataType.TEXT) + .addPartitionKey(COLUMN_PARTITION_KEY) + .addClusteringKey(COLUMN_CLUSTERING_KEY) + .build(); + Map tableMetadataMap = new HashMap<>(); + tableMetadataMap.put(lookupKey, tableMetadata); + tableMetadataMap.put(lookupKeyTwo, tableMetadata); + + ControlFileValidator.validate(controlFile, ControlFileValidationLevel.MAPPED, tableMetadataMap); + } +} From e2cc6ac4f65186f7a8885df0f1f9a3366000bf68 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 2 Jan 2025 14:39:45 +0530 Subject: [PATCH 38/87] Added task files and dtos --- .../com/scalar/db/common/error/CoreError.java | 14 +++ .../core/dataimport/ImportOptions.java | 38 ++++++ .../core/dataimport/log/LogMode.java | 6 + .../dataimport/task/ImportTaskConstants.java | 17 +++ .../task/mapping/ImportDataMapping.java | 28 +++++ .../ImportSourceRecordValidationResult.java | 48 ++++++++ .../ImportSourceRecordValidator.java | 112 ++++++++++++++++++ .../task/mapping/ImportDataMappingTest.java | 49 ++++++++ .../ImportSourceRecordValidatorTest.java | 86 ++++++++++++++ 9 files changed, 398 insertions(+) create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportOptions.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/log/LogMode.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskConstants.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/mapping/ImportDataMapping.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/validation/ImportSourceRecordValidationResult.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/validation/ImportSourceRecordValidator.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/task/mapping/ImportDataMappingTest.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/task/validation/ImportSourceRecordValidatorTest.java diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index 7786cedf50..c05eed9cd9 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -735,6 +735,20 @@ public enum CoreError implements ScalarDbError { "Multiple data mappings found for column '%s' in table '%s'", "", ""), + DATA_LOADER_MISSING_CLUSTERING_KEY_COLUMN( + Category.USER_ERROR, + "0166", + "Missing required field or column mapping for clustering key %s", + "", + ""), + DATA_LOADER_MISSING_PARTITION_KEY_COLUMN( + Category.USER_ERROR, + "0167", + "Missing required field or column mapping for partition key %s", + "", + ""), + DATA_LOADER_MISSING_COLUMN( + Category.USER_ERROR, "0168", "Missing field or column mapping for %s", "", ""), // // Errors for the concurrency error category // diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportOptions.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportOptions.java new file mode 100644 index 0000000000..9cb6225d30 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportOptions.java @@ -0,0 +1,38 @@ +package com.scalar.db.dataloader.core.dataimport; + +import com.scalar.db.dataloader.core.FileFormat; +import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFile; +import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileValidationLevel; +import com.scalar.db.dataloader.core.dataimport.log.LogMode; +import lombok.Builder; +import lombok.Data; + +/** Import options to import data into one or more ScalarDB tables */ +@Builder +@Data +public class ImportOptions { + + @Builder.Default private final ImportMode importMode = ImportMode.UPSERT; + @Builder.Default private final boolean requireAllColumns = false; + @Builder.Default private final FileFormat fileFormat = FileFormat.JSON; + @Builder.Default private final boolean prettyPrint = false; + @Builder.Default private final boolean ignoreNullValues = false; + @Builder.Default private final LogMode logMode = LogMode.SPLIT_BY_DATA_CHUNK; + + @Builder.Default + private final ControlFileValidationLevel controlFileValidationLevel = + ControlFileValidationLevel.MAPPED; + + @Builder.Default private final char delimiter = ','; + + @Builder.Default private final boolean logSuccessRecords = false; + @Builder.Default private final boolean logRawRecord = false; + + private final int dataChunkSize; + private final int transactionBatchSize; + private final ControlFile controlFile; + private final String namespace; + private final String tableName; + private final int maxThreads; + private final String customHeaderRow; +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/log/LogMode.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/log/LogMode.java new file mode 100644 index 0000000000..48eac32e61 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/log/LogMode.java @@ -0,0 +1,6 @@ +package com.scalar.db.dataloader.core.dataimport.log; + +public enum LogMode { + SINGLE_FILE, + SPLIT_BY_DATA_CHUNK +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskConstants.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskConstants.java new file mode 100644 index 0000000000..eb30211a0d --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskConstants.java @@ -0,0 +1,17 @@ +package com.scalar.db.dataloader.core.dataimport.task; + +import lombok.AccessLevel; +import lombok.NoArgsConstructor; + +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public class ImportTaskConstants { + public static final String ERROR_COULD_NOT_FIND_PARTITION_KEY = + "could not find the partition key"; + public static final String ERROR_UPSERT_INSERT_MISSING_COLUMNS = + "the source record needs to contain all fields if the UPSERT turns into an INSERT"; + public static final String ERROR_DATA_ALREADY_EXISTS = "record already exists"; + public static final String ERROR_DATA_NOT_FOUND = "record was not found"; + public static final String ERROR_COULD_NOT_FIND_CLUSTERING_KEY = + "could not find the clustering key"; + public static final String ERROR_TABLE_METADATA_MISSING = "No table metadata found"; +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/mapping/ImportDataMapping.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/mapping/ImportDataMapping.java new file mode 100644 index 0000000000..7f7524d263 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/mapping/ImportDataMapping.java @@ -0,0 +1,28 @@ +package com.scalar.db.dataloader.core.dataimport.task.mapping; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileTable; +import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileTableFieldMapping; + +public class ImportDataMapping { + + /** + * * Update the source data replace the source column name with the target column name according + * to control file table data + * + * @param source source data + * @param controlFileTable control file table to map source data + */ + public static void apply(ObjectNode source, ControlFileTable controlFileTable) { + // Copy the source field data to the target column if missing + for (ControlFileTableFieldMapping mapping : controlFileTable.getMappings()) { + String sourceField = mapping.getSourceField(); + String targetColumn = mapping.getTargetColumn(); + + if (source.has(sourceField) && !source.has(targetColumn)) { + source.set(targetColumn, source.get(sourceField)); + source.remove(sourceField); + } + } + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/validation/ImportSourceRecordValidationResult.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/validation/ImportSourceRecordValidationResult.java new file mode 100644 index 0000000000..30b878b9e6 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/validation/ImportSourceRecordValidationResult.java @@ -0,0 +1,48 @@ +package com.scalar.db.dataloader.core.dataimport.task.validation; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import javax.annotation.concurrent.Immutable; + +/** The validation result for a data source record */ +@Immutable +public final class ImportSourceRecordValidationResult { + + private final List errorMessages; + private final Set columnsWithErrors; + + /** Constructor */ + public ImportSourceRecordValidationResult() { + this.errorMessages = new ArrayList<>(); + this.columnsWithErrors = new HashSet<>(); + } + + /** + * Add a validation error message for a column. Also marking the column as containing an error. + * + * @param columnName column name + * @param errorMessage error message + */ + public void addErrorMessage(String columnName, String errorMessage) { + this.columnsWithErrors.add(columnName); + this.errorMessages.add(errorMessage); + } + + /** @return Immutable list of validation error messages */ + public List getErrorMessages() { + return Collections.unmodifiableList(this.errorMessages); + } + + /** @return Immutable set of columns that had errors */ + public Set getColumnsWithErrors() { + return Collections.unmodifiableSet(this.columnsWithErrors); + } + + /** @return Validation is valid or not */ + public boolean isValid() { + return this.errorMessages.isEmpty(); + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/validation/ImportSourceRecordValidator.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/validation/ImportSourceRecordValidator.java new file mode 100644 index 0000000000..38938ea8bf --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/validation/ImportSourceRecordValidator.java @@ -0,0 +1,112 @@ +package com.scalar.db.dataloader.core.dataimport.task.validation; + +import com.fasterxml.jackson.databind.JsonNode; +import com.scalar.db.common.error.CoreError; +import com.scalar.db.dataloader.core.DatabaseKeyType; +import com.scalar.db.dataloader.core.util.TableMetadataUtil; +import java.util.Set; +import lombok.AccessLevel; +import lombok.NoArgsConstructor; + +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public class ImportSourceRecordValidator { + + /** + * Create list for validation error messages. Validate everything and not return when one single + * error is found. Avoiding trial and error imports where every time a new error appears + * + * @param partitionKeyNames List of partition keys in table + * @param clusteringKeyNames List of clustering keys in table + * @param columnNames List of all column names in table + * @param sourceRecord source data + * @param allColumnsRequired If true treat missing columns as an error + * @return Source record validation result + */ + public static ImportSourceRecordValidationResult validateSourceRecord( + Set partitionKeyNames, + Set clusteringKeyNames, + Set columnNames, + JsonNode sourceRecord, + boolean allColumnsRequired) { + ImportSourceRecordValidationResult validationResult = new ImportSourceRecordValidationResult(); + + // check if partition keys are found + checkMissingKeys(DatabaseKeyType.PARTITION, partitionKeyNames, sourceRecord, validationResult); + + // check if clustering keys are found + checkMissingKeys( + DatabaseKeyType.CLUSTERING, clusteringKeyNames, sourceRecord, validationResult); + + // Check if the record is missing any columns + if (allColumnsRequired) { + checkMissingColumns( + sourceRecord, columnNames, validationResult, validationResult.getColumnsWithErrors()); + } + + return validationResult; + } + + /** + * Check if the required keys are found in the data file. + * + * @param keyType Type of key to validate + * @param keyColumnNames List of required column names + * @param sourceRecord source data + * @param validationResult Source record validation result + */ + public static void checkMissingKeys( + DatabaseKeyType keyType, + Set keyColumnNames, + JsonNode sourceRecord, + ImportSourceRecordValidationResult validationResult) { + for (String columnName : keyColumnNames) { + if (!sourceRecord.has(columnName)) { + String errorMessageFormat = + keyType == DatabaseKeyType.PARTITION + ? CoreError.DATA_LOADER_MISSING_PARTITION_KEY_COLUMN.buildMessage(columnName) + : CoreError.DATA_LOADER_MISSING_CLUSTERING_KEY_COLUMN.buildMessage(columnName); + validationResult.addErrorMessage(columnName, errorMessageFormat); + } + } + } + + /** + * Make sure the json object is not missing any columns. Error added to validation errors lists + * + * @param sourceRecord Source json object + * @param columnNames List of column names for a table + * @param validationResult Source record validation result + * @param ignoreColumns Columns that can be ignored in the check + */ + public static void checkMissingColumns( + JsonNode sourceRecord, + Set columnNames, + ImportSourceRecordValidationResult validationResult, + Set ignoreColumns) { + Set metadataColumns = TableMetadataUtil.getMetadataColumns(); + for (String columnName : columnNames) { + // If the field is not a metadata column and is missing and should not be ignored + if ((ignoreColumns == null || !ignoreColumns.contains(columnName)) + && !TableMetadataUtil.isMetadataColumn(columnName, metadataColumns, columnNames) + && !sourceRecord.has(columnName)) { + validationResult.addErrorMessage( + columnName, CoreError.DATA_LOADER_MISSING_COLUMN.buildMessage(columnName)); + } + } + } + + /** + * Make sure the json object is not missing any columns. Error added to validation errors lists + * + * @param sourceRecord Source json object + * @param columnNames List of column names for a table + * @param validationResult Source record validation result + */ + public static void checkMissingColumns( + JsonNode sourceRecord, + Set columnNames, + ImportSourceRecordValidationResult validationResult) { + ImportSourceRecordValidator.checkMissingColumns( + sourceRecord, columnNames, validationResult, null); + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/task/mapping/ImportDataMappingTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/task/mapping/ImportDataMappingTest.java new file mode 100644 index 0000000000..2589dacf82 --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/task/mapping/ImportDataMappingTest.java @@ -0,0 +1,49 @@ +package com.scalar.db.dataloader.core.dataimport.task.mapping; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.dataloader.core.UnitTestUtils; +import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileTable; +import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileTableFieldMapping; +import java.util.ArrayList; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class ImportDataMappingTest { + + TableMetadata mockMetadata; + ControlFileTable controlFilTable; + + @BeforeEach + void setup() { + mockMetadata = UnitTestUtils.createTestTableMetadata(); + controlFilTable = new ControlFileTable("namespace", "table"); + ControlFileTableFieldMapping m1 = new ControlFileTableFieldMapping("source_id", "target_id"); + ControlFileTableFieldMapping m2 = + new ControlFileTableFieldMapping("source_name", "target_name"); + ControlFileTableFieldMapping m3 = + new ControlFileTableFieldMapping("source_email", "target_email"); + ArrayList mappingArrayList = new ArrayList<>(); + mappingArrayList.add(m1); + mappingArrayList.add(m2); + mappingArrayList.add(m3); + controlFilTable.getMappings().addAll(mappingArrayList); + } + + @Test + void apply_withValidData_shouldUpdateSourceData() throws JsonProcessingException { + ObjectMapper objectMapper = new ObjectMapper(); + ObjectNode source = objectMapper.createObjectNode(); + source.put("source_id", "111"); + source.put("source_name", "abc"); + source.put("source_email", "sam@dsd.com"); + ImportDataMapping.apply(source, controlFilTable); + // Assert changes + Assertions.assertEquals("111", source.get("target_id").asText()); + Assertions.assertEquals("abc", source.get("target_name").asText()); + Assertions.assertEquals("sam@dsd.com", source.get("target_email").asText()); + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/task/validation/ImportSourceRecordValidatorTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/task/validation/ImportSourceRecordValidatorTest.java new file mode 100644 index 0000000000..f065bcb69e --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/task/validation/ImportSourceRecordValidatorTest.java @@ -0,0 +1,86 @@ +package com.scalar.db.dataloader.core.dataimport.task.validation; + +import com.fasterxml.jackson.databind.JsonNode; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.dataloader.core.UnitTestUtils; +import java.util.HashSet; +import java.util.Set; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class ImportSourceRecordValidatorTest { + + TableMetadata mockMetadata = UnitTestUtils.createTestTableMetadata(); + + @Test + void + validateSourceRecord_withValidData_shouldReturnValidImportSourceRecordValidationResultWithoutErrors() { + Set partitionKeyNames = mockMetadata.getPartitionKeyNames(); + Set clusteringKeyNames = mockMetadata.getClusteringKeyNames(); + Set columnNames = mockMetadata.getColumnNames(); + JsonNode sourceRecord = UnitTestUtils.getOutputDataWithoutMetadata(); + ImportSourceRecordValidationResult result = + ImportSourceRecordValidator.validateSourceRecord( + partitionKeyNames, clusteringKeyNames, columnNames, sourceRecord, false); + Assertions.assertTrue(result.getColumnsWithErrors().isEmpty()); + } + + @Test + void + validateSourceRecord_withValidDataWithAllColumnsRequired_shouldReturnValidImportSourceRecordValidationResultWithoutErrors() { + Set partitionKeyNames = mockMetadata.getPartitionKeyNames(); + Set clusteringKeyNames = mockMetadata.getClusteringKeyNames(); + Set columnNames = mockMetadata.getColumnNames(); + JsonNode sourceRecord = UnitTestUtils.getOutputDataWithoutMetadata(); + ImportSourceRecordValidationResult result = + ImportSourceRecordValidator.validateSourceRecord( + partitionKeyNames, clusteringKeyNames, columnNames, sourceRecord, true); + Assertions.assertTrue(result.getColumnsWithErrors().isEmpty()); + } + + @Test + void + validateSourceRecord_withInValidPartitionKey_shouldReturnValidImportSourceRecordValidationResultWithErrors() { + Set partitionKeyNames = new HashSet<>(); + partitionKeyNames.add("id1"); + Set clusteringKeyNames = mockMetadata.getClusteringKeyNames(); + Set columnNames = mockMetadata.getColumnNames(); + JsonNode sourceRecord = UnitTestUtils.getOutputDataWithoutMetadata(); + ImportSourceRecordValidationResult result = + ImportSourceRecordValidator.validateSourceRecord( + partitionKeyNames, clusteringKeyNames, columnNames, sourceRecord, false); + Assertions.assertFalse(result.getColumnsWithErrors().isEmpty()); + } + + @Test + void + validateSourceRecord_withInValidPartitionKeyWithAllColumnsRequired_shouldReturnValidImportSourceRecordValidationResultWithErrors() { + Set partitionKeyNames = new HashSet<>(); + partitionKeyNames.add("id1"); + Set clusteringKeyNames = mockMetadata.getClusteringKeyNames(); + Set columnNames = mockMetadata.getColumnNames(); + JsonNode sourceRecord = UnitTestUtils.getOutputDataWithoutMetadata(); + ImportSourceRecordValidationResult result = + ImportSourceRecordValidator.validateSourceRecord( + partitionKeyNames, clusteringKeyNames, columnNames, sourceRecord, true); + Assertions.assertFalse(result.getColumnsWithErrors().isEmpty()); + Assertions.assertEquals(1, result.getErrorMessages().size()); + } + + @Test + void + validateSourceRecord_withInValidClusteringKey_shouldReturnValidImportSourceRecordValidationResultWithErrors() { + Set partitionKeyNames = mockMetadata.getPartitionKeyNames(); + Set clusteringKeyNames = new HashSet<>(); + clusteringKeyNames.add("id1"); + Set columnNames = mockMetadata.getColumnNames(); + JsonNode sourceRecord = UnitTestUtils.getOutputDataWithoutMetadata(); + ImportSourceRecordValidationResult result = + ImportSourceRecordValidator.validateSourceRecord( + partitionKeyNames, clusteringKeyNames, columnNames, sourceRecord, false); + Assertions.assertFalse(result.getColumnsWithErrors().isEmpty()); + Assertions.assertEquals( + "missing required field or column mapping for clustering key id1", + result.getErrorMessages().get(0)); + } +} From 8c75b791a24a1dfe65f02a7de33be26d108278ae Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 2 Jan 2025 14:58:08 +0530 Subject: [PATCH 39/87] Fix unit test failure --- .../task/validation/ImportSourceRecordValidatorTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/task/validation/ImportSourceRecordValidatorTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/task/validation/ImportSourceRecordValidatorTest.java index f065bcb69e..5d33b2622b 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/task/validation/ImportSourceRecordValidatorTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/task/validation/ImportSourceRecordValidatorTest.java @@ -2,6 +2,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.scalar.db.api.TableMetadata; +import com.scalar.db.common.error.CoreError; import com.scalar.db.dataloader.core.UnitTestUtils; import java.util.HashSet; import java.util.Set; @@ -80,7 +81,7 @@ class ImportSourceRecordValidatorTest { partitionKeyNames, clusteringKeyNames, columnNames, sourceRecord, false); Assertions.assertFalse(result.getColumnsWithErrors().isEmpty()); Assertions.assertEquals( - "missing required field or column mapping for clustering key id1", + CoreError.DATA_LOADER_MISSING_CLUSTERING_KEY_COLUMN.buildMessage("id1"), result.getErrorMessages().get(0)); } } From 98618aa63b5bef6e4d44645681cac45d8eefe903 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 2 Jan 2025 15:16:38 +0530 Subject: [PATCH 40/87] Fix spot bugs failure --- .../core/dataimport/task/mapping/ImportDataMappingTest.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/task/mapping/ImportDataMappingTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/task/mapping/ImportDataMappingTest.java index 2589dacf82..e2b9364ff3 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/task/mapping/ImportDataMappingTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/task/mapping/ImportDataMappingTest.java @@ -3,8 +3,6 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; -import com.scalar.db.api.TableMetadata; -import com.scalar.db.dataloader.core.UnitTestUtils; import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileTable; import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileTableFieldMapping; import java.util.ArrayList; @@ -14,12 +12,10 @@ public class ImportDataMappingTest { - TableMetadata mockMetadata; ControlFileTable controlFilTable; @BeforeEach void setup() { - mockMetadata = UnitTestUtils.createTestTableMetadata(); controlFilTable = new ControlFileTable("namespace", "table"); ControlFileTableFieldMapping m1 = new ControlFileTableFieldMapping("source_id", "target_id"); ControlFileTableFieldMapping m2 = From 0d3f79ea3c79ec767e5000b0d9b30e7a55dc22fa Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 2 Jan 2025 15:56:37 +0530 Subject: [PATCH 41/87] Export tasks added --- .../core/dataexport/ExportManager.java | 308 ++++++++++++++++++ .../dataexport/producer/CsvProducerTask.java | 137 ++++++++ .../producer/JsonLineProducerTask.java | 126 +++++++ .../dataexport/producer/JsonProducerTask.java | 137 ++++++++ .../dataexport/producer/ProducerResult.java | 13 + .../dataexport/producer/ProducerTask.java | 39 +++ .../producer/ProducerTaskFactory.java | 46 +++ 7 files changed, 806 insertions(+) create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/ExportManager.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTask.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTask.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTask.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerResult.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTask.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactory.java diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/ExportManager.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/ExportManager.java new file mode 100644 index 0000000000..07d59415cc --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/ExportManager.java @@ -0,0 +1,308 @@ +package com.scalar.db.dataloader.core.dataexport; + +import com.scalar.db.api.DistributedStorage; +import com.scalar.db.api.Result; +import com.scalar.db.api.Scanner; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.dataloader.core.FileFormat; +import com.scalar.db.dataloader.core.dataexport.producer.ProducerTask; +import com.scalar.db.dataloader.core.dataexport.producer.ProducerTaskFactory; +import com.scalar.db.dataloader.core.dataexport.validation.ExportOptionsValidationException; +import com.scalar.db.dataloader.core.dataexport.validation.ExportOptionsValidator; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDao; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDaoException; +import com.scalar.db.dataloader.core.util.CsvUtil; +import com.scalar.db.dataloader.core.util.TableMetadataUtil; +import com.scalar.db.io.DataType; +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.Writer; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import lombok.RequiredArgsConstructor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@RequiredArgsConstructor +public class ExportManager { + private static final Logger logger = LoggerFactory.getLogger(ExportManager.class); + + private final DistributedStorage storage; + private final ScalarDBDao dao; + private final ProducerTaskFactory producerTaskFactory; + private final Object lock = new Object(); + + /** + * Starts the export process + * + * @param exportOptions Export options + * @param tableMetadata Metadata for a single ScalarDB table + * @param writer Writer to write the exported data + */ + public ExportReport startExport( + ExportOptions exportOptions, TableMetadata tableMetadata, Writer writer) { + ExportReport exportReport = new ExportReport(); + try { + validateExportOptions(exportOptions, tableMetadata); + Map dataTypeByColumnName = + TableMetadataUtil.extractColumnDataTypes(tableMetadata); + handleTransactionMetadata(exportOptions, tableMetadata); + + if (exportOptions.getOutputFileFormat() == FileFormat.CSV + && !exportOptions.isExcludeHeaderRow()) { + writeCsvHeaderRow(exportOptions, tableMetadata, dataTypeByColumnName, writer); + } + + int maxThreadCount = + exportOptions.getMaxThreadCount() == 0 + ? Runtime.getRuntime().availableProcessors() + : exportOptions.getMaxThreadCount(); + ExecutorService executorService = Executors.newFixedThreadPool(maxThreadCount); + + BufferedWriter bufferedWriter = new BufferedWriter(writer); + boolean isJson = exportOptions.getOutputFileFormat() == FileFormat.JSON; + + try (Scanner scanner = createScanner(exportOptions, dao, storage)) { + if (isJson) { + bufferedWriter.write("["); + } + + Iterator iterator = scanner.iterator(); + AtomicBoolean isFirstBatch = new AtomicBoolean(true); + + while (iterator.hasNext()) { + List dataChunk = fetchDataChunk(iterator, exportOptions.getDataChunkSize()); + executorService.submit( + () -> + processDataChunk( + exportOptions, + tableMetadata, + dataTypeByColumnName, + dataChunk, + bufferedWriter, + isJson, + isFirstBatch, + exportReport)); + } + executorService.shutdown(); + if (executorService.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS)) { + logger.info("All tasks completed"); + } else { + logger.error("Timeout occurred while waiting for tasks to complete"); + // TODO: handle this + } + if (isJson) { + bufferedWriter.write("]"); + } + bufferedWriter.flush(); + } catch (InterruptedException | IOException e) { + logger.error("Error during export: {}", e.getMessage()); + } + } catch (ExportOptionsValidationException | IOException | ScalarDBDaoException e) { + logger.error("Error during export: {}", e.getMessage()); + } + return exportReport; + } + + /*** + * To process result data chunk + * + * @param exportOptions export options + * @param tableMetadata metadata of the table + * @param dataTypeByColumnName map of columns and their data types + * @param dataChunk a list with result data + * @param bufferedWriter writer object + * @param isJson if data format is json or not + * @param isFirstBatch is the data going to be process is the first batch or not + * @param exportReport export report which will be updated once the data chunk is processed + */ + private void processDataChunk( + ExportOptions exportOptions, + TableMetadata tableMetadata, + Map dataTypeByColumnName, + List dataChunk, + BufferedWriter bufferedWriter, + boolean isJson, + AtomicBoolean isFirstBatch, + ExportReport exportReport) { + ProducerTask producerTask = + producerTaskFactory.createProducerTask( + exportOptions.getOutputFileFormat(), + exportOptions.getProjectionColumns(), + tableMetadata, + dataTypeByColumnName); + String dataChunkContent = producerTask.process(dataChunk, exportReport); + + try { + synchronized (lock) { + if (isJson && !isFirstBatch.getAndSet(false)) { + bufferedWriter.write(","); + } + bufferedWriter.write(dataChunkContent); + } + } catch (IOException e) { + logger.error("Error while writing data chunk: {}", e.getMessage()); + } + } + + /*** + * To split result into batches + * @param iterator iterator which parse results + * @param batchSize size of batch + * @return a list of results split to batches + */ + private List fetchDataChunk(Iterator iterator, int batchSize) { + List batch = new ArrayList<>(); + int count = 0; + while (iterator.hasNext() && count < batchSize) { + batch.add(iterator.next()); + count++; + } + return batch; + } + + /*** + * To validate export options + * @param exportOptions export options + * @param tableMetadata metadata of the table + * @throws ExportOptionsValidationException thrown if any of the export option validation fails + */ + private void validateExportOptions(ExportOptions exportOptions, TableMetadata tableMetadata) + throws ExportOptionsValidationException { + ExportOptionsValidator.validate(exportOptions, tableMetadata); + } + + /*** + * To update projection columns of export options if include metadata options is enabled + * @param exportOptions export options + * @param tableMetadata metadata of the table + */ + private void handleTransactionMetadata(ExportOptions exportOptions, TableMetadata tableMetadata) { + if (exportOptions.isIncludeTransactionMetadata() + && !exportOptions.getProjectionColumns().isEmpty()) { + List projectionMetadata = + TableMetadataUtil.populateProjectionsWithMetadata( + tableMetadata, exportOptions.getProjectionColumns()); + exportOptions.setProjectionColumns(projectionMetadata); + } + } + + /*** + * To create and write the header row to the CSV export file + * @param exportOptions export options + * @param tableMetadata metadata of the table + * @param dataTypeByColumnName map of columns and their data types + * @param writer writer object + * @throws IOException throws if any exception occur in file operations + */ + private void writeCsvHeaderRow( + ExportOptions exportOptions, + TableMetadata tableMetadata, + Map dataTypeByColumnName, + Writer writer) + throws IOException { + String header = + createCsvHeaderRow( + exportOptions, + tableMetadata, + dataTypeByColumnName, + TableMetadataUtil.getMetadataColumns()); + writer.append(header); + writer.flush(); + } + + /*** + * To create a scanner object + * @param exportOptions export options + * @param dao scalardb dao object + * @param storage distributed storage object + * @return created scanner + * @throws ScalarDBDaoException throws if any issue occurs in creating scanner object + */ + private Scanner createScanner( + ExportOptions exportOptions, ScalarDBDao dao, DistributedStorage storage) + throws ScalarDBDaoException { + boolean isScanAll = exportOptions.getScanPartitionKey() == null; + if (isScanAll) { + return dao.createScanner( + exportOptions.getNamespace(), + exportOptions.getTableName(), + exportOptions.getProjectionColumns(), + exportOptions.getLimit(), + storage); + } else { + return dao.createScanner( + exportOptions.getNamespace(), + exportOptions.getTableName(), + exportOptions.getScanPartitionKey(), + exportOptions.getScanRange(), + exportOptions.getSortOrders(), + exportOptions.getProjectionColumns(), + exportOptions.getLimit(), + storage); + } + } + + /*** + * To generate the header row of CSV export file + * @param exportOptions export options + * @param tableMetadata metadata of the table + * @param dataTypeByColumnName map of columns and their data types + * @param columnsToIgnore set of columns to ignore + * @return generated CSV header row + */ + private String createCsvHeaderRow( + ExportOptions exportOptions, + TableMetadata tableMetadata, + Map dataTypeByColumnName, + Set columnsToIgnore) { + StringBuilder headerRow = new StringBuilder(); + List projections = exportOptions.getProjectionColumns(); + Iterator iterator = tableMetadata.getColumnNames().iterator(); + while (iterator.hasNext()) { + String columnName = iterator.next(); + if (shouldIgnoreColumn( + exportOptions.isIncludeTransactionMetadata(), + columnName, + columnsToIgnore, + dataTypeByColumnName.keySet(), + projections)) { + continue; + } + headerRow.append(columnName); + if (iterator.hasNext()) { + headerRow.append(exportOptions.getDelimiter()); + } + } + CsvUtil.removeTrailingDelimiter(headerRow, exportOptions.getDelimiter()); + headerRow.append("\n"); + return headerRow.toString(); + } + + /*** + * To ignore a column or not based on conditions such as if it is a metadata column or if it is not include in selected projections + * @param isIncludeTransactionMetadata to include transaction metadata or not + * @param columnName column name + * @param columnsToIgnore set of columns to ignore + * @param dataTypeColumnNames data types of columns + * @param projections selected columns for projection + * @return ignore the column or not + */ + private boolean shouldIgnoreColumn( + boolean isIncludeTransactionMetadata, + String columnName, + Set columnsToIgnore, + Set dataTypeColumnNames, + List projections) { + return (!isIncludeTransactionMetadata + && TableMetadataUtil.isMetadataColumn(columnName, columnsToIgnore, dataTypeColumnNames)) + || (!projections.isEmpty() && !projections.contains(columnName)); + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTask.java new file mode 100644 index 0000000000..3d93a72c72 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTask.java @@ -0,0 +1,137 @@ +package com.scalar.db.dataloader.core.dataexport.producer; + +import com.scalar.db.api.Result; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.common.error.CoreError; +import com.scalar.db.dataloader.core.dataexport.ExportReport; +import com.scalar.db.dataloader.core.util.CsvUtil; +import com.scalar.db.dataloader.core.util.DecimalUtil; +import com.scalar.db.dataloader.core.util.TableMetadataUtil; +import com.scalar.db.io.DataType; +import java.util.Base64; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Producer that converts ScalarDB scan results to csv content. The output is sent to a queue to be + * processed by a consumer + */ +public class CsvProducerTask extends ProducerTask { + + private static final Logger LOGGER = LoggerFactory.getLogger(CsvProducerTask.class); + + private final String delimiter; + + /** + * Class constructor + * + * @param includeMetadata Include metadata in the exported data + * @param tableMetadata Metadata for a single ScalarDB table + * @param columnDataTypes Map of data types for the all columns in a ScalarDB table + * @param delimiter Delimiter used in csv content + */ + public CsvProducerTask( + boolean includeMetadata, + List projectColumns, + TableMetadata tableMetadata, + Map columnDataTypes, + String delimiter) { + super(includeMetadata, projectColumns, tableMetadata, columnDataTypes); + this.delimiter = delimiter; + } + + /*** + * Process scalardb scan result data and returns CSV data + * @param dataChunk list of results + * @param exportReport export report + * @return result converted to string + */ + @Override + public String process(List dataChunk, ExportReport exportReport) { + StringBuilder csvContent = new StringBuilder(); + for (Result result : dataChunk) { + String csvRow = convertResultToCsv(result); + csvContent.append(csvRow); + exportReport.increaseExportedRowCount(); + } + return csvContent.toString(); + } + + /** + * Convert a ScalarDB scan result to CSV + * + * @param result ScalarDB scan result + * @return CSV string + */ + private String convertResultToCsv(Result result) { + // Initialization + StringBuilder stringBuilder = new StringBuilder(); + Set columnsToIgnore = TableMetadataUtil.getMetadataColumns(); + LinkedHashSet tableColumnNames = tableMetadata.getColumnNames(); + Iterator iterator = tableColumnNames.iterator(); + + try { + // Loop over the result data list + while (iterator.hasNext()) { + String columnName = iterator.next(); + + // Skip the field if it can be ignored based on check + boolean columnNotProjected = !projectedColumnsSet.contains(columnName); + boolean isMetadataColumn = + TableMetadataUtil.isMetadataColumn(columnName, columnsToIgnore, tableColumnNames); + if (columnNotProjected || (!includeMetadata && isMetadataColumn)) { + continue; + } + + // Convert each value to a string value and add to the StringBuilder + stringBuilder.append( + convertToString(result, columnName, dataTypeByColumnName.get(columnName))); + + if (iterator.hasNext()) { + stringBuilder.append(delimiter); + } + } + + // Double check and remove the character if it's a delimiter. This can occur when the last + // added column was not the last iterator field and did get a delimiter + CsvUtil.removeTrailingDelimiter(stringBuilder, delimiter); + + stringBuilder.append(System.lineSeparator()); + + return stringBuilder.toString(); + } catch (UnsupportedOperationException e) { + LOGGER.error(CoreError.DATA_LOADER_VALUE_TO_STRING_CONVERSION_FAILED.buildMessage()); + } + return ""; + } + + /*** + * Convert result column value to string + * @param result scalardb result + * @param columnName column name + * @param dataType datatype of the column + * @return value of result converted to string + */ + private String convertToString(Result result, String columnName, DataType dataType) { + if (result.isNull(columnName)) { + return null; + } + return switch (dataType) { + case INT -> Integer.toString(result.getInt(columnName)); + case BIGINT -> Long.toString(result.getBigInt(columnName)); + case FLOAT -> DecimalUtil.convertToNonScientific(result.getFloat(columnName)); + case DOUBLE -> DecimalUtil.convertToNonScientific(result.getDouble(columnName)); + case BLOB -> { + byte[] encoded = Base64.getEncoder().encode(result.getBlobAsBytes(columnName)); + yield new String(encoded); + } + case BOOLEAN -> Boolean.toString(result.getBoolean(columnName)); + case TEXT -> result.getText(columnName); + }; + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTask.java new file mode 100644 index 0000000000..06e8a1c796 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTask.java @@ -0,0 +1,126 @@ +package com.scalar.db.dataloader.core.dataexport.producer; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.scalar.db.api.Result; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.dataloader.core.DataLoaderObjectMapper; +import com.scalar.db.dataloader.core.dataexport.ExportReport; +import com.scalar.db.dataloader.core.util.TableMetadataUtil; +import com.scalar.db.io.DataType; +import java.util.Base64; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class JsonLineProducerTask extends ProducerTask { + + private final DataLoaderObjectMapper objectMapper = new DataLoaderObjectMapper(); + + /** + * Class constructor + * + * @param includeMetadata Include metadata in the exported data + * @param tableMetadata Metadata for a single ScalarDB table + * @param columnDataTypes Map of data types for the all columns in a ScalarDB table + */ + public JsonLineProducerTask( + boolean includeMetadata, + List projectionColumns, + TableMetadata tableMetadata, + Map columnDataTypes) { + super(includeMetadata, projectionColumns, tableMetadata, columnDataTypes); + } + + /*** + * Process scalardb scan result data and returns CSV data + * @param dataChunk list of results + * @param exportReport export report + * @return result converted to string + */ + @Override + public String process(List dataChunk, ExportReport exportReport) { + StringBuilder jsonLines = new StringBuilder(); + + for (Result result : dataChunk) { + ObjectNode objectNode = generateJsonForResult(result); + jsonLines.append(objectNode.toString()); + jsonLines.append(System.lineSeparator()); + exportReport.increaseExportedRowCount(); + } + return jsonLines.toString(); + } + + /** + * Generate a Json Object based on a ScalarDB Result + * + * @param result ScalarDB Result object instance + * @return JsonObject containing the ScalarDB result data + */ + private ObjectNode generateJsonForResult(Result result) { + LinkedHashSet tableColumns = tableMetadata.getColumnNames(); + + ObjectNode objectNode = objectMapper.createObjectNode(); + + // Columns to ignore in the export + Set columnsToIgnore = TableMetadataUtil.getMetadataColumns(); + + // Loop through all the columns and to the json object + for (String columnName : tableColumns) { + // Skip the field if it can be ignored based on check + boolean columnNotProjected = !projectedColumnsSet.contains(columnName); + boolean isMetadataColumn = + TableMetadataUtil.isMetadataColumn(columnName, columnsToIgnore, tableColumns); + if (columnNotProjected || (!includeMetadata && isMetadataColumn)) { + continue; + } + + DataType dataType = dataTypeByColumnName.get(columnName); + addToObjectNode(objectNode, result, columnName, dataType); + } + return objectNode; + } + + /*** + * Add result column name and value to json object node + * @param result scalardb result + * @param columnName column name + * @param dataType datatype of the column + * + */ + private void addToObjectNode( + ObjectNode objectNode, Result result, String columnName, DataType dataType) { + + // Using add and not addProperty to be able to add a null value. addProperty does not + // support null values. + if (result.isNull(columnName)) { + return; + } + + switch (dataType) { + case BOOLEAN: + objectNode.put(columnName, result.getBoolean(columnName)); + break; + case INT: + objectNode.put(columnName, result.getInt(columnName)); + break; + case BIGINT: + objectNode.put(columnName, result.getBigInt(columnName)); + break; + case FLOAT: + objectNode.put(columnName, result.getFloat(columnName)); + break; + case DOUBLE: + objectNode.put(columnName, result.getDouble(columnName)); + break; + case TEXT: + objectNode.put(columnName, result.getText(columnName)); + break; + case BLOB: + // convert to base64 string + byte[] encoded = Base64.getEncoder().encode(result.getBlobAsBytes(columnName)); + objectNode.put(columnName, new String(encoded)); + break; + } + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTask.java new file mode 100644 index 0000000000..47fbd4c205 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTask.java @@ -0,0 +1,137 @@ +package com.scalar.db.dataloader.core.dataexport.producer; + +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.scalar.db.api.Result; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.dataloader.core.DataLoaderObjectMapper; +import com.scalar.db.dataloader.core.dataexport.ExportReport; +import com.scalar.db.dataloader.core.util.TableMetadataUtil; +import com.scalar.db.io.DataType; +import java.util.Base64; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class JsonProducerTask extends ProducerTask { + + private final DataLoaderObjectMapper objectMapper = new DataLoaderObjectMapper(); + private final boolean prettyPrintJson; + + /** + * Class constructor + * + * @param includeMetadata Include metadata in the exported data + * @param tableMetadata Metadata for a single ScalarDB table + * @param columnDataTypes Map of data types for the all columns in a ScalarDB table + */ + public JsonProducerTask( + boolean includeMetadata, + List projectionColumns, + TableMetadata tableMetadata, + Map columnDataTypes, + boolean prettyPrintJson) { + super(includeMetadata, projectionColumns, tableMetadata, columnDataTypes); + this.prettyPrintJson = prettyPrintJson; + } + + /*** + * Process scalardb scan result data and returns CSV data + * @param dataChunk list of results + * @param exportReport export report + * @return result converted to string + */ + @Override + public String process(List dataChunk, ExportReport exportReport) { + ArrayNode arrayNode = objectMapper.createArrayNode(); + + for (Result result : dataChunk) { + ObjectNode objectNode = generateJsonForResult(result); + arrayNode.add(objectNode); + exportReport.increaseExportedRowCount(); + } + + if (prettyPrintJson) { + String json = arrayNode.toPrettyString(); + return json.substring(1, json.length() - 1); + } + + String json = arrayNode.toString(); + // Remove the [] from the json string + return json.substring(1, json.length() - 1); + } + + /** + * Generate a Json Object based on a ScalarDB Result + * + * @param result ScalarDB Result object instance + * @return JsonObject containing the ScalarDB result data + */ + private ObjectNode generateJsonForResult(Result result) { + LinkedHashSet tableColumns = tableMetadata.getColumnNames(); + + ObjectNode objectNode = objectMapper.createObjectNode(); + + // Columns to ignore in the export + Set columnsToIgnore = TableMetadataUtil.getMetadataColumns(); + + // Loop through all the columns and to the json object + for (String columnName : tableColumns) { + // Skip the field if it can be ignored based on check + boolean columnNotProjected = !projectedColumnsSet.contains(columnName); + boolean isMetadataColumn = + TableMetadataUtil.isMetadataColumn(columnName, columnsToIgnore, tableColumns); + if (columnNotProjected || (!includeMetadata && isMetadataColumn)) { + continue; + } + + DataType dataType = dataTypeByColumnName.get(columnName); + addToObjectNode(objectNode, result, columnName, dataType); + } + return objectNode; + } + + /*** + * Add result column name and value to json object node + * @param result scalardb result + * @param columnName column name + * @param dataType datatype of the column + * + */ + private void addToObjectNode( + ObjectNode objectNode, Result result, String columnName, DataType dataType) { + + // Using add and not addProperty to be able to add a null value. addProperty does not + // support null values. + if (result.isNull(columnName)) { + return; + } + + switch (dataType) { + case BOOLEAN: + objectNode.put(columnName, result.getBoolean(columnName)); + break; + case INT: + objectNode.put(columnName, result.getInt(columnName)); + break; + case BIGINT: + objectNode.put(columnName, result.getBigInt(columnName)); + break; + case FLOAT: + objectNode.put(columnName, result.getFloat(columnName)); + break; + case DOUBLE: + objectNode.put(columnName, result.getDouble(columnName)); + break; + case TEXT: + objectNode.put(columnName, result.getText(columnName)); + break; + case BLOB: + // convert to base64 string + byte[] encoded = Base64.getEncoder().encode(result.getBlobAsBytes(columnName)); + objectNode.put(columnName, new String(encoded)); + break; + } + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerResult.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerResult.java new file mode 100644 index 0000000000..9506fcd722 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerResult.java @@ -0,0 +1,13 @@ +package com.scalar.db.dataloader.core.dataexport.producer; + +import com.fasterxml.jackson.databind.JsonNode; +import lombok.Builder; +import lombok.Value; + +@Builder +@Value +public class ProducerResult { + JsonNode jsonNode; + String csvSource; + boolean poisonPill; +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTask.java new file mode 100644 index 0000000000..a3ecb9ed87 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTask.java @@ -0,0 +1,39 @@ +package com.scalar.db.dataloader.core.dataexport.producer; + +import com.scalar.db.api.Result; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.dataloader.core.dataexport.ExportReport; +import com.scalar.db.io.DataType; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public abstract class ProducerTask { + + protected final TableMetadata tableMetadata; + protected final Map dataTypeByColumnName; + protected final boolean includeMetadata; + protected final Set projectedColumnsSet; + + /** + * Class constructor + * + * @param includeMetadata Include metadata in the exported data + * @param projectionColumns List of column name for projection + * @param tableMetadata Metadata of the ScalarDB table + * @param columnDataTypes Map of data types for the all columns in a ScalarDB table + */ + protected ProducerTask( + boolean includeMetadata, + List projectionColumns, + TableMetadata tableMetadata, + Map columnDataTypes) { + this.includeMetadata = includeMetadata; + this.projectedColumnsSet = new HashSet<>(projectionColumns); + this.tableMetadata = tableMetadata; + this.dataTypeByColumnName = columnDataTypes; + } + + public abstract String process(List dataChunk, ExportReport exportReport); +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactory.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactory.java new file mode 100644 index 0000000000..ed9883ade8 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactory.java @@ -0,0 +1,46 @@ +package com.scalar.db.dataloader.core.dataexport.producer; + +import com.scalar.db.api.TableMetadata; +import com.scalar.db.dataloader.core.FileFormat; +import com.scalar.db.io.DataType; +import java.util.List; +import java.util.Map; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public class ProducerTaskFactory { + + private final String delimiter; + private final boolean includeMetadata; + private final boolean prettyPrintJson; + + /*** + * + * @param fileFormat file format + * @param projectionColumns columns names that are selected + * @param tableMetadata metadata of the table + * @param dataTypeByColumnName map of columns with data types + * @return producer task object of provided file format + */ + public ProducerTask createProducerTask( + FileFormat fileFormat, + List projectionColumns, + TableMetadata tableMetadata, + Map dataTypeByColumnName) { + return switch (fileFormat) { + case JSON -> + new JsonProducerTask( + includeMetadata, + projectionColumns, + tableMetadata, + dataTypeByColumnName, + prettyPrintJson); + case JSONL -> + new JsonLineProducerTask( + includeMetadata, projectionColumns, tableMetadata, dataTypeByColumnName); + case CSV -> + new CsvProducerTask( + includeMetadata, projectionColumns, tableMetadata, dataTypeByColumnName, delimiter); + }; + } +} From 95022a982b0514231a4979e458c2f6b91e6535a5 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 2 Jan 2025 16:29:04 +0530 Subject: [PATCH 42/87] Initial commit [skip ci] --- .../com/scalar/db/common/error/CoreError.java | 4 +- .../core/DataLoaderObjectMapper.java | 14 ++ .../producer/ProducerTaskFactory.java | 19 +-- .../core/dataimport/dao/ScalarDBManager.java | 8 +- .../core/dataexport/ExportManagerTest.java | 157 ++++++++++++++++++ .../producer/CsvProducerTaskTest.java | 64 +++++++ .../producer/JsonLineProducerTaskTest.java | 64 +++++++ .../producer/JsonProducerTaskTest.java | 63 +++++++ .../producer/ProducerTaskFactoryTest.java | 55 ++++++ 9 files changed, 432 insertions(+), 16 deletions(-) create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/DataLoaderObjectMapper.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/ExportManagerTest.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTaskTest.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTaskTest.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTaskTest.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactoryTest.java diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index 6c0a9701b3..42ba06593a 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -718,7 +718,9 @@ public enum CoreError implements ScalarDbError { "The provided partition key order does not match the table schema. Required order: %s", "", ""), - + DATA_LOADER_VALUE_TO_STRING_CONVERSION_FAILED( + Category.USER_ERROR, "0168","Something went wrong while converting the ScalarDB values to strings. The table metadata and Value datatype probably do not match.", "", "" + ), // // Errors for the concurrency error category // diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/DataLoaderObjectMapper.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/DataLoaderObjectMapper.java new file mode 100644 index 0000000000..d90fd49b65 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/DataLoaderObjectMapper.java @@ -0,0 +1,14 @@ +package com.scalar.db.dataloader.core; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; + +public class DataLoaderObjectMapper extends ObjectMapper { + + public DataLoaderObjectMapper() { + super(); + this.setSerializationInclusion(JsonInclude.Include.NON_NULL); + this.registerModule(new JavaTimeModule()); + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactory.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactory.java index ed9883ade8..467d407d50 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactory.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactory.java @@ -28,19 +28,12 @@ public ProducerTask createProducerTask( TableMetadata tableMetadata, Map dataTypeByColumnName) { return switch (fileFormat) { - case JSON -> - new JsonProducerTask( - includeMetadata, - projectionColumns, - tableMetadata, - dataTypeByColumnName, - prettyPrintJson); - case JSONL -> - new JsonLineProducerTask( - includeMetadata, projectionColumns, tableMetadata, dataTypeByColumnName); - case CSV -> - new CsvProducerTask( - includeMetadata, projectionColumns, tableMetadata, dataTypeByColumnName, delimiter); + case JSON -> new JsonProducerTask( + includeMetadata, projectionColumns, tableMetadata, dataTypeByColumnName, prettyPrintJson); + case JSONL -> new JsonLineProducerTask( + includeMetadata, projectionColumns, tableMetadata, dataTypeByColumnName); + case CSV -> new CsvProducerTask( + includeMetadata, projectionColumns, tableMetadata, dataTypeByColumnName, delimiter); }; } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java index 1016eaaba4..7e3b3182a2 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java @@ -48,7 +48,9 @@ public ScalarDBManager(TransactionFactory transactionFactory) throws IOException storage = null; } - /** @return storage for ScalarDB connection that is running in storage mode */ + /** + * @return storage for ScalarDB connection that is running in storage mode + */ public DistributedStorage getDistributedStorage() { return storage; } @@ -61,7 +63,9 @@ public DistributedTransactionManager getDistributedTransactionManager() { return transactionManager; } - /** @return Distributed storage admin for ScalarDB admin operations */ + /** + * @return Distributed storage admin for ScalarDB admin operations + */ public DistributedStorageAdmin getDistributedStorageAdmin() { return storageAdmin; } diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/ExportManagerTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/ExportManagerTest.java new file mode 100644 index 0000000000..c41580e7d6 --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/ExportManagerTest.java @@ -0,0 +1,157 @@ +package com.scalar.db.dataloader.core.dataexport; + +import com.scalar.db.api.DistributedStorage; +import com.scalar.db.api.Result; +import com.scalar.db.api.Scanner; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.common.ResultImpl; +import com.scalar.db.dataloader.core.FileFormat; +import com.scalar.db.dataloader.core.ScanRange; +import com.scalar.db.dataloader.core.UnitTestUtils; +import com.scalar.db.dataloader.core.dataexport.producer.ProducerTaskFactory; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDao; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDaoException; +import com.scalar.db.io.Column; +import com.scalar.db.io.IntColumn; +import com.scalar.db.io.Key; +import java.io.*; +import java.nio.file.Paths; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; +import org.mockito.Spy; + +class ExportManagerTest { + + TableMetadata mockData; + DistributedStorage storage; + @Spy ScalarDBDao dao; + ProducerTaskFactory producerTaskFactory; + ExportManager exportManager; + + @BeforeEach + void setup() { + storage = Mockito.mock(DistributedStorage.class); + mockData = UnitTestUtils.createTestTableMetadata(); + dao = Mockito.mock(ScalarDBDao.class); + producerTaskFactory = new ProducerTaskFactory(null, false, true); + } + + @Test + void startExport_givenValidDataWithoutPartitionKey_shouldGenerateOutputFile() + throws IOException, ScalarDBDaoException { + exportManager = new ExportManager(storage, dao, producerTaskFactory); + Scanner scanner = Mockito.mock(Scanner.class); + String filePath = Paths.get("").toAbsolutePath() + "/output.json"; + Map> values = UnitTestUtils.createTestValues(); + Result result = new ResultImpl(values, mockData); + List results = Collections.singletonList(result); + + ExportOptions exportOptions = + ExportOptions.builder("namespace", "table", null, FileFormat.JSON) + .sortOrders(Collections.emptyList()) + .scanRange(new ScanRange(null, null, false, false)) + .build(); + + Mockito.when( + dao.createScanner( + exportOptions.getNamespace(), + exportOptions.getTableName(), + exportOptions.getProjectionColumns(), + exportOptions.getLimit(), + storage)) + .thenReturn(scanner); + Mockito.when(scanner.iterator()).thenReturn(results.iterator()); + try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath, true))) { + exportManager.startExport(exportOptions, mockData, writer); + } + File file = new File(filePath); + Assertions.assertTrue(file.exists()); + Assertions.assertTrue(file.delete()); + } + + @Test + void startExport_givenPartitionKey_shouldGenerateOutputFile() + throws IOException, ScalarDBDaoException { + exportManager = new ExportManager(storage, dao, producerTaskFactory); + Scanner scanner = Mockito.mock(Scanner.class); + String filePath = Paths.get("").toAbsolutePath() + "/output.json"; + Map> values = UnitTestUtils.createTestValues(); + Result result = new ResultImpl(values, mockData); + List results = Collections.singletonList(result); + + ExportOptions exportOptions = + ExportOptions.builder( + "namespace", + "table", + Key.newBuilder().add(IntColumn.of("col1", 1)).build(), + FileFormat.JSON) + .sortOrders(Collections.emptyList()) + .scanRange(new ScanRange(null, null, false, false)) + .build(); + + Mockito.when( + dao.createScanner( + exportOptions.getNamespace(), + exportOptions.getTableName(), + exportOptions.getScanPartitionKey(), + exportOptions.getScanRange(), + exportOptions.getSortOrders(), + exportOptions.getProjectionColumns(), + exportOptions.getLimit(), + storage)) + .thenReturn(scanner); + Mockito.when(scanner.iterator()).thenReturn(results.iterator()); + try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath, true))) { + exportManager.startExport(exportOptions, mockData, writer); + } + File file = new File(filePath); + Assertions.assertTrue(file.exists()); + Assertions.assertTrue(file.delete()); + } + + @Test + void startExport_givenPartitionKeyAndFileFormatCsv_shouldGenerateOutputFile() + throws IOException, ScalarDBDaoException { + producerTaskFactory = new ProducerTaskFactory(",", false, false); + exportManager = new ExportManager(storage, dao, producerTaskFactory); + Scanner scanner = Mockito.mock(Scanner.class); + String filePath = Paths.get("").toAbsolutePath() + "/output.csv"; + Map> values = UnitTestUtils.createTestValues(); + Result result = new ResultImpl(values, mockData); + List results = Collections.singletonList(result); + + ExportOptions exportOptions = + ExportOptions.builder( + "namespace", + "table", + Key.newBuilder().add(IntColumn.of("col1", 1)).build(), + FileFormat.CSV) + .sortOrders(Collections.emptyList()) + .scanRange(new ScanRange(null, null, false, false)) + .build(); + + Mockito.when( + dao.createScanner( + exportOptions.getNamespace(), + exportOptions.getTableName(), + exportOptions.getScanPartitionKey(), + exportOptions.getScanRange(), + exportOptions.getSortOrders(), + exportOptions.getProjectionColumns(), + exportOptions.getLimit(), + storage)) + .thenReturn(scanner); + Mockito.when(scanner.iterator()).thenReturn(results.iterator()); + try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath, true))) { + exportManager.startExport(exportOptions, mockData, writer); + } + File file = new File(filePath); + Assertions.assertTrue(file.exists()); + Assertions.assertTrue(file.delete()); + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTaskTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTaskTest.java new file mode 100644 index 0000000000..a084fb6930 --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTaskTest.java @@ -0,0 +1,64 @@ +package com.scalar.db.dataloader.core.dataexport.producer; + +import com.scalar.db.api.Result; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.common.ResultImpl; +import com.scalar.db.dataloader.core.UnitTestUtils; +import com.scalar.db.dataloader.core.dataexport.ExportReport; +import com.scalar.db.io.Column; +import com.scalar.db.io.DataType; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class CsvProducerTaskTest { + + TableMetadata mockMetadata; + List projectedColumns; + Map columnData; + CsvProducerTask csvProducerTask; + + @BeforeEach + void setup() { + mockMetadata = UnitTestUtils.createTestTableMetadata(); + projectedColumns = UnitTestUtils.getColumnsListOfMetadata(); + columnData = UnitTestUtils.getColumnData(); + csvProducerTask = new CsvProducerTask(false, projectedColumns, mockMetadata, columnData, ","); + } + + @Test + void process_withEmptyResultList_shouldReturnEmptyString() { + List results = Collections.emptyList(); + String output = csvProducerTask.process(results, new ExportReport()); + Assertions.assertEquals("", output); + } + + @Test + void process_withValidResultList_shouldReturnValidCsvString() { + String expectedOutput = + "9007199254740992,2147483647,true,0.000000000000000000000000000000000000000000001401298464324817,0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000049,test value,YmxvYiB0ZXN0IHZhbHVl"; + Map> values = UnitTestUtils.createTestValues(); + Result result = new ResultImpl(values, mockMetadata); + List resultList = new ArrayList<>(); + resultList.add(result); + String output = csvProducerTask.process(resultList, new ExportReport()); + Assertions.assertEquals(expectedOutput, output.trim()); + } + + @Test + void process_withValidResultListWithMetadata_shouldReturnValidCsvString() { + csvProducerTask = new CsvProducerTask(true, projectedColumns, mockMetadata, columnData, ","); + String expectedOutput = + "9007199254740992,2147483647,true,0.000000000000000000000000000000000000000000001401298464324817,0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000049,test value,YmxvYiB0ZXN0IHZhbHVl,0.000000000000000000000000000000000000000000001401298464324817,0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000049,test value,YmxvYiB0ZXN0IHZhbHVl,txt value 464654654,2147483647,2147483647,9007199254740992,9007199254740992,test value,2147483647,2147483647,9007199254740992,9007199254740992"; + Map> values = UnitTestUtils.createTestValues(); + Result result = new ResultImpl(values, mockMetadata); + List resultList = new ArrayList<>(); + resultList.add(result); + String output = csvProducerTask.process(resultList, new ExportReport()); + Assertions.assertEquals(expectedOutput, output.trim()); + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTaskTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTaskTest.java new file mode 100644 index 0000000000..c10f674ab3 --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTaskTest.java @@ -0,0 +1,64 @@ +package com.scalar.db.dataloader.core.dataexport.producer; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.scalar.db.api.Result; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.common.ResultImpl; +import com.scalar.db.dataloader.core.UnitTestUtils; +import com.scalar.db.dataloader.core.dataexport.ExportReport; +import com.scalar.db.io.Column; +import com.scalar.db.io.DataType; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class JsonLineProducerTaskTest { + TableMetadata mockMetadata; + List projectedColumns; + Map columnData; + JsonLineProducerTask jsonLineProducerTask; + + @BeforeEach + void setup() { + mockMetadata = UnitTestUtils.createTestTableMetadata(); + projectedColumns = UnitTestUtils.getColumnsListOfMetadata(); + columnData = UnitTestUtils.getColumnData(); + jsonLineProducerTask = + new JsonLineProducerTask(false, projectedColumns, mockMetadata, columnData); + } + + @Test + void process_withEmptyResultList_shouldReturnEmptyString() { + List results = Collections.emptyList(); + String output = jsonLineProducerTask.process(results, new ExportReport()); + Assertions.assertEquals("", output); + } + + @Test + void process_withValidResultList_shouldReturnValidJsonLineString() { + ObjectNode rootNode = UnitTestUtils.getOutputDataWithoutMetadata(); + Map> values = UnitTestUtils.createTestValues(); + Result result = new ResultImpl(values, mockMetadata); + List resultList = new ArrayList<>(); + resultList.add(result); + String output = jsonLineProducerTask.process(resultList, new ExportReport()); + Assertions.assertEquals(rootNode.toString(), output.trim()); + } + + @Test + void process_withValidResultListWithMetadata_shouldReturnValidJsonLineString() { + jsonLineProducerTask = + new JsonLineProducerTask(true, projectedColumns, mockMetadata, columnData); + ObjectNode rootNode = UnitTestUtils.getOutputDataWithMetadata(); + Map> values = UnitTestUtils.createTestValues(); + Result result = new ResultImpl(values, mockMetadata); + List resultList = new ArrayList<>(); + resultList.add(result); + String output = jsonLineProducerTask.process(resultList, new ExportReport()); + Assertions.assertEquals(rootNode.toString(), output.trim()); + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTaskTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTaskTest.java new file mode 100644 index 0000000000..3100b231cd --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTaskTest.java @@ -0,0 +1,63 @@ +package com.scalar.db.dataloader.core.dataexport.producer; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.scalar.db.api.Result; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.common.ResultImpl; +import com.scalar.db.dataloader.core.UnitTestUtils; +import com.scalar.db.dataloader.core.dataexport.ExportReport; +import com.scalar.db.io.Column; +import com.scalar.db.io.DataType; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class JsonProducerTaskTest { + TableMetadata mockMetadata; + List projectedColumns; + Map columnData; + JsonProducerTask jsonProducerTask; + + @BeforeEach + void setup() { + mockMetadata = UnitTestUtils.createTestTableMetadata(); + projectedColumns = UnitTestUtils.getColumnsListOfMetadata(); + columnData = UnitTestUtils.getColumnData(); + jsonProducerTask = + new JsonProducerTask(false, projectedColumns, mockMetadata, columnData, true); + } + + @Test + void process_withEmptyResultList_shouldReturnEmptyString() { + List results = Collections.emptyList(); + String output = jsonProducerTask.process(results, new ExportReport()); + Assertions.assertEquals(" ", output); + } + + @Test + void process_withValidResultList_shouldReturnValidJsonString() { + ObjectNode rootNode = UnitTestUtils.getOutputDataWithoutMetadata(); + Map> values = UnitTestUtils.createTestValues(); + Result result = new ResultImpl(values, mockMetadata); + List resultList = new ArrayList<>(); + resultList.add(result); + String output = jsonProducerTask.process(resultList, new ExportReport()); + Assertions.assertEquals(rootNode.toPrettyString(), output.trim()); + } + + @Test + void process_withValidResultListWithMetadata_shouldReturnValidJsonString() { + jsonProducerTask = new JsonProducerTask(true, projectedColumns, mockMetadata, columnData, true); + ObjectNode rootNode = UnitTestUtils.getOutputDataWithMetadata(); + Map> values = UnitTestUtils.createTestValues(); + Result result = new ResultImpl(values, mockMetadata); + List resultList = new ArrayList<>(); + resultList.add(result); + String output = jsonProducerTask.process(resultList, new ExportReport()); + Assertions.assertEquals(rootNode.toPrettyString(), output.trim()); + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactoryTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactoryTest.java new file mode 100644 index 0000000000..f97e80a1d1 --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactoryTest.java @@ -0,0 +1,55 @@ +package com.scalar.db.dataloader.core.dataexport.producer; + +import com.scalar.db.api.TableMetadata; +import com.scalar.db.dataloader.core.FileFormat; +import com.scalar.db.dataloader.core.UnitTestUtils; +import com.scalar.db.io.DataType; +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class ProducerTaskFactoryTest { + + TableMetadata mockMetadata; + List projectedColumns; + Map columnData; + + @BeforeEach + void setup() { + mockMetadata = UnitTestUtils.createTestTableMetadata(); + projectedColumns = UnitTestUtils.getColumnsListOfMetadata(); + columnData = UnitTestUtils.getColumnData(); + } + + @Test + void createProducerTask_withJsonFileFormat_shouldReturnJsonProducerTask() { + ProducerTaskFactory producerTaskFactory = new ProducerTaskFactory(null, false, true); + Assertions.assertEquals( + JsonProducerTask.class, + producerTaskFactory + .createProducerTask(FileFormat.JSON, projectedColumns, mockMetadata, columnData) + .getClass()); + } + + @Test + void createProducerTask_withJsonLinesFileFormat_shouldReturnJsonLineProducerTask() { + ProducerTaskFactory producerTaskFactory = new ProducerTaskFactory(null, false, false); + Assertions.assertEquals( + JsonLineProducerTask.class, + producerTaskFactory + .createProducerTask(FileFormat.JSONL, projectedColumns, mockMetadata, columnData) + .getClass()); + } + + @Test + void createProducerTask_withCsvFileFormat_shouldReturnCsvProducerTask() { + ProducerTaskFactory producerTaskFactory = new ProducerTaskFactory(",", false, false); + Assertions.assertEquals( + CsvProducerTask.class, + producerTaskFactory + .createProducerTask(FileFormat.CSV, projectedColumns, mockMetadata, columnData) + .getClass()); + } +} From be4583c8a2c35e0d8bae97546f2d113a84a5b036 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Fri, 3 Jan 2025 17:17:19 +0530 Subject: [PATCH 43/87] Added transaction batch dtos --- .../ImportTransactionBatch.java | 14 ++++++++ .../ImportTransactionBatchResult.java | 32 +++++++++++++++++++ .../ImportTransactionBatchStatus.java | 18 +++++++++++ 3 files changed, 64 insertions(+) create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatch.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatchResult.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatchStatus.java diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatch.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatch.java new file mode 100644 index 0000000000..a922fd8afa --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatch.java @@ -0,0 +1,14 @@ +package com.scalar.db.dataloader.core.dataimport.transactionbatch; + +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportRow; +import java.util.List; +import lombok.Builder; +import lombok.Value; + +/** Transaction batch details */ +@Builder +@Value +public class ImportTransactionBatch { + int transactionBatchId; + List sourceData; +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatchResult.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatchResult.java new file mode 100644 index 0000000000..0e44b66953 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatchResult.java @@ -0,0 +1,32 @@ +package com.scalar.db.dataloader.core.dataimport.transactionbatch; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.scalar.db.dataloader.core.dataimport.task.result.ImportTaskResult; +import java.util.List; +import lombok.Builder; +import lombok.Value; + +/** Transaction batch result */ +@Builder +@Value +@JsonDeserialize(builder = ImportTransactionBatchResult.ImportTransactionBatchResultBuilder.class) +public class ImportTransactionBatchResult { + @JsonProperty("dataChunkId") + int dataChunkId; + + @JsonProperty("transactionBatchId") + int transactionBatchId; + + @JsonProperty("transactionId") + String transactionId; + + @JsonProperty("records") + List records; + + @JsonProperty("errors") + List errors; + + @JsonProperty("success") + boolean success; +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatchStatus.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatchStatus.java new file mode 100644 index 0000000000..1b7bae34c6 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/transactionbatch/ImportTransactionBatchStatus.java @@ -0,0 +1,18 @@ +package com.scalar.db.dataloader.core.dataimport.transactionbatch; + +import com.scalar.db.dataloader.core.dataimport.task.result.ImportTaskResult; +import java.util.List; +import lombok.Builder; +import lombok.Value; + +/** Batch status details */ +@Builder +@Value +public class ImportTransactionBatchStatus { + int dataChunkId; + int transactionBatchId; + String transactionId; + List records; + List errors; + boolean success; +} From 89fea78d57faa44983ced2eb1a230892080ba3ed Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Mon, 6 Jan 2025 14:43:55 +0530 Subject: [PATCH 44/87] Added changes --- .../core/dataexport/ExportManager.java | 44 ++++++++++------- .../dataexport/producer/CsvProducerTask.java | 47 +++++++++++++------ .../producer/JsonLineProducerTask.java | 14 +++--- .../dataexport/producer/JsonProducerTask.java | 14 +++--- .../producer/ProducerTaskFactory.java | 36 ++++++++++---- .../core/dataimport/dao/ScalarDBManager.java | 8 +--- 6 files changed, 103 insertions(+), 60 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/ExportManager.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/ExportManager.java index 07d59415cc..888d78686a 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/ExportManager.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/ExportManager.java @@ -111,8 +111,8 @@ public ExportReport startExport( return exportReport; } - /*** - * To process result data chunk + /** + * * To process result data chunk * * @param exportOptions export options * @param tableMetadata metadata of the table @@ -152,11 +152,12 @@ private void processDataChunk( } } - /*** - * To split result into batches + /** + * * To split result into batches + * * @param iterator iterator which parse results * @param batchSize size of batch - * @return a list of results split to batches + * @return a list of results split to batches */ private List fetchDataChunk(Iterator iterator, int batchSize) { List batch = new ArrayList<>(); @@ -168,8 +169,9 @@ private List fetchDataChunk(Iterator iterator, int batchSize) { return batch; } - /*** - * To validate export options + /** + * * To validate export options + * * @param exportOptions export options * @param tableMetadata metadata of the table * @throws ExportOptionsValidationException thrown if any of the export option validation fails @@ -179,8 +181,9 @@ private void validateExportOptions(ExportOptions exportOptions, TableMetadata ta ExportOptionsValidator.validate(exportOptions, tableMetadata); } - /*** - * To update projection columns of export options if include metadata options is enabled + /** + * * To update projection columns of export options if include metadata options is enabled + * * @param exportOptions export options * @param tableMetadata metadata of the table */ @@ -194,8 +197,9 @@ private void handleTransactionMetadata(ExportOptions exportOptions, TableMetadat } } - /*** - * To create and write the header row to the CSV export file + /** + * * To create and write the header row to the CSV export file + * * @param exportOptions export options * @param tableMetadata metadata of the table * @param dataTypeByColumnName map of columns and their data types @@ -218,8 +222,9 @@ private void writeCsvHeaderRow( writer.flush(); } - /*** - * To create a scanner object + /** + * * To create a scanner object + * * @param exportOptions export options * @param dao scalardb dao object * @param storage distributed storage object @@ -250,8 +255,9 @@ private Scanner createScanner( } } - /*** - * To generate the header row of CSV export file + /** + * * To generate the header row of CSV export file + * * @param exportOptions export options * @param tableMetadata metadata of the table * @param dataTypeByColumnName map of columns and their data types @@ -286,11 +292,13 @@ private String createCsvHeaderRow( return headerRow.toString(); } - /*** - * To ignore a column or not based on conditions such as if it is a metadata column or if it is not include in selected projections + /** + * * To ignore a column or not based on conditions such as if it is a metadata column or if it is + * not include in selected projections + * * @param isIncludeTransactionMetadata to include transaction metadata or not * @param columnName column name - * @param columnsToIgnore set of columns to ignore + * @param columnsToIgnore set of columns to ignore * @param dataTypeColumnNames data types of columns * @param projections selected columns for projection * @return ignore the column or not diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTask.java index 3d93a72c72..217c8ddf70 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTask.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTask.java @@ -8,6 +8,7 @@ import com.scalar.db.dataloader.core.util.DecimalUtil; import com.scalar.db.dataloader.core.util.TableMetadataUtil; import com.scalar.db.io.DataType; +import java.nio.charset.Charset; import java.util.Base64; import java.util.Iterator; import java.util.LinkedHashSet; @@ -45,8 +46,9 @@ public CsvProducerTask( this.delimiter = delimiter; } - /*** - * Process scalardb scan result data and returns CSV data + /** + * * Process scalardb scan result data and returns CSV data + * * @param dataChunk list of results * @param exportReport export report * @return result converted to string @@ -110,8 +112,9 @@ private String convertResultToCsv(Result result) { return ""; } - /*** - * Convert result column value to string + /** + * * Convert result column value to string + * * @param result scalardb result * @param columnName column name * @param dataType datatype of the column @@ -121,17 +124,31 @@ private String convertToString(Result result, String columnName, DataType dataTy if (result.isNull(columnName)) { return null; } - return switch (dataType) { - case INT -> Integer.toString(result.getInt(columnName)); - case BIGINT -> Long.toString(result.getBigInt(columnName)); - case FLOAT -> DecimalUtil.convertToNonScientific(result.getFloat(columnName)); - case DOUBLE -> DecimalUtil.convertToNonScientific(result.getDouble(columnName)); - case BLOB -> { + String value = ""; + switch (dataType) { + case INT: + value = Integer.toString(result.getInt(columnName)); + break; + case BIGINT: + value = Long.toString(result.getBigInt(columnName)); + break; + case FLOAT: + value = DecimalUtil.convertToNonScientific(result.getFloat(columnName)); + break; + case DOUBLE: + value = DecimalUtil.convertToNonScientific(result.getDouble(columnName)); + break; + case BLOB: byte[] encoded = Base64.getEncoder().encode(result.getBlobAsBytes(columnName)); - yield new String(encoded); - } - case BOOLEAN -> Boolean.toString(result.getBoolean(columnName)); - case TEXT -> result.getText(columnName); - }; + value = new String(encoded, Charset.defaultCharset()); + break; + case BOOLEAN: + value = Boolean.toString(result.getBoolean(columnName)); + break; + case TEXT: + value = result.getText(columnName); + break; + } + return value; } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTask.java index 06e8a1c796..9207219491 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTask.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTask.java @@ -7,6 +7,7 @@ import com.scalar.db.dataloader.core.dataexport.ExportReport; import com.scalar.db.dataloader.core.util.TableMetadataUtil; import com.scalar.db.io.DataType; +import java.nio.charset.Charset; import java.util.Base64; import java.util.LinkedHashSet; import java.util.List; @@ -32,8 +33,9 @@ public JsonLineProducerTask( super(includeMetadata, projectionColumns, tableMetadata, columnDataTypes); } - /*** - * Process scalardb scan result data and returns CSV data + /** + * * Process scalardb scan result data and returns CSV data + * * @param dataChunk list of results * @param exportReport export report * @return result converted to string @@ -81,12 +83,12 @@ private ObjectNode generateJsonForResult(Result result) { return objectNode; } - /*** - * Add result column name and value to json object node + /** + * * Add result column name and value to json object node + * * @param result scalardb result * @param columnName column name * @param dataType datatype of the column - * */ private void addToObjectNode( ObjectNode objectNode, Result result, String columnName, DataType dataType) { @@ -119,7 +121,7 @@ private void addToObjectNode( case BLOB: // convert to base64 string byte[] encoded = Base64.getEncoder().encode(result.getBlobAsBytes(columnName)); - objectNode.put(columnName, new String(encoded)); + objectNode.put(columnName, new String(encoded, Charset.defaultCharset())); break; } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTask.java index 47fbd4c205..cfaa4cb336 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTask.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTask.java @@ -8,6 +8,7 @@ import com.scalar.db.dataloader.core.dataexport.ExportReport; import com.scalar.db.dataloader.core.util.TableMetadataUtil; import com.scalar.db.io.DataType; +import java.nio.charset.Charset; import java.util.Base64; import java.util.LinkedHashSet; import java.util.List; @@ -36,8 +37,9 @@ public JsonProducerTask( this.prettyPrintJson = prettyPrintJson; } - /*** - * Process scalardb scan result data and returns CSV data + /** + * * Process scalardb scan result data and returns CSV data + * * @param dataChunk list of results * @param exportReport export report * @return result converted to string @@ -92,12 +94,12 @@ private ObjectNode generateJsonForResult(Result result) { return objectNode; } - /*** - * Add result column name and value to json object node + /** + * * Add result column name and value to json object node + * * @param result scalardb result * @param columnName column name * @param dataType datatype of the column - * */ private void addToObjectNode( ObjectNode objectNode, Result result, String columnName, DataType dataType) { @@ -130,7 +132,7 @@ private void addToObjectNode( case BLOB: // convert to base64 string byte[] encoded = Base64.getEncoder().encode(result.getBlobAsBytes(columnName)); - objectNode.put(columnName, new String(encoded)); + objectNode.put(columnName, new String(encoded, Charset.defaultCharset())); break; } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactory.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactory.java index 467d407d50..4821a93b70 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactory.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactory.java @@ -14,7 +14,8 @@ public class ProducerTaskFactory { private final boolean includeMetadata; private final boolean prettyPrintJson; - /*** + /** + * * * * @param fileFormat file format * @param projectionColumns columns names that are selected @@ -27,13 +28,30 @@ public ProducerTask createProducerTask( List projectionColumns, TableMetadata tableMetadata, Map dataTypeByColumnName) { - return switch (fileFormat) { - case JSON -> new JsonProducerTask( - includeMetadata, projectionColumns, tableMetadata, dataTypeByColumnName, prettyPrintJson); - case JSONL -> new JsonLineProducerTask( - includeMetadata, projectionColumns, tableMetadata, dataTypeByColumnName); - case CSV -> new CsvProducerTask( - includeMetadata, projectionColumns, tableMetadata, dataTypeByColumnName, delimiter); - }; + ProducerTask producerTask; + switch (fileFormat) { + case JSON: + producerTask = + new JsonProducerTask( + includeMetadata, + projectionColumns, + tableMetadata, + dataTypeByColumnName, + prettyPrintJson); + break; + case JSONL: + producerTask = + new JsonLineProducerTask( + includeMetadata, projectionColumns, tableMetadata, dataTypeByColumnName); + break; + case CSV: + producerTask = + new CsvProducerTask( + includeMetadata, projectionColumns, tableMetadata, dataTypeByColumnName, delimiter); + break; + default: + producerTask = null; + } + return producerTask; } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java index 7e3b3182a2..1016eaaba4 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java @@ -48,9 +48,7 @@ public ScalarDBManager(TransactionFactory transactionFactory) throws IOException storage = null; } - /** - * @return storage for ScalarDB connection that is running in storage mode - */ + /** @return storage for ScalarDB connection that is running in storage mode */ public DistributedStorage getDistributedStorage() { return storage; } @@ -63,9 +61,7 @@ public DistributedTransactionManager getDistributedTransactionManager() { return transactionManager; } - /** - * @return Distributed storage admin for ScalarDB admin operations - */ + /** @return Distributed storage admin for ScalarDB admin operations */ public DistributedStorageAdmin getDistributedStorageAdmin() { return storageAdmin; } From 29a8c25a0eeb2a47e90354cbef29708f05a0edd3 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Mon, 6 Jan 2025 15:06:12 +0530 Subject: [PATCH 45/87] Fix spot less issue --- .../main/java/com/scalar/db/common/error/CoreError.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index 42ba06593a..14f970f969 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -719,8 +719,11 @@ public enum CoreError implements ScalarDbError { "", ""), DATA_LOADER_VALUE_TO_STRING_CONVERSION_FAILED( - Category.USER_ERROR, "0168","Something went wrong while converting the ScalarDB values to strings. The table metadata and Value datatype probably do not match.", "", "" - ), + Category.USER_ERROR, + "0168", + "Something went wrong while converting the ScalarDB values to strings. The table metadata and Value datatype probably do not match.", + "", + ""), // // Errors for the concurrency error category // From 2b58dcbf17f4321961f884cd203a80032a7734a2 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Wed, 8 Jan 2025 18:05:16 +0530 Subject: [PATCH 46/87] Initial commit --- .../com/scalar/db/common/error/CoreError.java | 7 + .../core/DataLoaderObjectMapper.java | 14 + .../core/dataimport/ImportEventListener.java | 23 + .../processor/CsvImportProcessor.java | 141 ++++++ .../DefaultImportProcessorFactory.java | 23 + .../dataimport/processor/ImportProcessor.java | 414 ++++++++++++++++++ .../processor/ImportProcessorFactory.java | 5 + .../processor/ImportProcessorParams.java | 23 + .../processor/JsonImportProcessor.java | 136 ++++++ .../processor/JsonLinesImportProcessor.java | 126 ++++++ .../processor/TableColumnDataTypes.java | 31 ++ .../dataimport/task/ImportStorageTask.java | 36 ++ .../core/dataimport/task/ImportTask.java | 360 +++++++++++++++ .../dataimport/task/ImportTaskParams.java | 24 + .../task/ImportTransactionalTask.java | 54 +++ .../db/dataloader/core/util/ColumnUtils.java | 143 +++++- .../db/dataloader/core/util/KeyUtils.java | 67 +++ .../DefaultImportProcessorFactoryTest.java | 60 +++ .../processor/TableColumnDataTypesTest.java | 33 ++ .../dataloader/core/util/ColumnUtilsTest.java | 29 ++ .../db/dataloader/core/util/KeyUtilsTest.java | 45 +- gradle/spotbugs-exclude.xml | 2 +- 22 files changed, 1793 insertions(+), 3 deletions(-) create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/DataLoaderObjectMapper.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportEventListener.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactory.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorFactory.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorParams.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypes.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportStorageTask.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskParams.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTransactionalTask.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactoryTest.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypesTest.java diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index c05eed9cd9..84046688fc 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -749,6 +749,13 @@ public enum CoreError implements ScalarDbError { ""), DATA_LOADER_MISSING_COLUMN( Category.USER_ERROR, "0168", "Missing field or column mapping for %s", "", ""), + DATA_LOADER_MISSING_SOURCE_FIELD( + Category.USER_ERROR, + "0169", + "The data mapping source field '%s' for table '%s' is missing in the json data record", + "", + ""), + // // Errors for the concurrency error category // diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/DataLoaderObjectMapper.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/DataLoaderObjectMapper.java new file mode 100644 index 0000000000..d90fd49b65 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/DataLoaderObjectMapper.java @@ -0,0 +1,14 @@ +package com.scalar.db.dataloader.core; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; + +public class DataLoaderObjectMapper extends ObjectMapper { + + public DataLoaderObjectMapper() { + super(); + this.setSerializationInclusion(JsonInclude.Include.NON_NULL); + this.registerModule(new JavaTimeModule()); + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportEventListener.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportEventListener.java new file mode 100644 index 0000000000..10157569b4 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportEventListener.java @@ -0,0 +1,23 @@ +package com.scalar.db.dataloader.core.dataimport; + +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; +import com.scalar.db.dataloader.core.dataimport.task.result.ImportTaskResult; +import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatchResult; +import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatchStatus; + +public interface ImportEventListener { + + void onDataChunkStarted(ImportDataChunkStatus status); + + void addOrUpdateDataChunkStatus(ImportDataChunkStatus status); + + void onDataChunkCompleted(ImportDataChunkStatus status); + + void onAllDataChunksCompleted(); + + void onTransactionBatchStarted(ImportTransactionBatchStatus batchStatus); + + void onTransactionBatchCompleted(ImportTransactionBatchResult batchResult); + + void onTaskComplete(ImportTaskResult taskResult); +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java new file mode 100644 index 0000000000..d5182e3012 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java @@ -0,0 +1,141 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.scalar.db.dataloader.core.DataLoaderObjectMapper; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunk; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportRow; +import java.io.BufferedReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicInteger; + +public class CsvImportProcessor extends ImportProcessor { + private static final DataLoaderObjectMapper OBJECT_MAPPER = new DataLoaderObjectMapper(); + private static final AtomicInteger dataChunkIdCounter = new AtomicInteger(0); + + public CsvImportProcessor(ImportProcessorParams params) { + super(params); + } + + /** + * Process the data from the import file + * + * @param dataChunkSize size of data chunk + * @param transactionBatchSize size of transaction batch + * @param reader reader which reads the source file + * @return process data chunk status list + */ + @Override + public List process( + int dataChunkSize, int transactionBatchSize, BufferedReader reader) { + int numCores = Runtime.getRuntime().availableProcessors(); + ExecutorService dataChunkExecutor = Executors.newFixedThreadPool(numCores); + // Create a queue to hold data batches + Queue dataChunkQueue = new LinkedList<>(); + Thread readerThread = + new Thread( + () -> { + try { + String header = params.getImportOptions().getCustomHeaderRow(); + String delimiter = Character.toString(params.getImportOptions().getDelimiter()); + if (delimiter.isBlank()) { + delimiter = ","; + } + if (header == null) { + header = reader.readLine(); + } + String[] headerArray = header.split(delimiter); + String line; + int rowNumber = 1; + List currentDataChunk = new ArrayList<>(); + while ((line = reader.readLine()) != null) { + String[] dataArray = line.split(delimiter); + if (headerArray.length != dataArray.length) { + // Throw a custom exception for related issue + throw new RuntimeException(); + } + JsonNode jsonNode = combineHeaderAndData(headerArray, dataArray); + if (jsonNode == null || jsonNode.isEmpty()) { + continue; + } + + ImportRow importRow = new ImportRow(rowNumber, jsonNode); + currentDataChunk.add(importRow); + // If the data chunk is full, add it to the queue + if (currentDataChunk.size() == dataChunkSize) { + int dataChunkId = dataChunkIdCounter.getAndIncrement(); + ImportDataChunk importDataChunk = + ImportDataChunk.builder() + .dataChunkId(dataChunkId) + .sourceData(currentDataChunk) + .build(); + dataChunkQueue.offer(importDataChunk); + currentDataChunk = new ArrayList<>(); + } + rowNumber++; + } + + // Add the last data chunk to the queue + if (!currentDataChunk.isEmpty()) { + int dataChunkId = dataChunkIdCounter.getAndIncrement(); + ImportDataChunk importDataChunk = + ImportDataChunk.builder() + .dataChunkId(dataChunkId) + .sourceData(currentDataChunk) + .build(); + dataChunkQueue.offer(importDataChunk); + } + + } catch (IOException e) { + throw new RuntimeException(); + } + }); + + readerThread.start(); + try { + // Wait for readerThread to finish + readerThread.join(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + // Process data chunks in parallel + List> dataChunkFutures = new ArrayList<>(); + while (!dataChunkQueue.isEmpty()) { + ImportDataChunk dataChunk = dataChunkQueue.poll(); + Future dataChunkFuture = + dataChunkExecutor.submit( + () -> processDataChunk(dataChunk, transactionBatchSize, numCores)); + dataChunkFutures.add(dataChunkFuture); + } + + List importDataChunkStatusList = new ArrayList<>(); + // Wait for all data chunk threads to complete + for (Future dataChunkFuture : dataChunkFutures) { + try { + importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); + } catch (Exception e) { + // TODO: handle the exception + e.printStackTrace(); + } + } + dataChunkExecutor.shutdown(); + notifyAllDataChunksCompleted(); + return importDataChunkStatusList; + } + + private JsonNode combineHeaderAndData(String[] header, String[] data) { + ObjectNode objectNode = OBJECT_MAPPER.createObjectNode(); + for (int i = 0; i < header.length; i++) { + objectNode.put(header[i], data[i]); + } + return objectNode; + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactory.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactory.java new file mode 100644 index 0000000000..e0fa8aa405 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactory.java @@ -0,0 +1,23 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +public class DefaultImportProcessorFactory implements ImportProcessorFactory { + + @Override + public ImportProcessor createImportProcessor(ImportProcessorParams params) { + ImportProcessor importProcessor; + switch (params.getImportOptions().getFileFormat()) { + case JSONL: + importProcessor = new JsonLinesImportProcessor(params); + break; + case JSON: + importProcessor = new JsonImportProcessor(params); + break; + case CSV: + importProcessor = new CsvImportProcessor(params); + break; + default: + importProcessor = null; + } + return importProcessor; + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java new file mode 100644 index 0000000000..2d16b9d189 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java @@ -0,0 +1,414 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +import com.scalar.db.api.DistributedTransaction; +import com.scalar.db.dataloader.core.ScalarDBMode; +import com.scalar.db.dataloader.core.dataimport.ImportEventListener; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunk; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatusState; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportRow; +import com.scalar.db.dataloader.core.dataimport.task.ImportStorageTask; +import com.scalar.db.dataloader.core.dataimport.task.ImportTaskParams; +import com.scalar.db.dataloader.core.dataimport.task.ImportTransactionalTask; +import com.scalar.db.dataloader.core.dataimport.task.result.ImportTargetResultStatus; +import com.scalar.db.dataloader.core.dataimport.task.result.ImportTaskResult; +import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatch; +import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatchResult; +import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatchStatus; +import com.scalar.db.exception.transaction.TransactionException; +import java.io.BufferedReader; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicInteger; +import lombok.RequiredArgsConstructor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@RequiredArgsConstructor +public abstract class ImportProcessor { + + final ImportProcessorParams params; + private static final Logger LOGGER = LoggerFactory.getLogger(ImportProcessor.class); + private final List listeners = new ArrayList<>(); + + /** + * * Process the source data from import file + * + * @param dataChunkSize size of data chunk + * @param transactionBatchSize size of transaction batch + * @param reader reader which reads the source file + * @return list of import data chunk status objects + */ + public List process( + int dataChunkSize, int transactionBatchSize, BufferedReader reader) { + return Collections.emptyList(); + } + + /** + * Add import event listener to listener list + * + * @param listener import event listener + */ + public void addListener(ImportEventListener listener) { + listeners.add(listener); + } + + /** + * Remove import event listener from listener list + * + * @param listener import event listener + */ + public void removeListener(ImportEventListener listener) { + listeners.remove(listener); + } + + /** + * Notify once the task is completed + * + * @param result task result object + */ + protected void notifyStorageRecordCompleted(ImportTaskResult result) { + // Add data to summary, success logs with/without raw data + for (ImportEventListener listener : listeners) { + listener.onTaskComplete(result); + } + } + + /** + * Notify once the data chunk process is started + * + * @param status data chunk status object + */ + protected void notifyDataChunkStarted(ImportDataChunkStatus status) { + for (ImportEventListener listener : listeners) { + listener.onDataChunkStarted(status); + listener.addOrUpdateDataChunkStatus(status); + } + } + + /** + * Notify once the data chunk process is completed + * + * @param status data chunk status object + */ + protected void notifyDataChunkCompleted(ImportDataChunkStatus status) { + for (ImportEventListener listener : listeners) { + listener.onDataChunkCompleted(status); + listener.addOrUpdateDataChunkStatus(status); + } + } + + /** + * Notify once the import transaction batch is started + * + * @param batchStatus import transaction batch status object + */ + protected void notifyTransactionBatchStarted(ImportTransactionBatchStatus batchStatus) { + for (ImportEventListener listener : listeners) { + listener.onTransactionBatchStarted(batchStatus); + } + } + + /** + * Notify once the import transaction batch is completed + * + * @param batchResult import transaction batch result object + */ + protected void notifyTransactionBatchCompleted(ImportTransactionBatchResult batchResult) { + for (ImportEventListener listener : listeners) { + listener.onTransactionBatchCompleted(batchResult); + } + } + + /** Notify when all data chunks processes are completed */ + protected void notifyAllDataChunksCompleted() { + for (ImportEventListener listener : listeners) { + listener.onAllDataChunksCompleted(); + } + } + + /** + * Split the data chunk into transaction batches + * + * @param dataChunk data chunk object + * @param batchSize batch size + * @return created list of transaction batches + */ + private List splitIntoTransactionBatches( + ImportDataChunk dataChunk, int batchSize) { + List transactionBatches = new ArrayList<>(); + AtomicInteger transactionBatchIdCounter = new AtomicInteger(0); + + List importRows = dataChunk.getSourceData(); + for (int i = 0; i < importRows.size(); i += batchSize) { + int endIndex = Math.min(i + batchSize, importRows.size()); + List transactionBatchData = importRows.subList(i, endIndex); + int transactionBatchId = transactionBatchIdCounter.getAndIncrement(); + ImportTransactionBatch transactionBatch = + ImportTransactionBatch.builder() + .transactionBatchId(transactionBatchId) + .sourceData(transactionBatchData) + .build(); + transactionBatches.add(transactionBatch); + } + return transactionBatches; + } + + /** + * To process a transaction batch and return the result + * + * @param dataChunk data chunk object + * @param transactionBatch transaction batch object + * @return processed transaction batch result + */ + private ImportTransactionBatchResult processTransactionBatch( + ImportDataChunk dataChunk, ImportTransactionBatch transactionBatch) { + ImportTransactionBatchStatus status = + ImportTransactionBatchStatus.builder() + .dataChunkId(dataChunk.getDataChunkId()) + .transactionBatchId(transactionBatch.getTransactionBatchId()) + .build(); + notifyTransactionBatchStarted(status); + List importRecordResult = new ArrayList<>(); + boolean isSuccess; + String error = ""; + try { + // Create the ScalarDB transaction + DistributedTransaction transaction = params.getDistributedTransactionManager().start(); + + // Loop over the transaction batch and process each record + for (ImportRow importRow : transactionBatch.getSourceData()) { + ImportTaskParams taskParams = + ImportTaskParams.builder() + .sourceRecord(importRow.getSourceData()) + .dataChunkId(dataChunk.getDataChunkId()) + .rowNumber(importRow.getRowNumber()) + .importOptions(params.getImportOptions()) + .tableColumnDataTypes(params.getTableColumnDataTypes()) + .tableMetadataByTableName(params.getTableMetadataByTableName()) + .dao(params.getDao()) + .build(); + importRecordResult.add(new ImportTransactionalTask(taskParams, transaction).execute()); + } + isSuccess = + importRecordResult.stream() + .allMatch( + importTaskResult -> + importTaskResult.getTargets().stream() + .allMatch( + targetResult -> + targetResult.getStatus().equals(ImportTargetResultStatus.SAVED))); + + // Check and Commit the transaction + if (isSuccess) { + transaction.commit(); + } else { + transaction.abort(); + error = "All transactions are aborted"; + } + + } catch (TransactionException e) { + isSuccess = false; + LOGGER.error(e.getMessage()); + } + ImportTransactionBatchResult importTransactionBatchResult = + ImportTransactionBatchResult.builder() + .transactionBatchId(transactionBatch.getTransactionBatchId()) + .success(isSuccess) + .dataChunkId(dataChunk.getDataChunkId()) + .records(importRecordResult) + .errors(Collections.singletonList(error)) + .build(); + notifyTransactionBatchCompleted(importTransactionBatchResult); + return importTransactionBatchResult; + } + + /** + * @param dataChunk data chunk object + * @param importRow data row object + * @return thr task result after processing the row data + */ + private ImportTaskResult processStorageRecord(ImportDataChunk dataChunk, ImportRow importRow) { + ImportTaskParams taskParams = + ImportTaskParams.builder() + .sourceRecord(importRow.getSourceData()) + .dataChunkId(dataChunk.getDataChunkId()) + .rowNumber(importRow.getRowNumber()) + .importOptions(params.getImportOptions()) + .tableColumnDataTypes(params.getTableColumnDataTypes()) + .tableMetadataByTableName(params.getTableMetadataByTableName()) + .dao(params.getDao()) + .build(); + ImportTaskResult importRecordResult = + new ImportStorageTask(taskParams, params.getDistributedStorage()).execute(); + + ImportTaskResult modifiedTaskResult = + ImportTaskResult.builder() + .rowNumber(importRecordResult.getRowNumber()) + .rawRecord(importRecordResult.getRawRecord()) + .targets(importRecordResult.getTargets()) + .dataChunkId(dataChunk.getDataChunkId()) + .build(); + notifyStorageRecordCompleted(modifiedTaskResult); + return modifiedTaskResult; + } + + /** + * Process data chunk data + * + * @param dataChunk data chunk object + * @param transactionBatchSize transaction batch size + * @param numCores num of cpu cores + * @return import data chunk status object after processing the data chunk + */ + protected ImportDataChunkStatus processDataChunk( + ImportDataChunk dataChunk, int transactionBatchSize, int numCores) { + ImportDataChunkStatus status = + ImportDataChunkStatus.builder() + .dataChunkId(dataChunk.getDataChunkId()) + .startTime(Instant.now()) + .status(ImportDataChunkStatusState.IN_PROGRESS) + .build(); + notifyDataChunkStarted(status); + ImportDataChunkStatus importDataChunkStatus; + if (params.getScalarDBMode() == ScalarDBMode.TRANSACTION) { + importDataChunkStatus = + processDataChunkWithTransactions(dataChunk, transactionBatchSize, numCores); + } else { + importDataChunkStatus = processDataChunkWithoutTransactions(dataChunk, numCores); + } + notifyDataChunkCompleted(importDataChunkStatus); + return importDataChunkStatus; + } + + /** + * Process data chunk data with transactions + * + * @param dataChunk data chunk object + * @param transactionBatchSize transaction batch size + * @param numCores num of cpu cores + * @return import data chunk status object after processing the data chunk + */ + private ImportDataChunkStatus processDataChunkWithTransactions( + ImportDataChunk dataChunk, int transactionBatchSize, int numCores) { + Instant startTime = Instant.now(); + List transactionBatches = + splitIntoTransactionBatches(dataChunk, transactionBatchSize); + ExecutorService transactionBatchExecutor = + Executors.newFixedThreadPool(Math.min(transactionBatches.size(), numCores)); + List> transactionBatchFutures = new ArrayList<>(); + AtomicInteger successCount = new AtomicInteger(0); + AtomicInteger failureCount = new AtomicInteger(0); + for (ImportTransactionBatch transactionBatch : transactionBatches) { + Future transactionBatchFuture = + transactionBatchExecutor.submit( + () -> processTransactionBatch(dataChunk, transactionBatch)); + transactionBatchFutures.add(transactionBatchFuture); + } + + waitForFuturesToComplete(transactionBatchFutures); + transactionBatchExecutor.shutdown(); + transactionBatchFutures.forEach( + batchResult -> { + try { + ImportTransactionBatchResult importTransactionBatchResult = + (ImportTransactionBatchResult) batchResult.get(); + importTransactionBatchResult + .getRecords() + .forEach( + batchRecords -> { + if (batchRecords.getTargets().stream() + .allMatch( + targetResult -> + targetResult + .getStatus() + .equals(ImportTargetResultStatus.SAVED))) { + successCount.incrementAndGet(); + } else { + failureCount.incrementAndGet(); + } + }); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + }); + Instant endTime = Instant.now(); + int totalDuration = (int) Duration.between(startTime, endTime).toMillis(); + return ImportDataChunkStatus.builder() + .dataChunkId(dataChunk.getDataChunkId()) + .failureCount(failureCount.get()) + .successCount(successCount.get()) + .totalRecords(dataChunk.getSourceData().size()) + .batchCount(transactionBatches.size()) + .status(ImportDataChunkStatusState.COMPLETE) + .startTime(startTime) + .endTime(endTime) + .totalDurationInMilliSeconds(totalDuration) + .build(); + } + + /** + * Process data chunk data without transactions + * + * @param dataChunk data chunk object + * @param numCores num of cpu cores + * @return import data chunk status object after processing the data chunk + */ + private ImportDataChunkStatus processDataChunkWithoutTransactions( + ImportDataChunk dataChunk, int numCores) { + Instant startTime = Instant.now(); + AtomicInteger successCount = new AtomicInteger(0); + AtomicInteger failureCount = new AtomicInteger(0); + ExecutorService recordExecutor = Executors.newFixedThreadPool(numCores); + List> recordFutures = new ArrayList<>(); + for (ImportRow importRow : dataChunk.getSourceData()) { + Future recordFuture = + recordExecutor.submit(() -> processStorageRecord(dataChunk, importRow)); + recordFutures.add(recordFuture); + } + waitForFuturesToComplete(recordFutures); + recordExecutor.shutdown(); + recordFutures.forEach( + r -> { + try { + ImportTaskResult result = (ImportTaskResult) r.get(); + boolean allSaved = + result.getTargets().stream() + .allMatch(t -> t.getStatus().equals(ImportTargetResultStatus.SAVED)); + if (allSaved) successCount.incrementAndGet(); + else failureCount.incrementAndGet(); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + }); + Instant endTime = Instant.now(); + int totalDuration = (int) Duration.between(startTime, endTime).toMillis(); + return ImportDataChunkStatus.builder() + .dataChunkId(dataChunk.getDataChunkId()) + .totalRecords(dataChunk.getSourceData().size()) + .successCount(successCount.get()) + .failureCount(failureCount.get()) + .startTime(startTime) + .endTime(endTime) + .totalDurationInMilliSeconds(totalDuration) + .status(ImportDataChunkStatusState.COMPLETE) + .build(); + } + + private void waitForFuturesToComplete(List> futures) { + for (Future future : futures) { + try { + future.get(); + } catch (Exception e) { + LOGGER.error(e.getMessage()); + } + } + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorFactory.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorFactory.java new file mode 100644 index 0000000000..e953b12228 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorFactory.java @@ -0,0 +1,5 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +public interface ImportProcessorFactory { + ImportProcessor createImportProcessor(ImportProcessorParams params); +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorParams.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorParams.java new file mode 100644 index 0000000000..632b1dc245 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorParams.java @@ -0,0 +1,23 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +import com.scalar.db.api.DistributedStorage; +import com.scalar.db.api.DistributedTransactionManager; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.dataloader.core.ScalarDBMode; +import com.scalar.db.dataloader.core.dataimport.ImportOptions; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDao; +import java.util.Map; +import lombok.Builder; +import lombok.Value; + +@Builder +@Value +public class ImportProcessorParams { + ScalarDBMode scalarDBMode; + ImportOptions importOptions; + Map tableMetadataByTableName; + TableColumnDataTypes tableColumnDataTypes; + ScalarDBDao dao; + DistributedStorage distributedStorage; + DistributedTransactionManager distributedTransactionManager; +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java new file mode 100644 index 0000000000..c02fa625b0 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java @@ -0,0 +1,136 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; +import com.fasterxml.jackson.databind.JsonNode; +import com.scalar.db.dataloader.core.DataLoaderObjectMapper; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunk; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportRow; +import java.io.BufferedReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicInteger; + +public class JsonImportProcessor extends ImportProcessor { + + private static final DataLoaderObjectMapper OBJECT_MAPPER = new DataLoaderObjectMapper(); + private static final AtomicInteger dataChunkIdCounter = new AtomicInteger(0); + + public JsonImportProcessor(ImportProcessorParams params) { + super(params); + } + + /** + * Process the data from the import file + * + * @param dataChunkSize size of data chunk + * @param transactionBatchSize size of transaction batch + * @param reader reader which reads the source file + * @return process data chunk status list + */ + @Override + public List process( + int dataChunkSize, int transactionBatchSize, BufferedReader reader) { + // Set the number of threads based on the available CPU cores + int numCores = Runtime.getRuntime().availableProcessors(); + + // Create a thread pool for processing data batches + ExecutorService dataChunkExecutor = Executors.newFixedThreadPool(numCores); + + // Create a queue to hold data batches + Queue dataChunkQueue = new LinkedList<>(); + + // Create a thread to read JSON lines and populate data batches + Thread readerThread = + new Thread( + () -> { + try (JsonParser jsonParser = new JsonFactory().createParser(reader)) { + if (jsonParser.nextToken() != JsonToken.START_ARRAY) { + throw new IOException("Expected content to be an array"); + } + + List currentDataChunk = new ArrayList<>(); + int rowNumber = 1; + while (jsonParser.nextToken() != JsonToken.END_ARRAY) { + JsonNode jsonNode = OBJECT_MAPPER.readTree(jsonParser); + // TODO: do something with the null jsonNode + if (jsonNode == null || jsonNode.isEmpty()) { + continue; + } + + ImportRow importRow = new ImportRow(rowNumber, jsonNode); + + currentDataChunk.add(importRow); + + // If the data chunk is full, add it to the queue + if (currentDataChunk.size() == dataChunkSize) { + int dataChunkId = dataChunkIdCounter.getAndIncrement(); + ImportDataChunk importDataChunk = + ImportDataChunk.builder() + .dataChunkId(dataChunkId) + .sourceData(currentDataChunk) + .build(); + dataChunkQueue.offer(importDataChunk); + currentDataChunk = new ArrayList<>(); + } + + rowNumber++; + } + + // Add the last data chunk to the queue + if (!currentDataChunk.isEmpty()) { + int dataChunkId = dataChunkIdCounter.getAndIncrement(); + ImportDataChunk importDataChunk = + ImportDataChunk.builder() + .dataChunkId(dataChunkId) + .sourceData(currentDataChunk) + .build(); + dataChunkQueue.offer(importDataChunk); + } + } catch (IOException e) { + // TODO: handle this exception + throw new RuntimeException(e); + } + }); + readerThread.start(); + + try { + // Wait for readerThread to finish + readerThread.join(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + System.err.println("Main thread was interrupted."); + } + + // Process data chunks in parallel + List> dataChunkFutures = new ArrayList<>(); + while (!dataChunkQueue.isEmpty()) { + ImportDataChunk dataChunk = dataChunkQueue.poll(); + Future dataChunkFuture = + dataChunkExecutor.submit( + () -> processDataChunk(dataChunk, transactionBatchSize, numCores)); + dataChunkFutures.add(dataChunkFuture); + } + List importDataChunkStatusList = new ArrayList<>(); + // Wait for all data chunk threads to complete + for (Future dataChunkFuture : dataChunkFutures) { + try { + importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); + } catch (Exception e) { + e.printStackTrace(); + } + } + + dataChunkExecutor.shutdown(); + notifyAllDataChunksCompleted(); + return importDataChunkStatusList; + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java new file mode 100644 index 0000000000..b63f897cbe --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java @@ -0,0 +1,126 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +import com.fasterxml.jackson.databind.JsonNode; +import com.scalar.db.dataloader.core.DataLoaderObjectMapper; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunk; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportRow; +import java.io.BufferedReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicInteger; + +public class JsonLinesImportProcessor extends ImportProcessor { + + private static final DataLoaderObjectMapper OBJECT_MAPPER = new DataLoaderObjectMapper(); + private static final AtomicInteger dataChunkIdCounter = new AtomicInteger(0); + + public JsonLinesImportProcessor(ImportProcessorParams params) { + super(params); + } + + /** + * Process the data from the import file + * + * @param dataChunkSize size of data chunk + * @param transactionBatchSize size of transaction batch + * @param reader reader which reads the source file + * @return process data chunk status list + */ + @Override + public List process( + int dataChunkSize, int transactionBatchSize, BufferedReader reader) { + int numCores = Runtime.getRuntime().availableProcessors(); + + // Create a thread pool for processing data batches + ExecutorService dataChunkExecutor = Executors.newFixedThreadPool(numCores); + + // Create a queue to hold data batches + Queue dataChunkQueue = new LinkedList<>(); + + // Create a thread to read JSON lines and populate data batches + Thread readerThread = + new Thread( + () -> { + try { + List currentDataChunk = new ArrayList<>(); + int rowNumber = 1; + String line; + while ((line = reader.readLine()) != null) { + JsonNode jsonNode = OBJECT_MAPPER.readTree(line); + // TODO: do something with the null jsonNode + if (jsonNode == null || jsonNode.isEmpty()) { + continue; + } + + ImportRow importRow = new ImportRow(rowNumber, jsonNode); + currentDataChunk.add(importRow); + + // If the data chunk is full, add it to the queue + if (currentDataChunk.size() == dataChunkSize) { + int dataChunkId = dataChunkIdCounter.getAndIncrement(); + ImportDataChunk importDataChunk = + ImportDataChunk.builder() + .dataChunkId(dataChunkId) + .sourceData(currentDataChunk) + .build(); + dataChunkQueue.offer(importDataChunk); + currentDataChunk = new ArrayList<>(); + } + rowNumber++; + } + + // Add the last data chunk to the queue + if (!currentDataChunk.isEmpty()) { + int dataChunkId = dataChunkIdCounter.getAndIncrement(); + ImportDataChunk importDataChunk = + ImportDataChunk.builder() + .dataChunkId(dataChunkId) + .sourceData(currentDataChunk) + .build(); + dataChunkQueue.offer(importDataChunk); + } + } catch (IOException e) { + // TODO: handle this exception + throw new RuntimeException(e); + } + }); + readerThread.start(); + try { + // Wait for readerThread to finish + readerThread.join(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + System.err.println("Main thread was interrupted."); + } + // Process data chunks in parallel + List> dataChunkFutures = new ArrayList<>(); + while (!dataChunkQueue.isEmpty()) { + ImportDataChunk dataChunk = dataChunkQueue.poll(); + Future dataChunkFuture = + dataChunkExecutor.submit( + () -> processDataChunk(dataChunk, transactionBatchSize, numCores)); + dataChunkFutures.add(dataChunkFuture); + } + + List importDataChunkStatusList = new ArrayList<>(); + // Wait for all data chunk threads to complete + for (Future dataChunkFuture : dataChunkFutures) { + try { + importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); + } catch (Exception e) { + // TODO: handle the exception + e.printStackTrace(); + } + } + dataChunkExecutor.shutdown(); + notifyAllDataChunksCompleted(); + return importDataChunkStatusList; + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypes.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypes.java new file mode 100644 index 0000000000..54268b2ccf --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypes.java @@ -0,0 +1,31 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +import com.scalar.db.io.DataType; +import java.util.HashMap; +import java.util.Map; + +public class TableColumnDataTypes { + private final Map> dataTypesByColumnsByTable; + + public TableColumnDataTypes() { + this.dataTypesByColumnsByTable = new HashMap<>(); + } + + public void addColumnDataType(String tableName, String columnName, DataType dataType) { + dataTypesByColumnsByTable + .computeIfAbsent(tableName, key -> new HashMap<>()) + .put(columnName, dataType); + } + + public DataType getDataType(String tableName, String columnName) { + Map columnDataTypes = dataTypesByColumnsByTable.get(tableName); + if (columnDataTypes != null) { + return columnDataTypes.get(columnName); + } + return null; + } + + public Map getColumnDataTypes(String tableName) { + return dataTypesByColumnsByTable.get(tableName); + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportStorageTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportStorageTask.java new file mode 100644 index 0000000000..2211f054bf --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportStorageTask.java @@ -0,0 +1,36 @@ +package com.scalar.db.dataloader.core.dataimport.task; + +import com.scalar.db.api.DistributedStorage; +import com.scalar.db.api.Result; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDaoException; +import com.scalar.db.io.Column; +import com.scalar.db.io.Key; +import java.util.*; + +public class ImportStorageTask extends ImportTask { + + private final DistributedStorage storage; + + public ImportStorageTask(ImportTaskParams params, DistributedStorage storage) { + super(params); + this.storage = storage; + } + + @Override + protected Optional getDataRecord( + String namespace, String tableName, Key partitionKey, Key clusteringKey) + throws ScalarDBDaoException { + return params.getDao().get(namespace, tableName, partitionKey, clusteringKey, this.storage); + } + + @Override + protected void saveRecord( + String namespace, + String tableName, + Key partitionKey, + Key clusteringKey, + List> columns) + throws ScalarDBDaoException { + params.getDao().put(namespace, tableName, partitionKey, clusteringKey, columns, this.storage); + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java new file mode 100644 index 0000000000..437e851ba1 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java @@ -0,0 +1,360 @@ +package com.scalar.db.dataloader.core.dataimport.task; + +import static com.scalar.db.dataloader.core.dataimport.task.ImportTaskConstants.*; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.scalar.db.api.Result; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.common.error.CoreError; +import com.scalar.db.dataloader.core.dataimport.ImportMode; +import com.scalar.db.dataloader.core.dataimport.ImportOptions; +import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFile; +import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileTable; +import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileTableFieldMapping; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDaoException; +import com.scalar.db.dataloader.core.dataimport.processor.TableColumnDataTypes; +import com.scalar.db.dataloader.core.dataimport.task.mapping.ImportDataMapping; +import com.scalar.db.dataloader.core.dataimport.task.result.ImportTargetResult; +import com.scalar.db.dataloader.core.dataimport.task.result.ImportTargetResultStatus; +import com.scalar.db.dataloader.core.dataimport.task.result.ImportTaskResult; +import com.scalar.db.dataloader.core.dataimport.task.validation.ImportSourceRecordValidationResult; +import com.scalar.db.dataloader.core.dataimport.task.validation.ImportSourceRecordValidator; +import com.scalar.db.dataloader.core.exception.Base64Exception; +import com.scalar.db.dataloader.core.exception.ColumnParsingException; +import com.scalar.db.dataloader.core.util.ColumnUtils; +import com.scalar.db.dataloader.core.util.KeyUtils; +import com.scalar.db.dataloader.core.util.TableMetadataUtil; +import com.scalar.db.io.Column; +import com.scalar.db.io.DataType; +import com.scalar.db.io.Key; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public abstract class ImportTask { + + protected final ImportTaskParams params; + + public ImportTaskResult execute() { + + ObjectNode mutableSourceRecord = params.getSourceRecord().deepCopy(); + ImportOptions importOptions = params.getImportOptions(); + + // Single table import + if (importOptions.getControlFile() == null) { + String tableLookupKey = + TableMetadataUtil.getTableLookupKey( + importOptions.getNamespace(), importOptions.getTableName()); + ImportTargetResult singleTargetResult = + importIntoSingleTable( + importOptions.getNamespace(), + importOptions.getTableName(), + params.getTableMetadataByTableName().get(tableLookupKey), + params.getTableColumnDataTypes().getColumnDataTypes(tableLookupKey), + null, + mutableSourceRecord); + // Add the single target result to the list of targets and return the result + return ImportTaskResult.builder() + .rawRecord(params.getSourceRecord()) + .rowNumber(params.getRowNumber()) + .targets(Collections.singletonList(singleTargetResult)) + .build(); + } + + // Multi-table import + List multiTargetResults = + startMultiTableImportProcess( + importOptions.getControlFile(), + params.getTableMetadataByTableName(), + params.getTableColumnDataTypes(), + mutableSourceRecord); + + return ImportTaskResult.builder() + .targets(multiTargetResults) + .rawRecord(params.getSourceRecord()) + .rowNumber(params.getRowNumber()) + .build(); + } + + private List startMultiTableImportProcess( + ControlFile controlFile, + Map tableMetadataByTableName, + TableColumnDataTypes tableColumnDataTypes, + ObjectNode mutableSourceRecord) { + + List targetResults = new ArrayList<>(); + + // Import for every table mapping specified in the control file + for (ControlFileTable controlFileTable : controlFile.getTables()) { + for (ControlFileTableFieldMapping mapping : controlFileTable.getMappings()) { + if (!mutableSourceRecord.has(mapping.getSourceField()) + && !mutableSourceRecord.has(mapping.getTargetColumn())) { + String errorMessage = + CoreError.DATA_LOADER_MISSING_SOURCE_FIELD.buildMessage( + mapping.getSourceField(), controlFileTable.getTableName()); + + ImportTargetResult targetResult = + ImportTargetResult.builder() + .namespace(controlFileTable.getNamespace()) + .tableName(controlFileTable.getTableName()) + .errors(Collections.singletonList(errorMessage)) + .status(ImportTargetResultStatus.VALIDATION_FAILED) + .build(); + return Collections.singletonList(targetResult); + } + } + + // Import into a single table + String tableLookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); + TableMetadata tableMetadata = tableMetadataByTableName.get(tableLookupKey); + Map dataTypesByColumns = + tableColumnDataTypes.getColumnDataTypes(tableLookupKey); + // Copied data to an object node data was overwritten by following operations and data check + // fails when same object is referenced again in logic before + ObjectNode copyNode = mutableSourceRecord.deepCopy(); + ImportTargetResult result = + importIntoSingleTable( + controlFileTable.getNamespace(), + controlFileTable.getTableName(), + tableMetadata, + dataTypesByColumns, + controlFileTable, + copyNode); + targetResults.add(result); + } + return targetResults; + } + + private ImportTargetResult importIntoSingleTable( + String namespace, + String tableName, + TableMetadata tableMetadata, + Map dataTypeByColumnName, + ControlFileTable controlFileTable, + ObjectNode mutableSourceRecord) { + + ImportOptions importOptions = params.getImportOptions(); + + if (dataTypeByColumnName == null || tableMetadata == null) { + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .status(ImportTargetResultStatus.VALIDATION_FAILED) + .errors(Collections.singletonList(ERROR_TABLE_METADATA_MISSING)) + .build(); + } + + LinkedHashSet partitionKeyNames = tableMetadata.getPartitionKeyNames(); + LinkedHashSet clusteringKeyNames = tableMetadata.getClusteringKeyNames(); + LinkedHashSet columnNames = tableMetadata.getColumnNames(); + + applyDataMapping(controlFileTable, mutableSourceRecord); + + boolean checkForMissingColumns = shouldCheckForMissingColumns(importOptions); + + ImportSourceRecordValidationResult validationResult = + validateSourceRecord( + partitionKeyNames, + clusteringKeyNames, + columnNames, + mutableSourceRecord, + checkForMissingColumns); + + if (!validationResult.isValid()) { + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .status(ImportTargetResultStatus.VALIDATION_FAILED) + .errors(validationResult.getErrorMessages()) + .build(); + } + + Optional optionalPartitionKey = + KeyUtils.createPartitionKeyFromSource( + partitionKeyNames, dataTypeByColumnName, mutableSourceRecord); + if (optionalPartitionKey.isEmpty()) { + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .status(ImportTargetResultStatus.VALIDATION_FAILED) + .errors(Collections.singletonList(ERROR_COULD_NOT_FIND_PARTITION_KEY)) + .build(); + } + Optional optionalClusteringKey = Optional.empty(); + if (!clusteringKeyNames.isEmpty()) { + optionalClusteringKey = + KeyUtils.createClusteringKeyFromSource( + clusteringKeyNames, dataTypeByColumnName, mutableSourceRecord); + if (optionalClusteringKey.isEmpty()) { + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .status(ImportTargetResultStatus.VALIDATION_FAILED) + .errors(Collections.singletonList(ERROR_COULD_NOT_FIND_CLUSTERING_KEY)) + .build(); + } + } + + Optional optionalScalarDBResult; + + try { + optionalScalarDBResult = + getDataRecord( + namespace, tableName, optionalPartitionKey.get(), optionalClusteringKey.orElse(null)); + } catch (ScalarDBDaoException e) { + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .status(ImportTargetResultStatus.RETRIEVAL_FAILED) + .errors(Collections.singletonList(e.getMessage())) + .build(); + } + ImportTaskAction importAction = + optionalScalarDBResult.isPresent() ? ImportTaskAction.UPDATE : ImportTaskAction.INSERT; + + if (importAction == ImportTaskAction.INSERT + && shouldRevalidateMissingColumns(importOptions, checkForMissingColumns)) { + ImportSourceRecordValidationResult validationResultForMissingColumns = + new ImportSourceRecordValidationResult(); + ImportSourceRecordValidator.checkMissingColumns( + mutableSourceRecord, columnNames, validationResultForMissingColumns); + if (!validationResultForMissingColumns.isValid()) { + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .status(ImportTargetResultStatus.MISSING_COLUMNS) + .errors(Collections.singletonList(ERROR_UPSERT_INSERT_MISSING_COLUMNS)) + .build(); + } + } + + if (shouldFailForExistingData(importAction, importOptions)) { + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .importedRecord(mutableSourceRecord) + .importAction(importAction) + .status(ImportTargetResultStatus.DATA_ALREADY_EXISTS) + .errors(Collections.singletonList(ERROR_DATA_ALREADY_EXISTS)) + .build(); + } + + if (shouldFailForMissingData(importAction, importOptions)) { + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .importedRecord(mutableSourceRecord) + .importAction(importAction) + .status(ImportTargetResultStatus.DATA_NOT_FOUND) + .errors(Collections.singletonList(ERROR_DATA_NOT_FOUND)) + .build(); + } + + List> columns; + + try { + columns = + ColumnUtils.getColumnsFromResult( + optionalScalarDBResult.orElse(null), + mutableSourceRecord, + importOptions.isIgnoreNullValues(), + partitionKeyNames, + clusteringKeyNames, + columnNames, + dataTypeByColumnName); + } catch (Base64Exception | ColumnParsingException e) { + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .status(ImportTargetResultStatus.VALIDATION_FAILED) + .errors(Collections.singletonList(e.getMessage())) + .build(); + } + + // Time to save the record + try { + saveRecord( + namespace, + tableName, + optionalPartitionKey.get(), + optionalClusteringKey.orElse(null), + columns); + + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .importAction(importAction) + .importedRecord(mutableSourceRecord) + .status(ImportTargetResultStatus.SAVED) + .build(); + + } catch (ScalarDBDaoException e) { + return ImportTargetResult.builder() + .namespace(namespace) + .tableName(tableName) + .importAction(importAction) + .status(ImportTargetResultStatus.SAVE_FAILED) + .errors(Collections.singletonList(e.getMessage())) + .build(); + } + } + + private void applyDataMapping(ControlFileTable controlFileTable, ObjectNode mutableSourceRecord) { + if (controlFileTable != null) { + ImportDataMapping.apply(mutableSourceRecord, controlFileTable); + } + } + + private boolean shouldCheckForMissingColumns(ImportOptions importOptions) { + return importOptions.getImportMode() == ImportMode.INSERT + || importOptions.isRequireAllColumns(); + } + + private ImportSourceRecordValidationResult validateSourceRecord( + LinkedHashSet partitionKeyNames, + LinkedHashSet clusteringKeyNames, + LinkedHashSet columnNames, + ObjectNode mutableSourceRecord, + boolean checkForMissingColumns) { + return ImportSourceRecordValidator.validateSourceRecord( + partitionKeyNames, + clusteringKeyNames, + columnNames, + mutableSourceRecord, + checkForMissingColumns); + } + + private boolean shouldRevalidateMissingColumns( + ImportOptions importOptions, boolean checkForMissingColumns) { + return !checkForMissingColumns && importOptions.getImportMode() == ImportMode.UPSERT; + } + + private boolean shouldFailForExistingData( + ImportTaskAction importAction, ImportOptions importOptions) { + return importAction == ImportTaskAction.UPDATE + && importOptions.getImportMode() == ImportMode.INSERT; + } + + private boolean shouldFailForMissingData( + ImportTaskAction importAction, ImportOptions importOptions) { + return importAction == ImportTaskAction.INSERT + && importOptions.getImportMode() == ImportMode.UPDATE; + } + + protected abstract Optional getDataRecord( + String namespace, String tableName, Key partitionKey, Key clusteringKey) + throws ScalarDBDaoException; + + protected abstract void saveRecord( + String namespace, + String tableName, + Key partitionKey, + Key clusteringKey, + List> columns) + throws ScalarDBDaoException; +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskParams.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskParams.java new file mode 100644 index 0000000000..f85671140d --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskParams.java @@ -0,0 +1,24 @@ +package com.scalar.db.dataloader.core.dataimport.task; + +import com.fasterxml.jackson.databind.JsonNode; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.dataloader.core.dataimport.ImportOptions; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDao; +import com.scalar.db.dataloader.core.dataimport.processor.TableColumnDataTypes; +import java.util.Map; +import lombok.Builder; +import lombok.NonNull; +import lombok.Value; + +@Builder +@Value +public class ImportTaskParams { + + @NonNull JsonNode sourceRecord; + int dataChunkId; + int rowNumber; + @NonNull ImportOptions importOptions; + @NonNull Map tableMetadataByTableName; + @NonNull TableColumnDataTypes tableColumnDataTypes; + @NonNull ScalarDBDao dao; +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTransactionalTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTransactionalTask.java new file mode 100644 index 0000000000..71e0d3ae23 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTransactionalTask.java @@ -0,0 +1,54 @@ +package com.scalar.db.dataloader.core.dataimport.task; + +import com.scalar.db.api.DistributedTransaction; +import com.scalar.db.api.Result; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDaoException; +import com.scalar.db.exception.transaction.AbortException; +import com.scalar.db.exception.transaction.TransactionException; +import com.scalar.db.io.Column; +import com.scalar.db.io.Key; +import java.util.List; +import java.util.Optional; + +public class ImportTransactionalTask extends ImportTask { + + private final DistributedTransaction transaction; + + public ImportTransactionalTask(ImportTaskParams params, DistributedTransaction transaction) { + super(params); + this.transaction = transaction; + } + + @Override + protected Optional getDataRecord( + String namespace, String tableName, Key partitionKey, Key clusteringKey) + throws ScalarDBDaoException { + return params.getDao().get(namespace, tableName, partitionKey, clusteringKey, transaction); + } + + @Override + protected void saveRecord( + String namespace, + String tableName, + Key partitionKey, + Key clusteringKey, + List> columns) + throws ScalarDBDaoException { + params.getDao().put(namespace, tableName, partitionKey, clusteringKey, columns, transaction); + } + + /** + * Abort the active ScalarDB transaction + * + * @throws TransactionException if something goes wrong during the aborting process + */ + private void abortActiveTransaction(DistributedTransaction tx) throws TransactionException { + if (tx != null) { + try { + tx.abort(); + } catch (AbortException e) { + throw new TransactionException(e.getMessage(), tx.getId()); + } + } + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java index 58f10d0f84..91008df3d9 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java @@ -1,7 +1,12 @@ package com.scalar.db.dataloader.core.util; +import static com.scalar.db.dataloader.core.util.TableMetadataUtil.isMetadataColumn; + +import com.fasterxml.jackson.databind.JsonNode; +import com.scalar.db.api.Result; import com.scalar.db.common.error.CoreError; import com.scalar.db.dataloader.core.ColumnInfo; +import com.scalar.db.dataloader.core.exception.Base64Exception; import com.scalar.db.dataloader.core.exception.ColumnParsingException; import com.scalar.db.io.BigIntColumn; import com.scalar.db.io.BlobColumn; @@ -12,7 +17,7 @@ import com.scalar.db.io.FloatColumn; import com.scalar.db.io.IntColumn; import com.scalar.db.io.TextColumn; -import java.util.Base64; +import java.util.*; import javax.annotation.Nullable; /** @@ -88,4 +93,140 @@ public static Column createColumnFromValue( e); } } + + /** + * Get columns from result data + * + * @param scalarDBResult result record + * @param sourceRecord source data + * @param ignoreNullValues ignore null values or not + * @param partitionKeyNames partition key names + * @param clusteringKeyNames clustering key names + * @param columnNames column names + * @param dataTypesByColumns data types of columns + * @return list of columns + * @throws Base64Exception if an error occurs while base64 decoding + */ + public static List> getColumnsFromResult( + Result scalarDBResult, + JsonNode sourceRecord, + boolean ignoreNullValues, + Set partitionKeyNames, + Set clusteringKeyNames, + Set columnNames, + Map dataTypesByColumns) + throws Base64Exception, ColumnParsingException { + + List> columns = new ArrayList<>(); + Set columnsToIgnore = getColumnsToIgnore(partitionKeyNames, clusteringKeyNames); + + for (String columnName : columnNames) { + if (isMetadataColumn(columnName, columnsToIgnore, columnNames)) { + continue; + } + + Column column = + getColumn(scalarDBResult, sourceRecord, columnName, ignoreNullValues, dataTypesByColumns); + + if (column != null) { + columns.add(column); + } + } + + return columns; + } + + /** + * Create a set of columns to ignore + * + * @param partitionKeyNames a set of partition key names + * @param clusteringKeyNames a set of clustering key names + * @return a set of columns to ignore + */ + private static Set getColumnsToIgnore( + Set partitionKeyNames, Set clusteringKeyNames) { + Set columnsToIgnore = new HashSet<>(TableMetadataUtil.getMetadataColumns()); + columnsToIgnore.addAll(partitionKeyNames); + columnsToIgnore.addAll(clusteringKeyNames); + return columnsToIgnore; + } + + /** + * Checks if a column is a metadata column + * + * @param columnName column name + * @param columnsToIgnore set of columns to ignore + * @param columnNames set of column names + * @return if column is a metadata column or not + */ + private static boolean isMetadataColumn( + String columnName, Set columnsToIgnore, Set columnNames) { + return TableMetadataUtil.isMetadataColumn(columnName, columnsToIgnore, columnNames); + } + + /** + * Get columns from result data + * + * @param scalarDBResult result record + * @param sourceRecord source data + * @param columnName column name + * @param ignoreNullValues ignore null values or not + * @param dataTypesByColumns data types of columns + * @return column data + * @throws Base64Exception if an error occurs while base64 decoding + */ + private static Column getColumn( + Result scalarDBResult, + JsonNode sourceRecord, + String columnName, + boolean ignoreNullValues, + Map dataTypesByColumns) + throws Base64Exception, ColumnParsingException { + if (scalarDBResult != null && !sourceRecord.has(columnName)) { + return getColumnFromResult(scalarDBResult, columnName); + } else { + return getColumnFromSourceRecord( + sourceRecord, columnName, ignoreNullValues, dataTypesByColumns); + } + } + + /** + * Get column from result + * + * @param scalarDBResult result record + * @param columnName column name + * @return column data + */ + private static Column getColumnFromResult(Result scalarDBResult, String columnName) { + Map> columnValues = scalarDBResult.getColumns(); + return columnValues.get(columnName); + } + + /** + * Get column from result + * + * @param sourceRecord source data + * @param columnName column name + * @param ignoreNullValues ignore null values or not + * @param dataTypesByColumns data types of columns + * @return column data + * @throws Base64Exception if an error occurs while base64 decoding + */ + private static Column getColumnFromSourceRecord( + JsonNode sourceRecord, + String columnName, + boolean ignoreNullValues, + Map dataTypesByColumns) + throws Base64Exception, ColumnParsingException { + DataType dataType = dataTypesByColumns.get(columnName); + String columnValue = + sourceRecord.has(columnName) && !sourceRecord.get(columnName).isNull() + ? sourceRecord.get(columnName).asText() + : null; + if (!ignoreNullValues || columnValue != null) { + ColumnInfo columnInfo = ColumnInfo.builder().columnName(columnName).build(); + return createColumnFromValue(dataType, columnInfo, columnValue); + } + return null; + } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/KeyUtils.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/KeyUtils.java index c2491df0f4..e46311545d 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/KeyUtils.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/KeyUtils.java @@ -1,14 +1,18 @@ package com.scalar.db.dataloader.core.util; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; import com.scalar.db.api.TableMetadata; import com.scalar.db.common.error.CoreError; import com.scalar.db.dataloader.core.ColumnInfo; import com.scalar.db.dataloader.core.ColumnKeyValue; +import com.scalar.db.dataloader.core.exception.Base64Exception; import com.scalar.db.dataloader.core.exception.ColumnParsingException; import com.scalar.db.dataloader.core.exception.KeyParsingException; import com.scalar.db.io.Column; import com.scalar.db.io.DataType; import com.scalar.db.io.Key; +import java.util.*; import javax.annotation.Nullable; /** @@ -22,6 +26,22 @@ public final class KeyUtils { /** Restrict instantiation via private constructor */ private KeyUtils() {} + public static Optional createClusteringKeyFromSource( + Set clusteringKeyNames, + Map dataTypeByColumnName, + ObjectNode sourceRecord) { + return clusteringKeyNames.isEmpty() + ? Optional.empty() + : createKeyFromSource(clusteringKeyNames, dataTypeByColumnName, sourceRecord); + } + + public static Optional createPartitionKeyFromSource( + Set partitionKeyNames, + Map dataTypeByColumnName, + ObjectNode sourceRecord) { + return createKeyFromSource(partitionKeyNames, dataTypeByColumnName, sourceRecord); + } + /** * Converts a key-value pair, in the format of =, into a ScalarDB Key instance for a * specific ScalarDB table. @@ -85,4 +105,51 @@ public static Key createKey(DataType dataType, ColumnInfo columnInfo, String val throw new KeyParsingException(e.getMessage(), e); } } + + /** + * Create a new composite ScalarDB key. + * + * @param dataTypes List of data types for the columns + * @param columnNames List of column names + * @param values List of key values + * @return ScalarDB Key instance, or empty if the provided arrays are not of the same length + * @throws Base64Exception if there is an error creating the key values + */ + public static Optional createCompositeKey( + List dataTypes, List columnNames, List values) + throws Base64Exception, ColumnParsingException { + if (!CollectionUtil.areSameLength(dataTypes, columnNames, values)) { + return Optional.empty(); + } + Key.Builder builder = Key.newBuilder(); + for (int i = 0; i < dataTypes.size(); i++) { + ColumnInfo columnInfo = ColumnInfo.builder().columnName(columnNames.get(i)).build(); + Column keyValue = + ColumnUtils.createColumnFromValue(dataTypes.get(i), columnInfo, values.get(i)); + builder.add(keyValue); + } + return Optional.of(builder.build()); + } + + private static Optional createKeyFromSource( + Set keyNames, Map columnDataTypes, JsonNode sourceRecord) { + List dataTypes = new ArrayList<>(); + List columnNames = new ArrayList<>(); + List values = new ArrayList<>(); + + for (String keyName : keyNames) { + if (!columnDataTypes.containsKey(keyName) || !sourceRecord.has(keyName)) { + return Optional.empty(); + } + dataTypes.add(columnDataTypes.get(keyName)); + columnNames.add(keyName); + values.add(sourceRecord.get(keyName).asText()); + } + + try { + return createCompositeKey(dataTypes, columnNames, values); + } catch (Base64Exception | ColumnParsingException e) { + return Optional.empty(); + } + } } diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactoryTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactoryTest.java new file mode 100644 index 0000000000..e78b019dd1 --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactoryTest.java @@ -0,0 +1,60 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +import static org.junit.jupiter.api.Assertions.assertInstanceOf; + +import com.scalar.db.dataloader.core.FileFormat; +import com.scalar.db.dataloader.core.dataimport.ImportOptions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class DefaultImportProcessorFactoryTest { + + private DefaultImportProcessorFactory factory; + + @BeforeEach + void setUp() { + factory = new DefaultImportProcessorFactory(); + } + + @Test + void createImportProcessor_givenFileFormatIsJsonl_shouldReturnJsonLinesImportProcessor() { + // Arrange + ImportOptions importOptions = ImportOptions.builder().fileFormat(FileFormat.JSONL).build(); + ImportProcessorParams params = + ImportProcessorParams.builder().importOptions(importOptions).build(); + + // Act + ImportProcessor result = factory.createImportProcessor(params); + + // Assert + assertInstanceOf(JsonLinesImportProcessor.class, result); + } + + @Test + void createImportProcessor_givenFileFormatIsJson_shouldReturnJsonImportProcessor() { + // Given + ImportOptions importOptions = ImportOptions.builder().fileFormat(FileFormat.JSON).build(); + ImportProcessorParams params = + ImportProcessorParams.builder().importOptions(importOptions).build(); + + // When + ImportProcessor result = factory.createImportProcessor(params); + + // Then + assertInstanceOf(JsonImportProcessor.class, result); + } + + @Test + void createImportProcessor_givenFileFormatIsCsv_shouldReturnCsvImportProcessor() { + // Given + ImportOptions importOptions = ImportOptions.builder().fileFormat(FileFormat.CSV).build(); + ImportProcessorParams params = + ImportProcessorParams.builder().importOptions(importOptions).build(); + + // When + ImportProcessor result = factory.createImportProcessor(params); + + // Then + assertInstanceOf(CsvImportProcessor.class, result); + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypesTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypesTest.java new file mode 100644 index 0000000000..2d72827f4f --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypesTest.java @@ -0,0 +1,33 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +import com.scalar.db.io.DataType; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class TableColumnDataTypesTest { + + TableColumnDataTypes tableColumnDataTypes; + + @Test + void addColumnDataType_withValidData_shouldAddColumnDataType() { + tableColumnDataTypes = new TableColumnDataTypes(); + tableColumnDataTypes.addColumnDataType("table", "id", DataType.BIGINT); + tableColumnDataTypes.addColumnDataType("table", "name", DataType.TEXT); + Assertions.assertEquals( + DataType.BIGINT, tableColumnDataTypes.getColumnDataTypes("table").get("id")); + } + + @Test + void getDataType_withValidTableAndColumnName_shouldReturnCorrectDataType() { + tableColumnDataTypes = new TableColumnDataTypes(); + tableColumnDataTypes.addColumnDataType("table", "id", DataType.BIGINT); + tableColumnDataTypes.addColumnDataType("table", "name", DataType.TEXT); + Assertions.assertEquals(DataType.TEXT, tableColumnDataTypes.getDataType("table", "name")); + } + + @Test + void getDataType_withInvalidTableAndColumnName_shouldReturnCorrectDataType() { + tableColumnDataTypes = new TableColumnDataTypes(); + Assertions.assertNull(tableColumnDataTypes.getDataType("table", "name")); + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java index cd47243b16..cefb0dcb66 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java @@ -2,8 +2,14 @@ import static org.junit.jupiter.api.Assertions.*; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.scalar.db.api.Result; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.common.ResultImpl; import com.scalar.db.common.error.CoreError; import com.scalar.db.dataloader.core.ColumnInfo; +import com.scalar.db.dataloader.core.UnitTestUtils; +import com.scalar.db.dataloader.core.exception.Base64Exception; import com.scalar.db.dataloader.core.exception.ColumnParsingException; import com.scalar.db.io.BigIntColumn; import com.scalar.db.io.BlobColumn; @@ -16,7 +22,10 @@ import com.scalar.db.io.TextColumn; import java.nio.charset.StandardCharsets; import java.util.Base64; +import java.util.List; +import java.util.Map; import java.util.stream.Stream; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; @@ -25,6 +34,11 @@ class ColumnUtilsTest { private static final float FLOAT_VALUE = 2.78f; + private static final TableMetadata mockMetadata = UnitTestUtils.createTestTableMetadata(); + private static final ObjectNode sourceRecord = UnitTestUtils.getOutputDataWithMetadata(); + private static final Map dataTypesByColumns = UnitTestUtils.getColumnData(); + private static final Map> values = UnitTestUtils.createTestValues(); + private static final Result scalarDBResult = new ResultImpl(values, mockMetadata); private static Stream provideColumnsForCreateColumnFromValue() { return Stream.of( @@ -105,4 +119,19 @@ void createColumnFromValue_invalidBase64_throwsBase64Exception() { columnName, "table", "ns"), exception.getMessage()); } + + @Test + void getColumnsFromResult_withValidData_shouldReturnColumns() + throws Base64Exception, ColumnParsingException { + List> columns = + ColumnUtils.getColumnsFromResult( + scalarDBResult, + sourceRecord, + false, + mockMetadata.getPartitionKeyNames(), + mockMetadata.getClusteringKeyNames(), + mockMetadata.getColumnNames(), + dataTypesByColumns); + Assertions.assertEquals(4, columns.size()); + } } diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/KeyUtilsTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/KeyUtilsTest.java index f2fe680490..5c1a04cc22 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/KeyUtilsTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/KeyUtilsTest.java @@ -3,10 +3,12 @@ import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.*; +import com.fasterxml.jackson.databind.node.ObjectNode; import com.scalar.db.api.TableMetadata; import com.scalar.db.common.error.CoreError; import com.scalar.db.dataloader.core.ColumnInfo; import com.scalar.db.dataloader.core.ColumnKeyValue; +import com.scalar.db.dataloader.core.UnitTestUtils; import com.scalar.db.dataloader.core.exception.KeyParsingException; import com.scalar.db.io.BigIntColumn; import com.scalar.db.io.BlobColumn; @@ -18,7 +20,8 @@ import com.scalar.db.io.Key; import com.scalar.db.io.TextColumn; import java.nio.charset.StandardCharsets; -import java.util.Base64; +import java.util.*; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; @@ -28,6 +31,8 @@ class KeyUtilsTest { @Mock private TableMetadata tableMetadata; + private static final Map dataTypeByColumnName = UnitTestUtils.getColumnData(); + private static final ObjectNode sourceRecord = UnitTestUtils.getOutputDataWithMetadata(); @Test void parseKeyValue_nullKeyValue_returnsNull() throws KeyParsingException { @@ -146,4 +151,42 @@ void createKey_invalidBase64_throwsBase64Exception() { assertThrows( KeyParsingException.class, () -> KeyUtils.createKey(DataType.BLOB, columnInfo, value)); } + + @Test + void createClusteringKeyFromSource_withEmptyClusteringKeySet_shouldReturnEmpty() { + Optional key = KeyUtils.createClusteringKeyFromSource(Collections.EMPTY_SET, null, null); + Assertions.assertEquals(Optional.empty(), key); + } + + @Test + void createClusteringKeyFromSource_withValidClusteringKeySet_shouldReturnValidKey() { + Set clusterKeySet = new HashSet<>(); + clusterKeySet.add(UnitTestUtils.TEST_COLUMN_2_CK); + clusterKeySet.add(UnitTestUtils.TEST_COLUMN_3_CK); + Optional key = + KeyUtils.createClusteringKeyFromSource(clusterKeySet, dataTypeByColumnName, sourceRecord); + Assertions.assertEquals( + "Optional[Key{IntColumn{name=col2, value=2147483647, hasNullValue=false}, BooleanColumn{name=col3, value=true, hasNullValue=false}}]", + key.toString()); + } + + @Test + void createPartitionKeyFromSource_withInvalidData_shouldReturnEmpty() { + Set partitionKeySet = new HashSet<>(); + partitionKeySet.add("id1"); + Optional key = + KeyUtils.createPartitionKeyFromSource(partitionKeySet, dataTypeByColumnName, sourceRecord); + Assertions.assertEquals(Optional.empty(), key); + } + + @Test + void createPartitionKeyFromSource_withValidData_shouldReturnValidKey() { + Set partitionKeySet = new HashSet<>(); + partitionKeySet.add(UnitTestUtils.TEST_COLUMN_1_PK); + Optional key = + KeyUtils.createPartitionKeyFromSource(partitionKeySet, dataTypeByColumnName, sourceRecord); + Assertions.assertEquals( + "Optional[Key{BigIntColumn{name=col1, value=9007199254740992, hasNullValue=false}}]", + key.toString()); + } } diff --git a/gradle/spotbugs-exclude.xml b/gradle/spotbugs-exclude.xml index 23254eb3ab..bab1669d82 100644 --- a/gradle/spotbugs-exclude.xml +++ b/gradle/spotbugs-exclude.xml @@ -37,7 +37,7 @@ - + From 8ecb39cd54f9fd8030171f6478178f25fc0201d0 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 9 Jan 2025 17:32:10 +0530 Subject: [PATCH 47/87] Changes -1 --- .../java/com/scalar/db/common/error/CoreError.java | 10 +++++----- .../dataimport/controlfile/ControlFileTable.java | 14 +++++++------- .../controlfile/ControlFileValidator.java | 4 ++-- .../db/dataloader/core/util/TableMetadataUtil.java | 2 +- .../controlfile/ControlFileValidatorTest.java | 4 ++-- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index 622b4b009e..00544a9aec 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -728,12 +728,12 @@ public enum CoreError implements ScalarDbError { DATA_LOADER_MISSING_COLUMN_MAPPING( Category.USER_ERROR, "0159", - "No mapping found for column '%s' in table '%s' in the control file. \\nControl file validation set at 'FULL'. All columns need to be mapped.", + "No mapping found for column '%s' in table '%s' in the control file. Control file validation set at 'FULL'. All columns need to be mapped.", "", ""), DATA_LOADER_CONTROL_FILE_MISSING_DATA_MAPPINGS( Category.USER_ERROR, "0160", "The control file is missing data mappings", "", ""), - DATA_LOADER__MISSING_NAMESPACE_OR_TABLE( + DATA_LOADER_MISSING_NAMESPACE_OR_TABLE( Category.USER_ERROR, "0161", "The provided namespace '%s' and/or table name '%s' is incorrect and could not be found", @@ -760,7 +760,7 @@ public enum CoreError implements ScalarDbError { DATA_LOADER_MULTIPLE_MAPPINGS_FOR_COLUMN_FOUND( Category.USER_ERROR, "0165", - "Multiple data mappings found for column '%s' in table '%s'", + "Duplicated data mappings found for column '%s' in table '%s'", "", ""), // @@ -1016,13 +1016,13 @@ public enum CoreError implements ScalarDbError { DATA_LOADER_ERROR_CRUD_EXCEPTION( Category.INTERNAL_ERROR, "0047", - "Something went wrong while trying to save the data. Details %s", + "Something went wrong while trying to save the data. Details: %s", "", ""), DATA_LOADER_ERROR_SCAN( Category.INTERNAL_ERROR, "0048", - "Something went wrong while scanning. Are you sure you are running in the correct transaction mode? Details %s", + "Something went wrong while scanning. Are you sure you are running in the correct transaction mode? Details: %s", "", ""), diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTable.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTable.java index e1d7c6a9d0..96e096d8ee 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTable.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileTable.java @@ -22,7 +22,7 @@ public class ControlFileTable { /** The name of the table in ScalarDB. */ @JsonProperty("table_name") - private String tableName; + private String table; /** * A list of mappings defining the correspondence between control file fields and table columns. @@ -35,11 +35,11 @@ public class ControlFileTable { * The mappings list is initialized as an empty list. * * @param namespace The namespace of the table in ScalarDB. - * @param tableName The name of the table in ScalarDB. + * @param table The name of the table in ScalarDB. */ - public ControlFileTable(String namespace, String tableName) { + public ControlFileTable(String namespace, String table) { this.namespace = namespace; - this.tableName = tableName; + this.table = table; this.mappings = new ArrayList<>(); } @@ -48,17 +48,17 @@ public ControlFileTable(String namespace, String tableName) { * constructor is used for deserialization of API requests or control files. * * @param namespace The namespace of the table in ScalarDB. - * @param tableName The name of the table in ScalarDB. + * @param table The name of the table in ScalarDB. * @param mappings A list of mappings that define the relationship between control file fields and * table columns. */ @JsonCreator public ControlFileTable( @JsonProperty("namespace") String namespace, - @JsonProperty("table_name") String tableName, + @JsonProperty("table_name") String table, @JsonProperty("mappings") List mappings) { this.namespace = namespace; - this.tableName = tableName; + this.table = table; this.mappings = mappings; } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidator.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidator.java index a9b71d066a..4a5013ab54 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidator.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidator.java @@ -123,8 +123,8 @@ private static void checkMultiTableMetadata( String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); if (!tableMetadataMap.containsKey(lookupKey)) { throw new ControlFileValidationException( - CoreError.DATA_LOADER__MISSING_NAMESPACE_OR_TABLE.buildMessage( - controlFileTable.getNamespace(), controlFileTable.getTableName())); + CoreError.DATA_LOADER_MISSING_NAMESPACE_OR_TABLE.buildMessage( + controlFileTable.getNamespace(), controlFileTable.getTable())); } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java index acfd509d0f..cbd74dbe36 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/TableMetadataUtil.java @@ -106,7 +106,7 @@ public static String getTableLookupKey(ControlFileTable controlFileTable) { return String.format( Constants.TABLE_LOOKUP_KEY_FORMAT, controlFileTable.getNamespace(), - controlFileTable.getTableName()); + controlFileTable.getTable()); } /** diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidatorTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidatorTest.java index 67cb8ea9f5..d5dbd654cf 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidatorTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidatorTest.java @@ -109,8 +109,8 @@ void validate_missingTableMetadataGiven_shouldThrowControlFileValidationExceptio controlFile, ControlFileValidationLevel.MAPPED, tableMetadataMap)) .isExactlyInstanceOf(ControlFileValidationException.class) .hasMessage( - CoreError.DATA_LOADER__MISSING_NAMESPACE_OR_TABLE.buildMessage( - controlFileTable.getNamespace(), controlFileTable.getTableName())); + CoreError.DATA_LOADER_MISSING_NAMESPACE_OR_TABLE.buildMessage( + controlFileTable.getNamespace(), controlFileTable.getTable())); } @Test From c7ba6c83d14545db29a15389fe70085652486371 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Fri, 10 Jan 2025 16:21:07 +0530 Subject: [PATCH 48/87] Description added --- .../dataimport/processor/DefaultImportProcessorFactory.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactory.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactory.java index e0fa8aa405..30c1c26085 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactory.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactory.java @@ -2,6 +2,12 @@ public class DefaultImportProcessorFactory implements ImportProcessorFactory { + /** + * Create import processor object based in file format in import params + * + * @param params import processor params objects + * @return generated import processor object + */ @Override public ImportProcessor createImportProcessor(ImportProcessorParams params) { ImportProcessor importProcessor; From a566ef24374ce92df3db622e55e89a96c36c65fd Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Mon, 13 Jan 2025 14:50:23 +0530 Subject: [PATCH 49/87] Code updated to support java 8 --- .../core/dataimport/processor/CsvImportProcessor.java | 2 +- .../scalar/db/dataloader/core/dataimport/task/ImportTask.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java index d5182e3012..01f1dbcf10 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java @@ -46,7 +46,7 @@ public List process( try { String header = params.getImportOptions().getCustomHeaderRow(); String delimiter = Character.toString(params.getImportOptions().getDelimiter()); - if (delimiter.isBlank()) { + if (delimiter.trim().isEmpty()) { delimiter = ","; } if (header == null) { diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java index 437e851ba1..ed54e742fd 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java @@ -177,7 +177,7 @@ private ImportTargetResult importIntoSingleTable( Optional optionalPartitionKey = KeyUtils.createPartitionKeyFromSource( partitionKeyNames, dataTypeByColumnName, mutableSourceRecord); - if (optionalPartitionKey.isEmpty()) { + if (!optionalPartitionKey.isPresent()) { return ImportTargetResult.builder() .namespace(namespace) .tableName(tableName) @@ -190,7 +190,7 @@ private ImportTargetResult importIntoSingleTable( optionalClusteringKey = KeyUtils.createClusteringKeyFromSource( clusteringKeyNames, dataTypeByColumnName, mutableSourceRecord); - if (optionalClusteringKey.isEmpty()) { + if (!optionalClusteringKey.isPresent()) { return ImportTargetResult.builder() .namespace(namespace) .tableName(tableName) From f6c54ec8aaf40d9977c3e932a295479b13ec539a Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Mon, 13 Jan 2025 14:56:40 +0530 Subject: [PATCH 50/87] Updated test code to remove warning --- .../core/dataexport/ExportManagerTest.java | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/ExportManagerTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/ExportManagerTest.java index c41580e7d6..8907f6df4a 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/ExportManagerTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/ExportManagerTest.java @@ -15,7 +15,10 @@ import com.scalar.db.io.IntColumn; import com.scalar.db.io.Key; import java.io.*; +import java.nio.charset.Charset; +import java.nio.file.Files; import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; import java.util.Collections; import java.util.List; import java.util.Map; @@ -66,7 +69,11 @@ void startExport_givenValidDataWithoutPartitionKey_shouldGenerateOutputFile() storage)) .thenReturn(scanner); Mockito.when(scanner.iterator()).thenReturn(results.iterator()); - try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath, true))) { + try (BufferedWriter writer = new BufferedWriter(Files.newBufferedWriter( + Paths.get(filePath), + Charset.defaultCharset(), // Explicitly use the default charset + StandardOpenOption.CREATE, + StandardOpenOption.APPEND))) { exportManager.startExport(exportOptions, mockData, writer); } File file = new File(filePath); @@ -106,7 +113,11 @@ void startExport_givenPartitionKey_shouldGenerateOutputFile() storage)) .thenReturn(scanner); Mockito.when(scanner.iterator()).thenReturn(results.iterator()); - try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath, true))) { + try (BufferedWriter writer = new BufferedWriter(Files.newBufferedWriter( + Paths.get(filePath), + Charset.defaultCharset(), // Explicitly use the default charset + StandardOpenOption.CREATE, + StandardOpenOption.APPEND))) { exportManager.startExport(exportOptions, mockData, writer); } File file = new File(filePath); @@ -147,7 +158,11 @@ void startExport_givenPartitionKeyAndFileFormatCsv_shouldGenerateOutputFile() storage)) .thenReturn(scanner); Mockito.when(scanner.iterator()).thenReturn(results.iterator()); - try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath, true))) { + try (BufferedWriter writer = new BufferedWriter(Files.newBufferedWriter( + Paths.get(filePath), + Charset.defaultCharset(), // Explicitly use the default charset + StandardOpenOption.CREATE, + StandardOpenOption.APPEND))) { exportManager.startExport(exportOptions, mockData, writer); } File file = new File(filePath); From ee252d249aecf12c914f664d23647006087ccf11 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 14 Jan 2025 15:02:40 +0530 Subject: [PATCH 51/87] Added import manager --- .../core/dataimport/ImportManager.java | 139 ++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java new file mode 100644 index 0000000000..1815c9bf15 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java @@ -0,0 +1,139 @@ +package com.scalar.db.dataloader.core.dataimport; + +import com.scalar.db.api.*; +import com.scalar.db.dataloader.core.ScalarDBMode; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDao; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; +import com.scalar.db.dataloader.core.dataimport.processor.ImportProcessor; +import com.scalar.db.dataloader.core.dataimport.processor.ImportProcessorFactory; +import com.scalar.db.dataloader.core.dataimport.processor.ImportProcessorParams; +import com.scalar.db.dataloader.core.dataimport.processor.TableColumnDataTypes; +import com.scalar.db.dataloader.core.dataimport.task.result.ImportTaskResult; +import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatchResult; +import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatchStatus; +import java.io.BufferedReader; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.NonNull; + +@AllArgsConstructor +public class ImportManager implements ImportEventListener { + + @NonNull private final Map tableMetadata; + @NonNull private final BufferedReader importFileReader; + @NonNull private final ImportOptions importOptions; + private final ImportProcessorFactory importProcessorFactory; + private final List listeners = new ArrayList<>(); + private final ScalarDBMode scalarDBMode; + private final DistributedStorage distributedStorage; + private final DistributedTransactionManager distributedTransactionManager; + private final List importDataChunkStatusList = new ArrayList<>(); + + /** + * * Start the import process + * + * @return list of import data chunk status objects + */ + public List startImport() { + ImportProcessorParams params = + ImportProcessorParams.builder() + .scalarDBMode(scalarDBMode) + .importOptions(importOptions) + .tableMetadataByTableName(tableMetadata) + .dao(new ScalarDBDao()) + .distributedTransactionManager(distributedTransactionManager) + .distributedStorage(distributedStorage) + .tableColumnDataTypes(getTableColumnDataTypes()) + .build(); + ImportProcessor processor = importProcessorFactory.createImportProcessor(params); + processor.addListener(this); + // If the data chunk size is 0, then process the entire file in a single data chunk + int dataChunkSize = + importOptions.getDataChunkSize() == 0 + ? Integer.MAX_VALUE + : importOptions.getDataChunkSize(); + return processor.process( + dataChunkSize, importOptions.getTransactionBatchSize(), importFileReader); + } + + public void addListener(ImportEventListener listener) { + listeners.add(listener); + } + + public void removeListener(ImportEventListener listener) { + listeners.remove(listener); + } + + @Override + public void onDataChunkStarted(ImportDataChunkStatus status) { + for (ImportEventListener listener : listeners) { + listener.onDataChunkStarted(status); + } + } + + @Override + public void addOrUpdateDataChunkStatus(ImportDataChunkStatus status) { + synchronized (importDataChunkStatusList) { + for (int i = 0; i < importDataChunkStatusList.size(); i++) { + if (importDataChunkStatusList.get(i).getDataChunkId() == status.getDataChunkId()) { + // Object found, replace it with the new one + importDataChunkStatusList.set(i, status); + return; + } + } + // If object is not found, add it to the list + importDataChunkStatusList.add(status); + } + } + + @Override + public void onDataChunkCompleted(ImportDataChunkStatus status) { + for (ImportEventListener listener : listeners) { + listener.onDataChunkCompleted(status); + } + } + + @Override + public void onTransactionBatchStarted(ImportTransactionBatchStatus status) { + for (ImportEventListener listener : listeners) { + listener.onTransactionBatchStarted(status); + } + } + + @Override + public void onTransactionBatchCompleted(ImportTransactionBatchResult batchResult) { + for (ImportEventListener listener : listeners) { + listener.onTransactionBatchCompleted(batchResult); + } + } + + @Override + public void onTaskComplete(ImportTaskResult taskResult) { + for (ImportEventListener listener : listeners) { + listener.onTaskComplete(taskResult); + } + } + + @Override + public void onAllDataChunksCompleted() { + for (ImportEventListener listener : listeners) { + listener.onAllDataChunksCompleted(); + } + } + + public List getImportDataChunkStatusList() { + return importDataChunkStatusList; + } + + public TableColumnDataTypes getTableColumnDataTypes() { + TableColumnDataTypes tableColumnDataTypes = new TableColumnDataTypes(); + tableMetadata.forEach( + (name, metadata) -> + metadata + .getColumnDataTypes() + .forEach((k, v) -> tableColumnDataTypes.addColumnDataType(name, k, v))); + return tableColumnDataTypes; + } +} From 90c48308aa91e568033a76ffd9bcea5b50e652a5 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 16 Jan 2025 11:25:52 +0530 Subject: [PATCH 52/87] Changes added --- .../com/scalar/db/common/error/CoreError.java | 4 +-- .../core/dataimport/dao/ScalarDBDao.java | 14 +++++--- .../dataimport/dao/ScalarDbStorageManger.java | 33 +++++++++++++++++++ .../dao/ScalarDbTransactionManger.java | 22 +++++++++++++ 4 files changed, 66 insertions(+), 7 deletions(-) create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDbStorageManger.java create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDbTransactionManger.java diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index 8250df0293..980fecd04f 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -972,13 +972,13 @@ public enum CoreError implements ScalarDbError { DATA_LOADER_ERROR_CRUD_EXCEPTION( Category.INTERNAL_ERROR, "0047", - "Something went wrong while trying to save the data. Details %s", + "Something went wrong while trying to save the data. Details: %s", "", ""), DATA_LOADER_ERROR_SCAN( Category.INTERNAL_ERROR, "0048", - "Something went wrong while scanning. Are you sure you are running in the correct transaction mode? Details %s", + "Something went wrong while scanning. Are you sure you are running in the correct transaction mode? Details: %s", "", ""), diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java index e7270de8eb..159523318e 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java @@ -178,11 +178,11 @@ public List scan( // scan data try { logger.info(SCAN_START_MSG); - Scanner scanner = storage.scan(scan); - List allResults = scanner.all(); - scanner.close(); - logger.info(SCAN_END_MSG); - return allResults; + try (Scanner scanner = storage.scan(scan)) { + List allResults = scanner.all(); + logger.info(SCAN_END_MSG); + return allResults; + } } catch (ExecutionException | IOException e) { throw new ScalarDBDaoException( CoreError.DATA_LOADER_ERROR_SCAN.buildMessage(e.getMessage()), e); @@ -323,6 +323,10 @@ Scan createScan( if (projectionColumns != null && !projectionColumns.isEmpty()) { buildableScanAll.projections(projectionColumns); } + // Can ordering be added? + for (Scan.Ordering sort : sortOrders) { + buildableScanAll.ordering(sort); + } // limit if (limit > 0) { diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDbStorageManger.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDbStorageManger.java new file mode 100644 index 0000000000..15ada477ce --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDbStorageManger.java @@ -0,0 +1,33 @@ +package com.scalar.db.dataloader.core.dataimport.dao; + +import com.scalar.db.api.DistributedStorage; +import com.scalar.db.api.DistributedStorageAdmin; +import com.scalar.db.service.StorageFactory; +import java.io.IOException; +import javax.annotation.Nullable; + +public class ScalarDbStorageManger { + + @Nullable private final DistributedStorage storage; + private final DistributedStorageAdmin storageAdmin; + + /** + * Class constructor + * + * @param storageFactory Factory to create all the necessary ScalarDB data managers + */ + public ScalarDbStorageManger(StorageFactory storageFactory) throws IOException { + storage = storageFactory.getStorage(); + storageAdmin = storageFactory.getStorageAdmin(); + } + + /** @return storage for ScalarDB connection that is running in storage mode */ + public DistributedStorage getDistributedStorage() { + return storage; + } + + /** @return Distributed storage admin for ScalarDB admin operations */ + public DistributedStorageAdmin getDistributedStorageAdmin() { + return storageAdmin; + } +} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDbTransactionManger.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDbTransactionManger.java new file mode 100644 index 0000000000..8993c4fab9 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDbTransactionManger.java @@ -0,0 +1,22 @@ +package com.scalar.db.dataloader.core.dataimport.dao; + +import com.scalar.db.api.DistributedTransactionManager; +import com.scalar.db.service.TransactionFactory; +import java.io.IOException; + +public class ScalarDbTransactionManger { + + private final DistributedTransactionManager transactionManager; + + public ScalarDbTransactionManger(TransactionFactory transactionFactory) throws IOException { + transactionManager = transactionFactory.getTransactionManager(); + } + + /** + * @return Distributed Transaction manager for ScalarDB connection that is running in transaction + * mode + */ + public DistributedTransactionManager getDistributedTransactionManager() { + return transactionManager; + } +} From 39c43decbdcc51769a2fe44f3acd795754944ab9 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 16 Jan 2025 12:04:09 +0530 Subject: [PATCH 53/87] Removed scalardb manager file --- .../core/dataimport/dao/ScalarDBManager.java | 68 ------------------- 1 file changed, 68 deletions(-) delete mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java deleted file mode 100644 index 1016eaaba4..0000000000 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBManager.java +++ /dev/null @@ -1,68 +0,0 @@ -package com.scalar.db.dataloader.core.dataimport.dao; - -import com.scalar.db.api.DistributedStorage; -import com.scalar.db.api.DistributedStorageAdmin; -import com.scalar.db.api.DistributedTransactionAdmin; -import com.scalar.db.api.DistributedTransactionManager; -import com.scalar.db.service.StorageFactory; -import com.scalar.db.service.TransactionFactory; -import java.io.IOException; -import javax.annotation.Nullable; - -/** - * A manager to retrieve the various ScalarDB managers based on the running mode - * - * @author Yves Peckstadt - */ -public class ScalarDBManager { - - /* Distributed storage for ScalarDB connection that is running in storage mode. */ - @Nullable private final DistributedStorage storage; - /* Distributed Transaction manager for ScalarDB connection that is running in transaction mode */ - private final DistributedTransactionManager transactionManager; - /* Distributed storage admin for ScalarDB admin operations */ - private final DistributedStorageAdmin storageAdmin; - private final DistributedTransactionAdmin transactionAdmin; - - /** - * Class constructor - * - * @param storageFactory Factory to create all the necessary ScalarDB data managers - */ - public ScalarDBManager(StorageFactory storageFactory) throws IOException { - storage = storageFactory.getStorage(); - storageAdmin = storageFactory.getStorageAdmin(); - transactionManager = null; - transactionAdmin = null; - } - - /** - * Class constructor - * - * @param transactionFactory Factory to create all the necessary ScalarDB data managers - */ - public ScalarDBManager(TransactionFactory transactionFactory) throws IOException { - transactionManager = transactionFactory.getTransactionManager(); - transactionAdmin = transactionFactory.getTransactionAdmin(); - storageAdmin = null; - storage = null; - } - - /** @return storage for ScalarDB connection that is running in storage mode */ - public DistributedStorage getDistributedStorage() { - return storage; - } - - /** - * @return Distributed Transaction manager for ScalarDB connection that is running in transaction - * mode - */ - public DistributedTransactionManager getDistributedTransactionManager() { - return transactionManager; - } - - /** @return Distributed storage admin for ScalarDB admin operations */ - public DistributedStorageAdmin getDistributedStorageAdmin() { - return storageAdmin; - } -} From 4df4acdc48006a7275b45d680402f04c2c4f74d3 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Mon, 20 Jan 2025 10:41:30 +0530 Subject: [PATCH 54/87] Removed wildcard import --- .../db/dataloader/core/dataimport/dao/ScalarDBDao.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java index 159523318e..c2a25c0de6 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java @@ -1,7 +1,15 @@ package com.scalar.db.dataloader.core.dataimport.dao; -import com.scalar.db.api.*; +import com.scalar.db.api.DistributedStorage; +import com.scalar.db.api.DistributedTransaction; +import com.scalar.db.api.Get; +import com.scalar.db.api.GetBuilder; +import com.scalar.db.api.Put; import com.scalar.db.api.PutBuilder.Buildable; +import com.scalar.db.api.Result; +import com.scalar.db.api.Scan; +import com.scalar.db.api.ScanBuilder; +import com.scalar.db.api.Scanner; import com.scalar.db.common.error.CoreError; import com.scalar.db.dataloader.core.ScanRange; import com.scalar.db.exception.storage.ExecutionException; From f4f253e67fbc3dc7ef69a353f5e28a6fac272a5b Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 28 Jan 2025 14:32:35 +0530 Subject: [PATCH 55/87] Changes --- .../core/dataimport/dao/ScalarDBDao.java | 36 +++++++++++-------- ...anger.java => ScalarDbStorageManager.java} | 8 ++--- ...r.java => ScalarDbTransactionManager.java} | 8 ++--- 3 files changed, 29 insertions(+), 23 deletions(-) rename data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/{ScalarDbStorageManger.java => ScalarDbStorageManager.java} (73%) rename data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/{ScalarDbTransactionManger.java => ScalarDbTransactionManager.java} (69%) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java index c2a25c0de6..a4497cb09e 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java @@ -21,6 +21,7 @@ import java.util.List; import java.util.NoSuchElementException; import java.util.Optional; +import javax.annotation.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -285,10 +286,10 @@ public Scanner createScanner( public Scanner createScanner( String namespace, String table, - Key partitionKey, - ScanRange scanRange, - List sortOrders, - List projectionColumns, + @Nullable Key partitionKey, + @Nullable ScanRange scanRange, + @Nullable List sortOrders, + @Nullable List projectionColumns, int limit, DistributedStorage storage) throws ScalarDBDaoException { @@ -317,10 +318,10 @@ public Scanner createScanner( Scan createScan( String namespace, String table, - Key partitionKey, - ScanRange scanRange, - List sortOrders, - List projectionColumns, + @Nullable Key partitionKey, + @Nullable ScanRange scanRange, + @Nullable List sortOrders, + @Nullable List projectionColumns, int limit) { // If no partition key is provided a scan all is created if (partitionKey == null) { @@ -331,9 +332,11 @@ Scan createScan( if (projectionColumns != null && !projectionColumns.isEmpty()) { buildableScanAll.projections(projectionColumns); } - // Can ordering be added? - for (Scan.Ordering sort : sortOrders) { - buildableScanAll.ordering(sort); + + if (sortOrders != null && !sortOrders.isEmpty()) { + for (Scan.Ordering sort : sortOrders) { + buildableScanAll.ordering(sort); + } } // limit @@ -361,8 +364,10 @@ Scan createScan( } // clustering order - for (Scan.Ordering sort : sortOrders) { - buildableScan.ordering(sort); + if (sortOrders != null && !sortOrders.isEmpty()) { + for (Scan.Ordering sort : sortOrders) { + buildableScan.ordering(sort); + } } // projections @@ -386,7 +391,8 @@ Scan createScan( * @param clusteringKey Optional clustering key for get * @return ScalarDB Get instance */ - private Get createGetWith(String namespace, String table, Key partitionKey, Key clusteringKey) { + private Get createGetWith( + String namespace, String table, Key partitionKey, @Nullable Key clusteringKey) { GetBuilder.BuildableGetWithPartitionKey buildable = Get.newBuilder().namespace(namespace).table(table).partitionKey(partitionKey); if (clusteringKey != null) { @@ -409,7 +415,7 @@ private Put createPutWith( String namespace, String table, Key partitionKey, - Key clusteringKey, + @Nullable Key clusteringKey, List> columns) { Buildable buildable = Put.newBuilder().namespace(namespace).table(table).partitionKey(partitionKey); diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDbStorageManger.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDbStorageManager.java similarity index 73% rename from data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDbStorageManger.java rename to data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDbStorageManager.java index 15ada477ce..0ed47cc647 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDbStorageManger.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDbStorageManager.java @@ -6,7 +6,7 @@ import java.io.IOException; import javax.annotation.Nullable; -public class ScalarDbStorageManger { +public class ScalarDbStorageManager { @Nullable private final DistributedStorage storage; private final DistributedStorageAdmin storageAdmin; @@ -16,17 +16,17 @@ public class ScalarDbStorageManger { * * @param storageFactory Factory to create all the necessary ScalarDB data managers */ - public ScalarDbStorageManger(StorageFactory storageFactory) throws IOException { + public ScalarDbStorageManager(StorageFactory storageFactory) throws IOException { storage = storageFactory.getStorage(); storageAdmin = storageFactory.getStorageAdmin(); } - /** @return storage for ScalarDB connection that is running in storage mode */ + /** Returns distributed storage for ScalarDB connection that is running in storage mode */ public DistributedStorage getDistributedStorage() { return storage; } - /** @return Distributed storage admin for ScalarDB admin operations */ + /** Returns distributed storage admin for ScalarDB admin operations */ public DistributedStorageAdmin getDistributedStorageAdmin() { return storageAdmin; } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDbTransactionManger.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDbTransactionManager.java similarity index 69% rename from data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDbTransactionManger.java rename to data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDbTransactionManager.java index 8993c4fab9..7999c01032 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDbTransactionManger.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDbTransactionManager.java @@ -4,17 +4,17 @@ import com.scalar.db.service.TransactionFactory; import java.io.IOException; -public class ScalarDbTransactionManger { +public class ScalarDbTransactionManager { private final DistributedTransactionManager transactionManager; - public ScalarDbTransactionManger(TransactionFactory transactionFactory) throws IOException { + public ScalarDbTransactionManager(TransactionFactory transactionFactory) throws IOException { transactionManager = transactionFactory.getTransactionManager(); } /** - * @return Distributed Transaction manager for ScalarDB connection that is running in transaction - * mode + * Returns distributed Transaction manager for ScalarDB connection that is running in transaction + * mode */ public DistributedTransactionManager getDistributedTransactionManager() { return transactionManager; From c9d01cb9fb313ff2ebd1b34e3b34adc537782616 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 28 Jan 2025 16:47:46 +0530 Subject: [PATCH 56/87] Added default case in switch to resolve sportbugs warning --- .../db/dataloader/core/dataexport/producer/CsvProducerTask.java | 2 ++ .../core/dataexport/producer/JsonLineProducerTask.java | 2 ++ .../dataloader/core/dataexport/producer/JsonProducerTask.java | 2 ++ 3 files changed, 6 insertions(+) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTask.java index 64b6936574..1d69f2ffa2 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTask.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTask.java @@ -149,6 +149,8 @@ private String convertToString(Result result, String columnName, DataType dataTy case TEXT: value = result.getText(columnName); break; + default: + break; } return value; } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTask.java index 9207219491..a8e1de0550 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTask.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTask.java @@ -123,6 +123,8 @@ private void addToObjectNode( byte[] encoded = Base64.getEncoder().encode(result.getBlobAsBytes(columnName)); objectNode.put(columnName, new String(encoded, Charset.defaultCharset())); break; + default: + break; } } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTask.java index cfaa4cb336..adde3db3cd 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTask.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTask.java @@ -134,6 +134,8 @@ private void addToObjectNode( byte[] encoded = Base64.getEncoder().encode(result.getBlobAsBytes(columnName)); objectNode.put(columnName, new String(encoded, Charset.defaultCharset())); break; + default: + break; } } } From d453e6cfaaa6c1d46f12f0204d917e58a7475639 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 30 Jan 2025 14:26:59 +0530 Subject: [PATCH 57/87] Change wildcard imports --- .../db/dataloader/core/dataimport/ImportManager.java | 4 +++- .../dataloader/core/dataimport/task/ImportStorageTask.java | 3 ++- .../db/dataloader/core/dataimport/task/ImportTask.java | 7 ++++++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java index 1815c9bf15..cbc0fc02a2 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java @@ -1,6 +1,8 @@ package com.scalar.db.dataloader.core.dataimport; -import com.scalar.db.api.*; +import com.scalar.db.api.DistributedStorage; +import com.scalar.db.api.DistributedTransactionManager; +import com.scalar.db.api.TableMetadata; import com.scalar.db.dataloader.core.ScalarDBMode; import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDao; import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportStorageTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportStorageTask.java index 2211f054bf..07d3458072 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportStorageTask.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportStorageTask.java @@ -5,7 +5,8 @@ import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDaoException; import com.scalar.db.io.Column; import com.scalar.db.io.Key; -import java.util.*; +import java.util.List; +import java.util.Optional; public class ImportStorageTask extends ImportTask { diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java index ed54e742fd..435ffbd359 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java @@ -1,6 +1,11 @@ package com.scalar.db.dataloader.core.dataimport.task; -import static com.scalar.db.dataloader.core.dataimport.task.ImportTaskConstants.*; +import static com.scalar.db.dataloader.core.dataimport.task.ImportTaskConstants.ERROR_COULD_NOT_FIND_CLUSTERING_KEY; +import static com.scalar.db.dataloader.core.dataimport.task.ImportTaskConstants.ERROR_COULD_NOT_FIND_PARTITION_KEY; +import static com.scalar.db.dataloader.core.dataimport.task.ImportTaskConstants.ERROR_DATA_ALREADY_EXISTS; +import static com.scalar.db.dataloader.core.dataimport.task.ImportTaskConstants.ERROR_DATA_NOT_FOUND; +import static com.scalar.db.dataloader.core.dataimport.task.ImportTaskConstants.ERROR_TABLE_METADATA_MISSING; +import static com.scalar.db.dataloader.core.dataimport.task.ImportTaskConstants.ERROR_UPSERT_INSERT_MISSING_COLUMNS; import com.fasterxml.jackson.databind.node.ObjectNode; import com.scalar.db.api.Result; From 6998b6869e00449a9587cdff19305b2f50115be0 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Mon, 3 Feb 2025 17:44:23 +0530 Subject: [PATCH 58/87] Changes --- .../controlfile/ControlFileValidator.java | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidator.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidator.java index 4a5013ab54..6c31a851bd 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidator.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidator.java @@ -50,10 +50,11 @@ public static void validate( // Make sure table metadata is provided for each table mentioned in the data mappings checkMultiTableMetadata(tableMetadataMap, controlFileTable); - // Make sure the specified target columns in the mappings actually exist - checkIfTargetColumnExist(tableMetadataMap, controlFileTable); TableMetadata tableMetadata = tableMetadataMap.get(lookupKey); + // Make sure the specified target columns in the mappings actually exist + checkIfTargetColumnExist(tableMetadata, controlFileTable); + // Make sure all table columns are mapped if (controlFileValidationMode == ControlFileValidationLevel.FULL) { checkIfAllColumnsAreMapped(tableMetadata, mappedTargetColumns, controlFileTable); @@ -102,7 +103,7 @@ private static void checkIfAllColumnsAreMapped( private static void checkEmptyMappings(ControlFile controlFile) throws ControlFileValidationException { // Make sure data mapping for at least one table is provided - if (controlFile.getTables() == null || controlFile.getTables().isEmpty()) { + if (controlFile.getTables().isEmpty()) { throw new ControlFileValidationException( CoreError.DATA_LOADER_CONTROL_FILE_MISSING_DATA_MAPPINGS.buildMessage()); } @@ -131,17 +132,15 @@ private static void checkMultiTableMetadata( /** * Check that the mapped target column exists in the provided table metadata. * - * @param tableMetadataMap Metadata for one or more ScalarDB tables + * @param tableMetadata Metadata for the table * @param controlFileTable Control file entry for one ScalarDB table * @throws ControlFileValidationException when the target column does not exist */ private static void checkIfTargetColumnExist( - Map tableMetadataMap, ControlFileTable controlFileTable) + TableMetadata tableMetadata, ControlFileTable controlFileTable) throws ControlFileValidationException { String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable); - TableMetadata tableMetadata = - tableMetadataMap.get(TableMetadataUtil.getTableLookupKey(controlFileTable)); LinkedHashSet columnNames = tableMetadata.getColumnNames(); for (ControlFileTableFieldMapping mapping : controlFileTable.getMappings()) { From 1996865af5146855d39fa309059b3a37f5b27c3b Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 4 Feb 2025 11:34:22 +0530 Subject: [PATCH 59/87] Reverted new line removal --- .../scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java | 1 + 1 file changed, 1 insertion(+) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java index 21e3910f7c..a4497cb09e 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java @@ -338,6 +338,7 @@ Scan createScan( buildableScanAll.ordering(sort); } } + // limit if (limit > 0) { buildableScanAll.limit(limit); From 318784787fb0bb46a439de3dd98a3684893ba90c Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 4 Feb 2025 15:47:32 +0530 Subject: [PATCH 60/87] Changes to util function calls --- .../core/dataimport/task/ImportTask.java | 22 +++++----- .../db/dataloader/core/util/ColumnUtils.java | 42 ++++++------------- .../dataloader/core/util/ColumnUtilsTest.java | 12 +----- 3 files changed, 26 insertions(+), 50 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java index 435ffbd359..8038e70ecf 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java @@ -101,12 +101,12 @@ private List startMultiTableImportProcess( && !mutableSourceRecord.has(mapping.getTargetColumn())) { String errorMessage = CoreError.DATA_LOADER_MISSING_SOURCE_FIELD.buildMessage( - mapping.getSourceField(), controlFileTable.getTableName()); + mapping.getSourceField(), controlFileTable.getTable()); ImportTargetResult targetResult = ImportTargetResult.builder() .namespace(controlFileTable.getNamespace()) - .tableName(controlFileTable.getTableName()) + .tableName(controlFileTable.getTable()) .errors(Collections.singletonList(errorMessage)) .status(ImportTargetResultStatus.VALIDATION_FAILED) .build(); @@ -125,7 +125,7 @@ private List startMultiTableImportProcess( ImportTargetResult result = importIntoSingleTable( controlFileTable.getNamespace(), - controlFileTable.getTableName(), + controlFileTable.getTable(), tableMetadata, dataTypesByColumns, controlFileTable, @@ -168,7 +168,8 @@ private ImportTargetResult importIntoSingleTable( clusteringKeyNames, columnNames, mutableSourceRecord, - checkForMissingColumns); + checkForMissingColumns, + tableMetadata); if (!validationResult.isValid()) { return ImportTargetResult.builder() @@ -227,7 +228,7 @@ && shouldRevalidateMissingColumns(importOptions, checkForMissingColumns)) { ImportSourceRecordValidationResult validationResultForMissingColumns = new ImportSourceRecordValidationResult(); ImportSourceRecordValidator.checkMissingColumns( - mutableSourceRecord, columnNames, validationResultForMissingColumns); + mutableSourceRecord, columnNames, validationResultForMissingColumns, tableMetadata); if (!validationResultForMissingColumns.isValid()) { return ImportTargetResult.builder() .namespace(namespace) @@ -268,10 +269,7 @@ && shouldRevalidateMissingColumns(importOptions, checkForMissingColumns)) { optionalScalarDBResult.orElse(null), mutableSourceRecord, importOptions.isIgnoreNullValues(), - partitionKeyNames, - clusteringKeyNames, - columnNames, - dataTypeByColumnName); + tableMetadata); } catch (Base64Exception | ColumnParsingException e) { return ImportTargetResult.builder() .namespace(namespace) @@ -325,13 +323,15 @@ private ImportSourceRecordValidationResult validateSourceRecord( LinkedHashSet clusteringKeyNames, LinkedHashSet columnNames, ObjectNode mutableSourceRecord, - boolean checkForMissingColumns) { + boolean checkForMissingColumns, + TableMetadata tableMetadata) { return ImportSourceRecordValidator.validateSourceRecord( partitionKeyNames, clusteringKeyNames, columnNames, mutableSourceRecord, - checkForMissingColumns); + checkForMissingColumns, + tableMetadata); } private boolean shouldRevalidateMissingColumns( diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java index 91008df3d9..8574336d2f 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java @@ -1,9 +1,8 @@ package com.scalar.db.dataloader.core.util; -import static com.scalar.db.dataloader.core.util.TableMetadataUtil.isMetadataColumn; - import com.fasterxml.jackson.databind.JsonNode; import com.scalar.db.api.Result; +import com.scalar.db.api.TableMetadata; import com.scalar.db.common.error.CoreError; import com.scalar.db.dataloader.core.ColumnInfo; import com.scalar.db.dataloader.core.exception.Base64Exception; @@ -17,6 +16,7 @@ import com.scalar.db.io.FloatColumn; import com.scalar.db.io.IntColumn; import com.scalar.db.io.TextColumn; +import com.scalar.db.transaction.consensuscommit.ConsensusCommitUtils; import java.util.*; import javax.annotation.Nullable; @@ -100,10 +100,6 @@ public static Column createColumnFromValue( * @param scalarDBResult result record * @param sourceRecord source data * @param ignoreNullValues ignore null values or not - * @param partitionKeyNames partition key names - * @param clusteringKeyNames clustering key names - * @param columnNames column names - * @param dataTypesByColumns data types of columns * @return list of columns * @throws Base64Exception if an error occurs while base64 decoding */ @@ -111,22 +107,22 @@ public static List> getColumnsFromResult( Result scalarDBResult, JsonNode sourceRecord, boolean ignoreNullValues, - Set partitionKeyNames, - Set clusteringKeyNames, - Set columnNames, - Map dataTypesByColumns) + TableMetadata tableMetadata) throws Base64Exception, ColumnParsingException { List> columns = new ArrayList<>(); - Set columnsToIgnore = getColumnsToIgnore(partitionKeyNames, clusteringKeyNames); - - for (String columnName : columnNames) { - if (isMetadataColumn(columnName, columnsToIgnore, columnNames)) { + for (String columnName : tableMetadata.getColumnNames()) { + if (ConsensusCommitUtils.isTransactionMetaColumn(columnName, tableMetadata)) { continue; } Column column = - getColumn(scalarDBResult, sourceRecord, columnName, ignoreNullValues, dataTypesByColumns); + getColumn( + scalarDBResult, + sourceRecord, + columnName, + ignoreNullValues, + tableMetadata.getColumnDataTypes()); if (column != null) { columns.add(column); @@ -145,25 +141,13 @@ public static List> getColumnsFromResult( */ private static Set getColumnsToIgnore( Set partitionKeyNames, Set clusteringKeyNames) { - Set columnsToIgnore = new HashSet<>(TableMetadataUtil.getMetadataColumns()); + Set columnsToIgnore = + new HashSet<>(ConsensusCommitUtils.getTransactionMetaColumns().keySet()); columnsToIgnore.addAll(partitionKeyNames); columnsToIgnore.addAll(clusteringKeyNames); return columnsToIgnore; } - /** - * Checks if a column is a metadata column - * - * @param columnName column name - * @param columnsToIgnore set of columns to ignore - * @param columnNames set of column names - * @return if column is a metadata column or not - */ - private static boolean isMetadataColumn( - String columnName, Set columnsToIgnore, Set columnNames) { - return TableMetadataUtil.isMetadataColumn(columnName, columnsToIgnore, columnNames); - } - /** * Get columns from result data * diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java index cefb0dcb66..f38ca7fd72 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java @@ -36,7 +36,6 @@ class ColumnUtilsTest { private static final float FLOAT_VALUE = 2.78f; private static final TableMetadata mockMetadata = UnitTestUtils.createTestTableMetadata(); private static final ObjectNode sourceRecord = UnitTestUtils.getOutputDataWithMetadata(); - private static final Map dataTypesByColumns = UnitTestUtils.getColumnData(); private static final Map> values = UnitTestUtils.createTestValues(); private static final Result scalarDBResult = new ResultImpl(values, mockMetadata); @@ -124,14 +123,7 @@ void createColumnFromValue_invalidBase64_throwsBase64Exception() { void getColumnsFromResult_withValidData_shouldReturnColumns() throws Base64Exception, ColumnParsingException { List> columns = - ColumnUtils.getColumnsFromResult( - scalarDBResult, - sourceRecord, - false, - mockMetadata.getPartitionKeyNames(), - mockMetadata.getClusteringKeyNames(), - mockMetadata.getColumnNames(), - dataTypesByColumns); - Assertions.assertEquals(4, columns.size()); + ColumnUtils.getColumnsFromResult(scalarDBResult, sourceRecord, false, mockMetadata); + Assertions.assertEquals(7, columns.size()); } } From 7d7ec91f9e31e14fbdb91ffeadf361574b9f0d8a Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 4 Feb 2025 17:42:07 +0530 Subject: [PATCH 61/87] Revert "Merge export tasks branch after resolving conflicts" This reverts commit da2e241a4a20be4b9af8c42cd199a3195f22ac46, reversing changes made to 1996865af5146855d39fa309059b3a37f5b27c3b. --- .../com/scalar/db/common/error/CoreError.java | 6 - .../core/DataLoaderObjectMapper.java | 14 - .../core/dataexport/ExportManager.java | 310 ------------------ .../dataexport/producer/CsvProducerTask.java | 155 --------- .../producer/JsonLineProducerTask.java | 126 ------- .../dataexport/producer/JsonProducerTask.java | 137 -------- .../dataexport/producer/ProducerResult.java | 13 - .../dataexport/producer/ProducerTask.java | 39 --- .../producer/ProducerTaskFactory.java | 57 ---- .../core/dataimport/dao/ScalarDBDao.java | 2 +- .../core/dataexport/ExportManagerTest.java | 178 ---------- .../producer/CsvProducerTaskTest.java | 64 ---- .../producer/JsonLineProducerTaskTest.java | 64 ---- .../producer/JsonProducerTaskTest.java | 63 ---- .../producer/ProducerTaskFactoryTest.java | 55 ---- 15 files changed, 1 insertion(+), 1282 deletions(-) delete mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/DataLoaderObjectMapper.java delete mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/ExportManager.java delete mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTask.java delete mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTask.java delete mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTask.java delete mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerResult.java delete mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTask.java delete mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactory.java delete mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/ExportManagerTest.java delete mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTaskTest.java delete mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTaskTest.java delete mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTaskTest.java delete mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactoryTest.java diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index a11f8f3eaf..b49cb6d7b9 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -760,12 +760,6 @@ public enum CoreError implements ScalarDbError { "The underlying-storage data type %s is not supported as the ScalarDB %s data type: %s", "", ""), - DATA_LOADER_VALUE_TO_STRING_CONVERSION_FAILED( - Category.USER_ERROR, - "0168", - "Something went wrong while converting the ScalarDB values to strings. The table metadata and Value datatype probably do not match. Details: %s", - "", - ""), DATA_LOADER_MISSING_NAMESPACE_OR_TABLE( Category.USER_ERROR, "0165", "Missing namespace or table: %s, %s", "", ""), DATA_LOADER_TABLE_METADATA_RETRIEVAL_FAILED( diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/DataLoaderObjectMapper.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/DataLoaderObjectMapper.java deleted file mode 100644 index d90fd49b65..0000000000 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/DataLoaderObjectMapper.java +++ /dev/null @@ -1,14 +0,0 @@ -package com.scalar.db.dataloader.core; - -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; - -public class DataLoaderObjectMapper extends ObjectMapper { - - public DataLoaderObjectMapper() { - super(); - this.setSerializationInclusion(JsonInclude.Include.NON_NULL); - this.registerModule(new JavaTimeModule()); - } -} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/ExportManager.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/ExportManager.java deleted file mode 100644 index a58f67c66a..0000000000 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/ExportManager.java +++ /dev/null @@ -1,310 +0,0 @@ -package com.scalar.db.dataloader.core.dataexport; - -import com.scalar.db.api.DistributedStorage; -import com.scalar.db.api.Result; -import com.scalar.db.api.Scanner; -import com.scalar.db.api.TableMetadata; -import com.scalar.db.dataloader.core.FileFormat; -import com.scalar.db.dataloader.core.dataexport.producer.ProducerTask; -import com.scalar.db.dataloader.core.dataexport.producer.ProducerTaskFactory; -import com.scalar.db.dataloader.core.dataexport.validation.ExportOptionsValidationException; -import com.scalar.db.dataloader.core.dataexport.validation.ExportOptionsValidator; -import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDao; -import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDaoException; -import com.scalar.db.dataloader.core.util.CsvUtil; -import com.scalar.db.dataloader.core.util.TableMetadataUtil; -import com.scalar.db.io.DataType; -import com.scalar.db.transaction.consensuscommit.ConsensusCommitUtils; -import java.io.BufferedWriter; -import java.io.IOException; -import java.io.Writer; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import lombok.RequiredArgsConstructor; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@RequiredArgsConstructor -public class ExportManager { - private static final Logger logger = LoggerFactory.getLogger(ExportManager.class); - - private final DistributedStorage storage; - private final ScalarDBDao dao; - private final ProducerTaskFactory producerTaskFactory; - private final Object lock = new Object(); - - /** - * Starts the export process - * - * @param exportOptions Export options - * @param tableMetadata Metadata for a single ScalarDB table - * @param writer Writer to write the exported data - */ - public ExportReport startExport( - ExportOptions exportOptions, TableMetadata tableMetadata, Writer writer) { - ExportReport exportReport = new ExportReport(); - try { - validateExportOptions(exportOptions, tableMetadata); - Map dataTypeByColumnName = tableMetadata.getColumnDataTypes(); - handleTransactionMetadata(exportOptions, tableMetadata); - - if (exportOptions.getOutputFileFormat() == FileFormat.CSV - && !exportOptions.isExcludeHeaderRow()) { - writeCsvHeaderRow(exportOptions, tableMetadata, dataTypeByColumnName, writer); - } - - int maxThreadCount = - exportOptions.getMaxThreadCount() == 0 - ? Runtime.getRuntime().availableProcessors() - : exportOptions.getMaxThreadCount(); - ExecutorService executorService = Executors.newFixedThreadPool(maxThreadCount); - - BufferedWriter bufferedWriter = new BufferedWriter(writer); - boolean isJson = exportOptions.getOutputFileFormat() == FileFormat.JSON; - - try (Scanner scanner = createScanner(exportOptions, dao, storage)) { - if (isJson) { - bufferedWriter.write("["); - } - - Iterator iterator = scanner.iterator(); - AtomicBoolean isFirstBatch = new AtomicBoolean(true); - - while (iterator.hasNext()) { - List dataChunk = fetchDataChunk(iterator, exportOptions.getDataChunkSize()); - executorService.submit( - () -> - processDataChunk( - exportOptions, - tableMetadata, - dataTypeByColumnName, - dataChunk, - bufferedWriter, - isJson, - isFirstBatch, - exportReport)); - } - executorService.shutdown(); - if (executorService.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS)) { - logger.info("All tasks completed"); - } else { - logger.error("Timeout occurred while waiting for tasks to complete"); - // TODO: handle this - } - if (isJson) { - bufferedWriter.write("]"); - } - bufferedWriter.flush(); - } catch (InterruptedException | IOException e) { - logger.error("Error during export: {}", e.getMessage()); - } - } catch (ExportOptionsValidationException | IOException | ScalarDBDaoException e) { - logger.error("Error during export: {}", e.getMessage()); - } - return exportReport; - } - - /** - * * To process result data chunk - * - * @param exportOptions export options - * @param tableMetadata metadata of the table - * @param dataTypeByColumnName map of columns and their data types - * @param dataChunk a list with result data - * @param bufferedWriter writer object - * @param isJson if data format is json or not - * @param isFirstBatch is the data going to be process is the first batch or not - * @param exportReport export report which will be updated once the data chunk is processed - */ - private void processDataChunk( - ExportOptions exportOptions, - TableMetadata tableMetadata, - Map dataTypeByColumnName, - List dataChunk, - BufferedWriter bufferedWriter, - boolean isJson, - AtomicBoolean isFirstBatch, - ExportReport exportReport) { - ProducerTask producerTask = - producerTaskFactory.createProducerTask( - exportOptions.getOutputFileFormat(), - exportOptions.getProjectionColumns(), - tableMetadata, - dataTypeByColumnName); - String dataChunkContent = producerTask.process(dataChunk, exportReport); - - try { - synchronized (lock) { - if (isJson && !isFirstBatch.getAndSet(false)) { - bufferedWriter.write(","); - } - bufferedWriter.write(dataChunkContent); - } - } catch (IOException e) { - logger.error("Error while writing data chunk: {}", e.getMessage()); - } - } - - /** - * * To split result into batches - * - * @param iterator iterator which parse results - * @param batchSize size of batch - * @return a list of results split to batches - */ - private List fetchDataChunk(Iterator iterator, int batchSize) { - List batch = new ArrayList<>(); - int count = 0; - while (iterator.hasNext() && count < batchSize) { - batch.add(iterator.next()); - count++; - } - return batch; - } - - /** - * * To validate export options - * - * @param exportOptions export options - * @param tableMetadata metadata of the table - * @throws ExportOptionsValidationException thrown if any of the export option validation fails - */ - private void validateExportOptions(ExportOptions exportOptions, TableMetadata tableMetadata) - throws ExportOptionsValidationException { - ExportOptionsValidator.validate(exportOptions, tableMetadata); - } - - /** - * * To update projection columns of export options if include metadata options is enabled - * - * @param exportOptions export options - * @param tableMetadata metadata of the table - */ - private void handleTransactionMetadata(ExportOptions exportOptions, TableMetadata tableMetadata) { - if (exportOptions.isIncludeTransactionMetadata() - && !exportOptions.getProjectionColumns().isEmpty()) { - List projectionMetadata = - TableMetadataUtil.populateProjectionsWithMetadata( - tableMetadata, exportOptions.getProjectionColumns()); - exportOptions.setProjectionColumns(projectionMetadata); - } - } - - /** - * * To create and write the header row to the CSV export file - * - * @param exportOptions export options - * @param tableMetadata metadata of the table - * @param dataTypeByColumnName map of columns and their data types - * @param writer writer object - * @throws IOException throws if any exception occur in file operations - */ - private void writeCsvHeaderRow( - ExportOptions exportOptions, - TableMetadata tableMetadata, - Map dataTypeByColumnName, - Writer writer) - throws IOException { - String header = - createCsvHeaderRow( - exportOptions, - tableMetadata, - dataTypeByColumnName, - ConsensusCommitUtils.getTransactionMetaColumns().keySet()); - writer.append(header); - writer.flush(); - } - - /** - * * To create a scanner object - * - * @param exportOptions export options - * @param dao scalardb dao object - * @param storage distributed storage object - * @return created scanner - * @throws ScalarDBDaoException throws if any issue occurs in creating scanner object - */ - private Scanner createScanner( - ExportOptions exportOptions, ScalarDBDao dao, DistributedStorage storage) - throws ScalarDBDaoException { - boolean isScanAll = exportOptions.getScanPartitionKey() == null; - if (isScanAll) { - return dao.createScanner( - exportOptions.getNamespace(), - exportOptions.getTableName(), - exportOptions.getProjectionColumns(), - exportOptions.getLimit(), - storage); - } else { - return dao.createScanner( - exportOptions.getNamespace(), - exportOptions.getTableName(), - exportOptions.getScanPartitionKey(), - exportOptions.getScanRange(), - exportOptions.getSortOrders(), - exportOptions.getProjectionColumns(), - exportOptions.getLimit(), - storage); - } - } - - /** - * * To generate the header row of CSV export file - * - * @param exportOptions export options - * @param tableMetadata metadata of the table - * @param dataTypeByColumnName map of columns and their data types - * @param columnsToIgnore set of columns to ignore - * @return generated CSV header row - */ - private String createCsvHeaderRow( - ExportOptions exportOptions, - TableMetadata tableMetadata, - Map dataTypeByColumnName, - Set columnsToIgnore) { - StringBuilder headerRow = new StringBuilder(); - List projections = exportOptions.getProjectionColumns(); - Iterator iterator = tableMetadata.getColumnNames().iterator(); - while (iterator.hasNext()) { - String columnName = iterator.next(); - if (shouldIgnoreColumn( - exportOptions.isIncludeTransactionMetadata(), columnName, tableMetadata, projections)) { - continue; - } - headerRow.append(columnName); - if (iterator.hasNext()) { - headerRow.append(exportOptions.getDelimiter()); - } - } - CsvUtil.removeTrailingDelimiter(headerRow, exportOptions.getDelimiter()); - headerRow.append("\n"); - return headerRow.toString(); - } - - /** - * * To ignore a column or not based on conditions such as if it is a metadata column or if it is - * not include in selected projections - * - * @param isIncludeTransactionMetadata to include transaction metadata or not - * @param columnName column name - * @param tableMetadata table metadata - * @param projections selected columns for projection - * @return ignore the column or not - */ - private boolean shouldIgnoreColumn( - boolean isIncludeTransactionMetadata, - String columnName, - TableMetadata tableMetadata, - List projections) { - return (!isIncludeTransactionMetadata - && ConsensusCommitUtils.isTransactionMetaColumn(columnName, tableMetadata)) - || (!projections.isEmpty() && !projections.contains(columnName)); - } -} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTask.java deleted file mode 100644 index 22a5fbad9b..0000000000 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTask.java +++ /dev/null @@ -1,155 +0,0 @@ -package com.scalar.db.dataloader.core.dataexport.producer; - -import com.scalar.db.api.Result; -import com.scalar.db.api.TableMetadata; -import com.scalar.db.common.error.CoreError; -import com.scalar.db.dataloader.core.dataexport.ExportReport; -import com.scalar.db.dataloader.core.util.CsvUtil; -import com.scalar.db.dataloader.core.util.DecimalUtil; -import com.scalar.db.io.DataType; -import com.scalar.db.transaction.consensuscommit.ConsensusCommitUtils; -import java.nio.charset.Charset; -import java.util.Base64; -import java.util.Iterator; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Producer that converts ScalarDB scan results to csv content. The output is sent to a queue to be - * processed by a consumer - */ -public class CsvProducerTask extends ProducerTask { - - private static final Logger LOGGER = LoggerFactory.getLogger(CsvProducerTask.class); - - private final String delimiter; - - /** - * Class constructor - * - * @param includeMetadata Include metadata in the exported data - * @param tableMetadata Metadata for a single ScalarDB table - * @param columnDataTypes Map of data types for the all columns in a ScalarDB table - * @param delimiter Delimiter used in csv content - */ - public CsvProducerTask( - boolean includeMetadata, - List projectColumns, - TableMetadata tableMetadata, - Map columnDataTypes, - String delimiter) { - super(includeMetadata, projectColumns, tableMetadata, columnDataTypes); - this.delimiter = delimiter; - } - - /** - * * Process scalardb scan result data and returns CSV data - * - * @param dataChunk list of results - * @param exportReport export report - * @return result converted to string - */ - @Override - public String process(List dataChunk, ExportReport exportReport) { - StringBuilder csvContent = new StringBuilder(); - for (Result result : dataChunk) { - String csvRow = convertResultToCsv(result); - csvContent.append(csvRow); - exportReport.increaseExportedRowCount(); - } - return csvContent.toString(); - } - - /** - * Convert a ScalarDB scan result to CSV - * - * @param result ScalarDB scan result - * @return CSV string - */ - private String convertResultToCsv(Result result) { - // Initialization - StringBuilder stringBuilder = new StringBuilder(); - LinkedHashSet tableColumnNames = tableMetadata.getColumnNames(); - Iterator iterator = tableColumnNames.iterator(); - - try { - // Loop over the result data list - while (iterator.hasNext()) { - String columnName = iterator.next(); - - // Skip the field if it can be ignored based on check - boolean columnNotProjected = !projectedColumnsSet.contains(columnName); - boolean isMetadataColumn = - ConsensusCommitUtils.isTransactionMetaColumn(columnName, tableMetadata); - if (columnNotProjected || (!includeMetadata && isMetadataColumn)) { - continue; - } - - // Convert each value to a string value and add to the StringBuilder - stringBuilder.append( - convertToString(result, columnName, dataTypeByColumnName.get(columnName))); - - if (iterator.hasNext()) { - stringBuilder.append(delimiter); - } - } - - // Double check and remove the character if it's a delimiter. This can occur when the last - // added column was not the last iterator field and did get a delimiter - CsvUtil.removeTrailingDelimiter(stringBuilder, delimiter); - - stringBuilder.append(System.lineSeparator()); - - return stringBuilder.toString(); - } catch (UnsupportedOperationException e) { - LOGGER.error( - CoreError.DATA_LOADER_VALUE_TO_STRING_CONVERSION_FAILED.buildMessage(e.getMessage())); - } - return ""; - } - - /** - * * Convert result column value to string - * - * @param result scalardb result - * @param columnName column name - * @param dataType datatype of the column - * @return value of result converted to string - */ - private String convertToString(Result result, String columnName, DataType dataType) { - if (result.isNull(columnName)) { - return null; - } - String value = ""; - switch (dataType) { - case INT: - value = Integer.toString(result.getInt(columnName)); - break; - case BIGINT: - value = Long.toString(result.getBigInt(columnName)); - break; - case FLOAT: - value = DecimalUtil.convertToNonScientific(result.getFloat(columnName)); - break; - case DOUBLE: - value = DecimalUtil.convertToNonScientific(result.getDouble(columnName)); - break; - case BLOB: - byte[] encoded = Base64.getEncoder().encode(result.getBlobAsBytes(columnName)); - value = new String(encoded, Charset.defaultCharset()); - break; - case BOOLEAN: - value = Boolean.toString(result.getBoolean(columnName)); - break; - case TEXT: - value = result.getText(columnName); - break; - default: - break; - } - return value; - } -} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTask.java deleted file mode 100644 index 358ff2ca73..0000000000 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTask.java +++ /dev/null @@ -1,126 +0,0 @@ -package com.scalar.db.dataloader.core.dataexport.producer; - -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.scalar.db.api.Result; -import com.scalar.db.api.TableMetadata; -import com.scalar.db.dataloader.core.DataLoaderObjectMapper; -import com.scalar.db.dataloader.core.dataexport.ExportReport; -import com.scalar.db.io.DataType; -import com.scalar.db.transaction.consensuscommit.ConsensusCommitUtils; -import java.nio.charset.Charset; -import java.util.Base64; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; - -public class JsonLineProducerTask extends ProducerTask { - - private final DataLoaderObjectMapper objectMapper = new DataLoaderObjectMapper(); - - /** - * Class constructor - * - * @param includeMetadata Include metadata in the exported data - * @param tableMetadata Metadata for a single ScalarDB table - * @param columnDataTypes Map of data types for the all columns in a ScalarDB table - */ - public JsonLineProducerTask( - boolean includeMetadata, - List projectionColumns, - TableMetadata tableMetadata, - Map columnDataTypes) { - super(includeMetadata, projectionColumns, tableMetadata, columnDataTypes); - } - - /** - * * Process scalardb scan result data and returns CSV data - * - * @param dataChunk list of results - * @param exportReport export report - * @return result converted to string - */ - @Override - public String process(List dataChunk, ExportReport exportReport) { - StringBuilder jsonLines = new StringBuilder(); - - for (Result result : dataChunk) { - ObjectNode objectNode = generateJsonForResult(result); - jsonLines.append(objectNode.toString()); - jsonLines.append(System.lineSeparator()); - exportReport.increaseExportedRowCount(); - } - return jsonLines.toString(); - } - - /** - * Generate a Json Object based on a ScalarDB Result - * - * @param result ScalarDB Result object instance - * @return JsonObject containing the ScalarDB result data - */ - private ObjectNode generateJsonForResult(Result result) { - LinkedHashSet tableColumns = tableMetadata.getColumnNames(); - - ObjectNode objectNode = objectMapper.createObjectNode(); - - // Loop through all the columns and to the json object - for (String columnName : tableColumns) { - // Skip the field if it can be ignored based on check - boolean columnNotProjected = !projectedColumnsSet.contains(columnName); - boolean isMetadataColumn = - ConsensusCommitUtils.isTransactionMetaColumn(columnName, tableMetadata); - if (columnNotProjected || (!includeMetadata && isMetadataColumn)) { - continue; - } - - DataType dataType = dataTypeByColumnName.get(columnName); - addToObjectNode(objectNode, result, columnName, dataType); - } - return objectNode; - } - - /** - * * Add result column name and value to json object node - * - * @param result scalardb result - * @param columnName column name - * @param dataType datatype of the column - */ - private void addToObjectNode( - ObjectNode objectNode, Result result, String columnName, DataType dataType) { - - // Using add and not addProperty to be able to add a null value. addProperty does not - // support null values. - if (result.isNull(columnName)) { - return; - } - - switch (dataType) { - case BOOLEAN: - objectNode.put(columnName, result.getBoolean(columnName)); - break; - case INT: - objectNode.put(columnName, result.getInt(columnName)); - break; - case BIGINT: - objectNode.put(columnName, result.getBigInt(columnName)); - break; - case FLOAT: - objectNode.put(columnName, result.getFloat(columnName)); - break; - case DOUBLE: - objectNode.put(columnName, result.getDouble(columnName)); - break; - case TEXT: - objectNode.put(columnName, result.getText(columnName)); - break; - case BLOB: - // convert to base64 string - byte[] encoded = Base64.getEncoder().encode(result.getBlobAsBytes(columnName)); - objectNode.put(columnName, new String(encoded, Charset.defaultCharset())); - break; - default: - break; - } - } -} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTask.java deleted file mode 100644 index 5c735557d7..0000000000 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTask.java +++ /dev/null @@ -1,137 +0,0 @@ -package com.scalar.db.dataloader.core.dataexport.producer; - -import com.fasterxml.jackson.databind.node.ArrayNode; -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.scalar.db.api.Result; -import com.scalar.db.api.TableMetadata; -import com.scalar.db.dataloader.core.DataLoaderObjectMapper; -import com.scalar.db.dataloader.core.dataexport.ExportReport; -import com.scalar.db.io.DataType; -import com.scalar.db.transaction.consensuscommit.ConsensusCommitUtils; -import java.nio.charset.Charset; -import java.util.Base64; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; - -public class JsonProducerTask extends ProducerTask { - - private final DataLoaderObjectMapper objectMapper = new DataLoaderObjectMapper(); - private final boolean prettyPrintJson; - - /** - * Class constructor - * - * @param includeMetadata Include metadata in the exported data - * @param tableMetadata Metadata for a single ScalarDB table - * @param columnDataTypes Map of data types for the all columns in a ScalarDB table - */ - public JsonProducerTask( - boolean includeMetadata, - List projectionColumns, - TableMetadata tableMetadata, - Map columnDataTypes, - boolean prettyPrintJson) { - super(includeMetadata, projectionColumns, tableMetadata, columnDataTypes); - this.prettyPrintJson = prettyPrintJson; - } - - /** - * * Process scalardb scan result data and returns CSV data - * - * @param dataChunk list of results - * @param exportReport export report - * @return result converted to string - */ - @Override - public String process(List dataChunk, ExportReport exportReport) { - ArrayNode arrayNode = objectMapper.createArrayNode(); - - for (Result result : dataChunk) { - ObjectNode objectNode = generateJsonForResult(result); - arrayNode.add(objectNode); - exportReport.increaseExportedRowCount(); - } - - if (prettyPrintJson) { - String json = arrayNode.toPrettyString(); - return json.substring(1, json.length() - 1); - } - - String json = arrayNode.toString(); - // Remove the [] from the json string - return json.substring(1, json.length() - 1); - } - - /** - * Generate a Json Object based on a ScalarDB Result - * - * @param result ScalarDB Result object instance - * @return JsonObject containing the ScalarDB result data - */ - private ObjectNode generateJsonForResult(Result result) { - LinkedHashSet tableColumns = tableMetadata.getColumnNames(); - - ObjectNode objectNode = objectMapper.createObjectNode(); - - // Loop through all the columns and to the json object - for (String columnName : tableColumns) { - // Skip the field if it can be ignored based on check - boolean columnNotProjected = !projectedColumnsSet.contains(columnName); - boolean isMetadataColumn = - ConsensusCommitUtils.isTransactionMetaColumn(columnName, tableMetadata); - if (columnNotProjected || (!includeMetadata && isMetadataColumn)) { - continue; - } - - DataType dataType = dataTypeByColumnName.get(columnName); - addToObjectNode(objectNode, result, columnName, dataType); - } - return objectNode; - } - - /** - * * Add result column name and value to json object node - * - * @param result scalardb result - * @param columnName column name - * @param dataType datatype of the column - */ - private void addToObjectNode( - ObjectNode objectNode, Result result, String columnName, DataType dataType) { - - // Using add and not addProperty to be able to add a null value. addProperty does not - // support null values. - if (result.isNull(columnName)) { - return; - } - - switch (dataType) { - case BOOLEAN: - objectNode.put(columnName, result.getBoolean(columnName)); - break; - case INT: - objectNode.put(columnName, result.getInt(columnName)); - break; - case BIGINT: - objectNode.put(columnName, result.getBigInt(columnName)); - break; - case FLOAT: - objectNode.put(columnName, result.getFloat(columnName)); - break; - case DOUBLE: - objectNode.put(columnName, result.getDouble(columnName)); - break; - case TEXT: - objectNode.put(columnName, result.getText(columnName)); - break; - case BLOB: - // convert to base64 string - byte[] encoded = Base64.getEncoder().encode(result.getBlobAsBytes(columnName)); - objectNode.put(columnName, new String(encoded, Charset.defaultCharset())); - break; - default: - break; - } - } -} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerResult.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerResult.java deleted file mode 100644 index 9506fcd722..0000000000 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerResult.java +++ /dev/null @@ -1,13 +0,0 @@ -package com.scalar.db.dataloader.core.dataexport.producer; - -import com.fasterxml.jackson.databind.JsonNode; -import lombok.Builder; -import lombok.Value; - -@Builder -@Value -public class ProducerResult { - JsonNode jsonNode; - String csvSource; - boolean poisonPill; -} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTask.java deleted file mode 100644 index a3ecb9ed87..0000000000 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTask.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.scalar.db.dataloader.core.dataexport.producer; - -import com.scalar.db.api.Result; -import com.scalar.db.api.TableMetadata; -import com.scalar.db.dataloader.core.dataexport.ExportReport; -import com.scalar.db.io.DataType; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -public abstract class ProducerTask { - - protected final TableMetadata tableMetadata; - protected final Map dataTypeByColumnName; - protected final boolean includeMetadata; - protected final Set projectedColumnsSet; - - /** - * Class constructor - * - * @param includeMetadata Include metadata in the exported data - * @param projectionColumns List of column name for projection - * @param tableMetadata Metadata of the ScalarDB table - * @param columnDataTypes Map of data types for the all columns in a ScalarDB table - */ - protected ProducerTask( - boolean includeMetadata, - List projectionColumns, - TableMetadata tableMetadata, - Map columnDataTypes) { - this.includeMetadata = includeMetadata; - this.projectedColumnsSet = new HashSet<>(projectionColumns); - this.tableMetadata = tableMetadata; - this.dataTypeByColumnName = columnDataTypes; - } - - public abstract String process(List dataChunk, ExportReport exportReport); -} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactory.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactory.java deleted file mode 100644 index 4821a93b70..0000000000 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactory.java +++ /dev/null @@ -1,57 +0,0 @@ -package com.scalar.db.dataloader.core.dataexport.producer; - -import com.scalar.db.api.TableMetadata; -import com.scalar.db.dataloader.core.FileFormat; -import com.scalar.db.io.DataType; -import java.util.List; -import java.util.Map; -import lombok.RequiredArgsConstructor; - -@RequiredArgsConstructor -public class ProducerTaskFactory { - - private final String delimiter; - private final boolean includeMetadata; - private final boolean prettyPrintJson; - - /** - * * - * - * @param fileFormat file format - * @param projectionColumns columns names that are selected - * @param tableMetadata metadata of the table - * @param dataTypeByColumnName map of columns with data types - * @return producer task object of provided file format - */ - public ProducerTask createProducerTask( - FileFormat fileFormat, - List projectionColumns, - TableMetadata tableMetadata, - Map dataTypeByColumnName) { - ProducerTask producerTask; - switch (fileFormat) { - case JSON: - producerTask = - new JsonProducerTask( - includeMetadata, - projectionColumns, - tableMetadata, - dataTypeByColumnName, - prettyPrintJson); - break; - case JSONL: - producerTask = - new JsonLineProducerTask( - includeMetadata, projectionColumns, tableMetadata, dataTypeByColumnName); - break; - case CSV: - producerTask = - new CsvProducerTask( - includeMetadata, projectionColumns, tableMetadata, dataTypeByColumnName, delimiter); - break; - default: - producerTask = null; - } - return producerTask; - } -} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java index f0ab14b238..a4497cb09e 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/dao/ScalarDBDao.java @@ -332,7 +332,7 @@ Scan createScan( if (projectionColumns != null && !projectionColumns.isEmpty()) { buildableScanAll.projections(projectionColumns); } - + if (sortOrders != null && !sortOrders.isEmpty()) { for (Scan.Ordering sort : sortOrders) { buildableScanAll.ordering(sort); diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/ExportManagerTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/ExportManagerTest.java deleted file mode 100644 index dcdef57ea8..0000000000 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/ExportManagerTest.java +++ /dev/null @@ -1,178 +0,0 @@ -package com.scalar.db.dataloader.core.dataexport; - -import com.scalar.db.api.DistributedStorage; -import com.scalar.db.api.Result; -import com.scalar.db.api.Scanner; -import com.scalar.db.api.TableMetadata; -import com.scalar.db.common.ResultImpl; -import com.scalar.db.dataloader.core.FileFormat; -import com.scalar.db.dataloader.core.ScanRange; -import com.scalar.db.dataloader.core.UnitTestUtils; -import com.scalar.db.dataloader.core.dataexport.producer.ProducerTaskFactory; -import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDao; -import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDaoException; -import com.scalar.db.io.Column; -import com.scalar.db.io.IntColumn; -import com.scalar.db.io.Key; -import java.io.*; -import java.nio.charset.Charset; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.nio.file.StandardOpenOption; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.mockito.Mockito; -import org.mockito.Spy; - -class ExportManagerTest { - - TableMetadata mockData; - DistributedStorage storage; - @Spy ScalarDBDao dao; - ProducerTaskFactory producerTaskFactory; - ExportManager exportManager; - - @BeforeEach - void setup() { - storage = Mockito.mock(DistributedStorage.class); - mockData = UnitTestUtils.createTestTableMetadata(); - dao = Mockito.mock(ScalarDBDao.class); - producerTaskFactory = new ProducerTaskFactory(null, false, true); - } - - @Test - void startExport_givenValidDataWithoutPartitionKey_shouldGenerateOutputFile() - throws IOException, ScalarDBDaoException { - exportManager = new ExportManager(storage, dao, producerTaskFactory); - Scanner scanner = Mockito.mock(Scanner.class); - String filePath = Paths.get("").toAbsolutePath() + "/output.json"; - Map> values = UnitTestUtils.createTestValues(); - Result result = new ResultImpl(values, mockData); - List results = Collections.singletonList(result); - - ExportOptions exportOptions = - ExportOptions.builder("namespace", "table", null, FileFormat.JSON) - .sortOrders(Collections.emptyList()) - .scanRange(new ScanRange(null, null, false, false)) - .build(); - - Mockito.when( - dao.createScanner( - exportOptions.getNamespace(), - exportOptions.getTableName(), - exportOptions.getProjectionColumns(), - exportOptions.getLimit(), - storage)) - .thenReturn(scanner); - Mockito.when(scanner.iterator()).thenReturn(results.iterator()); - try (BufferedWriter writer = - new BufferedWriter( - Files.newBufferedWriter( - Paths.get(filePath), - Charset.defaultCharset(), // Explicitly use the default charset - StandardOpenOption.CREATE, - StandardOpenOption.APPEND))) { - exportManager.startExport(exportOptions, mockData, writer); - } - File file = new File(filePath); - Assertions.assertTrue(file.exists()); - Assertions.assertTrue(file.delete()); - } - - @Test - void startExport_givenPartitionKey_shouldGenerateOutputFile() - throws IOException, ScalarDBDaoException { - exportManager = new ExportManager(storage, dao, producerTaskFactory); - Scanner scanner = Mockito.mock(Scanner.class); - String filePath = Paths.get("").toAbsolutePath() + "/output.json"; - Map> values = UnitTestUtils.createTestValues(); - Result result = new ResultImpl(values, mockData); - List results = Collections.singletonList(result); - - ExportOptions exportOptions = - ExportOptions.builder( - "namespace", - "table", - Key.newBuilder().add(IntColumn.of("col1", 1)).build(), - FileFormat.JSON) - .sortOrders(Collections.emptyList()) - .scanRange(new ScanRange(null, null, false, false)) - .build(); - - Mockito.when( - dao.createScanner( - exportOptions.getNamespace(), - exportOptions.getTableName(), - exportOptions.getScanPartitionKey(), - exportOptions.getScanRange(), - exportOptions.getSortOrders(), - exportOptions.getProjectionColumns(), - exportOptions.getLimit(), - storage)) - .thenReturn(scanner); - Mockito.when(scanner.iterator()).thenReturn(results.iterator()); - try (BufferedWriter writer = - new BufferedWriter( - Files.newBufferedWriter( - Paths.get(filePath), - Charset.defaultCharset(), // Explicitly use the default charset - StandardOpenOption.CREATE, - StandardOpenOption.APPEND))) { - exportManager.startExport(exportOptions, mockData, writer); - } - File file = new File(filePath); - Assertions.assertTrue(file.exists()); - Assertions.assertTrue(file.delete()); - } - - @Test - void startExport_givenPartitionKeyAndFileFormatCsv_shouldGenerateOutputFile() - throws IOException, ScalarDBDaoException { - producerTaskFactory = new ProducerTaskFactory(",", false, false); - exportManager = new ExportManager(storage, dao, producerTaskFactory); - Scanner scanner = Mockito.mock(Scanner.class); - String filePath = Paths.get("").toAbsolutePath() + "/output.csv"; - Map> values = UnitTestUtils.createTestValues(); - Result result = new ResultImpl(values, mockData); - List results = Collections.singletonList(result); - - ExportOptions exportOptions = - ExportOptions.builder( - "namespace", - "table", - Key.newBuilder().add(IntColumn.of("col1", 1)).build(), - FileFormat.CSV) - .sortOrders(Collections.emptyList()) - .scanRange(new ScanRange(null, null, false, false)) - .build(); - - Mockito.when( - dao.createScanner( - exportOptions.getNamespace(), - exportOptions.getTableName(), - exportOptions.getScanPartitionKey(), - exportOptions.getScanRange(), - exportOptions.getSortOrders(), - exportOptions.getProjectionColumns(), - exportOptions.getLimit(), - storage)) - .thenReturn(scanner); - Mockito.when(scanner.iterator()).thenReturn(results.iterator()); - try (BufferedWriter writer = - new BufferedWriter( - Files.newBufferedWriter( - Paths.get(filePath), - Charset.defaultCharset(), // Explicitly use the default charset - StandardOpenOption.CREATE, - StandardOpenOption.APPEND))) { - exportManager.startExport(exportOptions, mockData, writer); - } - File file = new File(filePath); - Assertions.assertTrue(file.exists()); - Assertions.assertTrue(file.delete()); - } -} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTaskTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTaskTest.java deleted file mode 100644 index a084fb6930..0000000000 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/CsvProducerTaskTest.java +++ /dev/null @@ -1,64 +0,0 @@ -package com.scalar.db.dataloader.core.dataexport.producer; - -import com.scalar.db.api.Result; -import com.scalar.db.api.TableMetadata; -import com.scalar.db.common.ResultImpl; -import com.scalar.db.dataloader.core.UnitTestUtils; -import com.scalar.db.dataloader.core.dataexport.ExportReport; -import com.scalar.db.io.Column; -import com.scalar.db.io.DataType; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -class CsvProducerTaskTest { - - TableMetadata mockMetadata; - List projectedColumns; - Map columnData; - CsvProducerTask csvProducerTask; - - @BeforeEach - void setup() { - mockMetadata = UnitTestUtils.createTestTableMetadata(); - projectedColumns = UnitTestUtils.getColumnsListOfMetadata(); - columnData = UnitTestUtils.getColumnData(); - csvProducerTask = new CsvProducerTask(false, projectedColumns, mockMetadata, columnData, ","); - } - - @Test - void process_withEmptyResultList_shouldReturnEmptyString() { - List results = Collections.emptyList(); - String output = csvProducerTask.process(results, new ExportReport()); - Assertions.assertEquals("", output); - } - - @Test - void process_withValidResultList_shouldReturnValidCsvString() { - String expectedOutput = - "9007199254740992,2147483647,true,0.000000000000000000000000000000000000000000001401298464324817,0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000049,test value,YmxvYiB0ZXN0IHZhbHVl"; - Map> values = UnitTestUtils.createTestValues(); - Result result = new ResultImpl(values, mockMetadata); - List resultList = new ArrayList<>(); - resultList.add(result); - String output = csvProducerTask.process(resultList, new ExportReport()); - Assertions.assertEquals(expectedOutput, output.trim()); - } - - @Test - void process_withValidResultListWithMetadata_shouldReturnValidCsvString() { - csvProducerTask = new CsvProducerTask(true, projectedColumns, mockMetadata, columnData, ","); - String expectedOutput = - "9007199254740992,2147483647,true,0.000000000000000000000000000000000000000000001401298464324817,0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000049,test value,YmxvYiB0ZXN0IHZhbHVl,0.000000000000000000000000000000000000000000001401298464324817,0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000049,test value,YmxvYiB0ZXN0IHZhbHVl,txt value 464654654,2147483647,2147483647,9007199254740992,9007199254740992,test value,2147483647,2147483647,9007199254740992,9007199254740992"; - Map> values = UnitTestUtils.createTestValues(); - Result result = new ResultImpl(values, mockMetadata); - List resultList = new ArrayList<>(); - resultList.add(result); - String output = csvProducerTask.process(resultList, new ExportReport()); - Assertions.assertEquals(expectedOutput, output.trim()); - } -} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTaskTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTaskTest.java deleted file mode 100644 index c10f674ab3..0000000000 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/JsonLineProducerTaskTest.java +++ /dev/null @@ -1,64 +0,0 @@ -package com.scalar.db.dataloader.core.dataexport.producer; - -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.scalar.db.api.Result; -import com.scalar.db.api.TableMetadata; -import com.scalar.db.common.ResultImpl; -import com.scalar.db.dataloader.core.UnitTestUtils; -import com.scalar.db.dataloader.core.dataexport.ExportReport; -import com.scalar.db.io.Column; -import com.scalar.db.io.DataType; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -class JsonLineProducerTaskTest { - TableMetadata mockMetadata; - List projectedColumns; - Map columnData; - JsonLineProducerTask jsonLineProducerTask; - - @BeforeEach - void setup() { - mockMetadata = UnitTestUtils.createTestTableMetadata(); - projectedColumns = UnitTestUtils.getColumnsListOfMetadata(); - columnData = UnitTestUtils.getColumnData(); - jsonLineProducerTask = - new JsonLineProducerTask(false, projectedColumns, mockMetadata, columnData); - } - - @Test - void process_withEmptyResultList_shouldReturnEmptyString() { - List results = Collections.emptyList(); - String output = jsonLineProducerTask.process(results, new ExportReport()); - Assertions.assertEquals("", output); - } - - @Test - void process_withValidResultList_shouldReturnValidJsonLineString() { - ObjectNode rootNode = UnitTestUtils.getOutputDataWithoutMetadata(); - Map> values = UnitTestUtils.createTestValues(); - Result result = new ResultImpl(values, mockMetadata); - List resultList = new ArrayList<>(); - resultList.add(result); - String output = jsonLineProducerTask.process(resultList, new ExportReport()); - Assertions.assertEquals(rootNode.toString(), output.trim()); - } - - @Test - void process_withValidResultListWithMetadata_shouldReturnValidJsonLineString() { - jsonLineProducerTask = - new JsonLineProducerTask(true, projectedColumns, mockMetadata, columnData); - ObjectNode rootNode = UnitTestUtils.getOutputDataWithMetadata(); - Map> values = UnitTestUtils.createTestValues(); - Result result = new ResultImpl(values, mockMetadata); - List resultList = new ArrayList<>(); - resultList.add(result); - String output = jsonLineProducerTask.process(resultList, new ExportReport()); - Assertions.assertEquals(rootNode.toString(), output.trim()); - } -} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTaskTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTaskTest.java deleted file mode 100644 index 3100b231cd..0000000000 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/JsonProducerTaskTest.java +++ /dev/null @@ -1,63 +0,0 @@ -package com.scalar.db.dataloader.core.dataexport.producer; - -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.scalar.db.api.Result; -import com.scalar.db.api.TableMetadata; -import com.scalar.db.common.ResultImpl; -import com.scalar.db.dataloader.core.UnitTestUtils; -import com.scalar.db.dataloader.core.dataexport.ExportReport; -import com.scalar.db.io.Column; -import com.scalar.db.io.DataType; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -class JsonProducerTaskTest { - TableMetadata mockMetadata; - List projectedColumns; - Map columnData; - JsonProducerTask jsonProducerTask; - - @BeforeEach - void setup() { - mockMetadata = UnitTestUtils.createTestTableMetadata(); - projectedColumns = UnitTestUtils.getColumnsListOfMetadata(); - columnData = UnitTestUtils.getColumnData(); - jsonProducerTask = - new JsonProducerTask(false, projectedColumns, mockMetadata, columnData, true); - } - - @Test - void process_withEmptyResultList_shouldReturnEmptyString() { - List results = Collections.emptyList(); - String output = jsonProducerTask.process(results, new ExportReport()); - Assertions.assertEquals(" ", output); - } - - @Test - void process_withValidResultList_shouldReturnValidJsonString() { - ObjectNode rootNode = UnitTestUtils.getOutputDataWithoutMetadata(); - Map> values = UnitTestUtils.createTestValues(); - Result result = new ResultImpl(values, mockMetadata); - List resultList = new ArrayList<>(); - resultList.add(result); - String output = jsonProducerTask.process(resultList, new ExportReport()); - Assertions.assertEquals(rootNode.toPrettyString(), output.trim()); - } - - @Test - void process_withValidResultListWithMetadata_shouldReturnValidJsonString() { - jsonProducerTask = new JsonProducerTask(true, projectedColumns, mockMetadata, columnData, true); - ObjectNode rootNode = UnitTestUtils.getOutputDataWithMetadata(); - Map> values = UnitTestUtils.createTestValues(); - Result result = new ResultImpl(values, mockMetadata); - List resultList = new ArrayList<>(); - resultList.add(result); - String output = jsonProducerTask.process(resultList, new ExportReport()); - Assertions.assertEquals(rootNode.toPrettyString(), output.trim()); - } -} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactoryTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactoryTest.java deleted file mode 100644 index f97e80a1d1..0000000000 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataexport/producer/ProducerTaskFactoryTest.java +++ /dev/null @@ -1,55 +0,0 @@ -package com.scalar.db.dataloader.core.dataexport.producer; - -import com.scalar.db.api.TableMetadata; -import com.scalar.db.dataloader.core.FileFormat; -import com.scalar.db.dataloader.core.UnitTestUtils; -import com.scalar.db.io.DataType; -import java.util.List; -import java.util.Map; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -class ProducerTaskFactoryTest { - - TableMetadata mockMetadata; - List projectedColumns; - Map columnData; - - @BeforeEach - void setup() { - mockMetadata = UnitTestUtils.createTestTableMetadata(); - projectedColumns = UnitTestUtils.getColumnsListOfMetadata(); - columnData = UnitTestUtils.getColumnData(); - } - - @Test - void createProducerTask_withJsonFileFormat_shouldReturnJsonProducerTask() { - ProducerTaskFactory producerTaskFactory = new ProducerTaskFactory(null, false, true); - Assertions.assertEquals( - JsonProducerTask.class, - producerTaskFactory - .createProducerTask(FileFormat.JSON, projectedColumns, mockMetadata, columnData) - .getClass()); - } - - @Test - void createProducerTask_withJsonLinesFileFormat_shouldReturnJsonLineProducerTask() { - ProducerTaskFactory producerTaskFactory = new ProducerTaskFactory(null, false, false); - Assertions.assertEquals( - JsonLineProducerTask.class, - producerTaskFactory - .createProducerTask(FileFormat.JSONL, projectedColumns, mockMetadata, columnData) - .getClass()); - } - - @Test - void createProducerTask_withCsvFileFormat_shouldReturnCsvProducerTask() { - ProducerTaskFactory producerTaskFactory = new ProducerTaskFactory(",", false, false); - Assertions.assertEquals( - CsvProducerTask.class, - producerTaskFactory - .createProducerTask(FileFormat.CSV, projectedColumns, mockMetadata, columnData) - .getClass()); - } -} From aebcef6607fca403e7878518d5ae768f547aa754 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Fri, 7 Feb 2025 15:11:47 +0530 Subject: [PATCH 62/87] Removing unwanted changes [skip ci] --- core/src/main/java/com/scalar/db/common/error/CoreError.java | 1 + .../core/dataimport/controlfile/ControlFileValidator.java | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index 8d3e782ad9..671c985725 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -816,6 +816,7 @@ public enum CoreError implements ScalarDbError { ""), DATA_LOADER_MISSING_COLUMN( Category.USER_ERROR, "0177", "Missing field or column mapping for %s", "", ""), + // // Errors for the concurrency error category // diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidator.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidator.java index 418de9957b..862ec3015d 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidator.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/controlfile/ControlFileValidator.java @@ -203,7 +203,7 @@ private static void checkClusteringKeys( /** * Check that a control file table mapping does not contain duplicate mappings for the same target - * <<<<<<< HEAD column ======= column and creates a set of unique mappings >>>>>>> master + * column and creates a set of unique mappings * * @param controlFileTable Control file entry for one ScalarDB table * @return Set of uniquely mapped target columns From b5134b13579933eb221c2f514b77ad78fca8a135 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Mon, 10 Feb 2025 09:45:54 +0530 Subject: [PATCH 63/87] Changes --- .../dataloader/core/dataimport/log/LogMode.java | 1 + .../dataimport/task/ImportTaskConstants.java | 17 ----------------- 2 files changed, 1 insertion(+), 17 deletions(-) delete mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskConstants.java diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/log/LogMode.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/log/LogMode.java index 48eac32e61..de49580403 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/log/LogMode.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/log/LogMode.java @@ -1,5 +1,6 @@ package com.scalar.db.dataloader.core.dataimport.log; +/** Log modes available for import logging * */ public enum LogMode { SINGLE_FILE, SPLIT_BY_DATA_CHUNK diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskConstants.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskConstants.java deleted file mode 100644 index eb30211a0d..0000000000 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskConstants.java +++ /dev/null @@ -1,17 +0,0 @@ -package com.scalar.db.dataloader.core.dataimport.task; - -import lombok.AccessLevel; -import lombok.NoArgsConstructor; - -@NoArgsConstructor(access = AccessLevel.PRIVATE) -public class ImportTaskConstants { - public static final String ERROR_COULD_NOT_FIND_PARTITION_KEY = - "could not find the partition key"; - public static final String ERROR_UPSERT_INSERT_MISSING_COLUMNS = - "the source record needs to contain all fields if the UPSERT turns into an INSERT"; - public static final String ERROR_DATA_ALREADY_EXISTS = "record already exists"; - public static final String ERROR_DATA_NOT_FOUND = "record was not found"; - public static final String ERROR_COULD_NOT_FIND_CLUSTERING_KEY = - "could not find the clustering key"; - public static final String ERROR_TABLE_METADATA_MISSING = "No table metadata found"; -} From 285f51d29695b1c3b22e864bc6c4b1aea39b1554 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Mon, 10 Feb 2025 09:47:21 +0530 Subject: [PATCH 64/87] Java doc minor change [skip ci] --- .../com/scalar/db/dataloader/core/dataimport/log/LogMode.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/log/LogMode.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/log/LogMode.java index de49580403..cf0349366c 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/log/LogMode.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/log/LogMode.java @@ -1,6 +1,6 @@ package com.scalar.db.dataloader.core.dataimport.log; -/** Log modes available for import logging * */ +/** Log modes available for import logging */ public enum LogMode { SINGLE_FILE, SPLIT_BY_DATA_CHUNK From 9cadea4582415b89df012ff1e18d5704aef99ca0 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Mon, 10 Feb 2025 14:26:49 +0530 Subject: [PATCH 65/87] Constant file added --- .../dataimport/task/ImportTaskConstants.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskConstants.java diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskConstants.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskConstants.java new file mode 100644 index 0000000000..eb30211a0d --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskConstants.java @@ -0,0 +1,17 @@ +package com.scalar.db.dataloader.core.dataimport.task; + +import lombok.AccessLevel; +import lombok.NoArgsConstructor; + +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public class ImportTaskConstants { + public static final String ERROR_COULD_NOT_FIND_PARTITION_KEY = + "could not find the partition key"; + public static final String ERROR_UPSERT_INSERT_MISSING_COLUMNS = + "the source record needs to contain all fields if the UPSERT turns into an INSERT"; + public static final String ERROR_DATA_ALREADY_EXISTS = "record already exists"; + public static final String ERROR_DATA_NOT_FOUND = "record was not found"; + public static final String ERROR_COULD_NOT_FIND_CLUSTERING_KEY = + "could not find the clustering key"; + public static final String ERROR_TABLE_METADATA_MISSING = "No table metadata found"; +} From 0dd2956302c3206009807434942ff74934fbb6ff Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 13 Feb 2025 16:34:24 +0530 Subject: [PATCH 66/87] Error messages and adding java docs --- .../com/scalar/db/common/error/CoreError.java | 16 +++ .../core/DataLoaderObjectMapper.java | 10 ++ .../DefaultImportProcessorFactory.java | 6 +- .../processor/TableColumnDataTypes.java | 27 ++++ .../dataimport/task/ImportStorageTask.java | 33 +++++ .../core/dataimport/task/ImportTask.java | 122 ++++++++++++++---- .../dataimport/task/ImportTaskConstants.java | 17 --- .../task/ImportTransactionalTask.java | 38 +++++- .../db/dataloader/core/util/ColumnUtils.java | 11 +- .../db/dataloader/core/util/KeyUtils.java | 23 +++- .../dataloader/core/util/ColumnUtilsTest.java | 6 +- .../db/dataloader/core/util/KeyUtilsTest.java | 22 ++-- 12 files changed, 268 insertions(+), 63 deletions(-) delete mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskConstants.java diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index 0882a343f5..76e219d8e9 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -822,6 +822,22 @@ public enum CoreError implements ScalarDbError { "The data mapping source field '%s' for table '%s' is missing in the json data record", "", ""), + DATA_LOADER_FILE_FORMAT_NOT_SUPPORTED( + Category.USER_ERROR, "0178", "The provided file format is not supported : %s", "", ""), + DATA_LOADER_COULD_NOT_FIND_PARTITION_KEY( + Category.USER_ERROR, "0179", "Could not find the partition key", "", ""), + DATA_LOADER_UPSERT_INSERT_MISSING_COLUMNS( + Category.USER_ERROR, + "0180", + "The source record needs to contain all fields if the UPSERT turns into an INSERT", + "", + ""), + DATA_LOADER_DATA_ALREADY_EXISTS(Category.USER_ERROR, "0181", "Record already exists", "", ""), + DATA_LOADER_DATA_NOT_FOUND(Category.USER_ERROR, "0182", "Record was not found", "", ""), + DATA_LOADER_COULD_NOT_FIND_CLUSTERING_KEY( + Category.USER_ERROR, "0183", "Could not find the clustering key", "", ""), + DATA_LOADER_TABLE_METADATA_MISSING( + Category.USER_ERROR, "0184", "No table metadata found", "", ""), // // Errors for the concurrency error category diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/DataLoaderObjectMapper.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/DataLoaderObjectMapper.java index d90fd49b65..81e2760d5c 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/DataLoaderObjectMapper.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/DataLoaderObjectMapper.java @@ -4,6 +4,16 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; +/** + * A custom {@link ObjectMapper} for data loading purposes. + * + *

This class configures the Jackson {@link ObjectMapper} to: + * + *

+ */ public class DataLoaderObjectMapper extends ObjectMapper { public DataLoaderObjectMapper() { diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactory.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactory.java index 30c1c26085..1277ba6130 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactory.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactory.java @@ -1,5 +1,7 @@ package com.scalar.db.dataloader.core.dataimport.processor; +import com.scalar.db.common.error.CoreError; + public class DefaultImportProcessorFactory implements ImportProcessorFactory { /** @@ -22,7 +24,9 @@ public ImportProcessor createImportProcessor(ImportProcessorParams params) { importProcessor = new CsvImportProcessor(params); break; default: - importProcessor = null; + throw new IllegalArgumentException( + CoreError.DATA_LOADER_FILE_FORMAT_NOT_SUPPORTED.buildMessage( + params.getImportOptions().getFileFormat().toString())); } return importProcessor; } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypes.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypes.java index 54268b2ccf..7ac9686539 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypes.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypes.java @@ -4,19 +4,39 @@ import java.util.HashMap; import java.util.Map; +/** + * A class that maintains a mapping of column data types for database tables. + * + *

This class allows storing and retrieving data types for specific columns in a given table. + */ public class TableColumnDataTypes { private final Map> dataTypesByColumnsByTable; + /** Constructs a new {@code TableColumnDataTypes} instance with an empty mapping. */ public TableColumnDataTypes() { this.dataTypesByColumnsByTable = new HashMap<>(); } + /** + * Adds a data type for a specific column in a given table. + * + * @param tableName the name of the table + * @param columnName the name of the column + * @param dataType the data type associated with the column + */ public void addColumnDataType(String tableName, String columnName, DataType dataType) { dataTypesByColumnsByTable .computeIfAbsent(tableName, key -> new HashMap<>()) .put(columnName, dataType); } + /** + * Retrieves the data type of specific column in a given table. + * + * @param tableName the name of the table + * @param columnName the name of the column + * @return the {@link DataType} of the column, or {@code null} if not found + */ public DataType getDataType(String tableName, String columnName) { Map columnDataTypes = dataTypesByColumnsByTable.get(tableName); if (columnDataTypes != null) { @@ -25,6 +45,13 @@ public DataType getDataType(String tableName, String columnName) { return null; } + /** + * Retrieves all column data types for a given table. + * + * @param tableName the name of the table + * @return a {@link Map} of column names to their respective {@link DataType}s, or {@code null} if + * the table does not exist + */ public Map getColumnDataTypes(String tableName) { return dataTypesByColumnsByTable.get(tableName); } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportStorageTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportStorageTask.java index 07d3458072..3f3ec0601b 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportStorageTask.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportStorageTask.java @@ -8,15 +8,38 @@ import java.util.List; import java.util.Optional; +/** + * An import task that interacts with a {@link DistributedStorage} for data retrieval and storage. + * + *

This class extends {@link ImportTask} and overrides methods to fetch and store records using + * the provided {@code DistributedStorage} instance. + */ public class ImportStorageTask extends ImportTask { private final DistributedStorage storage; + /** + * Constructs an {@code ImportStorageTask} with the specified parameters and storage. + * + * @param params the import task parameters + * @param storage the distributed storage to be used for data operations + */ public ImportStorageTask(ImportTaskParams params, DistributedStorage storage) { super(params); this.storage = storage; } + /** + * Retrieves a data record from the distributed storage. + * + * @param namespace the namespace of the table + * @param tableName the name of the table + * @param partitionKey the partition key of the record + * @param clusteringKey the clustering key of the record + * @return an {@link Optional} containing the {@link Result} if the record exists, otherwise an + * empty {@link Optional} + * @throws ScalarDBDaoException if an error occurs during retrieval + */ @Override protected Optional getDataRecord( String namespace, String tableName, Key partitionKey, Key clusteringKey) @@ -24,6 +47,16 @@ protected Optional getDataRecord( return params.getDao().get(namespace, tableName, partitionKey, clusteringKey, this.storage); } + /** + * Saves a record into the distributed storage. + * + * @param namespace the namespace of the table + * @param tableName the name of the table + * @param partitionKey the partition key of the record + * @param clusteringKey the clustering key of the record + * @param columns the list of columns to be saved + * @throws ScalarDBDaoException if an error occurs during the save operation + */ @Override protected void saveRecord( String namespace, diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java index 8038e70ecf..c1c7f261df 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java @@ -1,12 +1,5 @@ package com.scalar.db.dataloader.core.dataimport.task; -import static com.scalar.db.dataloader.core.dataimport.task.ImportTaskConstants.ERROR_COULD_NOT_FIND_CLUSTERING_KEY; -import static com.scalar.db.dataloader.core.dataimport.task.ImportTaskConstants.ERROR_COULD_NOT_FIND_PARTITION_KEY; -import static com.scalar.db.dataloader.core.dataimport.task.ImportTaskConstants.ERROR_DATA_ALREADY_EXISTS; -import static com.scalar.db.dataloader.core.dataimport.task.ImportTaskConstants.ERROR_DATA_NOT_FOUND; -import static com.scalar.db.dataloader.core.dataimport.task.ImportTaskConstants.ERROR_TABLE_METADATA_MISSING; -import static com.scalar.db.dataloader.core.dataimport.task.ImportTaskConstants.ERROR_UPSERT_INSERT_MISSING_COLUMNS; - import com.fasterxml.jackson.databind.node.ObjectNode; import com.scalar.db.api.Result; import com.scalar.db.api.TableMetadata; @@ -45,6 +38,7 @@ public abstract class ImportTask { protected final ImportTaskParams params; + /** Executes the import task, ie import data to database tables */ public ImportTaskResult execute() { ObjectNode mutableSourceRecord = params.getSourceRecord().deepCopy(); @@ -86,6 +80,15 @@ public ImportTaskResult execute() { .build(); } + /** + * @param controlFile control file which is used to map source data columns to columns of tables + * to which data is imported + * @param tableMetadataByTableName a map of table metadata with table name as key + * @param tableColumnDataTypes a map with table name as key that contains a map of column names + * and their data types + * @param mutableSourceRecord mutable source record data + * @return result object of import + */ private List startMultiTableImportProcess( ControlFile controlFile, Map tableMetadataByTableName, @@ -135,9 +138,19 @@ private List startMultiTableImportProcess( return targetResults; } + /** + * @param namespace Namespace name + * @param table table name + * @param tableMetadata metadata of the table + * @param dataTypeByColumnName a map with table name as key that contains a map of column names + * and their data types + * @param controlFileTable the control file table containing column mappings + * @param mutableSourceRecord mutable source record + * @return result of the import + */ private ImportTargetResult importIntoSingleTable( String namespace, - String tableName, + String table, TableMetadata tableMetadata, Map dataTypeByColumnName, ControlFileTable controlFileTable, @@ -148,9 +161,11 @@ private ImportTargetResult importIntoSingleTable( if (dataTypeByColumnName == null || tableMetadata == null) { return ImportTargetResult.builder() .namespace(namespace) - .tableName(tableName) + .tableName(table) .status(ImportTargetResultStatus.VALIDATION_FAILED) - .errors(Collections.singletonList(ERROR_TABLE_METADATA_MISSING)) + .errors( + Collections.singletonList( + CoreError.DATA_LOADER_TABLE_METADATA_MISSING.buildMessage())) .build(); } @@ -174,7 +189,7 @@ private ImportTargetResult importIntoSingleTable( if (!validationResult.isValid()) { return ImportTargetResult.builder() .namespace(namespace) - .tableName(tableName) + .tableName(table) .status(ImportTargetResultStatus.VALIDATION_FAILED) .errors(validationResult.getErrorMessages()) .build(); @@ -186,9 +201,11 @@ private ImportTargetResult importIntoSingleTable( if (!optionalPartitionKey.isPresent()) { return ImportTargetResult.builder() .namespace(namespace) - .tableName(tableName) + .tableName(table) .status(ImportTargetResultStatus.VALIDATION_FAILED) - .errors(Collections.singletonList(ERROR_COULD_NOT_FIND_PARTITION_KEY)) + .errors( + Collections.singletonList( + CoreError.DATA_LOADER_COULD_NOT_FIND_PARTITION_KEY.buildMessage())) .build(); } Optional optionalClusteringKey = Optional.empty(); @@ -199,9 +216,11 @@ private ImportTargetResult importIntoSingleTable( if (!optionalClusteringKey.isPresent()) { return ImportTargetResult.builder() .namespace(namespace) - .tableName(tableName) + .tableName(table) .status(ImportTargetResultStatus.VALIDATION_FAILED) - .errors(Collections.singletonList(ERROR_COULD_NOT_FIND_CLUSTERING_KEY)) + .errors( + Collections.singletonList( + CoreError.DATA_LOADER_COULD_NOT_FIND_CLUSTERING_KEY.buildMessage())) .build(); } } @@ -211,11 +230,11 @@ private ImportTargetResult importIntoSingleTable( try { optionalScalarDBResult = getDataRecord( - namespace, tableName, optionalPartitionKey.get(), optionalClusteringKey.orElse(null)); + namespace, table, optionalPartitionKey.get(), optionalClusteringKey.orElse(null)); } catch (ScalarDBDaoException e) { return ImportTargetResult.builder() .namespace(namespace) - .tableName(tableName) + .tableName(table) .status(ImportTargetResultStatus.RETRIEVAL_FAILED) .errors(Collections.singletonList(e.getMessage())) .build(); @@ -232,9 +251,11 @@ && shouldRevalidateMissingColumns(importOptions, checkForMissingColumns)) { if (!validationResultForMissingColumns.isValid()) { return ImportTargetResult.builder() .namespace(namespace) - .tableName(tableName) + .tableName(table) .status(ImportTargetResultStatus.MISSING_COLUMNS) - .errors(Collections.singletonList(ERROR_UPSERT_INSERT_MISSING_COLUMNS)) + .errors( + Collections.singletonList( + CoreError.DATA_LOADER_UPSERT_INSERT_MISSING_COLUMNS.buildMessage())) .build(); } } @@ -242,22 +263,23 @@ && shouldRevalidateMissingColumns(importOptions, checkForMissingColumns)) { if (shouldFailForExistingData(importAction, importOptions)) { return ImportTargetResult.builder() .namespace(namespace) - .tableName(tableName) + .tableName(table) .importedRecord(mutableSourceRecord) .importAction(importAction) .status(ImportTargetResultStatus.DATA_ALREADY_EXISTS) - .errors(Collections.singletonList(ERROR_DATA_ALREADY_EXISTS)) + .errors( + Collections.singletonList(CoreError.DATA_LOADER_DATA_ALREADY_EXISTS.buildMessage())) .build(); } if (shouldFailForMissingData(importAction, importOptions)) { return ImportTargetResult.builder() .namespace(namespace) - .tableName(tableName) + .tableName(table) .importedRecord(mutableSourceRecord) .importAction(importAction) .status(ImportTargetResultStatus.DATA_NOT_FOUND) - .errors(Collections.singletonList(ERROR_DATA_NOT_FOUND)) + .errors(Collections.singletonList(CoreError.DATA_LOADER_DATA_NOT_FOUND.buildMessage())) .build(); } @@ -273,24 +295,24 @@ && shouldRevalidateMissingColumns(importOptions, checkForMissingColumns)) { } catch (Base64Exception | ColumnParsingException e) { return ImportTargetResult.builder() .namespace(namespace) - .tableName(tableName) + .tableName(table) .status(ImportTargetResultStatus.VALIDATION_FAILED) .errors(Collections.singletonList(e.getMessage())) .build(); } - // Time to save the record + // Save the record try { saveRecord( namespace, - tableName, + table, optionalPartitionKey.get(), optionalClusteringKey.orElse(null), columns); return ImportTargetResult.builder() .namespace(namespace) - .tableName(tableName) + .tableName(table) .importAction(importAction) .importedRecord(mutableSourceRecord) .status(ImportTargetResultStatus.SAVED) @@ -299,7 +321,7 @@ && shouldRevalidateMissingColumns(importOptions, checkForMissingColumns)) { } catch (ScalarDBDaoException e) { return ImportTargetResult.builder() .namespace(namespace) - .tableName(tableName) + .tableName(table) .importAction(importAction) .status(ImportTargetResultStatus.SAVE_FAILED) .errors(Collections.singletonList(e.getMessage())) @@ -307,17 +329,40 @@ && shouldRevalidateMissingColumns(importOptions, checkForMissingColumns)) { } } + /** + * Applies data mapping to the given source record based on the specified control file table. + * + * @param controlFileTable the control file table containing column mappings + * @param mutableSourceRecord the source record to be modified based on the mappings + */ private void applyDataMapping(ControlFileTable controlFileTable, ObjectNode mutableSourceRecord) { if (controlFileTable != null) { ImportDataMapping.apply(mutableSourceRecord, controlFileTable); } } + /** + * Determines whether missing columns should be checked based on import options. + * + * @param importOptions the import options to evaluate + * @return {@code true} if missing columns should be checked, otherwise {@code false} + */ private boolean shouldCheckForMissingColumns(ImportOptions importOptions) { return importOptions.getImportMode() == ImportMode.INSERT || importOptions.isRequireAllColumns(); } + /** + * Validates a source record against the given table metadata and constraints. + * + * @param partitionKeyNames the set of partition key names + * @param clusteringKeyNames the set of clustering key names + * @param columnNames the set of expected column names + * @param mutableSourceRecord the source record to be validated + * @param checkForMissingColumns whether to check for missing columns + * @param tableMetadata the table metadata containing schema details + * @return the validation result containing any validation errors or success status + */ private ImportSourceRecordValidationResult validateSourceRecord( LinkedHashSet partitionKeyNames, LinkedHashSet clusteringKeyNames, @@ -334,17 +379,38 @@ private ImportSourceRecordValidationResult validateSourceRecord( tableMetadata); } + /** + * Determines whether missing columns should be revalidated when performing an upsert operation. + * + * @param importOptions the import options to evaluate + * @param checkForMissingColumns whether missing columns were initially checked + * @return {@code true} if missing columns should be revalidated, otherwise {@code false} + */ private boolean shouldRevalidateMissingColumns( ImportOptions importOptions, boolean checkForMissingColumns) { return !checkForMissingColumns && importOptions.getImportMode() == ImportMode.UPSERT; } + /** + * Determines whether the operation should fail if data already exists. + * + * @param importAction the action being performed (e.g., INSERT, UPDATE) + * @param importOptions the import options specifying behavior + * @return {@code true} if the operation should fail for existing data, otherwise {@code false} + */ private boolean shouldFailForExistingData( ImportTaskAction importAction, ImportOptions importOptions) { return importAction == ImportTaskAction.UPDATE && importOptions.getImportMode() == ImportMode.INSERT; } + /** + * Determines whether the operation should fail if the expected data is missing. + * + * @param importAction the action being performed (e.g., INSERT, UPDATE) + * @param importOptions the import options specifying behavior + * @return {@code true} if the operation should fail for missing data, otherwise {@code false} + */ private boolean shouldFailForMissingData( ImportTaskAction importAction, ImportOptions importOptions) { return importAction == ImportTaskAction.INSERT diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskConstants.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskConstants.java deleted file mode 100644 index eb30211a0d..0000000000 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskConstants.java +++ /dev/null @@ -1,17 +0,0 @@ -package com.scalar.db.dataloader.core.dataimport.task; - -import lombok.AccessLevel; -import lombok.NoArgsConstructor; - -@NoArgsConstructor(access = AccessLevel.PRIVATE) -public class ImportTaskConstants { - public static final String ERROR_COULD_NOT_FIND_PARTITION_KEY = - "could not find the partition key"; - public static final String ERROR_UPSERT_INSERT_MISSING_COLUMNS = - "the source record needs to contain all fields if the UPSERT turns into an INSERT"; - public static final String ERROR_DATA_ALREADY_EXISTS = "record already exists"; - public static final String ERROR_DATA_NOT_FOUND = "record was not found"; - public static final String ERROR_COULD_NOT_FIND_CLUSTERING_KEY = - "could not find the clustering key"; - public static final String ERROR_TABLE_METADATA_MISSING = "No table metadata found"; -} diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTransactionalTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTransactionalTask.java index 71e0d3ae23..41c56c7312 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTransactionalTask.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTransactionalTask.java @@ -10,15 +10,38 @@ import java.util.List; import java.util.Optional; +/** + * An import task that operates within a {@link DistributedTransaction}. + * + *

This class extends {@link ImportTask} and overrides methods to fetch and store records using a + * transactional context. + */ public class ImportTransactionalTask extends ImportTask { private final DistributedTransaction transaction; + /** + * Constructs an {@code ImportTransactionalTask} with the specified parameters and transaction. + * + * @param params the import task parameters + * @param transaction the distributed transaction to be used for data operations + */ public ImportTransactionalTask(ImportTaskParams params, DistributedTransaction transaction) { super(params); this.transaction = transaction; } + /** + * Retrieves a data record within the active transaction. + * + * @param namespace the namespace of the table + * @param tableName the name of the table + * @param partitionKey the partition key of the record + * @param clusteringKey the clustering key of the record + * @return an {@link Optional} containing the {@link Result} if the record exists, otherwise an + * empty {@link Optional} + * @throws ScalarDBDaoException if an error occurs during retrieval + */ @Override protected Optional getDataRecord( String namespace, String tableName, Key partitionKey, Key clusteringKey) @@ -26,6 +49,16 @@ protected Optional getDataRecord( return params.getDao().get(namespace, tableName, partitionKey, clusteringKey, transaction); } + /** + * Saves a record within the active transaction. + * + * @param namespace the namespace of the table + * @param tableName the name of the table + * @param partitionKey the partition key of the record + * @param clusteringKey the clustering key of the record + * @param columns the list of columns to be saved + * @throws ScalarDBDaoException if an error occurs during the save operation + */ @Override protected void saveRecord( String namespace, @@ -38,9 +71,10 @@ protected void saveRecord( } /** - * Abort the active ScalarDB transaction + * Aborts the active ScalarDB transaction if it has not been committed. * - * @throws TransactionException if something goes wrong during the aborting process + * @param tx the transaction to be aborted + * @throws TransactionException if an error occurs during the aborting process */ private void abortActiveTransaction(DistributedTransaction tx) throws TransactionException { if (tx != null) { diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java index 8574336d2f..478f5ce391 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java @@ -17,7 +17,12 @@ import com.scalar.db.io.IntColumn; import com.scalar.db.io.TextColumn; import com.scalar.db.transaction.consensuscommit.ConsensusCommitUtils; -import java.util.*; +import java.util.ArrayList; +import java.util.Base64; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; import javax.annotation.Nullable; /** @@ -194,14 +199,14 @@ private static Column getColumnFromResult(Result scalarDBResult, String colum * @param ignoreNullValues ignore null values or not * @param dataTypesByColumns data types of columns * @return column data - * @throws Base64Exception if an error occurs while base64 decoding + * @throws ColumnParsingException if an error occurs while parsing the column */ private static Column getColumnFromSourceRecord( JsonNode sourceRecord, String columnName, boolean ignoreNullValues, Map dataTypesByColumns) - throws Base64Exception, ColumnParsingException { + throws ColumnParsingException { DataType dataType = dataTypesByColumns.get(columnName); String columnValue = sourceRecord.has(columnName) && !sourceRecord.get(columnName).isNull() diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/KeyUtils.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/KeyUtils.java index e46311545d..3c13d3f0f5 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/KeyUtils.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/KeyUtils.java @@ -12,7 +12,11 @@ import com.scalar.db.io.Column; import com.scalar.db.io.DataType; import com.scalar.db.io.Key; -import java.util.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; import javax.annotation.Nullable; /** @@ -26,6 +30,15 @@ public final class KeyUtils { /** Restrict instantiation via private constructor */ private KeyUtils() {} + /** + * Creates an {@link Optional} clustering key from the given source record. + * + * @param clusteringKeyNames A set of column names that make up the clustering key. + * @param dataTypeByColumnName A map defining the data type for each column name. + * @param sourceRecord The source record containing the data. + * @return An {@link Optional} containing the clustering key if clustering keys exist, otherwise + * {@link Optional#empty()}. + */ public static Optional createClusteringKeyFromSource( Set clusteringKeyNames, Map dataTypeByColumnName, @@ -35,6 +48,14 @@ public static Optional createClusteringKeyFromSource( : createKeyFromSource(clusteringKeyNames, dataTypeByColumnName, sourceRecord); } + /** + * Creates an {@link Optional} partition key from the given source record. + * + * @param partitionKeyNames A set of column names that make up the partition key. + * @param dataTypeByColumnName A map defining the data type for each column name. + * @param sourceRecord The source record containing the data. + * @return An {@link Optional} containing the partition key. + */ public static Optional createPartitionKeyFromSource( Set partitionKeyNames, Map dataTypeByColumnName, diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java index f38ca7fd72..84e4a9f766 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java @@ -1,6 +1,7 @@ package com.scalar.db.dataloader.core.util; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; import com.fasterxml.jackson.databind.node.ObjectNode; import com.scalar.db.api.Result; @@ -25,7 +26,6 @@ import java.util.List; import java.util.Map; import java.util.stream.Stream; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; @@ -124,6 +124,6 @@ void getColumnsFromResult_withValidData_shouldReturnColumns() throws Base64Exception, ColumnParsingException { List> columns = ColumnUtils.getColumnsFromResult(scalarDBResult, sourceRecord, false, mockMetadata); - Assertions.assertEquals(7, columns.size()); + assertEquals(7, columns.size()); } } diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/KeyUtilsTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/KeyUtilsTest.java index 5c1a04cc22..07d2e1e1ad 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/KeyUtilsTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/KeyUtilsTest.java @@ -1,7 +1,9 @@ package com.scalar.db.dataloader.core.util; -import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.Mockito.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.when; import com.fasterxml.jackson.databind.node.ObjectNode; import com.scalar.db.api.TableMetadata; @@ -20,8 +22,12 @@ import com.scalar.db.io.Key; import com.scalar.db.io.TextColumn; import java.nio.charset.StandardCharsets; -import java.util.*; -import org.junit.jupiter.api.Assertions; +import java.util.Base64; +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Optional; +import java.util.Set; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; @@ -155,7 +161,7 @@ void createKey_invalidBase64_throwsBase64Exception() { @Test void createClusteringKeyFromSource_withEmptyClusteringKeySet_shouldReturnEmpty() { Optional key = KeyUtils.createClusteringKeyFromSource(Collections.EMPTY_SET, null, null); - Assertions.assertEquals(Optional.empty(), key); + assertEquals(Optional.empty(), key); } @Test @@ -165,7 +171,7 @@ void createClusteringKeyFromSource_withValidClusteringKeySet_shouldReturnValidKe clusterKeySet.add(UnitTestUtils.TEST_COLUMN_3_CK); Optional key = KeyUtils.createClusteringKeyFromSource(clusterKeySet, dataTypeByColumnName, sourceRecord); - Assertions.assertEquals( + assertEquals( "Optional[Key{IntColumn{name=col2, value=2147483647, hasNullValue=false}, BooleanColumn{name=col3, value=true, hasNullValue=false}}]", key.toString()); } @@ -176,7 +182,7 @@ void createPartitionKeyFromSource_withInvalidData_shouldReturnEmpty() { partitionKeySet.add("id1"); Optional key = KeyUtils.createPartitionKeyFromSource(partitionKeySet, dataTypeByColumnName, sourceRecord); - Assertions.assertEquals(Optional.empty(), key); + assertEquals(Optional.empty(), key); } @Test @@ -185,7 +191,7 @@ void createPartitionKeyFromSource_withValidData_shouldReturnValidKey() { partitionKeySet.add(UnitTestUtils.TEST_COLUMN_1_PK); Optional key = KeyUtils.createPartitionKeyFromSource(partitionKeySet, dataTypeByColumnName, sourceRecord); - Assertions.assertEquals( + assertEquals( "Optional[Key{BigIntColumn{name=col1, value=9007199254740992, hasNullValue=false}}]", key.toString()); } From 868d9b52b1eec266391bc37f651eb87529dacdb4 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Wed, 26 Feb 2025 15:50:09 +0530 Subject: [PATCH 67/87] Column util correction --- .../com/scalar/db/dataloader/core/util/ColumnUtils.java | 6 +++++- .../com/scalar/db/dataloader/core/util/ColumnUtilsTest.java | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java index 478f5ce391..ce6c033615 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java @@ -116,8 +116,12 @@ public static List> getColumnsFromResult( throws Base64Exception, ColumnParsingException { List> columns = new ArrayList<>(); + Set columnsToIgnore = + getColumnsToIgnore( + tableMetadata.getPartitionKeyNames(), tableMetadata.getClusteringKeyNames()); for (String columnName : tableMetadata.getColumnNames()) { - if (ConsensusCommitUtils.isTransactionMetaColumn(columnName, tableMetadata)) { + if (ConsensusCommitUtils.isTransactionMetaColumn(columnName, tableMetadata) + || columnsToIgnore.contains(columnName)) { continue; } diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java index 84e4a9f766..10d8fa8d4c 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java @@ -124,6 +124,6 @@ void getColumnsFromResult_withValidData_shouldReturnColumns() throws Base64Exception, ColumnParsingException { List> columns = ColumnUtils.getColumnsFromResult(scalarDBResult, sourceRecord, false, mockMetadata); - assertEquals(7, columns.size()); + assertEquals(4, columns.size()); } } From e4cd7fef28fefd1e076fef477a7968177fdfc081 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Wed, 26 Feb 2025 16:02:50 +0530 Subject: [PATCH 68/87] Minor corrections --- .../com/scalar/db/dataloader/core/util/ColumnUtils.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java index ce6c033615..ef03af84ed 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java @@ -45,7 +45,7 @@ private ColumnUtils() {} * * @param dataType the data type of the specified column * @param columnInfo the ScalarDB table column information - * @param value the value for the ScalarDB column (may be {@code null}) + * @param value the value for the ScalarDB column (maybe {@code null}) * @return the ScalarDB column created from the specified data * @throws ColumnParsingException if an error occurs while creating the column or parsing the * value @@ -166,7 +166,7 @@ private static Set getColumnsToIgnore( * @param ignoreNullValues ignore null values or not * @param dataTypesByColumns data types of columns * @return column data - * @throws Base64Exception if an error occurs while base64 decoding + * @throws ColumnParsingException if an error occurs while base64 parsing the column */ private static Column getColumn( Result scalarDBResult, @@ -174,7 +174,7 @@ private static Column getColumn( String columnName, boolean ignoreNullValues, Map dataTypesByColumns) - throws Base64Exception, ColumnParsingException { + throws ColumnParsingException { if (scalarDBResult != null && !sourceRecord.has(columnName)) { return getColumnFromResult(scalarDBResult, columnName); } else { From d0a73a392e5205934dc5a3b7a401491d4f81dfc1 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 4 Mar 2025 17:39:17 +0530 Subject: [PATCH 69/87] Changes --- .../core/dataimport/ImportManager.java | 3 +- .../processor/CsvImportProcessor.java | 28 +++++++++++-------- .../dataimport/processor/ImportProcessor.java | 20 +++++++++---- .../processor/JsonImportProcessor.java | 27 ++++++++++-------- .../processor/JsonLinesImportProcessor.java | 28 +++++++++++-------- 5 files changed, 64 insertions(+), 42 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java index cbc0fc02a2..2dc50ec591 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java @@ -17,6 +17,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.concurrent.ExecutionException; import lombok.AllArgsConstructor; import lombok.NonNull; @@ -38,7 +39,7 @@ public class ImportManager implements ImportEventListener { * * @return list of import data chunk status objects */ - public List startImport() { + public List startImport() throws ExecutionException, InterruptedException { ImportProcessorParams params = ImportProcessorParams.builder() .scalarDBMode(scalarDBMode) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java index 01f1dbcf10..d043889282 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java @@ -12,6 +12,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Queue; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; @@ -26,16 +27,24 @@ public CsvImportProcessor(ImportProcessorParams params) { } /** - * Process the data from the import file + * Processes the source data from the given import file. + *

+ * This method reads data from the provided {@link BufferedReader}, processes it in chunks, + * and batches transactions according to the specified sizes. The method returns a list of + * {@link ImportDataChunkStatus} objects, each representing the status of a processed data chunk. + *

* - * @param dataChunkSize size of data chunk - * @param transactionBatchSize size of transaction batch - * @param reader reader which reads the source file - * @return process data chunk status list + * @param dataChunkSize the number of records to include in each data chunk + * @param transactionBatchSize the number of records to include in each transaction batch + * @param reader the {@link BufferedReader} used to read the source file + * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status of each data chunk + * @throws ExecutionException if an error occurs during asynchronous processing + * @throws InterruptedException if the processing is interrupted */ @Override public List process( - int dataChunkSize, int transactionBatchSize, BufferedReader reader) { + int dataChunkSize, int transactionBatchSize, BufferedReader reader) + throws ExecutionException, InterruptedException { int numCores = Runtime.getRuntime().availableProcessors(); ExecutorService dataChunkExecutor = Executors.newFixedThreadPool(numCores); // Create a queue to hold data batches @@ -119,12 +128,7 @@ public List process( List importDataChunkStatusList = new ArrayList<>(); // Wait for all data chunk threads to complete for (Future dataChunkFuture : dataChunkFutures) { - try { - importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); - } catch (Exception e) { - // TODO: handle the exception - e.printStackTrace(); - } + importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); } dataChunkExecutor.shutdown(); notifyAllDataChunksCompleted(); diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java index 2d16b9d189..771c8baf20 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java @@ -39,15 +39,23 @@ public abstract class ImportProcessor { private final List listeners = new ArrayList<>(); /** - * * Process the source data from import file + * Processes the source data from the given import file. + *

+ * This method reads data from the provided {@link BufferedReader}, processes it in chunks, + * and batches transactions according to the specified sizes. The method returns a list of + * {@link ImportDataChunkStatus} objects, each representing the status of a processed data chunk. + *

* - * @param dataChunkSize size of data chunk - * @param transactionBatchSize size of transaction batch - * @param reader reader which reads the source file - * @return list of import data chunk status objects + * @param dataChunkSize the number of records to include in each data chunk + * @param transactionBatchSize the number of records to include in each transaction batch + * @param reader the {@link BufferedReader} used to read the source file + * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status of each data chunk + * @throws ExecutionException if an error occurs during asynchronous processing + * @throws InterruptedException if the processing is interrupted */ public List process( - int dataChunkSize, int transactionBatchSize, BufferedReader reader) { + int dataChunkSize, int transactionBatchSize, BufferedReader reader) + throws ExecutionException, InterruptedException { return Collections.emptyList(); } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java index c02fa625b0..bb160c8601 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java @@ -14,6 +14,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Queue; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; @@ -29,16 +30,24 @@ public JsonImportProcessor(ImportProcessorParams params) { } /** - * Process the data from the import file + * Processes the source data from the given import file. + *

+ * This method reads data from the provided {@link BufferedReader}, processes it in chunks, + * and batches transactions according to the specified sizes. The method returns a list of + * {@link ImportDataChunkStatus} objects, each representing the status of a processed data chunk. + *

* - * @param dataChunkSize size of data chunk - * @param transactionBatchSize size of transaction batch - * @param reader reader which reads the source file - * @return process data chunk status list + * @param dataChunkSize the number of records to include in each data chunk + * @param transactionBatchSize the number of records to include in each transaction batch + * @param reader the {@link BufferedReader} used to read the source file + * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status of each data chunk + * @throws ExecutionException if an error occurs during asynchronous processing + * @throws InterruptedException if the processing is interrupted */ @Override public List process( - int dataChunkSize, int transactionBatchSize, BufferedReader reader) { + int dataChunkSize, int transactionBatchSize, BufferedReader reader) + throws ExecutionException, InterruptedException { // Set the number of threads based on the available CPU cores int numCores = Runtime.getRuntime().availableProcessors(); @@ -122,11 +131,7 @@ public List process( List importDataChunkStatusList = new ArrayList<>(); // Wait for all data chunk threads to complete for (Future dataChunkFuture : dataChunkFutures) { - try { - importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); - } catch (Exception e) { - e.printStackTrace(); - } + importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); } dataChunkExecutor.shutdown(); diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java index b63f897cbe..59f7be4513 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java @@ -11,6 +11,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Queue; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; @@ -26,16 +27,24 @@ public JsonLinesImportProcessor(ImportProcessorParams params) { } /** - * Process the data from the import file + * Processes the source data from the given import file. + *

+ * This method reads data from the provided {@link BufferedReader}, processes it in chunks, + * and batches transactions according to the specified sizes. The method returns a list of + * {@link ImportDataChunkStatus} objects, each representing the status of a processed data chunk. + *

* - * @param dataChunkSize size of data chunk - * @param transactionBatchSize size of transaction batch - * @param reader reader which reads the source file - * @return process data chunk status list + * @param dataChunkSize the number of records to include in each data chunk + * @param transactionBatchSize the number of records to include in each transaction batch + * @param reader the {@link BufferedReader} used to read the source file + * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status of each data chunk + * @throws ExecutionException if an error occurs during asynchronous processing + * @throws InterruptedException if the processing is interrupted */ @Override public List process( - int dataChunkSize, int transactionBatchSize, BufferedReader reader) { + int dataChunkSize, int transactionBatchSize, BufferedReader reader) + throws ExecutionException, InterruptedException { int numCores = Runtime.getRuntime().availableProcessors(); // Create a thread pool for processing data batches @@ -112,12 +121,7 @@ public List process( List importDataChunkStatusList = new ArrayList<>(); // Wait for all data chunk threads to complete for (Future dataChunkFuture : dataChunkFutures) { - try { - importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); - } catch (Exception e) { - // TODO: handle the exception - e.printStackTrace(); - } + importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); } dataChunkExecutor.shutdown(); notifyAllDataChunksCompleted(); From bffa85b152bab52c36314470058b68859530a717 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 4 Mar 2025 18:02:50 +0530 Subject: [PATCH 70/87] gradle change reverted --- gradle/spotbugs-exclude.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/spotbugs-exclude.xml b/gradle/spotbugs-exclude.xml index bab1669d82..23254eb3ab 100644 --- a/gradle/spotbugs-exclude.xml +++ b/gradle/spotbugs-exclude.xml @@ -37,7 +37,7 @@ - + From adc7e56dce778c3f74c30d7b96cd9299b74288fe Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 4 Mar 2025 18:14:55 +0530 Subject: [PATCH 71/87] Spotless applied --- .../dataimport/processor/CsvImportProcessor.java | 12 ++++++------ .../core/dataimport/processor/ImportProcessor.java | 12 ++++++------ .../dataimport/processor/JsonImportProcessor.java | 12 ++++++------ .../processor/JsonLinesImportProcessor.java | 12 ++++++------ .../scalar/db/dataloader/core/util/ColumnUtils.java | 4 ++-- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java index d043889282..a8a02a13f7 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java @@ -28,16 +28,16 @@ public CsvImportProcessor(ImportProcessorParams params) { /** * Processes the source data from the given import file. - *

- * This method reads data from the provided {@link BufferedReader}, processes it in chunks, - * and batches transactions according to the specified sizes. The method returns a list of - * {@link ImportDataChunkStatus} objects, each representing the status of a processed data chunk. - *

+ * + *

This method reads data from the provided {@link BufferedReader}, processes it in chunks, and + * batches transactions according to the specified sizes. The method returns a list of {@link + * ImportDataChunkStatus} objects, each representing the status of a processed data chunk. * * @param dataChunkSize the number of records to include in each data chunk * @param transactionBatchSize the number of records to include in each transaction batch * @param reader the {@link BufferedReader} used to read the source file - * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status of each data chunk + * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status of + * each data chunk * @throws ExecutionException if an error occurs during asynchronous processing * @throws InterruptedException if the processing is interrupted */ diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java index 771c8baf20..1ec8422226 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java @@ -40,16 +40,16 @@ public abstract class ImportProcessor { /** * Processes the source data from the given import file. - *

- * This method reads data from the provided {@link BufferedReader}, processes it in chunks, - * and batches transactions according to the specified sizes. The method returns a list of - * {@link ImportDataChunkStatus} objects, each representing the status of a processed data chunk. - *

+ * + *

This method reads data from the provided {@link BufferedReader}, processes it in chunks, and + * batches transactions according to the specified sizes. The method returns a list of {@link + * ImportDataChunkStatus} objects, each representing the status of a processed data chunk. * * @param dataChunkSize the number of records to include in each data chunk * @param transactionBatchSize the number of records to include in each transaction batch * @param reader the {@link BufferedReader} used to read the source file - * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status of each data chunk + * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status of + * each data chunk * @throws ExecutionException if an error occurs during asynchronous processing * @throws InterruptedException if the processing is interrupted */ diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java index bb160c8601..7c9ce923d6 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java @@ -31,16 +31,16 @@ public JsonImportProcessor(ImportProcessorParams params) { /** * Processes the source data from the given import file. - *

- * This method reads data from the provided {@link BufferedReader}, processes it in chunks, - * and batches transactions according to the specified sizes. The method returns a list of - * {@link ImportDataChunkStatus} objects, each representing the status of a processed data chunk. - *

+ * + *

This method reads data from the provided {@link BufferedReader}, processes it in chunks, and + * batches transactions according to the specified sizes. The method returns a list of {@link + * ImportDataChunkStatus} objects, each representing the status of a processed data chunk. * * @param dataChunkSize the number of records to include in each data chunk * @param transactionBatchSize the number of records to include in each transaction batch * @param reader the {@link BufferedReader} used to read the source file - * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status of each data chunk + * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status of + * each data chunk * @throws ExecutionException if an error occurs during asynchronous processing * @throws InterruptedException if the processing is interrupted */ diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java index 59f7be4513..fb82ceaeec 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java @@ -28,16 +28,16 @@ public JsonLinesImportProcessor(ImportProcessorParams params) { /** * Processes the source data from the given import file. - *

- * This method reads data from the provided {@link BufferedReader}, processes it in chunks, - * and batches transactions according to the specified sizes. The method returns a list of - * {@link ImportDataChunkStatus} objects, each representing the status of a processed data chunk. - *

+ * + *

This method reads data from the provided {@link BufferedReader}, processes it in chunks, and + * batches transactions according to the specified sizes. The method returns a list of {@link + * ImportDataChunkStatus} objects, each representing the status of a processed data chunk. * * @param dataChunkSize the number of records to include in each data chunk * @param transactionBatchSize the number of records to include in each transaction batch * @param reader the {@link BufferedReader} used to read the source file - * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status of each data chunk + * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status of + * each data chunk * @throws ExecutionException if an error occurs during asynchronous processing * @throws InterruptedException if the processing is interrupted */ diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java index 81f79fc446..a36dc56135 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java @@ -17,15 +17,15 @@ import com.scalar.db.io.FloatColumn; import com.scalar.db.io.IntColumn; import com.scalar.db.io.TextColumn; -import com.scalar.db.transaction.consensuscommit.ConsensusCommitUtils; -import java.util.ArrayList; import com.scalar.db.io.TimeColumn; import com.scalar.db.io.TimestampColumn; import com.scalar.db.io.TimestampTZColumn; +import com.scalar.db.transaction.consensuscommit.ConsensusCommitUtils; import java.time.Instant; import java.time.LocalDate; import java.time.LocalDateTime; import java.time.LocalTime; +import java.util.ArrayList; import java.util.Base64; import java.util.HashSet; import java.util.List; From 5b618762d94d22d1646b6611a815400808a672cd Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 4 Mar 2025 18:26:52 +0530 Subject: [PATCH 72/87] Fixed unit test --- .../com/scalar/db/dataloader/core/util/ColumnUtilsTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java index 9054901895..dd1a981658 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java @@ -161,6 +161,7 @@ void getColumnsFromResult_withValidData_shouldReturnColumns() throws Base64Exception, ColumnParsingException { List> columns = ColumnUtils.getColumnsFromResult(scalarDBResult, sourceRecord, false, mockMetadata); - assertEquals(4, columns.size()); + System.out.println(columns); + assertEquals(8, columns.size()); } } From b9842befb963b748d35a62e3f74376b2d7f6aca1 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Wed, 5 Mar 2025 16:14:17 +0530 Subject: [PATCH 73/87] Reverted try-catch changes --- .../dataimport/processor/CsvImportProcessor.java | 13 +++++++------ .../dataimport/processor/ImportProcessor.java | 5 +---- .../dataimport/processor/JsonImportProcessor.java | 15 +++++++-------- .../processor/JsonLinesImportProcessor.java | 15 +++++++-------- 4 files changed, 22 insertions(+), 26 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java index a8a02a13f7..e7904d494f 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java @@ -38,13 +38,10 @@ public CsvImportProcessor(ImportProcessorParams params) { * @param reader the {@link BufferedReader} used to read the source file * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status of * each data chunk - * @throws ExecutionException if an error occurs during asynchronous processing - * @throws InterruptedException if the processing is interrupted */ @Override public List process( - int dataChunkSize, int transactionBatchSize, BufferedReader reader) - throws ExecutionException, InterruptedException { + int dataChunkSize, int transactionBatchSize, BufferedReader reader) { int numCores = Runtime.getRuntime().availableProcessors(); ExecutorService dataChunkExecutor = Executors.newFixedThreadPool(numCores); // Create a queue to hold data batches @@ -104,7 +101,7 @@ public List process( } } catch (IOException e) { - throw new RuntimeException(); + throw new RuntimeException("Failed to read import file", e); } }); @@ -128,7 +125,11 @@ public List process( List importDataChunkStatusList = new ArrayList<>(); // Wait for all data chunk threads to complete for (Future dataChunkFuture : dataChunkFutures) { - importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); + try { + importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException("Data chunk processing failed", e); + } } dataChunkExecutor.shutdown(); notifyAllDataChunksCompleted(); diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java index 1ec8422226..8f464693c5 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java @@ -50,12 +50,9 @@ public abstract class ImportProcessor { * @param reader the {@link BufferedReader} used to read the source file * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status of * each data chunk - * @throws ExecutionException if an error occurs during asynchronous processing - * @throws InterruptedException if the processing is interrupted */ public List process( - int dataChunkSize, int transactionBatchSize, BufferedReader reader) - throws ExecutionException, InterruptedException { + int dataChunkSize, int transactionBatchSize, BufferedReader reader) { return Collections.emptyList(); } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java index 7c9ce923d6..bb1705b3af 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java @@ -41,13 +41,10 @@ public JsonImportProcessor(ImportProcessorParams params) { * @param reader the {@link BufferedReader} used to read the source file * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status of * each data chunk - * @throws ExecutionException if an error occurs during asynchronous processing - * @throws InterruptedException if the processing is interrupted */ @Override public List process( - int dataChunkSize, int transactionBatchSize, BufferedReader reader) - throws ExecutionException, InterruptedException { + int dataChunkSize, int transactionBatchSize, BufferedReader reader) { // Set the number of threads based on the available CPU cores int numCores = Runtime.getRuntime().availableProcessors(); @@ -70,7 +67,6 @@ public List process( int rowNumber = 1; while (jsonParser.nextToken() != JsonToken.END_ARRAY) { JsonNode jsonNode = OBJECT_MAPPER.readTree(jsonParser); - // TODO: do something with the null jsonNode if (jsonNode == null || jsonNode.isEmpty()) { continue; } @@ -105,8 +101,7 @@ public List process( dataChunkQueue.offer(importDataChunk); } } catch (IOException e) { - // TODO: handle this exception - throw new RuntimeException(e); + throw new RuntimeException("Failed to read import file", e); } }); readerThread.start(); @@ -131,7 +126,11 @@ public List process( List importDataChunkStatusList = new ArrayList<>(); // Wait for all data chunk threads to complete for (Future dataChunkFuture : dataChunkFutures) { - importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); + try { + importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException("Data chunk processing failed", e); + } } dataChunkExecutor.shutdown(); diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java index fb82ceaeec..03779e249f 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java @@ -38,13 +38,10 @@ public JsonLinesImportProcessor(ImportProcessorParams params) { * @param reader the {@link BufferedReader} used to read the source file * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status of * each data chunk - * @throws ExecutionException if an error occurs during asynchronous processing - * @throws InterruptedException if the processing is interrupted */ @Override public List process( - int dataChunkSize, int transactionBatchSize, BufferedReader reader) - throws ExecutionException, InterruptedException { + int dataChunkSize, int transactionBatchSize, BufferedReader reader) { int numCores = Runtime.getRuntime().availableProcessors(); // Create a thread pool for processing data batches @@ -63,7 +60,6 @@ public List process( String line; while ((line = reader.readLine()) != null) { JsonNode jsonNode = OBJECT_MAPPER.readTree(line); - // TODO: do something with the null jsonNode if (jsonNode == null || jsonNode.isEmpty()) { continue; } @@ -96,8 +92,7 @@ public List process( dataChunkQueue.offer(importDataChunk); } } catch (IOException e) { - // TODO: handle this exception - throw new RuntimeException(e); + throw new RuntimeException("Failed to read import file", e); } }); readerThread.start(); @@ -121,7 +116,11 @@ public List process( List importDataChunkStatusList = new ArrayList<>(); // Wait for all data chunk threads to complete for (Future dataChunkFuture : dataChunkFutures) { - importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); + try { + importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException("Data chunk processing failed", e.getCause()); + } } dataChunkExecutor.shutdown(); notifyAllDataChunksCompleted(); From 16ae46dcf8dbc02d77301513e83f4b4d44cb9fe7 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Mon, 10 Mar 2025 09:32:01 +0530 Subject: [PATCH 74/87] Optimizations --- .../processor/CsvImportProcessor.java | 162 ++++++++---------- .../processor/JsonImportProcessor.java | 135 ++++++--------- .../processor/JsonLinesImportProcessor.java | 157 +++++++++-------- 3 files changed, 210 insertions(+), 244 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java index e7904d494f..1d16c2d014 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java @@ -8,15 +8,17 @@ import com.scalar.db.dataloader.core.dataimport.datachunk.ImportRow; import java.io.BufferedReader; import java.io.IOException; +import java.io.UncheckedIOException; import java.util.ArrayList; -import java.util.LinkedList; import java.util.List; -import java.util.Queue; -import java.util.concurrent.ExecutionException; +import java.util.Optional; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.Future; +import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; public class CsvImportProcessor extends ImportProcessor { private static final DataLoaderObjectMapper OBJECT_MAPPER = new DataLoaderObjectMapper(); @@ -44,98 +46,86 @@ public List process( int dataChunkSize, int transactionBatchSize, BufferedReader reader) { int numCores = Runtime.getRuntime().availableProcessors(); ExecutorService dataChunkExecutor = Executors.newFixedThreadPool(numCores); - // Create a queue to hold data batches - Queue dataChunkQueue = new LinkedList<>(); - Thread readerThread = - new Thread( - () -> { - try { - String header = params.getImportOptions().getCustomHeaderRow(); - String delimiter = Character.toString(params.getImportOptions().getDelimiter()); - if (delimiter.trim().isEmpty()) { - delimiter = ","; - } - if (header == null) { - header = reader.readLine(); - } - String[] headerArray = header.split(delimiter); - String line; - int rowNumber = 1; - List currentDataChunk = new ArrayList<>(); - while ((line = reader.readLine()) != null) { - String[] dataArray = line.split(delimiter); - if (headerArray.length != dataArray.length) { - // Throw a custom exception for related issue - throw new RuntimeException(); - } - JsonNode jsonNode = combineHeaderAndData(headerArray, dataArray); - if (jsonNode == null || jsonNode.isEmpty()) { - continue; - } - - ImportRow importRow = new ImportRow(rowNumber, jsonNode); - currentDataChunk.add(importRow); - // If the data chunk is full, add it to the queue - if (currentDataChunk.size() == dataChunkSize) { - int dataChunkId = dataChunkIdCounter.getAndIncrement(); - ImportDataChunk importDataChunk = - ImportDataChunk.builder() - .dataChunkId(dataChunkId) - .sourceData(currentDataChunk) - .build(); - dataChunkQueue.offer(importDataChunk); - currentDataChunk = new ArrayList<>(); - } - rowNumber++; - } + BlockingQueue dataChunkQueue = new LinkedBlockingQueue<>(); - // Add the last data chunk to the queue - if (!currentDataChunk.isEmpty()) { - int dataChunkId = dataChunkIdCounter.getAndIncrement(); - ImportDataChunk importDataChunk = - ImportDataChunk.builder() - .dataChunkId(dataChunkId) - .sourceData(currentDataChunk) - .build(); - dataChunkQueue.offer(importDataChunk); - } + CompletableFuture readerFuture = + CompletableFuture.runAsync(() -> readDataChunks(reader, dataChunkSize, dataChunkQueue)); - } catch (IOException e) { - throw new RuntimeException("Failed to read import file", e); + List> dataChunkFutures = new ArrayList<>(); + readerFuture + .thenRun( + () -> { + ImportDataChunk dataChunk; + while ((dataChunk = dataChunkQueue.poll()) != null) { + ImportDataChunk finalDataChunk = dataChunk; + CompletableFuture future = + CompletableFuture.supplyAsync( + () -> processDataChunk(finalDataChunk, transactionBatchSize, numCores), + dataChunkExecutor); + dataChunkFutures.add(future); } - }); + }) + .join(); - readerThread.start(); - try { - // Wait for readerThread to finish - readerThread.join(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - // Process data chunks in parallel - List> dataChunkFutures = new ArrayList<>(); - while (!dataChunkQueue.isEmpty()) { - ImportDataChunk dataChunk = dataChunkQueue.poll(); - Future dataChunkFuture = - dataChunkExecutor.submit( - () -> processDataChunk(dataChunk, transactionBatchSize, numCores)); - dataChunkFutures.add(dataChunkFuture); - } + List importDataChunkStatusList = + dataChunkFutures.stream().map(CompletableFuture::join).collect(Collectors.toList()); - List importDataChunkStatusList = new ArrayList<>(); - // Wait for all data chunk threads to complete - for (Future dataChunkFuture : dataChunkFutures) { - try { - importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); - } catch (InterruptedException | ExecutionException e) { - throw new RuntimeException("Data chunk processing failed", e); - } - } dataChunkExecutor.shutdown(); notifyAllDataChunksCompleted(); return importDataChunkStatusList; } + private void readDataChunks( + BufferedReader reader, int dataChunkSize, BlockingQueue dataChunkQueue) { + try { + String delimiter = + Optional.of(params.getImportOptions().getDelimiter()) + .map(c -> Character.toString(c).trim()) + .filter(s -> !s.isEmpty()) + .orElse(","); + + String header = + Optional.ofNullable(params.getImportOptions().getCustomHeaderRow()) + .orElseGet(() -> safeReadLine(reader)); + + String[] headerArray = header.split(delimiter); + List currentDataChunk = new ArrayList<>(); + String line; + int rowNumber = 1; + while ((line = reader.readLine()) != null) { + String[] dataArray = line.split(delimiter); + if (headerArray.length != dataArray.length) { + throw new IllegalArgumentException("CSV row does not match header length."); + } + JsonNode jsonNode = combineHeaderAndData(headerArray, dataArray); + if (jsonNode.isEmpty()) continue; + + currentDataChunk.add(new ImportRow(rowNumber++, jsonNode)); + if (currentDataChunk.size() == dataChunkSize) { + enqueueDataChunk(currentDataChunk, dataChunkQueue); + currentDataChunk = new ArrayList<>(); + } + } + if (!currentDataChunk.isEmpty()) enqueueDataChunk(currentDataChunk, dataChunkQueue); + } catch (IOException | InterruptedException e) { + throw new RuntimeException("Failed to read CSV file", e); + } + } + + private void enqueueDataChunk(List dataChunk, BlockingQueue queue) + throws InterruptedException { + int dataChunkId = dataChunkIdCounter.getAndIncrement(); + queue.put(ImportDataChunk.builder().dataChunkId(dataChunkId).sourceData(dataChunk).build()); + } + + private String safeReadLine(BufferedReader reader) { + try { + return reader.readLine(); + } catch (IOException e) { + throw new UncheckedIOException("Failed to read header line", e); + } + } + private JsonNode combineHeaderAndData(String[] header, String[] data) { ObjectNode objectNode = OBJECT_MAPPER.createObjectNode(); for (int i = 0; i < header.length; i++) { diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java index bb1705b3af..336c9eda72 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java @@ -11,14 +11,14 @@ import java.io.BufferedReader; import java.io.IOException; import java.util.ArrayList; -import java.util.LinkedList; import java.util.List; -import java.util.Queue; -import java.util.concurrent.ExecutionException; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.Future; +import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; public class JsonImportProcessor extends ImportProcessor { @@ -45,96 +45,65 @@ public JsonImportProcessor(ImportProcessorParams params) { @Override public List process( int dataChunkSize, int transactionBatchSize, BufferedReader reader) { - // Set the number of threads based on the available CPU cores int numCores = Runtime.getRuntime().availableProcessors(); - - // Create a thread pool for processing data batches ExecutorService dataChunkExecutor = Executors.newFixedThreadPool(numCores); + BlockingQueue dataChunkQueue = new LinkedBlockingQueue<>(); - // Create a queue to hold data batches - Queue dataChunkQueue = new LinkedList<>(); + CompletableFuture readerFuture = + CompletableFuture.runAsync(() -> readDataChunks(reader, dataChunkSize, dataChunkQueue)); - // Create a thread to read JSON lines and populate data batches - Thread readerThread = - new Thread( + List> dataChunkFutures = new ArrayList<>(); + readerFuture + .thenRun( () -> { - try (JsonParser jsonParser = new JsonFactory().createParser(reader)) { - if (jsonParser.nextToken() != JsonToken.START_ARRAY) { - throw new IOException("Expected content to be an array"); - } - - List currentDataChunk = new ArrayList<>(); - int rowNumber = 1; - while (jsonParser.nextToken() != JsonToken.END_ARRAY) { - JsonNode jsonNode = OBJECT_MAPPER.readTree(jsonParser); - if (jsonNode == null || jsonNode.isEmpty()) { - continue; - } - - ImportRow importRow = new ImportRow(rowNumber, jsonNode); - - currentDataChunk.add(importRow); - - // If the data chunk is full, add it to the queue - if (currentDataChunk.size() == dataChunkSize) { - int dataChunkId = dataChunkIdCounter.getAndIncrement(); - ImportDataChunk importDataChunk = - ImportDataChunk.builder() - .dataChunkId(dataChunkId) - .sourceData(currentDataChunk) - .build(); - dataChunkQueue.offer(importDataChunk); - currentDataChunk = new ArrayList<>(); - } + ImportDataChunk dataChunk; + while ((dataChunk = dataChunkQueue.poll()) != null) { + ImportDataChunk finalDataChunk = dataChunk; + CompletableFuture future = + CompletableFuture.supplyAsync( + () -> processDataChunk(finalDataChunk, transactionBatchSize, numCores), + dataChunkExecutor); + dataChunkFutures.add(future); + } + }) + .join(); - rowNumber++; - } + List importDataChunkStatusList = + dataChunkFutures.stream().map(CompletableFuture::join).collect(Collectors.toList()); - // Add the last data chunk to the queue - if (!currentDataChunk.isEmpty()) { - int dataChunkId = dataChunkIdCounter.getAndIncrement(); - ImportDataChunk importDataChunk = - ImportDataChunk.builder() - .dataChunkId(dataChunkId) - .sourceData(currentDataChunk) - .build(); - dataChunkQueue.offer(importDataChunk); - } - } catch (IOException e) { - throw new RuntimeException("Failed to read import file", e); - } - }); - readerThread.start(); + dataChunkExecutor.shutdown(); + notifyAllDataChunksCompleted(); + return importDataChunkStatusList; + } - try { - // Wait for readerThread to finish - readerThread.join(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - System.err.println("Main thread was interrupted."); - } + private void readDataChunks( + BufferedReader reader, int dataChunkSize, BlockingQueue dataChunkQueue) { + try (JsonParser jsonParser = new JsonFactory().createParser(reader)) { + if (jsonParser.nextToken() != JsonToken.START_ARRAY) { + throw new IOException("Expected content to be an array"); + } - // Process data chunks in parallel - List> dataChunkFutures = new ArrayList<>(); - while (!dataChunkQueue.isEmpty()) { - ImportDataChunk dataChunk = dataChunkQueue.poll(); - Future dataChunkFuture = - dataChunkExecutor.submit( - () -> processDataChunk(dataChunk, transactionBatchSize, numCores)); - dataChunkFutures.add(dataChunkFuture); - } - List importDataChunkStatusList = new ArrayList<>(); - // Wait for all data chunk threads to complete - for (Future dataChunkFuture : dataChunkFutures) { - try { - importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); - } catch (InterruptedException | ExecutionException e) { - throw new RuntimeException("Data chunk processing failed", e); + List currentDataChunk = new ArrayList<>(); + int rowNumber = 1; + while (jsonParser.nextToken() != JsonToken.END_ARRAY) { + JsonNode jsonNode = OBJECT_MAPPER.readTree(jsonParser); + if (jsonNode == null || jsonNode.isEmpty()) continue; + + currentDataChunk.add(new ImportRow(rowNumber++, jsonNode)); + if (currentDataChunk.size() == dataChunkSize) { + enqueueDataChunk(currentDataChunk, dataChunkQueue); + currentDataChunk = new ArrayList<>(); + } } + if (!currentDataChunk.isEmpty()) enqueueDataChunk(currentDataChunk, dataChunkQueue); + } catch (IOException | InterruptedException e) { + throw new RuntimeException("Failed to read import file", e); } + } - dataChunkExecutor.shutdown(); - notifyAllDataChunksCompleted(); - return importDataChunkStatusList; + private void enqueueDataChunk(List dataChunk, BlockingQueue queue) + throws InterruptedException { + int dataChunkId = dataChunkIdCounter.getAndIncrement(); + queue.put(ImportDataChunk.builder().dataChunkId(dataChunkId).sourceData(dataChunk).build()); } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java index 03779e249f..ca3b091cb0 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java @@ -8,14 +8,16 @@ import java.io.BufferedReader; import java.io.IOException; import java.util.ArrayList; -import java.util.LinkedList; import java.util.List; -import java.util.Queue; -import java.util.concurrent.ExecutionException; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.Future; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; public class JsonLinesImportProcessor extends ImportProcessor { @@ -43,87 +45,92 @@ public JsonLinesImportProcessor(ImportProcessorParams params) { public List process( int dataChunkSize, int transactionBatchSize, BufferedReader reader) { int numCores = Runtime.getRuntime().availableProcessors(); - - // Create a thread pool for processing data batches ExecutorService dataChunkExecutor = Executors.newFixedThreadPool(numCores); + BlockingQueue dataChunkQueue = new LinkedBlockingQueue<>(); + List> dataChunkFutures = new CopyOnWriteArrayList<>(); - // Create a queue to hold data batches - Queue dataChunkQueue = new LinkedList<>(); + try { + CompletableFuture readerFuture = + CompletableFuture.runAsync( + () -> readDataChunks(reader, dataChunkSize, dataChunkQueue), dataChunkExecutor); - // Create a thread to read JSON lines and populate data batches - Thread readerThread = - new Thread( - () -> { - try { - List currentDataChunk = new ArrayList<>(); - int rowNumber = 1; - String line; - while ((line = reader.readLine()) != null) { - JsonNode jsonNode = OBJECT_MAPPER.readTree(line); - if (jsonNode == null || jsonNode.isEmpty()) { - continue; + CompletableFuture processingFuture = + readerFuture.thenRunAsync( + () -> { + while (!(dataChunkQueue.isEmpty() && readerFuture.isDone())) { + try { + ImportDataChunk dataChunk = dataChunkQueue.poll(100, TimeUnit.MILLISECONDS); + if (dataChunk != null) { + dataChunkFutures.add( + CompletableFuture.supplyAsync( + () -> processDataChunk(dataChunk, transactionBatchSize, numCores), + dataChunkExecutor)); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Data chunk processing was interrupted", e); } + } + }, + dataChunkExecutor); - ImportRow importRow = new ImportRow(rowNumber, jsonNode); - currentDataChunk.add(importRow); + processingFuture.join(); - // If the data chunk is full, add it to the queue - if (currentDataChunk.size() == dataChunkSize) { - int dataChunkId = dataChunkIdCounter.getAndIncrement(); - ImportDataChunk importDataChunk = - ImportDataChunk.builder() - .dataChunkId(dataChunkId) - .sourceData(currentDataChunk) - .build(); - dataChunkQueue.offer(importDataChunk); - currentDataChunk = new ArrayList<>(); - } - rowNumber++; - } + return CompletableFuture.allOf(dataChunkFutures.toArray(new CompletableFuture[0])) + .thenApply( + v -> + dataChunkFutures.stream() + .map( + f -> + f.exceptionally( + e -> { + System.err.println( + "Data chunk processing failed: " + e.getMessage()); + return null; + }) + .join()) + .collect(Collectors.toList())) + .join(); + } finally { + dataChunkExecutor.shutdown(); + try { + if (!dataChunkExecutor.awaitTermination(60, TimeUnit.SECONDS)) { + dataChunkExecutor.shutdownNow(); + } + } catch (InterruptedException e) { + dataChunkExecutor.shutdownNow(); + Thread.currentThread().interrupt(); + } + notifyAllDataChunksCompleted(); + } + } - // Add the last data chunk to the queue - if (!currentDataChunk.isEmpty()) { - int dataChunkId = dataChunkIdCounter.getAndIncrement(); - ImportDataChunk importDataChunk = - ImportDataChunk.builder() - .dataChunkId(dataChunkId) - .sourceData(currentDataChunk) - .build(); - dataChunkQueue.offer(importDataChunk); - } - } catch (IOException e) { - throw new RuntimeException("Failed to read import file", e); - } - }); - readerThread.start(); + private void readDataChunks( + BufferedReader reader, int dataChunkSize, BlockingQueue dataChunkQueue) { try { - // Wait for readerThread to finish - readerThread.join(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - System.err.println("Main thread was interrupted."); - } - // Process data chunks in parallel - List> dataChunkFutures = new ArrayList<>(); - while (!dataChunkQueue.isEmpty()) { - ImportDataChunk dataChunk = dataChunkQueue.poll(); - Future dataChunkFuture = - dataChunkExecutor.submit( - () -> processDataChunk(dataChunk, transactionBatchSize, numCores)); - dataChunkFutures.add(dataChunkFuture); - } + List currentDataChunk = new ArrayList<>(); + int rowNumber = 1; + String line; + while ((line = reader.readLine()) != null) { + JsonNode jsonNode = OBJECT_MAPPER.readTree(line); + if (jsonNode == null || jsonNode.isEmpty()) continue; - List importDataChunkStatusList = new ArrayList<>(); - // Wait for all data chunk threads to complete - for (Future dataChunkFuture : dataChunkFutures) { - try { - importDataChunkStatusList.add((ImportDataChunkStatus) dataChunkFuture.get()); - } catch (InterruptedException | ExecutionException e) { - throw new RuntimeException("Data chunk processing failed", e.getCause()); + currentDataChunk.add(new ImportRow(rowNumber++, jsonNode)); + if (currentDataChunk.size() == dataChunkSize) { + enqueueDataChunk(currentDataChunk, dataChunkQueue); + currentDataChunk = new ArrayList<>(); + } } + if (!currentDataChunk.isEmpty()) enqueueDataChunk(currentDataChunk, dataChunkQueue); + } catch (IOException | InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Failed to read import file", e); } - dataChunkExecutor.shutdown(); - notifyAllDataChunksCompleted(); - return importDataChunkStatusList; + } + + private void enqueueDataChunk(List dataChunk, BlockingQueue queue) + throws InterruptedException { + int dataChunkId = dataChunkIdCounter.getAndIncrement(); + queue.put(ImportDataChunk.builder().dataChunkId(dataChunkId).sourceData(dataChunk).build()); } } From 6b2536edf8483010025d15f434478321f0c40e42 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Mon, 10 Mar 2025 14:24:39 +0530 Subject: [PATCH 75/87] Error message changes and further optimizations --- .../com/scalar/db/common/error/CoreError.java | 18 ++++ .../processor/CsvImportProcessor.java | 89 +++++++++++++------ .../processor/JsonImportProcessor.java | 85 +++++++++++++----- .../processor/JsonLinesImportProcessor.java | 9 +- 4 files changed, 150 insertions(+), 51 deletions(-) diff --git a/core/src/main/java/com/scalar/db/common/error/CoreError.java b/core/src/main/java/com/scalar/db/common/error/CoreError.java index 3bd5d9f89c..23d66b3797 100644 --- a/core/src/main/java/com/scalar/db/common/error/CoreError.java +++ b/core/src/main/java/com/scalar/db/common/error/CoreError.java @@ -844,6 +844,10 @@ public enum CoreError implements ScalarDbError { "The data mapping source field '%s' for table '%s' is missing in the json data record", "", ""), + DATA_LOADER_CSV_DATA_MISMATCH( + Category.USER_ERROR, "0186", "The CSV row: %s does not match header: %s.", "", ""), + DATA_LOADER_JSON_CONTENT_START_ERROR( + Category.USER_ERROR, "0187", "Expected JSON file content to be an array", "", ""), // // Errors for the concurrency error category @@ -1107,6 +1111,20 @@ public enum CoreError implements ScalarDbError { "Something went wrong while scanning. Are you sure you are running in the correct transaction mode? Details: %s", "", ""), + DATA_LOADER_CSV_FILE_READ_FAILED( + Category.INTERNAL_ERROR, "0049", "Failed to read CSV file. Details: %s.", "", ""), + DATA_LOADER_CSV_FILE_HEADER_READ_FAILED( + Category.INTERNAL_ERROR, "0050", "Failed to CSV read header line. Details: %s.", "", ""), + DATA_LOADER_DATA_CHUNK_PROCESS_FAILED( + Category.INTERNAL_ERROR, + "0051", + "Data chunk processing was interrupted. Details: %s", + "", + ""), + DATA_LOADER_JSON_FILE_READ_FAILED( + Category.INTERNAL_ERROR, "0052", "Failed to read JSON file. Details: %s.", "", ""), + DATA_LOADER_JSONLINES_FILE_READ_FAILED( + Category.INTERNAL_ERROR, "0053", "Failed to read JSON Lines file. Details: %s.", "", ""), // // Errors for the unknown transaction status error category diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java index 1d16c2d014..158b3446a7 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java @@ -2,6 +2,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.scalar.db.common.error.CoreError; import com.scalar.db.dataloader.core.DataLoaderObjectMapper; import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunk; import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; @@ -14,9 +15,11 @@ import java.util.Optional; import java.util.concurrent.BlockingQueue; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; @@ -47,32 +50,65 @@ public List process( int numCores = Runtime.getRuntime().availableProcessors(); ExecutorService dataChunkExecutor = Executors.newFixedThreadPool(numCores); BlockingQueue dataChunkQueue = new LinkedBlockingQueue<>(); + List> dataChunkFutures = new CopyOnWriteArrayList<>(); - CompletableFuture readerFuture = - CompletableFuture.runAsync(() -> readDataChunks(reader, dataChunkSize, dataChunkQueue)); + try { + CompletableFuture readerFuture = + CompletableFuture.runAsync( + () -> readDataChunks(reader, dataChunkSize, dataChunkQueue), dataChunkExecutor); - List> dataChunkFutures = new ArrayList<>(); - readerFuture - .thenRun( - () -> { - ImportDataChunk dataChunk; - while ((dataChunk = dataChunkQueue.poll()) != null) { - ImportDataChunk finalDataChunk = dataChunk; - CompletableFuture future = - CompletableFuture.supplyAsync( - () -> processDataChunk(finalDataChunk, transactionBatchSize, numCores), - dataChunkExecutor); - dataChunkFutures.add(future); - } - }) - .join(); + CompletableFuture processingFuture = + readerFuture.thenRunAsync( + () -> { + while (!(dataChunkQueue.isEmpty() && readerFuture.isDone())) { + try { + ImportDataChunk dataChunk = dataChunkQueue.poll(100, TimeUnit.MILLISECONDS); + if (dataChunk != null) { + dataChunkFutures.add( + CompletableFuture.supplyAsync( + () -> processDataChunk(dataChunk, transactionBatchSize, numCores), + dataChunkExecutor)); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException( + CoreError.DATA_LOADER_DATA_CHUNK_PROCESS_FAILED.buildMessage( + e.getMessage()), + e); + } + } + }, + dataChunkExecutor); - List importDataChunkStatusList = - dataChunkFutures.stream().map(CompletableFuture::join).collect(Collectors.toList()); + processingFuture.join(); - dataChunkExecutor.shutdown(); - notifyAllDataChunksCompleted(); - return importDataChunkStatusList; + return CompletableFuture.allOf(dataChunkFutures.toArray(new CompletableFuture[0])) + .thenApply( + v -> + dataChunkFutures.stream() + .map( + f -> + f.exceptionally( + e -> { + System.err.println( + "Data chunk processing failed: " + e.getMessage()); + return null; + }) + .join()) + .collect(Collectors.toList())) + .join(); + } finally { + dataChunkExecutor.shutdown(); + try { + if (!dataChunkExecutor.awaitTermination(60, TimeUnit.SECONDS)) { + dataChunkExecutor.shutdownNow(); + } + } catch (InterruptedException e) { + dataChunkExecutor.shutdownNow(); + Thread.currentThread().interrupt(); + } + notifyAllDataChunksCompleted(); + } } private void readDataChunks( @@ -95,7 +131,8 @@ private void readDataChunks( while ((line = reader.readLine()) != null) { String[] dataArray = line.split(delimiter); if (headerArray.length != dataArray.length) { - throw new IllegalArgumentException("CSV row does not match header length."); + throw new IllegalArgumentException( + CoreError.DATA_LOADER_CSV_DATA_MISMATCH.buildMessage(line, header)); } JsonNode jsonNode = combineHeaderAndData(headerArray, dataArray); if (jsonNode.isEmpty()) continue; @@ -108,7 +145,8 @@ private void readDataChunks( } if (!currentDataChunk.isEmpty()) enqueueDataChunk(currentDataChunk, dataChunkQueue); } catch (IOException | InterruptedException e) { - throw new RuntimeException("Failed to read CSV file", e); + throw new RuntimeException( + CoreError.DATA_LOADER_CSV_FILE_READ_FAILED.buildMessage(e.getMessage()), e); } } @@ -122,7 +160,8 @@ private String safeReadLine(BufferedReader reader) { try { return reader.readLine(); } catch (IOException e) { - throw new UncheckedIOException("Failed to read header line", e); + throw new UncheckedIOException( + CoreError.DATA_LOADER_CSV_FILE_HEADER_READ_FAILED.buildMessage(e.getMessage()), e); } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java index 336c9eda72..c100412aed 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java @@ -4,6 +4,7 @@ import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; import com.fasterxml.jackson.databind.JsonNode; +import com.scalar.db.common.error.CoreError; import com.scalar.db.dataloader.core.DataLoaderObjectMapper; import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunk; import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; @@ -14,9 +15,11 @@ import java.util.List; import java.util.concurrent.BlockingQueue; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; @@ -48,39 +51,72 @@ public List process( int numCores = Runtime.getRuntime().availableProcessors(); ExecutorService dataChunkExecutor = Executors.newFixedThreadPool(numCores); BlockingQueue dataChunkQueue = new LinkedBlockingQueue<>(); + List> dataChunkFutures = new CopyOnWriteArrayList<>(); - CompletableFuture readerFuture = - CompletableFuture.runAsync(() -> readDataChunks(reader, dataChunkSize, dataChunkQueue)); + try { + CompletableFuture readerFuture = + CompletableFuture.runAsync( + () -> readDataChunks(reader, dataChunkSize, dataChunkQueue), dataChunkExecutor); - List> dataChunkFutures = new ArrayList<>(); - readerFuture - .thenRun( - () -> { - ImportDataChunk dataChunk; - while ((dataChunk = dataChunkQueue.poll()) != null) { - ImportDataChunk finalDataChunk = dataChunk; - CompletableFuture future = - CompletableFuture.supplyAsync( - () -> processDataChunk(finalDataChunk, transactionBatchSize, numCores), - dataChunkExecutor); - dataChunkFutures.add(future); - } - }) - .join(); + CompletableFuture processingFuture = + readerFuture.thenRunAsync( + () -> { + while (!(dataChunkQueue.isEmpty() && readerFuture.isDone())) { + try { + ImportDataChunk dataChunk = dataChunkQueue.poll(100, TimeUnit.MILLISECONDS); + if (dataChunk != null) { + dataChunkFutures.add( + CompletableFuture.supplyAsync( + () -> processDataChunk(dataChunk, transactionBatchSize, numCores), + dataChunkExecutor)); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException( + CoreError.DATA_LOADER_DATA_CHUNK_PROCESS_FAILED.buildMessage( + e.getMessage()), + e); + } + } + }, + dataChunkExecutor); - List importDataChunkStatusList = - dataChunkFutures.stream().map(CompletableFuture::join).collect(Collectors.toList()); + processingFuture.join(); - dataChunkExecutor.shutdown(); - notifyAllDataChunksCompleted(); - return importDataChunkStatusList; + return CompletableFuture.allOf(dataChunkFutures.toArray(new CompletableFuture[0])) + .thenApply( + v -> + dataChunkFutures.stream() + .map( + f -> + f.exceptionally( + e -> { + System.err.println( + "Data chunk processing failed: " + e.getMessage()); + return null; + }) + .join()) + .collect(Collectors.toList())) + .join(); + } finally { + dataChunkExecutor.shutdown(); + try { + if (!dataChunkExecutor.awaitTermination(60, TimeUnit.SECONDS)) { + dataChunkExecutor.shutdownNow(); + } + } catch (InterruptedException e) { + dataChunkExecutor.shutdownNow(); + Thread.currentThread().interrupt(); + } + notifyAllDataChunksCompleted(); + } } private void readDataChunks( BufferedReader reader, int dataChunkSize, BlockingQueue dataChunkQueue) { try (JsonParser jsonParser = new JsonFactory().createParser(reader)) { if (jsonParser.nextToken() != JsonToken.START_ARRAY) { - throw new IOException("Expected content to be an array"); + throw new IOException(CoreError.DATA_LOADER_JSON_CONTENT_START_ERROR.buildMessage()); } List currentDataChunk = new ArrayList<>(); @@ -97,7 +133,8 @@ private void readDataChunks( } if (!currentDataChunk.isEmpty()) enqueueDataChunk(currentDataChunk, dataChunkQueue); } catch (IOException | InterruptedException e) { - throw new RuntimeException("Failed to read import file", e); + throw new RuntimeException( + CoreError.DATA_LOADER_JSON_FILE_READ_FAILED.buildMessage(e.getMessage()), e); } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java index ca3b091cb0..8de0878a86 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java @@ -1,6 +1,7 @@ package com.scalar.db.dataloader.core.dataimport.processor; import com.fasterxml.jackson.databind.JsonNode; +import com.scalar.db.common.error.CoreError; import com.scalar.db.dataloader.core.DataLoaderObjectMapper; import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunk; import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; @@ -68,7 +69,10 @@ public List process( } } catch (InterruptedException e) { Thread.currentThread().interrupt(); - throw new RuntimeException("Data chunk processing was interrupted", e); + throw new RuntimeException( + CoreError.DATA_LOADER_DATA_CHUNK_PROCESS_FAILED.buildMessage( + e.getMessage()), + e); } } }, @@ -124,7 +128,8 @@ private void readDataChunks( if (!currentDataChunk.isEmpty()) enqueueDataChunk(currentDataChunk, dataChunkQueue); } catch (IOException | InterruptedException e) { Thread.currentThread().interrupt(); - throw new RuntimeException("Failed to read import file", e); + throw new RuntimeException( + CoreError.DATA_LOADER_JSONLINES_FILE_READ_FAILED.buildMessage(e.getMessage()), e); } } From 6aea83c67f2b5d2e419be1e0e442660f22d951c9 Mon Sep 17 00:00:00 2001 From: Peckstadt Yves Date: Mon, 17 Mar 2025 11:44:19 +0900 Subject: [PATCH 76/87] Improve javadocs for the data loader import process --- .../core/dataimport/ImportEventListener.java | 38 ++++++ .../core/dataimport/ImportManager.java | 56 ++++++++- .../processor/CsvImportProcessor.java | 59 +++++++++ .../DefaultImportProcessorFactory.java | 20 +++- .../dataimport/processor/ImportProcessor.java | 85 ++++++++----- .../processor/ImportProcessorFactory.java | 12 ++ .../processor/ImportProcessorParams.java | 20 ++++ .../processor/JsonImportProcessor.java | 44 +++++++ .../processor/JsonLinesImportProcessor.java | 41 +++++++ .../processor/TableColumnDataTypes.java | 45 ++++++- .../dataimport/task/ImportStorageTask.java | 59 ++++++--- .../core/dataimport/task/ImportTask.java | 69 ++++++++--- .../dataimport/task/ImportTaskParams.java | 17 +++ .../task/ImportTransactionalTask.java | 58 +++++---- .../db/dataloader/core/util/ColumnUtils.java | 103 +++++++++++----- .../db/dataloader/core/util/KeyUtils.java | 112 ++++++++++++------ .../DefaultImportProcessorFactoryTest.java | 10 ++ .../processor/TableColumnDataTypesTest.java | 16 +++ .../dataloader/core/util/ColumnUtilsTest.java | 36 ++++++ .../db/dataloader/core/util/KeyUtilsTest.java | 34 ++++++ 20 files changed, 776 insertions(+), 158 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportEventListener.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportEventListener.java index 10157569b4..8081931c50 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportEventListener.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportEventListener.java @@ -5,19 +5,57 @@ import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatchResult; import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatchStatus; +/** + * Listener interface for monitoring import events during the data loading process. Implementations + * can use this to track progress and handle various stages of the import process. + */ public interface ImportEventListener { + /** + * Called when processing of a data chunk begins. + * + * @param status the current status of the data chunk being processed + */ void onDataChunkStarted(ImportDataChunkStatus status); + /** + * Updates or adds new status information for a data chunk. + * + * @param status the updated status information for the data chunk + */ void addOrUpdateDataChunkStatus(ImportDataChunkStatus status); + /** + * Called when processing of a data chunk is completed. + * + * @param status the final status of the completed data chunk + */ void onDataChunkCompleted(ImportDataChunkStatus status); + /** + * Called when all data chunks have been processed. This indicates that the entire chunked import + * process is complete. + */ void onAllDataChunksCompleted(); + /** + * Called when processing of a transaction batch begins. + * + * @param batchStatus the initial status of the transaction batch + */ void onTransactionBatchStarted(ImportTransactionBatchStatus batchStatus); + /** + * Called when processing of a transaction batch is completed. + * + * @param batchResult the result of the completed transaction batch + */ void onTransactionBatchCompleted(ImportTransactionBatchResult batchResult); + /** + * Called when an import task is completed. + * + * @param taskResult the result of the completed import task + */ void onTaskComplete(ImportTaskResult taskResult); } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java index 2dc50ec591..9deb18e8f4 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java @@ -21,6 +21,20 @@ import lombok.AllArgsConstructor; import lombok.NonNull; +/** + * Manages the data import process and coordinates event handling between the import processor and + * listeners. This class implements {@link ImportEventListener} to receive events from the processor + * and relay them to registered listeners. + * + *

The import process involves: + * + *

    + *
  • Reading data from an input file + *
  • Processing the data in configurable chunk sizes + *
  • Managing database transactions in batches + *
  • Notifying listeners of various import events + *
+ */ @AllArgsConstructor public class ImportManager implements ImportEventListener { @@ -35,9 +49,16 @@ public class ImportManager implements ImportEventListener { private final List importDataChunkStatusList = new ArrayList<>(); /** - * * Start the import process + * Starts the import process using the configured parameters. * - * @return list of import data chunk status objects + *

If the data chunk size in {@link ImportOptions} is set to 0, the entire file will be + * processed as a single chunk. Otherwise, the file will be processed in chunks of the specified + * size. + * + * @return a list of {@link ImportDataChunkStatus} objects containing the status of each processed + * chunk + * @throws ExecutionException if there is an error during the execution of the import process + * @throws InterruptedException if the import process is interrupted */ public List startImport() throws ExecutionException, InterruptedException { ImportProcessorParams params = @@ -61,14 +82,26 @@ public List startImport() throws ExecutionException, Inte dataChunkSize, importOptions.getTransactionBatchSize(), importFileReader); } + /** + * Registers a new listener to receive import events. + * + * @param listener the listener to add + * @throws IllegalArgumentException if the listener is null + */ public void addListener(ImportEventListener listener) { listeners.add(listener); } + /** + * Removes a previously registered listener. + * + * @param listener the listener to remove + */ public void removeListener(ImportEventListener listener) { listeners.remove(listener); } + /** {@inheritDoc} Forwards the event to all registered listeners. */ @Override public void onDataChunkStarted(ImportDataChunkStatus status) { for (ImportEventListener listener : listeners) { @@ -76,6 +109,10 @@ public void onDataChunkStarted(ImportDataChunkStatus status) { } } + /** + * {@inheritDoc} Updates or adds the status of a data chunk in the status list. This method is + * thread-safe. + */ @Override public void addOrUpdateDataChunkStatus(ImportDataChunkStatus status) { synchronized (importDataChunkStatusList) { @@ -91,6 +128,7 @@ public void addOrUpdateDataChunkStatus(ImportDataChunkStatus status) { } } + /** {@inheritDoc} Forwards the event to all registered listeners. */ @Override public void onDataChunkCompleted(ImportDataChunkStatus status) { for (ImportEventListener listener : listeners) { @@ -98,6 +136,7 @@ public void onDataChunkCompleted(ImportDataChunkStatus status) { } } + /** {@inheritDoc} Forwards the event to all registered listeners. */ @Override public void onTransactionBatchStarted(ImportTransactionBatchStatus status) { for (ImportEventListener listener : listeners) { @@ -105,6 +144,7 @@ public void onTransactionBatchStarted(ImportTransactionBatchStatus status) { } } + /** {@inheritDoc} Forwards the event to all registered listeners. */ @Override public void onTransactionBatchCompleted(ImportTransactionBatchResult batchResult) { for (ImportEventListener listener : listeners) { @@ -112,6 +152,7 @@ public void onTransactionBatchCompleted(ImportTransactionBatchResult batchResult } } + /** {@inheritDoc} Forwards the event to all registered listeners. */ @Override public void onTaskComplete(ImportTaskResult taskResult) { for (ImportEventListener listener : listeners) { @@ -119,6 +160,7 @@ public void onTaskComplete(ImportTaskResult taskResult) { } } + /** {@inheritDoc} Forwards the event to all registered listeners. */ @Override public void onAllDataChunksCompleted() { for (ImportEventListener listener : listeners) { @@ -126,10 +168,20 @@ public void onAllDataChunksCompleted() { } } + /** + * Returns the current list of import data chunk status objects. + * + * @return an unmodifiable list of {@link ImportDataChunkStatus} objects + */ public List getImportDataChunkStatusList() { return importDataChunkStatusList; } + /** + * Creates and returns a mapping of table column data types from the table metadata. + * + * @return a {@link TableColumnDataTypes} object containing the column data types for all tables + */ public TableColumnDataTypes getTableColumnDataTypes() { TableColumnDataTypes tableColumnDataTypes = new TableColumnDataTypes(); tableMetadata.forEach( diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java index 158b3446a7..aed4b0b585 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java @@ -23,10 +23,30 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; +/** + * A processor for importing CSV data into the database. + * + *

This class handles the processing of CSV files by: + * + *

    + *
  • Reading and parsing CSV data with configurable delimiters + *
  • Processing data in configurable chunk sizes for efficient batch processing + *
  • Supporting parallel processing using multiple threads + *
  • Converting CSV rows into JSON format for database import + *
+ * + *

The processor supports custom headers and validates that each data row matches the header + * structure before processing. + */ public class CsvImportProcessor extends ImportProcessor { private static final DataLoaderObjectMapper OBJECT_MAPPER = new DataLoaderObjectMapper(); private static final AtomicInteger dataChunkIdCounter = new AtomicInteger(0); + /** + * Creates a new CsvImportProcessor with the specified parameters. + * + * @param params Configuration parameters for the import processor + */ public CsvImportProcessor(ImportProcessorParams params) { super(params); } @@ -111,6 +131,24 @@ public List process( } } + /** + * Reads and processes CSV data in chunks from the provided reader. + * + *

This method: + * + *

    + *
  • Reads the CSV header (custom or from file) + *
  • Validates each data row against the header + *
  • Converts rows to JSON format + *
  • Batches rows into data chunks + *
  • Enqueues chunks for processing + *
+ * + * @param reader the BufferedReader containing CSV data + * @param dataChunkSize the number of rows to include in each chunk + * @param dataChunkQueue the queue where data chunks are placed for processing + * @throws RuntimeException if there are errors reading the file or if interrupted + */ private void readDataChunks( BufferedReader reader, int dataChunkSize, BlockingQueue dataChunkQueue) { try { @@ -150,12 +188,26 @@ private void readDataChunks( } } + /** + * Adds a completed data chunk to the processing queue. + * + * @param dataChunk the list of ImportRows to be processed + * @param queue the queue where the chunk should be placed + * @throws InterruptedException if the thread is interrupted while waiting to add to the queue + */ private void enqueueDataChunk(List dataChunk, BlockingQueue queue) throws InterruptedException { int dataChunkId = dataChunkIdCounter.getAndIncrement(); queue.put(ImportDataChunk.builder().dataChunkId(dataChunkId).sourceData(dataChunk).build()); } + /** + * Safely reads a line from the BufferedReader, handling IOExceptions. + * + * @param reader the BufferedReader to read from + * @return the line read from the reader + * @throws UncheckedIOException if an IOException occurs while reading + */ private String safeReadLine(BufferedReader reader) { try { return reader.readLine(); @@ -165,6 +217,13 @@ private String safeReadLine(BufferedReader reader) { } } + /** + * Combines header fields with data values to create a JSON object. + * + * @param header array of header field names + * @param data array of data values corresponding to the header fields + * @return a JsonNode containing the combined header-value pairs + */ private JsonNode combineHeaderAndData(String[] header, String[] data) { ObjectNode objectNode = OBJECT_MAPPER.createObjectNode(); for (int i = 0; i < header.length; i++) { diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactory.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactory.java index 1277ba6130..d40222d9a7 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactory.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactory.java @@ -2,13 +2,27 @@ import com.scalar.db.common.error.CoreError; +/** + * A factory class that creates appropriate ImportProcessor instances based on the input file + * format. This factory implements the ImportProcessorFactory interface and provides a default + * implementation for creating processors that handle different file formats (JSON, JSONL, CSV). + */ public class DefaultImportProcessorFactory implements ImportProcessorFactory { /** - * Create import processor object based in file format in import params + * Creates an appropriate ImportProcessor instance based on the file format specified in the + * import parameters. * - * @param params import processor params objects - * @return generated import processor object + * @param params ImportProcessorParams containing configuration and import options, including the + * file format + * @return An ImportProcessor instance configured for the specified file format + * @throws IllegalArgumentException if the specified file format is not supported + *

Supported file formats: + *

    + *
  • JSONL - Creates a JsonLinesImportProcessor for JSON Lines format + *
  • JSON - Creates a JsonImportProcessor for JSON format + *
  • CSV - Creates a CsvImportProcessor for CSV format + *
*/ @Override public ImportProcessor createImportProcessor(ImportProcessorParams params) { diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java index 8f464693c5..1717eea658 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java @@ -31,6 +31,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +/** + * An abstract class that handles the processing of data imports into ScalarDB. This processor + * supports both transactional and non-transactional (storage) modes and provides event notification + * capabilities for monitoring the import process. + */ @RequiredArgsConstructor public abstract class ImportProcessor { @@ -42,14 +47,16 @@ public abstract class ImportProcessor { * Processes the source data from the given import file. * *

This method reads data from the provided {@link BufferedReader}, processes it in chunks, and - * batches transactions according to the specified sizes. The method returns a list of {@link - * ImportDataChunkStatus} objects, each representing the status of a processed data chunk. + * batches transactions according to the specified sizes. The processing can be done in either + * transactional or storage mode, depending on the configured {@link ScalarDBMode}. * - * @param dataChunkSize the number of records to include in each data chunk - * @param transactionBatchSize the number of records to include in each transaction batch + * @param dataChunkSize the number of records to include in each data chunk for parallel + * processing + * @param transactionBatchSize the number of records to group together in a single transaction + * (only used in transaction mode) * @param reader the {@link BufferedReader} used to read the source file - * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status of - * each data chunk + * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status and + * results of each data chunk */ public List process( int dataChunkSize, int transactionBatchSize, BufferedReader reader) { @@ -140,11 +147,12 @@ protected void notifyAllDataChunksCompleted() { } /** - * Split the data chunk into transaction batches + * Splits a data chunk into smaller transaction batches for processing. This method is used in + * transaction mode to group records together for atomic processing. * - * @param dataChunk data chunk object - * @param batchSize batch size - * @return created list of transaction batches + * @param dataChunk the data chunk to split into batches + * @param batchSize the maximum number of records per transaction batch + * @return a list of {@link ImportTransactionBatch} objects representing the split batches */ private List splitIntoTransactionBatches( ImportDataChunk dataChunk, int batchSize) { @@ -167,11 +175,14 @@ private List splitIntoTransactionBatches( } /** - * To process a transaction batch and return the result + * Processes a single transaction batch within a data chunk. Creates a new transaction, processes + * all records in the batch, and commits or aborts the transaction based on the success of all + * operations. * - * @param dataChunk data chunk object - * @param transactionBatch transaction batch object - * @return processed transaction batch result + * @param dataChunk the parent data chunk containing this batch + * @param transactionBatch the batch of records to process in a single transaction + * @return an {@link ImportTransactionBatchResult} containing the processing results and any + * errors */ private ImportTransactionBatchResult processTransactionBatch( ImportDataChunk dataChunk, ImportTransactionBatch transactionBatch) { @@ -236,9 +247,12 @@ private ImportTransactionBatchResult processTransactionBatch( } /** - * @param dataChunk data chunk object - * @param importRow data row object - * @return thr task result after processing the row data + * Processes a single record in storage mode (non-transactional). Each record is processed + * independently without transaction guarantees. + * + * @param dataChunk the parent data chunk containing this record + * @param importRow the record to process + * @return an {@link ImportTaskResult} containing the processing result for the record */ private ImportTaskResult processStorageRecord(ImportDataChunk dataChunk, ImportRow importRow) { ImportTaskParams taskParams = @@ -266,12 +280,13 @@ private ImportTaskResult processStorageRecord(ImportDataChunk dataChunk, ImportR } /** - * Process data chunk data + * Processes a complete data chunk using parallel execution. The processing mode (transactional or + * storage) is determined by the configured {@link ScalarDBMode}. * - * @param dataChunk data chunk object - * @param transactionBatchSize transaction batch size - * @param numCores num of cpu cores - * @return import data chunk status object after processing the data chunk + * @param dataChunk the data chunk to process + * @param transactionBatchSize the size of transaction batches (used only in transaction mode) + * @param numCores the number of CPU cores to use for parallel processing + * @return an {@link ImportDataChunkStatus} containing the complete processing results and metrics */ protected ImportDataChunkStatus processDataChunk( ImportDataChunk dataChunk, int transactionBatchSize, int numCores) { @@ -294,12 +309,13 @@ protected ImportDataChunkStatus processDataChunk( } /** - * Process data chunk data with transactions + * Processes a data chunk using transaction mode with parallel batch processing. Multiple + * transaction batches are processed concurrently using a thread pool. * - * @param dataChunk data chunk object - * @param transactionBatchSize transaction batch size - * @param numCores num of cpu cores - * @return import data chunk status object after processing the data chunk + * @param dataChunk the data chunk to process + * @param transactionBatchSize the number of records per transaction batch + * @param numCores the maximum number of concurrent transactions to process + * @return an {@link ImportDataChunkStatus} containing processing results and metrics */ private ImportDataChunkStatus processDataChunkWithTransactions( ImportDataChunk dataChunk, int transactionBatchSize, int numCores) { @@ -360,11 +376,12 @@ private ImportDataChunkStatus processDataChunkWithTransactions( } /** - * Process data chunk data without transactions + * Processes a data chunk using storage mode with parallel record processing. Individual records + * are processed concurrently without transaction guarantees. * - * @param dataChunk data chunk object - * @param numCores num of cpu cores - * @return import data chunk status object after processing the data chunk + * @param dataChunk the data chunk to process + * @param numCores the number of records to process concurrently + * @return an {@link ImportDataChunkStatus} containing processing results and metrics */ private ImportDataChunkStatus processDataChunkWithoutTransactions( ImportDataChunk dataChunk, int numCores) { @@ -407,6 +424,12 @@ private ImportDataChunkStatus processDataChunkWithoutTransactions( .build(); } + /** + * Waits for all futures in the provided list to complete. Any exceptions during execution are + * logged but not propagated. + * + * @param futures the list of {@link Future} objects to wait for + */ private void waitForFuturesToComplete(List> futures) { for (Future future : futures) { try { diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorFactory.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorFactory.java index e953b12228..a84e13de57 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorFactory.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorFactory.java @@ -1,5 +1,17 @@ package com.scalar.db.dataloader.core.dataimport.processor; +/** + * A factory interface for creating {@link ImportProcessor} instances. This factory follows the + * Factory design pattern to encapsulate the creation of specific import processor implementations. + */ public interface ImportProcessorFactory { + + /** + * Creates a new instance of an {@link ImportProcessor}. + * + * @param params The parameters required for configuring the import processor + * @return A new {@link ImportProcessor} instance configured with the provided parameters + * @throws IllegalArgumentException if the provided parameters are invalid + */ ImportProcessor createImportProcessor(ImportProcessorParams params); } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorParams.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorParams.java index 632b1dc245..36b96f62d5 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorParams.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessorParams.java @@ -10,14 +10,34 @@ import lombok.Builder; import lombok.Value; +/** + * Parameters class for the import processor containing all necessary components for data import + * operations. + * + *

This class is immutable and uses the Builder pattern for construction. It encapsulates all + * required parameters and dependencies for processing data imports in ScalarDB. + */ @Builder @Value public class ImportProcessorParams { + /** The operational mode of ScalarDB (transaction or storage mode). */ ScalarDBMode scalarDBMode; + + /** Configuration options for the import operation. */ ImportOptions importOptions; + + /** Mapping of table names to their corresponding metadata definitions. */ Map tableMetadataByTableName; + + /** Data type information for table columns. */ TableColumnDataTypes tableColumnDataTypes; + + /** Data Access Object for ScalarDB operations. */ ScalarDBDao dao; + + /** Storage interface for non-transactional operations. */ DistributedStorage distributedStorage; + + /** Transaction manager for handling transactional operations. */ DistributedTransactionManager distributedTransactionManager; } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java index c100412aed..70b25c4484 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java @@ -23,6 +23,24 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; +/** + * A processor for importing JSON data into the database. + * + *

This processor handles JSON files that contain an array of JSON objects. Each object in the + * array represents a row to be imported into the database. The processor reads the JSON file, + * splits it into chunks of configurable size, and processes these chunks in parallel using multiple + * threads. + * + *

The processing is done in two main phases: + * + *

    + *
  • Reading phase: The JSON file is read and split into chunks + *
  • Processing phase: Each chunk is processed independently and imported into the database + *
+ * + *

The processor uses a producer-consumer pattern where one thread reads the JSON file and + * produces data chunks, while a pool of worker threads consumes and processes these chunks. + */ public class JsonImportProcessor extends ImportProcessor { private static final DataLoaderObjectMapper OBJECT_MAPPER = new DataLoaderObjectMapper(); @@ -112,6 +130,21 @@ public List process( } } + /** + * Reads data chunks from the JSON file and adds them to the processing queue. + * + *

This method reads the JSON file as an array of objects, creating data chunks of the + * specified size. Each chunk is then added to the queue for processing. The method expects the + * JSON file to start with an array token '[' and end with ']'. + * + *

Empty or null JSON nodes are skipped during processing. + * + * @param reader the BufferedReader containing the JSON data + * @param dataChunkSize the maximum number of records to include in each chunk + * @param dataChunkQueue the queue where data chunks are placed for processing + * @throws RuntimeException if there is an error reading the JSON file or if the thread is + * interrupted + */ private void readDataChunks( BufferedReader reader, int dataChunkSize, BlockingQueue dataChunkQueue) { try (JsonParser jsonParser = new JsonFactory().createParser(reader)) { @@ -138,6 +171,17 @@ private void readDataChunks( } } + /** + * Adds a data chunk to the processing queue. + * + *

This method creates a new ImportDataChunk with a unique ID and the provided data, then adds + * it to the processing queue. The ID is generated using an atomic counter to ensure thread + * safety. + * + * @param dataChunk the list of ImportRow objects to be processed + * @param queue the queue where the data chunk will be added + * @throws InterruptedException if the thread is interrupted while waiting to add to the queue + */ private void enqueueDataChunk(List dataChunk, BlockingQueue queue) throws InterruptedException { int dataChunkId = dataChunkIdCounter.getAndIncrement(); diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java index 8de0878a86..c556a00d18 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java @@ -20,11 +20,31 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; +/** + * A processor for importing data from JSON Lines (JSONL) formatted files. + * + *

This processor reads data from files where each line is a valid JSON object. It processes the + * input file in chunks, allowing for parallel processing and batched transactions for efficient + * data loading. + * + *

The processor uses a multi-threaded approach with: + * + *

    + *
  • A dedicated thread for reading data chunks from the input file + *
  • Multiple threads for processing data chunks in parallel + *
  • A queue-based system to manage data chunks between reader and processor threads + *
+ */ public class JsonLinesImportProcessor extends ImportProcessor { private static final DataLoaderObjectMapper OBJECT_MAPPER = new DataLoaderObjectMapper(); private static final AtomicInteger dataChunkIdCounter = new AtomicInteger(0); + /** + * Creates a new JsonLinesImportProcessor with the specified parameters. + * + * @param params configuration parameters for the import processor + */ public JsonLinesImportProcessor(ImportProcessorParams params) { super(params); } @@ -109,6 +129,18 @@ public List process( } } + /** + * Reads data from the input file and creates data chunks for processing. + * + *

This method reads the input file line by line, parsing each line as a JSON object. It + * accumulates rows until reaching the specified chunk size, then enqueues the chunk for + * processing. Empty lines or invalid JSON objects are skipped. + * + * @param reader the BufferedReader for reading the input file + * @param dataChunkSize the maximum number of rows to include in each data chunk + * @param dataChunkQueue the queue where data chunks are placed for processing + * @throws RuntimeException if there is an error reading the file or if the thread is interrupted + */ private void readDataChunks( BufferedReader reader, int dataChunkSize, BlockingQueue dataChunkQueue) { try { @@ -133,6 +165,15 @@ private void readDataChunks( } } + /** + * Enqueues a data chunk for processing. + * + *

Creates a new ImportDataChunk with a unique ID and adds it to the processing queue. + * + * @param dataChunk the list of ImportRows to be processed + * @param queue the queue where the data chunk should be placed + * @throws InterruptedException if the thread is interrupted while waiting to add to the queue + */ private void enqueueDataChunk(List dataChunk, BlockingQueue queue) throws InterruptedException { int dataChunkId = dataChunkIdCounter.getAndIncrement(); diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypes.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypes.java index 7ac9686539..b9684ee64a 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypes.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypes.java @@ -7,12 +7,34 @@ /** * A class that maintains a mapping of column data types for database tables. * - *

This class allows storing and retrieving data types for specific columns in a given table. + *

This class provides functionality to store and retrieve data type information for table + * columns in a database schema. It uses a nested map structure where the outer map keys are table + * names and the inner map keys are column names. + * + *

Example usage: + * + *

{@code
+ * TableColumnDataTypes types = new TableColumnDataTypes();
+ *
+ * // Add column data types for a table
+ * types.addColumnDataType("users", "id", DataType.INT);
+ * types.addColumnDataType("users", "name", DataType.TEXT);
+ *
+ * // Retrieve data type for a specific column
+ * DataType idType = types.getDataType("users", "id"); // Returns DataType.INT
+ *
+ * // Get all column data types for a table
+ * Map userColumns = types.getColumnDataTypes("users");
+ * }
*/ public class TableColumnDataTypes { private final Map> dataTypesByColumnsByTable; - /** Constructs a new {@code TableColumnDataTypes} instance with an empty mapping. */ + /** + * Constructs a new {@code TableColumnDataTypes} instance with an empty mapping. The internal + * structure is initialized as an empty HashMap that will store table names as keys and + * column-to-datatype mappings as values. + */ public TableColumnDataTypes() { this.dataTypesByColumnsByTable = new HashMap<>(); } @@ -20,9 +42,14 @@ public TableColumnDataTypes() { /** * Adds a data type for a specific column in a given table. * + *

If the table doesn't exist in the mapping, a new entry is created automatically. If the + * column already exists for the specified table, its data type will be updated with the new + * value. + * * @param tableName the name of the table * @param columnName the name of the column * @param dataType the data type associated with the column + * @throws NullPointerException if any of the parameters is null */ public void addColumnDataType(String tableName, String columnName, DataType dataType) { dataTypesByColumnsByTable @@ -33,9 +60,14 @@ public void addColumnDataType(String tableName, String columnName, DataType data /** * Retrieves the data type of specific column in a given table. * + *

This method performs a lookup in the internal mapping to find the data type associated with + * the specified table and column combination. + * * @param tableName the name of the table * @param columnName the name of the column - * @return the {@link DataType} of the column, or {@code null} if not found + * @return the {@link DataType} of the column, or {@code null} if either the table or the column + * is not found in the mapping + * @throws NullPointerException if any of the parameters is null */ public DataType getDataType(String tableName, String columnName) { Map columnDataTypes = dataTypesByColumnsByTable.get(tableName); @@ -48,9 +80,14 @@ public DataType getDataType(String tableName, String columnName) { /** * Retrieves all column data types for a given table. * + *

Returns a map containing all columns and their corresponding data types for the specified + * table. The returned map is a direct reference to the internal map, so modifications to it will + * affect the internal state. + * * @param tableName the name of the table * @return a {@link Map} of column names to their respective {@link DataType}s, or {@code null} if - * the table does not exist + * the table does not exist in the mapping + * @throws NullPointerException if tableName is null */ public Map getColumnDataTypes(String tableName) { return dataTypesByColumnsByTable.get(tableName); diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportStorageTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportStorageTask.java index 3f3ec0601b..98d982cac0 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportStorageTask.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportStorageTask.java @@ -9,10 +9,22 @@ import java.util.Optional; /** - * An import task that interacts with a {@link DistributedStorage} for data retrieval and storage. + * An import task that interacts with a {@link DistributedStorage} for data retrieval and storage + * operations. * - *

This class extends {@link ImportTask} and overrides methods to fetch and store records using - * the provided {@code DistributedStorage} instance. + *

This class extends {@link ImportTask} and provides concrete implementations for fetching and + * storing records using a {@link DistributedStorage} instance. It acts as a bridge between the + * import process and the underlying distributed storage system. + * + *

The task handles both read and write operations: + * + *

    + *
  • Reading existing records using partition and clustering keys + *
  • Storing new or updated records with their associated columns + *
+ * + *

All storage operations are performed through the provided {@link DistributedStorage} instance, + * which must be properly initialized before creating this task. */ public class ImportStorageTask extends ImportTask { @@ -21,8 +33,9 @@ public class ImportStorageTask extends ImportTask { /** * Constructs an {@code ImportStorageTask} with the specified parameters and storage. * - * @param params the import task parameters - * @param storage the distributed storage to be used for data operations + * @param params the import task parameters containing configuration and DAO objects + * @param storage the distributed storage instance to be used for data operations + * @throws NullPointerException if either params or storage is null */ public ImportStorageTask(ImportTaskParams params, DistributedStorage storage) { super(params); @@ -30,15 +43,20 @@ public ImportStorageTask(ImportTaskParams params, DistributedStorage storage) { } /** - * Retrieves a data record from the distributed storage. + * Retrieves a data record from the distributed storage using the specified keys. * - * @param namespace the namespace of the table - * @param tableName the name of the table - * @param partitionKey the partition key of the record - * @param clusteringKey the clustering key of the record + *

This method attempts to fetch a single record from the specified table using both partition + * and clustering keys. The operation is performed through the configured DAO using the associated + * storage instance. + * + * @param namespace the namespace of the table to query + * @param tableName the name of the table to query + * @param partitionKey the partition key identifying the record's partition + * @param clusteringKey the clustering key for further record identification within the partition * @return an {@link Optional} containing the {@link Result} if the record exists, otherwise an * empty {@link Optional} - * @throws ScalarDBDaoException if an error occurs during retrieval + * @throws ScalarDBDaoException if an error occurs during the retrieval operation, such as + * connection issues or invalid table/namespace */ @Override protected Optional getDataRecord( @@ -48,14 +66,19 @@ protected Optional getDataRecord( } /** - * Saves a record into the distributed storage. + * Saves a record into the distributed storage with the specified keys and columns. + * + *

This method writes or updates a record in the specified table using the provided keys and + * column values. The operation is performed through the configured DAO using the associated + * storage instance. * - * @param namespace the namespace of the table - * @param tableName the name of the table - * @param partitionKey the partition key of the record - * @param clusteringKey the clustering key of the record - * @param columns the list of columns to be saved - * @throws ScalarDBDaoException if an error occurs during the save operation + * @param namespace the namespace of the target table + * @param tableName the name of the target table + * @param partitionKey the partition key determining where the record will be stored + * @param clusteringKey the clustering key for organizing records within the partition + * @param columns the list of columns containing the record's data to be saved + * @throws ScalarDBDaoException if an error occurs during the save operation, such as connection + * issues, invalid data types, or constraint violations */ @Override protected void saveRecord( diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java index c1c7f261df..3be177a00a 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTask.java @@ -33,12 +33,24 @@ import java.util.Optional; import lombok.RequiredArgsConstructor; +/** + * Abstract base class for handling data import tasks into ScalarDB tables. This class provides + * functionality to import data into single or multiple tables based on the provided import options + * and control file configurations. + */ @RequiredArgsConstructor public abstract class ImportTask { protected final ImportTaskParams params; - /** Executes the import task, ie import data to database tables */ + /** + * Executes the import task by importing data into one or more database tables. If a control file + * is specified in the import options, performs a multi-table import. Otherwise, performs a single + * table import. + * + * @return ImportTaskResult containing the results of the import operation including + * success/failure status and any error messages for each target table + */ public ImportTaskResult execute() { ObjectNode mutableSourceRecord = params.getSourceRecord().deepCopy(); @@ -81,13 +93,14 @@ public ImportTaskResult execute() { } /** - * @param controlFile control file which is used to map source data columns to columns of tables - * to which data is imported - * @param tableMetadataByTableName a map of table metadata with table name as key - * @param tableColumnDataTypes a map with table name as key that contains a map of column names - * and their data types - * @param mutableSourceRecord mutable source record data - * @return result object of import + * Processes multi-table import based on the control file configuration. For each table specified + * in the control file, validates the source data and performs the import operation. + * + * @param controlFile control file which maps source data columns to target table columns + * @param tableMetadataByTableName map of table metadata indexed by table name + * @param tableColumnDataTypes map of column data types indexed by table name + * @param mutableSourceRecord source record data that can be modified during import + * @return List of ImportTargetResult objects containing the results for each table import */ private List startMultiTableImportProcess( ControlFile controlFile, @@ -139,14 +152,18 @@ private List startMultiTableImportProcess( } /** - * @param namespace Namespace name - * @param table table name - * @param tableMetadata metadata of the table - * @param dataTypeByColumnName a map with table name as key that contains a map of column names - * and their data types - * @param controlFileTable the control file table containing column mappings - * @param mutableSourceRecord mutable source record - * @return result of the import + * Imports data into a single table with validation and error handling. The method performs the + * following steps: 1. Validates table metadata and source record 2. Creates partition and + * clustering keys 3. Determines whether to insert or update based on existing data 4. Applies the + * import operation according to specified import mode + * + * @param namespace database namespace name + * @param table target table name + * @param tableMetadata metadata describing the table structure + * @param dataTypeByColumnName map of column names to their data types + * @param controlFileTable optional control file table configuration for column mapping + * @param mutableSourceRecord source record to be imported + * @return ImportTargetResult containing the result of the import operation */ private ImportTargetResult importIntoSingleTable( String namespace, @@ -417,10 +434,30 @@ private boolean shouldFailForMissingData( && importOptions.getImportMode() == ImportMode.UPDATE; } + /** + * Retrieves an existing record from the database if it exists. + * + * @param namespace the database namespace + * @param tableName the target table name + * @param partitionKey the partition key for the record + * @param clusteringKey the clustering key for the record (can be null) + * @return Optional containing the Result if found, empty if not found + * @throws ScalarDBDaoException if there is an error accessing the database + */ protected abstract Optional getDataRecord( String namespace, String tableName, Key partitionKey, Key clusteringKey) throws ScalarDBDaoException; + /** + * Saves a record to the database, either as an insert or update operation. + * + * @param namespace the database namespace + * @param tableName the target table name + * @param partitionKey the partition key for the record + * @param clusteringKey the clustering key for the record (can be null) + * @param columns the columns and their values to be saved + * @throws ScalarDBDaoException if there is an error saving to the database + */ protected abstract void saveRecord( String namespace, String tableName, diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskParams.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskParams.java index f85671140d..eafe3a42ae 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskParams.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTaskParams.java @@ -10,15 +10,32 @@ import lombok.NonNull; import lombok.Value; +/** + * Parameters required for executing an import task in the data loader. This class encapsulates all + * necessary information needed to process and import a single record into ScalarDB. + */ @Builder @Value public class ImportTaskParams { + /** The source record to be imported, represented as a JSON node */ @NonNull JsonNode sourceRecord; + + /** Identifier for the current chunk of data being processed */ int dataChunkId; + + /** The row number of the current record in the source data */ int rowNumber; + + /** Configuration options for the import process */ @NonNull ImportOptions importOptions; + + /** Mapping of table names to their corresponding metadata */ @NonNull Map tableMetadataByTableName; + + /** Data type information for table columns */ @NonNull TableColumnDataTypes tableColumnDataTypes; + + /** Data Access Object for interacting with ScalarDB */ @NonNull ScalarDBDao dao; } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTransactionalTask.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTransactionalTask.java index 41c56c7312..449270d929 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTransactionalTask.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/task/ImportTransactionalTask.java @@ -11,10 +11,15 @@ import java.util.Optional; /** - * An import task that operates within a {@link DistributedTransaction}. + * An import task that operates within a {@link DistributedTransaction} context. * - *

This class extends {@link ImportTask} and overrides methods to fetch and store records using a - * transactional context. + *

This class extends {@link ImportTask} and provides transactional semantics for data import + * operations. It ensures that all data operations (get and put) are executed within the same + * transaction context, maintaining ACID properties. + * + *

The task uses a single {@link DistributedTransaction} instance throughout its lifecycle, which + * is passed during construction. This transaction must be managed (committed or aborted) by the + * caller. */ public class ImportTransactionalTask extends ImportTask { @@ -23,8 +28,9 @@ public class ImportTransactionalTask extends ImportTask { /** * Constructs an {@code ImportTransactionalTask} with the specified parameters and transaction. * - * @param params the import task parameters - * @param transaction the distributed transaction to be used for data operations + * @param params the import task parameters containing configuration and DAO objects + * @param transaction the distributed transaction to be used for all data operations. This + * transaction should be properly managed (committed/aborted) by the caller */ public ImportTransactionalTask(ImportTaskParams params, DistributedTransaction transaction) { super(params); @@ -32,15 +38,19 @@ public ImportTransactionalTask(ImportTaskParams params, DistributedTransaction t } /** - * Retrieves a data record within the active transaction. + * Retrieves a data record within the active transaction context. + * + *

This method overrides the base implementation to ensure the get operation is executed within + * the transaction context provided during construction. * - * @param namespace the namespace of the table - * @param tableName the name of the table - * @param partitionKey the partition key of the record - * @param clusteringKey the clustering key of the record + * @param namespace the namespace of the table to query + * @param tableName the name of the table to query + * @param partitionKey the partition key identifying the record's partition + * @param clusteringKey the clustering key for further record identification within the partition * @return an {@link Optional} containing the {@link Result} if the record exists, otherwise an * empty {@link Optional} - * @throws ScalarDBDaoException if an error occurs during retrieval + * @throws ScalarDBDaoException if an error occurs during the database operation or if the + * transaction encounters any issues */ @Override protected Optional getDataRecord( @@ -50,14 +60,18 @@ protected Optional getDataRecord( } /** - * Saves a record within the active transaction. + * Saves a record within the active transaction context. * - * @param namespace the namespace of the table - * @param tableName the name of the table - * @param partitionKey the partition key of the record - * @param clusteringKey the clustering key of the record - * @param columns the list of columns to be saved - * @throws ScalarDBDaoException if an error occurs during the save operation + *

This method overrides the base implementation to ensure the put operation is executed within + * the transaction context provided during construction. + * + * @param namespace the namespace of the target table + * @param tableName the name of the target table + * @param partitionKey the partition key determining where the record will be stored + * @param clusteringKey the clustering key for ordering/organizing records within the partition + * @param columns the list of columns containing the actual data to be saved + * @throws ScalarDBDaoException if an error occurs during the database operation or if the + * transaction encounters any issues */ @Override protected void saveRecord( @@ -73,8 +87,12 @@ protected void saveRecord( /** * Aborts the active ScalarDB transaction if it has not been committed. * - * @param tx the transaction to be aborted - * @throws TransactionException if an error occurs during the aborting process + *

This method provides a safe way to abort an active transaction, handling any abort-related + * exceptions by wrapping them in a {@link TransactionException}. + * + * @param tx the transaction to be aborted. If null, this method does nothing + * @throws TransactionException if an error occurs during the abort operation or if the underlying + * abort operation fails */ private void abortActiveTransaction(DistributedTransaction tx) throws TransactionException { if (tx != null) { diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java index a36dc56135..21c7bef658 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java @@ -36,9 +36,22 @@ /** * Utility class for creating and managing ScalarDB columns. * - *

This class provides methods for creating ScalarDB columns based on the given data type, column - * information, and value. It includes handling for various data types and special cases like base64 - * encoding for BLOB data. + *

This class provides utility methods for: + * + *

    + *
  • Creating ScalarDB columns from various data types and values + *
  • Converting between ScalarDB Result objects and column data + *
  • Handling special data formats like base64 encoding for BLOB data + *
  • Managing transaction-related metadata columns + *
+ * + *

The class supports all ScalarDB data types including: + * + *

    + *
  • Basic types: BOOLEAN, INT, BIGINT, FLOAT, DOUBLE, TEXT + *
  • Binary data: BLOB (requires base64 encoding) + *
  • Temporal types: DATE, TIME, TIMESTAMP, TIMESTAMPTZ + *
*/ public final class ColumnUtils { @@ -48,15 +61,22 @@ private ColumnUtils() {} /** * Creates a ScalarDB column from the given data type, column information, and value. * - *

Blob source values need to be base64 encoded before passing them as a value. If the value is - * {@code null}, the corresponding column is created as a {@code null} column. + *

This method handles the creation of columns for all supported ScalarDB data types. For BLOB + * type columns, the input value must be base64 encoded before being passed to this method. * - * @param dataType the data type of the specified column - * @param columnInfo the ScalarDB table column information - * @param value the value for the ScalarDB column (maybe {@code null}) + *

If the provided value is {@code null}, a null column of the appropriate type is created. + * + * @param dataType the data type of the specified column (e.g., BOOLEAN, INT, TEXT, etc.) + * @param columnInfo the ScalarDB table column information containing column name and metadata + * @param value the string representation of the value for the ScalarDB column (maybe {@code + * null}) * @return the ScalarDB column created from the specified data - * @throws ColumnParsingException if an error occurs while creating the column or parsing the - * value + * @throws ColumnParsingException if an error occurs while creating the column, such as: + *

    + *
  • Invalid number format for numeric types + *
  • Invalid base64 encoding for BLOB type + *
  • Invalid date/time format for temporal types + *
*/ public static Column createColumnFromValue( DataType dataType, ColumnInfo columnInfo, @Nullable String value) @@ -124,13 +144,25 @@ public static Column createColumnFromValue( } /** - * Get columns from result data + * Retrieves columns from a ScalarDB Result object, comparing with source data and handling + * metadata. * - * @param scalarDBResult result record - * @param sourceRecord source data - * @param ignoreNullValues ignore null values or not - * @return list of columns - * @throws Base64Exception if an error occurs while base64 decoding + *

This method processes the result data while: + * + *

    + *
  • Excluding transaction metadata columns + *
  • Excluding partition and clustering key columns + *
  • Handling null values based on the ignoreNullValues parameter + *
  • Merging data from both ScalarDB Result and source record + *
+ * + * @param scalarDBResult the ScalarDB Result object containing the current data + * @param sourceRecord the source data in JSON format to compare against + * @param ignoreNullValues if true, null values will be excluded from the result + * @param tableMetadata metadata about the table structure and column types + * @return a List of Column objects representing the processed data + * @throws Base64Exception if there's an error processing base64 encoded BLOB data + * @throws ColumnParsingException if there's an error parsing column values */ public static List> getColumnsFromResult( Result scalarDBResult, @@ -166,11 +198,19 @@ public static List> getColumnsFromResult( } /** - * Create a set of columns to ignore + * Creates a set of column names that should be ignored during processing. + * + *

This method combines: * - * @param partitionKeyNames a set of partition key names - * @param clusteringKeyNames a set of clustering key names - * @return a set of columns to ignore + *

    + *
  • Transaction metadata columns + *
  • Partition key columns + *
  • Clustering key columns + *
+ * + * @param partitionKeyNames set of column names that are partition keys + * @param clusteringKeyNames set of column names that are clustering keys + * @return a Set of column names that should be ignored during processing */ private static Set getColumnsToIgnore( Set partitionKeyNames, Set clusteringKeyNames) { @@ -182,15 +222,22 @@ private static Set getColumnsToIgnore( } /** - * Get columns from result data + * Retrieves a column value by comparing ScalarDB Result data with source record data. * - * @param scalarDBResult result record - * @param sourceRecord source data - * @param columnName column name - * @param ignoreNullValues ignore null values or not - * @param dataTypesByColumns data types of columns - * @return column data - * @throws ColumnParsingException if an error occurs while base64 parsing the column + *

This method determines which data source to use for the column value: + * + *

    + *
  • If the column exists in ScalarDB Result but not in source record, uses Result data + *
  • Otherwise, uses the source record data + *
+ * + * @param scalarDBResult the ScalarDB Result object containing current data + * @param sourceRecord the source data in JSON format + * @param columnName the name of the column to retrieve + * @param ignoreNullValues whether to ignore null values in the result + * @param dataTypesByColumns mapping of column names to their data types + * @return the Column object containing the value, or null if ignored + * @throws ColumnParsingException if there's an error parsing the column value */ private static Column getColumn( Result scalarDBResult, diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/KeyUtils.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/KeyUtils.java index 3c13d3f0f5..e3433a31b5 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/KeyUtils.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/KeyUtils.java @@ -22,8 +22,17 @@ /** * Utility class for creating and managing ScalarDB keys. * - *

This class provides methods to parse key-value pairs and create ScalarDB key instances. It - * also includes utility methods for handling data types, columns, and potential parsing exceptions. + *

This class provides utility methods for: + * + *

    + *
  • Creating partition and clustering keys from source records + *
  • Parsing key-value pairs into ScalarDB Key instances + *
  • Creating composite keys from multiple columns + *
+ * + *

The class handles proper type conversion and validation of keys according to the table + * metadata and column data types. It also provides comprehensive error handling for various + * key-related operations. */ public final class KeyUtils { @@ -33,11 +42,15 @@ private KeyUtils() {} /** * Creates an {@link Optional} clustering key from the given source record. * - * @param clusteringKeyNames A set of column names that make up the clustering key. - * @param dataTypeByColumnName A map defining the data type for each column name. - * @param sourceRecord The source record containing the data. - * @return An {@link Optional} containing the clustering key if clustering keys exist, otherwise - * {@link Optional#empty()}. + *

This method constructs a clustering key by extracting values from the source record for each + * clustering key column. If any required clustering key column is missing from the source record + * or if there's an error in data conversion, an empty Optional is returned. + * + * @param clusteringKeyNames A set of column names that make up the clustering key + * @param dataTypeByColumnName A map defining the data type for each column name + * @param sourceRecord The source record containing the column values + * @return An {@link Optional} containing the clustering key if all required columns exist and are + * valid, otherwise {@link Optional#empty()} */ public static Optional createClusteringKeyFromSource( Set clusteringKeyNames, @@ -51,10 +64,15 @@ public static Optional createClusteringKeyFromSource( /** * Creates an {@link Optional} partition key from the given source record. * - * @param partitionKeyNames A set of column names that make up the partition key. - * @param dataTypeByColumnName A map defining the data type for each column name. - * @param sourceRecord The source record containing the data. - * @return An {@link Optional} containing the partition key. + *

This method constructs a partition key by extracting values from the source record for each + * partition key column. If any required partition key column is missing from the source record or + * if there's an error in data conversion, an empty Optional is returned. + * + * @param partitionKeyNames A set of column names that make up the partition key + * @param dataTypeByColumnName A map defining the data type for each column name + * @param sourceRecord The source record containing the column values + * @return An {@link Optional} containing the partition key if all required columns exist and are + * valid, otherwise {@link Optional#empty()} */ public static Optional createPartitionKeyFromSource( Set partitionKeyNames, @@ -64,20 +82,25 @@ public static Optional createPartitionKeyFromSource( } /** - * Converts a key-value pair, in the format of =, into a ScalarDB Key instance for a - * specific ScalarDB table. + * Converts a key-value pair into a ScalarDB Key instance for a specific ScalarDB table. * - *

This method uses the provided table metadata to determine the data type for the key and - * creates a corresponding ScalarDB Key. If the key does not match any column in the table - * metadata, a {@link KeyParsingException} is thrown. + *

This method performs the following steps: * - * @param columnKeyValue a key-value pair in the format of = - * @param namespace the name of the ScalarDB namespace - * @param tableName the name of the ScalarDB table - * @param tableMetadata metadata for the ScalarDB table - * @return a new ScalarDB Key instance formatted according to the data type - * @throws KeyParsingException if there is an error parsing the key value or if the column does - * not exist + *

    + *
  1. Validates that the column exists in the table metadata + *
  2. Determines the correct data type for the column + *
  3. Converts the value to the appropriate type + *
  4. Creates and returns a new ScalarDB Key instance + *
+ * + * @param columnKeyValue A key-value pair containing the column name and value + * @param namespace The name of the ScalarDB namespace + * @param tableName The name of the ScalarDB table + * @param tableMetadata Metadata for the ScalarDB table + * @return A new ScalarDB Key instance formatted according to the data type, or null if + * columnKeyValue is null + * @throws KeyParsingException If the column doesn't exist in the table or if there's an error + * parsing the value */ @Nullable public static Key parseKeyValue( @@ -108,14 +131,16 @@ public static Key parseKeyValue( /** * Creates a ScalarDB key based on the provided data type, column information, and value. * - *

This method creates a ScalarDB Key instance by converting the column value to the - * appropriate data type and constructing the key using that value. + *

This method handles the conversion of string values to their appropriate ScalarDB data types + * and constructs a single-column key. The method ensures type safety and proper formatting of the + * key value according to the specified data type. * - * @param dataType the data type of the specified column - * @param columnInfo the ScalarDB table column information - * @param value the value for the ScalarDB key - * @return a ScalarDB Key instance - * @throws KeyParsingException if there is an error while creating the ScalarDB key + * @param dataType The data type of the specified column + * @param columnInfo The ScalarDB table column information + * @param value The string value to be converted and used as the key + * @return A ScalarDB Key instance containing the converted value + * @throws KeyParsingException If there's an error converting the value to the specified data type + * or creating the key */ public static Key createKey(DataType dataType, ColumnInfo columnInfo, String value) throws KeyParsingException { @@ -128,13 +153,28 @@ public static Key createKey(DataType dataType, ColumnInfo columnInfo, String val } /** - * Create a new composite ScalarDB key. + * Creates a new composite ScalarDB key from multiple columns. + * + *

This method creates a composite key by combining multiple columns, each with its own data + * type and value. The method requires that all input lists (dataTypes, columnNames, and values) + * have the same length. If the lists are not of equal length, an empty Optional is returned. + * + *

The method performs the following for each column: + * + *

    + *
  1. Creates a ColumnInfo instance + *
  2. Converts the string value to the appropriate data type + *
  3. Adds the converted value to the composite key + *
* - * @param dataTypes List of data types for the columns - * @param columnNames List of column names - * @param values List of key values - * @return ScalarDB Key instance, or empty if the provided arrays are not of the same length - * @throws Base64Exception if there is an error creating the key values + * @param dataTypes List of data types for each column in the composite key + * @param columnNames List of column names corresponding to each data type + * @param values List of string values to be converted and used in the key + * @return An Optional containing the composite ScalarDB Key if successful, or empty if the input + * lists have different lengths + * @throws Base64Exception If there's an error processing Base64-encoded values + * @throws ColumnParsingException If there's an error converting any value to its specified data + * type */ public static Optional createCompositeKey( List dataTypes, List columnNames, List values) diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactoryTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactoryTest.java index e78b019dd1..3e6ed5bcc8 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactoryTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/DefaultImportProcessorFactoryTest.java @@ -7,6 +7,10 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +/** + * Unit tests for {@link DefaultImportProcessorFactory} class. Tests the factory's ability to create + * appropriate import processors based on different file formats. + */ class DefaultImportProcessorFactoryTest { private DefaultImportProcessorFactory factory; @@ -16,6 +20,10 @@ void setUp() { factory = new DefaultImportProcessorFactory(); } + /** + * Tests that the factory creates a {@link JsonLinesImportProcessor} when JSONL format is + * specified. + */ @Test void createImportProcessor_givenFileFormatIsJsonl_shouldReturnJsonLinesImportProcessor() { // Arrange @@ -30,6 +38,7 @@ void createImportProcessor_givenFileFormatIsJsonl_shouldReturnJsonLinesImportPro assertInstanceOf(JsonLinesImportProcessor.class, result); } + /** Tests that the factory creates a {@link JsonImportProcessor} when JSON format is specified. */ @Test void createImportProcessor_givenFileFormatIsJson_shouldReturnJsonImportProcessor() { // Given @@ -44,6 +53,7 @@ void createImportProcessor_givenFileFormatIsJson_shouldReturnJsonImportProcessor assertInstanceOf(JsonImportProcessor.class, result); } + /** Tests that the factory creates a {@link CsvImportProcessor} when CSV format is specified. */ @Test void createImportProcessor_givenFileFormatIsCsv_shouldReturnCsvImportProcessor() { // Given diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypesTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypesTest.java index 2d72827f4f..687397523f 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypesTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/TableColumnDataTypesTest.java @@ -4,10 +4,18 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +/** + * Unit tests for the TableColumnDataTypes class which manages data type mappings for table columns. + */ class TableColumnDataTypesTest { TableColumnDataTypes tableColumnDataTypes; + /** + * Tests that column data types can be successfully added and retrieved for a table. Verifies that + * the correct data type is returned for a specific column after adding multiple column + * definitions. + */ @Test void addColumnDataType_withValidData_shouldAddColumnDataType() { tableColumnDataTypes = new TableColumnDataTypes(); @@ -17,6 +25,10 @@ void addColumnDataType_withValidData_shouldAddColumnDataType() { DataType.BIGINT, tableColumnDataTypes.getColumnDataTypes("table").get("id")); } + /** + * Tests the retrieval of a data type for a specific table and column combination. Verifies that + * the correct data type is returned when the table and column exist in the mapping. + */ @Test void getDataType_withValidTableAndColumnName_shouldReturnCorrectDataType() { tableColumnDataTypes = new TableColumnDataTypes(); @@ -25,6 +37,10 @@ void getDataType_withValidTableAndColumnName_shouldReturnCorrectDataType() { Assertions.assertEquals(DataType.TEXT, tableColumnDataTypes.getDataType("table", "name")); } + /** + * Tests the behavior when attempting to retrieve a data type for a non-existent table and column + * combination. Verifies that null is returned when the requested mapping doesn't exist. + */ @Test void getDataType_withInvalidTableAndColumnName_shouldReturnCorrectDataType() { tableColumnDataTypes = new TableColumnDataTypes(); diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java index dd1a981658..44d8a90c94 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java @@ -39,6 +39,10 @@ import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; +/** + * Unit tests for the ColumnUtils class which handles column creation and manipulation. Tests + * various data type conversions and error handling scenarios. + */ class ColumnUtilsTest { private static final float FLOAT_VALUE = 2.78f; @@ -47,6 +51,13 @@ class ColumnUtilsTest { private static final Map> values = UnitTestUtils.createTestValues(); private static final Result scalarDBResult = new ResultImpl(values, mockMetadata); + /** + * Provides test cases for column creation with different data types and values. Each test case + * includes: - The target DataType - Column name - Input value (as string) - Expected Column + * object + * + * @return Stream of Arguments containing test parameters + */ private static Stream provideColumnsForCreateColumnFromValue() { return Stream.of( Arguments.of(DataType.BOOLEAN, "boolColumn", "true", BooleanColumn.of("boolColumn", true)), @@ -114,6 +125,16 @@ private static Stream provideColumnsForCreateColumnFromValue() { TimestampTZColumn.ofNull("timestampTZColumn"))); } + /** + * Tests column creation from string values for various data types. Verifies that the created + * column matches the expected column with correct type and value. + * + * @param dataType The target ScalarDB data type + * @param columnName Name of the column + * @param value String value to convert + * @param expectedColumn Expected Column object after conversion + * @throws ColumnParsingException if the value cannot be parsed into the target data type + */ @ParameterizedTest @MethodSource("provideColumnsForCreateColumnFromValue") void createColumnFromValue_validInput_returnsColumn( @@ -124,6 +145,10 @@ void createColumnFromValue_validInput_returnsColumn( assertEquals(expectedColumn, actualColumn); } + /** + * Tests that attempting to create a numeric column with an invalid number format throws a + * ColumnParsingException with appropriate error message. + */ @Test void createColumnFromValue_invalidNumberFormat_throwsNumberFormatException() { String columnName = "intColumn"; @@ -140,6 +165,10 @@ void createColumnFromValue_invalidNumberFormat_throwsNumberFormatException() { exception.getMessage()); } + /** + * Tests that attempting to create a BLOB column with invalid Base64 encoding throws a + * ColumnParsingException with appropriate error message. + */ @Test void createColumnFromValue_invalidBase64_throwsBase64Exception() { String columnName = "blobColumn"; @@ -156,6 +185,13 @@ void createColumnFromValue_invalidBase64_throwsBase64Exception() { exception.getMessage()); } + /** + * Tests the extraction of columns from a ScalarDB Result object. Verifies that all columns are + * correctly extracted and converted from the source record. + * + * @throws Base64Exception if BLOB data contains invalid Base64 encoding + * @throws ColumnParsingException if any column value cannot be parsed into its target data type + */ @Test void getColumnsFromResult_withValidData_shouldReturnColumns() throws Base64Exception, ColumnParsingException { diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/KeyUtilsTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/KeyUtilsTest.java index 07d2e1e1ad..eb19b12c85 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/KeyUtilsTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/KeyUtilsTest.java @@ -33,6 +33,10 @@ import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +/** + * Unit tests for the KeyUtils class which handles parsing and creation of database keys. Tests + * cover various data types and key creation scenarios including partition and clustering keys. + */ @ExtendWith(MockitoExtension.class) class KeyUtilsTest { @@ -40,11 +44,17 @@ class KeyUtilsTest { private static final Map dataTypeByColumnName = UnitTestUtils.getColumnData(); private static final ObjectNode sourceRecord = UnitTestUtils.getOutputDataWithMetadata(); + /** Tests that parsing a null key value returns null. */ @Test void parseKeyValue_nullKeyValue_returnsNull() throws KeyParsingException { assertNull(KeyUtils.parseKeyValue(null, null, null, tableMetadata)); } + /** + * Tests that attempting to parse a key value with an invalid column name throws + * KeyParsingException. The exception should contain appropriate error message with namespace and + * table details. + */ @Test void parseKeyValue_invalidColumnName_throwsKeyParsingException() { String columnName = "invalidColumn"; @@ -61,6 +71,7 @@ void parseKeyValue_invalidColumnName_throwsKeyParsingException() { exception.getMessage()); } + /** Tests successful parsing of a valid key value with TEXT data type. */ @Test void parseKeyValue_validKeyValue_returnsKey() throws KeyParsingException { String columnName = "columnName"; @@ -75,6 +86,7 @@ void parseKeyValue_validKeyValue_returnsKey() throws KeyParsingException { assertEquals(expected, actual); } + /** Tests creation of a key with BOOLEAN data type. */ @Test void createKey_boolean_returnsKey() throws KeyParsingException { String columnName = "booleanColumn"; @@ -85,6 +97,7 @@ void createKey_boolean_returnsKey() throws KeyParsingException { assertEquals(expected, actual); } + /** Tests creation of a key with INT data type. */ @Test void createKey_int_returnsKey() throws KeyParsingException { String columnName = "intColumn"; @@ -95,6 +108,7 @@ void createKey_int_returnsKey() throws KeyParsingException { assertEquals(expected, actual); } + /** Tests creation of a key with BIGINT data type. */ @Test void createKey_bigint_returnsKey() throws KeyParsingException { String columnName = "bigintColumn"; @@ -105,6 +119,7 @@ void createKey_bigint_returnsKey() throws KeyParsingException { assertEquals(expected, actual); } + /** Tests creation of a key with FLOAT data type. */ @Test void createKey_float_returnsKey() throws KeyParsingException { String columnName = "floatColumn"; @@ -115,6 +130,7 @@ void createKey_float_returnsKey() throws KeyParsingException { assertEquals(expected, actual); } + /** Tests creation of a key with DOUBLE data type. */ @Test void createKey_double_returnsKey() throws KeyParsingException { String columnName = "doubleColumn"; @@ -125,6 +141,7 @@ void createKey_double_returnsKey() throws KeyParsingException { assertEquals(expected, actual); } + /** Tests creation of a key with TEXT data type. */ @Test void createKey_text_returnsKey() throws KeyParsingException { String columnName = "textColumn"; @@ -135,6 +152,7 @@ void createKey_text_returnsKey() throws KeyParsingException { assertEquals(expected, actual); } + /** Tests creation of a key with BLOB data type using Base64 encoded input. */ @Test void createKey_blob_returnsKey() throws KeyParsingException { String columnName = "blobColumn"; @@ -149,6 +167,10 @@ void createKey_blob_returnsKey() throws KeyParsingException { assertEquals(expected, actual); } + /** + * Tests that attempting to create a BLOB key with invalid Base64 input throws + * KeyParsingException. + */ @Test void createKey_invalidBase64_throwsBase64Exception() { String columnName = "blobColumn"; @@ -158,12 +180,17 @@ void createKey_invalidBase64_throwsBase64Exception() { KeyParsingException.class, () -> KeyUtils.createKey(DataType.BLOB, columnInfo, value)); } + /** Tests that creating a clustering key from an empty set returns an empty Optional. */ @Test void createClusteringKeyFromSource_withEmptyClusteringKeySet_shouldReturnEmpty() { Optional key = KeyUtils.createClusteringKeyFromSource(Collections.EMPTY_SET, null, null); assertEquals(Optional.empty(), key); } + /** + * Tests creation of a clustering key from a valid set of clustering columns. Verifies that the + * resulting key contains the expected INT and BOOLEAN values. + */ @Test void createClusteringKeyFromSource_withValidClusteringKeySet_shouldReturnValidKey() { Set clusterKeySet = new HashSet<>(); @@ -176,6 +203,9 @@ void createClusteringKeyFromSource_withValidClusteringKeySet_shouldReturnValidKe key.toString()); } + /** + * Tests that attempting to create a partition key with invalid data returns an empty Optional. + */ @Test void createPartitionKeyFromSource_withInvalidData_shouldReturnEmpty() { Set partitionKeySet = new HashSet<>(); @@ -185,6 +215,10 @@ void createPartitionKeyFromSource_withInvalidData_shouldReturnEmpty() { assertEquals(Optional.empty(), key); } + /** + * Tests creation of a partition key from valid data. Verifies that the resulting key contains the + * expected BIGINT value. + */ @Test void createPartitionKeyFromSource_withValidData_shouldReturnValidKey() { Set partitionKeySet = new HashSet<>(); From 851b6910a5c01e82405d936e3e958345e72e0f03 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 25 Mar 2025 14:27:29 +0530 Subject: [PATCH 77/87] Changes added --- .../core/dataimport/ImportManager.java | 31 ++-- .../processor/CsvImportProcessor.java | 67 +++---- .../dataimport/processor/ImportProcessor.java | 131 ++++++++------ .../processor/JsonImportProcessor.java | 66 +++---- .../processor/JsonLinesImportProcessor.java | 66 +++---- .../db/dataloader/core/util/ColumnUtils.java | 2 +- .../db/dataloader/core/UnitTestUtils.java | 171 ++++++++++++++++++ .../processor/CsvImportProcessorTest.java | 119 ++++++++++++ .../processor/JsonImportProcessorTest.java | 119 ++++++++++++ .../JsonLinesImportProcessorTest.java | 119 ++++++++++++ .../dataloader/core/util/ColumnUtilsTest.java | 1 - 11 files changed, 677 insertions(+), 215 deletions(-) create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessorTest.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessorTest.java create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessorTest.java diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java index 9deb18e8f4..51da88c66d 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java @@ -17,6 +17,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import lombok.AllArgsConstructor; import lombok.NonNull; @@ -46,7 +47,8 @@ public class ImportManager implements ImportEventListener { private final ScalarDBMode scalarDBMode; private final DistributedStorage distributedStorage; private final DistributedTransactionManager distributedTransactionManager; - private final List importDataChunkStatusList = new ArrayList<>(); + private final ConcurrentHashMap importDataChunkStatusMap = + new ConcurrentHashMap<>(); /** * Starts the import process using the configured parameters. @@ -55,12 +57,13 @@ public class ImportManager implements ImportEventListener { * processed as a single chunk. Otherwise, the file will be processed in chunks of the specified * size. * - * @return a list of {@link ImportDataChunkStatus} objects containing the status of each processed + * @return a map of {@link ImportDataChunkStatus} objects containing the status of each processed * chunk * @throws ExecutionException if there is an error during the execution of the import process * @throws InterruptedException if the import process is interrupted */ - public List startImport() throws ExecutionException, InterruptedException { + public ConcurrentHashMap startImport() + throws ExecutionException, InterruptedException { ImportProcessorParams params = ImportProcessorParams.builder() .scalarDBMode(scalarDBMode) @@ -110,22 +113,12 @@ public void onDataChunkStarted(ImportDataChunkStatus status) { } /** - * {@inheritDoc} Updates or adds the status of a data chunk in the status list. This method is + * {@inheritDoc} Updates or adds the status of a data chunk in the status map. This method is * thread-safe. */ @Override public void addOrUpdateDataChunkStatus(ImportDataChunkStatus status) { - synchronized (importDataChunkStatusList) { - for (int i = 0; i < importDataChunkStatusList.size(); i++) { - if (importDataChunkStatusList.get(i).getDataChunkId() == status.getDataChunkId()) { - // Object found, replace it with the new one - importDataChunkStatusList.set(i, status); - return; - } - } - // If object is not found, add it to the list - importDataChunkStatusList.add(status); - } + importDataChunkStatusMap.put(status.getDataChunkId(), status); } /** {@inheritDoc} Forwards the event to all registered listeners. */ @@ -169,12 +162,12 @@ public void onAllDataChunksCompleted() { } /** - * Returns the current list of import data chunk status objects. + * Returns the current map of import data chunk status objects. * - * @return an unmodifiable list of {@link ImportDataChunkStatus} objects + * @return a map of {@link ImportDataChunkStatus} objects */ - public List getImportDataChunkStatusList() { - return importDataChunkStatusList; + public ConcurrentHashMap getImportDataChunkStatus() { + return importDataChunkStatusMap; } /** diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java index aed4b0b585..ecb0919bf9 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java @@ -15,13 +15,12 @@ import java.util.Optional; import java.util.concurrent.BlockingQueue; import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; /** * A processor for importing CSV data into the database. @@ -41,6 +40,7 @@ public class CsvImportProcessor extends ImportProcessor { private static final DataLoaderObjectMapper OBJECT_MAPPER = new DataLoaderObjectMapper(); private static final AtomicInteger dataChunkIdCounter = new AtomicInteger(0); + private static final int MAX_QUEUE_SIZE = 10; /** * Creates a new CsvImportProcessor with the specified parameters. @@ -61,62 +61,37 @@ public CsvImportProcessor(ImportProcessorParams params) { * @param dataChunkSize the number of records to include in each data chunk * @param transactionBatchSize the number of records to include in each transaction batch * @param reader the {@link BufferedReader} used to read the source file - * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status of - * each data chunk + * @return a map of {@link ImportDataChunkStatus} objects indicating the processing status of each + * data chunk */ @Override - public List process( + public ConcurrentHashMap process( int dataChunkSize, int transactionBatchSize, BufferedReader reader) { int numCores = Runtime.getRuntime().availableProcessors(); ExecutorService dataChunkExecutor = Executors.newFixedThreadPool(numCores); - BlockingQueue dataChunkQueue = new LinkedBlockingQueue<>(); - List> dataChunkFutures = new CopyOnWriteArrayList<>(); + BlockingQueue dataChunkQueue = new LinkedBlockingQueue<>(MAX_QUEUE_SIZE); try { CompletableFuture readerFuture = CompletableFuture.runAsync( () -> readDataChunks(reader, dataChunkSize, dataChunkQueue), dataChunkExecutor); - CompletableFuture processingFuture = - readerFuture.thenRunAsync( - () -> { - while (!(dataChunkQueue.isEmpty() && readerFuture.isDone())) { - try { - ImportDataChunk dataChunk = dataChunkQueue.poll(100, TimeUnit.MILLISECONDS); - if (dataChunk != null) { - dataChunkFutures.add( - CompletableFuture.supplyAsync( - () -> processDataChunk(dataChunk, transactionBatchSize, numCores), - dataChunkExecutor)); - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException( - CoreError.DATA_LOADER_DATA_CHUNK_PROCESS_FAILED.buildMessage( - e.getMessage()), - e); - } - } - }, - dataChunkExecutor); + ConcurrentHashMap result = new ConcurrentHashMap<>(); - processingFuture.join(); - - return CompletableFuture.allOf(dataChunkFutures.toArray(new CompletableFuture[0])) - .thenApply( - v -> - dataChunkFutures.stream() - .map( - f -> - f.exceptionally( - e -> { - System.err.println( - "Data chunk processing failed: " + e.getMessage()); - return null; - }) - .join()) - .collect(Collectors.toList())) - .join(); + while (!(dataChunkQueue.isEmpty() && readerFuture.isDone())) { + ImportDataChunk dataChunk = dataChunkQueue.poll(100, TimeUnit.MILLISECONDS); + if (dataChunk != null) { + ImportDataChunkStatus status = + processDataChunk(dataChunk, transactionBatchSize, numCores); + result.put(status.getDataChunkId(), status); + } + } + readerFuture.join(); + return result; + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException( + CoreError.DATA_LOADER_DATA_CHUNK_PROCESS_FAILED.buildMessage(e.getMessage()), e); } finally { dataChunkExecutor.shutdown(); try { diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java index 1717eea658..594e6d99db 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -55,13 +56,11 @@ public abstract class ImportProcessor { * @param transactionBatchSize the number of records to group together in a single transaction * (only used in transaction mode) * @param reader the {@link BufferedReader} used to read the source file - * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status and + * @return a map of {@link ImportDataChunkStatus} objects indicating the processing status and * results of each data chunk */ - public List process( - int dataChunkSize, int transactionBatchSize, BufferedReader reader) { - return Collections.emptyList(); - } + public abstract ConcurrentHashMap process( + int dataChunkSize, int transactionBatchSize, BufferedReader reader); /** * Add import event listener to listener list @@ -195,9 +194,10 @@ private ImportTransactionBatchResult processTransactionBatch( List importRecordResult = new ArrayList<>(); boolean isSuccess; String error = ""; + DistributedTransaction transaction = null; try { // Create the ScalarDB transaction - DistributedTransaction transaction = params.getDistributedTransactionManager().start(); + transaction = params.getDistributedTransactionManager().start(); // Loop over the transaction batch and process each record for (ImportRow importRow : transactionBatch.getSourceData()) { @@ -233,6 +233,15 @@ private ImportTransactionBatchResult processTransactionBatch( } catch (TransactionException e) { isSuccess = false; LOGGER.error(e.getMessage()); + try { + if (transaction != null) { + transaction.abort(); // Ensure transaction is aborted + } + } catch (TransactionException abortException) { + LOGGER.error( + "Failed to abort transaction: {}", abortException.getMessage(), abortException); + } + error = e.getMessage(); } ImportTransactionBatchResult importTransactionBatchResult = ImportTransactionBatchResult.builder() @@ -323,43 +332,46 @@ private ImportDataChunkStatus processDataChunkWithTransactions( List transactionBatches = splitIntoTransactionBatches(dataChunk, transactionBatchSize); ExecutorService transactionBatchExecutor = - Executors.newFixedThreadPool(Math.min(transactionBatches.size(), numCores)); + Executors.newFixedThreadPool(Math.min(transactionBatches.size(), numCores * 2)); List> transactionBatchFutures = new ArrayList<>(); AtomicInteger successCount = new AtomicInteger(0); AtomicInteger failureCount = new AtomicInteger(0); - for (ImportTransactionBatch transactionBatch : transactionBatches) { - Future transactionBatchFuture = - transactionBatchExecutor.submit( - () -> processTransactionBatch(dataChunk, transactionBatch)); - transactionBatchFutures.add(transactionBatchFuture); - } + try { + for (ImportTransactionBatch transactionBatch : transactionBatches) { + Future transactionBatchFuture = + transactionBatchExecutor.submit( + () -> processTransactionBatch(dataChunk, transactionBatch)); + transactionBatchFutures.add(transactionBatchFuture); + } - waitForFuturesToComplete(transactionBatchFutures); - transactionBatchExecutor.shutdown(); - transactionBatchFutures.forEach( - batchResult -> { - try { - ImportTransactionBatchResult importTransactionBatchResult = - (ImportTransactionBatchResult) batchResult.get(); - importTransactionBatchResult - .getRecords() - .forEach( - batchRecords -> { - if (batchRecords.getTargets().stream() - .allMatch( - targetResult -> - targetResult - .getStatus() - .equals(ImportTargetResultStatus.SAVED))) { - successCount.incrementAndGet(); - } else { - failureCount.incrementAndGet(); - } - }); - } catch (InterruptedException | ExecutionException e) { - throw new RuntimeException(e); - } - }); + waitForFuturesToComplete(transactionBatchFutures); + transactionBatchFutures.forEach( + batchResult -> { + try { + ImportTransactionBatchResult importTransactionBatchResult = + (ImportTransactionBatchResult) batchResult.get(); + importTransactionBatchResult + .getRecords() + .forEach( + batchRecords -> { + if (batchRecords.getTargets().stream() + .allMatch( + targetResult -> + targetResult + .getStatus() + .equals(ImportTargetResultStatus.SAVED))) { + successCount.incrementAndGet(); + } else { + failureCount.incrementAndGet(); + } + }); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + }); + } finally { + transactionBatchExecutor.shutdown(); + } Instant endTime = Instant.now(); int totalDuration = (int) Duration.between(startTime, endTime).toMillis(); return ImportDataChunkStatus.builder() @@ -390,26 +402,29 @@ private ImportDataChunkStatus processDataChunkWithoutTransactions( AtomicInteger failureCount = new AtomicInteger(0); ExecutorService recordExecutor = Executors.newFixedThreadPool(numCores); List> recordFutures = new ArrayList<>(); - for (ImportRow importRow : dataChunk.getSourceData()) { - Future recordFuture = - recordExecutor.submit(() -> processStorageRecord(dataChunk, importRow)); - recordFutures.add(recordFuture); + try { + for (ImportRow importRow : dataChunk.getSourceData()) { + Future recordFuture = + recordExecutor.submit(() -> processStorageRecord(dataChunk, importRow)); + recordFutures.add(recordFuture); + } + waitForFuturesToComplete(recordFutures); + recordFutures.forEach( + r -> { + try { + ImportTaskResult result = (ImportTaskResult) r.get(); + boolean allSaved = + result.getTargets().stream() + .allMatch(t -> t.getStatus().equals(ImportTargetResultStatus.SAVED)); + if (allSaved) successCount.incrementAndGet(); + else failureCount.incrementAndGet(); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + }); + } finally { + recordExecutor.shutdown(); } - waitForFuturesToComplete(recordFutures); - recordExecutor.shutdown(); - recordFutures.forEach( - r -> { - try { - ImportTaskResult result = (ImportTaskResult) r.get(); - boolean allSaved = - result.getTargets().stream() - .allMatch(t -> t.getStatus().equals(ImportTargetResultStatus.SAVED)); - if (allSaved) successCount.incrementAndGet(); - else failureCount.incrementAndGet(); - } catch (InterruptedException | ExecutionException e) { - throw new RuntimeException(e); - } - }); Instant endTime = Instant.now(); int totalDuration = (int) Duration.between(startTime, endTime).toMillis(); return ImportDataChunkStatus.builder() diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java index 70b25c4484..738924268e 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java @@ -15,13 +15,12 @@ import java.util.List; import java.util.concurrent.BlockingQueue; import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; /** * A processor for importing JSON data into the database. @@ -45,6 +44,7 @@ public class JsonImportProcessor extends ImportProcessor { private static final DataLoaderObjectMapper OBJECT_MAPPER = new DataLoaderObjectMapper(); private static final AtomicInteger dataChunkIdCounter = new AtomicInteger(0); + private static final int MAX_QUEUE_SIZE = 10; public JsonImportProcessor(ImportProcessorParams params) { super(params); @@ -60,62 +60,38 @@ public JsonImportProcessor(ImportProcessorParams params) { * @param dataChunkSize the number of records to include in each data chunk * @param transactionBatchSize the number of records to include in each transaction batch * @param reader the {@link BufferedReader} used to read the source file - * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status of - * each data chunk + * @return a map of {@link ImportDataChunkStatus} objects indicating the processing status of each + * data chunk */ @Override - public List process( + public ConcurrentHashMap process( int dataChunkSize, int transactionBatchSize, BufferedReader reader) { int numCores = Runtime.getRuntime().availableProcessors(); ExecutorService dataChunkExecutor = Executors.newFixedThreadPool(numCores); - BlockingQueue dataChunkQueue = new LinkedBlockingQueue<>(); - List> dataChunkFutures = new CopyOnWriteArrayList<>(); + BlockingQueue dataChunkQueue = new LinkedBlockingQueue<>(MAX_QUEUE_SIZE); try { CompletableFuture readerFuture = CompletableFuture.runAsync( () -> readDataChunks(reader, dataChunkSize, dataChunkQueue), dataChunkExecutor); - CompletableFuture processingFuture = - readerFuture.thenRunAsync( - () -> { - while (!(dataChunkQueue.isEmpty() && readerFuture.isDone())) { - try { - ImportDataChunk dataChunk = dataChunkQueue.poll(100, TimeUnit.MILLISECONDS); - if (dataChunk != null) { - dataChunkFutures.add( - CompletableFuture.supplyAsync( - () -> processDataChunk(dataChunk, transactionBatchSize, numCores), - dataChunkExecutor)); - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException( - CoreError.DATA_LOADER_DATA_CHUNK_PROCESS_FAILED.buildMessage( - e.getMessage()), - e); - } - } - }, - dataChunkExecutor); + ConcurrentHashMap result = new ConcurrentHashMap<>(); - processingFuture.join(); + while (!(dataChunkQueue.isEmpty() && readerFuture.isDone())) { + ImportDataChunk dataChunk = dataChunkQueue.poll(100, TimeUnit.MILLISECONDS); + if (dataChunk != null) { + ImportDataChunkStatus status = + processDataChunk(dataChunk, transactionBatchSize, numCores); + result.put(status.getDataChunkId(), status); + } + } - return CompletableFuture.allOf(dataChunkFutures.toArray(new CompletableFuture[0])) - .thenApply( - v -> - dataChunkFutures.stream() - .map( - f -> - f.exceptionally( - e -> { - System.err.println( - "Data chunk processing failed: " + e.getMessage()); - return null; - }) - .join()) - .collect(Collectors.toList())) - .join(); + readerFuture.join(); + return result; + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException( + CoreError.DATA_LOADER_DATA_CHUNK_PROCESS_FAILED.buildMessage(e.getMessage()), e); } finally { dataChunkExecutor.shutdown(); try { diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java index c556a00d18..30cb88fb37 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java @@ -12,13 +12,12 @@ import java.util.List; import java.util.concurrent.BlockingQueue; import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; /** * A processor for importing data from JSON Lines (JSONL) formatted files. @@ -39,6 +38,7 @@ public class JsonLinesImportProcessor extends ImportProcessor { private static final DataLoaderObjectMapper OBJECT_MAPPER = new DataLoaderObjectMapper(); private static final AtomicInteger dataChunkIdCounter = new AtomicInteger(0); + private static final int MAX_QUEUE_SIZE = 10; /** * Creates a new JsonLinesImportProcessor with the specified parameters. @@ -59,62 +59,38 @@ public JsonLinesImportProcessor(ImportProcessorParams params) { * @param dataChunkSize the number of records to include in each data chunk * @param transactionBatchSize the number of records to include in each transaction batch * @param reader the {@link BufferedReader} used to read the source file - * @return a list of {@link ImportDataChunkStatus} objects indicating the processing status of - * each data chunk + * @return a map of {@link ImportDataChunkStatus} objects indicating the processing status of each + * data chunk */ @Override - public List process( + public ConcurrentHashMap process( int dataChunkSize, int transactionBatchSize, BufferedReader reader) { int numCores = Runtime.getRuntime().availableProcessors(); ExecutorService dataChunkExecutor = Executors.newFixedThreadPool(numCores); - BlockingQueue dataChunkQueue = new LinkedBlockingQueue<>(); - List> dataChunkFutures = new CopyOnWriteArrayList<>(); + BlockingQueue dataChunkQueue = new LinkedBlockingQueue<>(MAX_QUEUE_SIZE); try { CompletableFuture readerFuture = CompletableFuture.runAsync( () -> readDataChunks(reader, dataChunkSize, dataChunkQueue), dataChunkExecutor); - CompletableFuture processingFuture = - readerFuture.thenRunAsync( - () -> { - while (!(dataChunkQueue.isEmpty() && readerFuture.isDone())) { - try { - ImportDataChunk dataChunk = dataChunkQueue.poll(100, TimeUnit.MILLISECONDS); - if (dataChunk != null) { - dataChunkFutures.add( - CompletableFuture.supplyAsync( - () -> processDataChunk(dataChunk, transactionBatchSize, numCores), - dataChunkExecutor)); - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException( - CoreError.DATA_LOADER_DATA_CHUNK_PROCESS_FAILED.buildMessage( - e.getMessage()), - e); - } - } - }, - dataChunkExecutor); + ConcurrentHashMap result = new ConcurrentHashMap<>(); - processingFuture.join(); + while (!(dataChunkQueue.isEmpty() && readerFuture.isDone())) { + ImportDataChunk dataChunk = dataChunkQueue.poll(100, TimeUnit.MILLISECONDS); + if (dataChunk != null) { + ImportDataChunkStatus status = + processDataChunk(dataChunk, transactionBatchSize, numCores); + result.put(status.getDataChunkId(), status); + } + } - return CompletableFuture.allOf(dataChunkFutures.toArray(new CompletableFuture[0])) - .thenApply( - v -> - dataChunkFutures.stream() - .map( - f -> - f.exceptionally( - e -> { - System.err.println( - "Data chunk processing failed: " + e.getMessage()); - return null; - }) - .join()) - .collect(Collectors.toList())) - .join(); + readerFuture.join(); + return result; + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException( + CoreError.DATA_LOADER_DATA_CHUNK_PROCESS_FAILED.buildMessage(e.getMessage()), e); } finally { dataChunkExecutor.shutdown(); try { diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java index 21c7bef658..0907388978 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ColumnUtils.java @@ -240,7 +240,7 @@ private static Set getColumnsToIgnore( * @throws ColumnParsingException if there's an error parsing the column value */ private static Column getColumn( - Result scalarDBResult, + @Nullable Result scalarDBResult, JsonNode sourceRecord, String columnName, boolean ignoreNullValues, diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java index 52947e139d..8438d74660 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java @@ -5,7 +5,13 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.scalar.db.api.Result; import com.scalar.db.api.TableMetadata; +import com.scalar.db.common.ResultImpl; +import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFile; +import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileTable; +import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileTableFieldMapping; +import com.scalar.db.dataloader.core.dataimport.processor.TableColumnDataTypes; import com.scalar.db.dataloader.core.util.DecimalUtil; import com.scalar.db.io.BigIntColumn; import com.scalar.db.io.BlobColumn; @@ -16,11 +22,14 @@ import com.scalar.db.io.DoubleColumn; import com.scalar.db.io.FloatColumn; import com.scalar.db.io.IntColumn; +import com.scalar.db.io.Key; import com.scalar.db.io.TextColumn; import com.scalar.db.io.TimeColumn; import com.scalar.db.io.TimestampColumn; import com.scalar.db.io.TimestampTZColumn; import com.scalar.db.transaction.consensuscommit.Attribute; +import java.io.BufferedReader; +import java.io.StringReader; import java.nio.charset.StandardCharsets; import java.time.Instant; import java.time.LocalDate; @@ -273,4 +282,166 @@ public static String getSourceTestValue(DataType dataType) { return TEST_VALUE_TEXT; } } + + public static TableColumnDataTypes getTableColumnData() { + TableColumnDataTypes tableColumnDataTypes = new TableColumnDataTypes(); + Map tableMetadataMap = new HashMap<>(); + tableMetadataMap.put("namespace.table", createTestTableMetadata()); + tableMetadataMap.forEach( + (name, metadata) -> + metadata + .getColumnDataTypes() + .forEach((k, v) -> tableColumnDataTypes.addColumnDataType(name, k, v))); + return tableColumnDataTypes; + } + + public static ControlFile getControlFile() { + List controlFileTables = new ArrayList<>(); + List mappings = new ArrayList<>(); + mappings.add(new ControlFileTableFieldMapping("col1", "col1")); + mappings.add(new ControlFileTableFieldMapping("col2", "col2")); + mappings.add(new ControlFileTableFieldMapping("col3", "col3")); + mappings.add(new ControlFileTableFieldMapping("col4", "col4")); + mappings.add(new ControlFileTableFieldMapping("col5", "col5")); + mappings.add(new ControlFileTableFieldMapping("col6", "col6")); + mappings.add(new ControlFileTableFieldMapping("col7", "col7")); + mappings.add(new ControlFileTableFieldMapping("col8", "col8")); + mappings.add(new ControlFileTableFieldMapping("col9", "col9")); + mappings.add(new ControlFileTableFieldMapping("col10", "col10")); + mappings.add(new ControlFileTableFieldMapping("col11", "col11")); + mappings.add(new ControlFileTableFieldMapping("col11", "col11")); + controlFileTables.add(new ControlFileTable("namespace", "table", mappings)); + return new ControlFile(controlFileTables); + } + + public static BufferedReader getJsonReaderOrg() { + String jsonData = + "[{\"col1\":1,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," + + "{\"col1\":2,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," + + "{\"col1\":3,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," + + "{\"col1\":4,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," + + "{\"col1\":5,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," + + "{\"col1\":6,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," + + "{\"col1\":7,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," + + "{\"col1\":8,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," + + "{\"col1\":9,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," + + "{\"col1\":10,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," + + "{\"col1\":11,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," + + "{\"col1\":2,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," + + "{\"col1\":2,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," + + "{\"col1\":2,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}]"; + return new BufferedReader(new StringReader(jsonData)); + } + + public static BufferedReader getJsonReader() { + String jsonData = + "[{\"col1\":1,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}]"; + return new BufferedReader(new StringReader(jsonData)); + } + + public static BufferedReader getJsonLinesReaderOrg() { + String jsonLinesData = + "{\"col1\":1,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" + + "{\"col1\":2,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" + + "{\"col1\":3,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" + + "{\"col1\":4,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" + + "{\"col1\":5,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" + + "{\"col1\":6,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" + + "{\"col1\":7,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" + + "{\"col1\":8,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" + + "{\"col1\":9,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" + + "{\"col1\":10,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" + + "{\"col1\":11,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" + + "{\"col1\":2,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" + + "{\"col1\":2,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" + + "{\"col1\":2,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n"; + return new BufferedReader(new StringReader(jsonLinesData)); + } + + public static BufferedReader getJsonLinesReader() { + String jsonLinesData = + "{\"col1\":1,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n"; + return new BufferedReader(new StringReader(jsonLinesData)); + } + + public static BufferedReader getCsvReaderOrg() { + String csvData = + "col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11 \n" + + "1,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n" + + "2,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n" + + "3,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n" + + "4,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n" + + "5,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n" + + "6,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n" + + "7,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n" + + "8,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n" + + "9,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n" + + "10,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n"; + return new BufferedReader(new StringReader(csvData)); + } + + public static BufferedReader getCsvReader() { + + String csvData = + "col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11 \n" + + "1,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n" + + "2,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n"; + return new BufferedReader(new StringReader(csvData)); + } + + public static Key getClusteringKey() { + return Key.newBuilder() + .add(IntColumn.of("col2", 1)) + .add(BooleanColumn.of("col3", true)) + .build(); + } + + public static Key getPartitionKey(int j) { + return Key.ofBigInt("col1", j); + } + + public static Optional getResult(long pk) { + Map> values = new HashMap<>(); + values.put(TEST_COLUMN_1_PK, BigIntColumn.of(TEST_COLUMN_1_PK, pk)); + values.put(TEST_COLUMN_2_CK, IntColumn.of(TEST_COLUMN_2_CK, 1)); + values.put(TEST_COLUMN_3_CK, BooleanColumn.of(TEST_COLUMN_3_CK, true)); + values.put(TEST_COLUMN_4, FloatColumn.of(TEST_COLUMN_4, TEST_VALUE_FLOAT)); + values.put(TEST_COLUMN_5, DoubleColumn.of(TEST_COLUMN_5, TEST_VALUE_DOUBLE)); + values.put(TEST_COLUMN_6, TextColumn.of(TEST_COLUMN_6, TEST_VALUE_TEXT)); + values.put(TEST_COLUMN_7, BlobColumn.of(TEST_COLUMN_7, TEST_VALUE_BLOB)); + values.put(TEST_COLUMN_8, DateColumn.of(TEST_COLUMN_8, TEST_VALUE_DATE)); + values.put(TEST_COLUMN_9, TimeColumn.of(TEST_COLUMN_9, TEST_VALUE_TIME)); + values.put(TEST_COLUMN_10, TimestampColumn.of(TEST_COLUMN_10, TEST_VALUE_DATE_TIME)); + values.put(TEST_COLUMN_11, TimestampTZColumn.of(TEST_COLUMN_11, TEST_VALUE_INSTANT)); + values.put( + Attribute.BEFORE_PREFIX + TEST_COLUMN_4, + FloatColumn.of(Attribute.BEFORE_PREFIX + TEST_COLUMN_4, TEST_VALUE_FLOAT)); + values.put( + Attribute.BEFORE_PREFIX + TEST_COLUMN_5, + DoubleColumn.of(Attribute.BEFORE_PREFIX + TEST_COLUMN_5, TEST_VALUE_DOUBLE)); + values.put( + Attribute.BEFORE_PREFIX + TEST_COLUMN_6, + TextColumn.of(Attribute.BEFORE_PREFIX + TEST_COLUMN_6, TEST_VALUE_TEXT)); + values.put( + Attribute.BEFORE_PREFIX + TEST_COLUMN_7, + BlobColumn.of(Attribute.BEFORE_PREFIX + TEST_COLUMN_7, TEST_VALUE_BLOB)); + values.put(Attribute.ID, TextColumn.of(Attribute.ID, TEST_VALUE_TX_ID)); + values.put(Attribute.STATE, IntColumn.of(Attribute.STATE, TEST_VALUE_INT)); + values.put(Attribute.VERSION, IntColumn.of(Attribute.VERSION, TEST_VALUE_INT)); + values.put(Attribute.PREPARED_AT, BigIntColumn.of(Attribute.PREPARED_AT, TEST_VALUE_LONG)); + values.put(Attribute.COMMITTED_AT, BigIntColumn.of(Attribute.COMMITTED_AT, TEST_VALUE_LONG)); + values.put(Attribute.BEFORE_ID, TextColumn.of(Attribute.BEFORE_ID, TEST_VALUE_TEXT)); + values.put(Attribute.BEFORE_STATE, IntColumn.of(Attribute.BEFORE_STATE, TEST_VALUE_INT)); + values.put(Attribute.BEFORE_VERSION, IntColumn.of(Attribute.BEFORE_VERSION, TEST_VALUE_INT)); + values.put( + Attribute.BEFORE_PREPARED_AT, + BigIntColumn.of(Attribute.BEFORE_PREPARED_AT, TEST_VALUE_LONG)); + values.put( + Attribute.BEFORE_COMMITTED_AT, + BigIntColumn.of(Attribute.BEFORE_COMMITTED_AT, TEST_VALUE_LONG)); + TableMetadata tableMetadata = createTestTableMetadata(); + Result data = new ResultImpl(createTestValues(), createTestTableMetadata()); + + return Optional.of(data); + } } diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessorTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessorTest.java new file mode 100644 index 0000000000..f8d08bc66d --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessorTest.java @@ -0,0 +1,119 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.scalar.db.api.DistributedStorage; +import com.scalar.db.api.DistributedTransaction; +import com.scalar.db.api.DistributedTransactionManager; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.dataloader.core.FileFormat; +import com.scalar.db.dataloader.core.ScalarDBMode; +import com.scalar.db.dataloader.core.UnitTestUtils; +import com.scalar.db.dataloader.core.dataimport.ImportMode; +import com.scalar.db.dataloader.core.dataimport.ImportOptions; +import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileValidationLevel; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDao; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDaoException; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; +import com.scalar.db.dataloader.core.dataimport.log.LogMode; +import com.scalar.db.exception.transaction.TransactionException; +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; +import org.mockito.Mockito; + +public class CsvImportProcessorTest { + @Mock ImportProcessorParams params; + @Mock ScalarDBMode scalarDBMode; + @Mock ImportOptions importOptions; + @Mock Map tableMetadataByTableName; + @Mock TableColumnDataTypes tableColumnDataTypes; + + ScalarDBDao dao; + @Mock DistributedStorage distributedStorage; + DistributedTransactionManager distributedTransactionManager; + CsvImportProcessor csvImportProcessor; + + @BeforeEach + void setup() throws ScalarDBDaoException, TransactionException { + dao = Mockito.mock(ScalarDBDao.class); + distributedTransactionManager = mock(DistributedTransactionManager.class); + DistributedTransaction distributedTransaction = mock(DistributedTransaction.class); + when(distributedTransactionManager.start()).thenReturn(distributedTransaction); + tableMetadataByTableName = new HashMap<>(); + tableMetadataByTableName.put("namespace.table", UnitTestUtils.createTestTableMetadata()); + tableColumnDataTypes = UnitTestUtils.getTableColumnData(); + importOptions = + ImportOptions.builder() + .importMode(ImportMode.UPSERT) + .fileFormat(FileFormat.CSV) + .controlFile(UnitTestUtils.getControlFile()) + .controlFileValidationLevel(ControlFileValidationLevel.MAPPED) + .namespace("namespace") + .transactionBatchSize(1) + .dataChunkSize(5) + .tableName("table") + .logMode(LogMode.SINGLE_FILE) + .build(); + Mockito.when( + dao.get( + "namespace", + "table", + UnitTestUtils.getPartitionKey(1), + UnitTestUtils.getClusteringKey(), + distributedStorage)) + .thenReturn(UnitTestUtils.getResult(1)); + Mockito.when( + dao.get( + "namespace", + "table", + UnitTestUtils.getPartitionKey(1), + UnitTestUtils.getClusteringKey(), + distributedTransaction)) + .thenReturn(UnitTestUtils.getResult(1)); + } + + @Test + void test_importProcessWithStorage() { + params = + ImportProcessorParams.builder() + .scalarDBMode(ScalarDBMode.STORAGE) + .importOptions(importOptions) + .dao(dao) + .distributedStorage(distributedStorage) + .distributedTransactionManager(distributedTransactionManager) + .scalarDBMode(scalarDBMode) + .tableColumnDataTypes(tableColumnDataTypes) + .tableMetadataByTableName(tableMetadataByTableName) + .build(); + csvImportProcessor = new CsvImportProcessor(params); + Map statusList = + csvImportProcessor.process(5, 1, UnitTestUtils.getCsvReader()); + assert statusList != null; + Assertions.assertEquals(1, statusList.size()); + } + + @Test + void test_importProcessWithTransaction() { + params = + ImportProcessorParams.builder() + .scalarDBMode(ScalarDBMode.TRANSACTION) + .importOptions(importOptions) + .dao(dao) + .distributedStorage(distributedStorage) + .distributedTransactionManager(distributedTransactionManager) + .scalarDBMode(scalarDBMode) + .tableColumnDataTypes(tableColumnDataTypes) + .tableMetadataByTableName(tableMetadataByTableName) + .build(); + csvImportProcessor = new CsvImportProcessor(params); + Map statusList = + csvImportProcessor.process(5, 1, UnitTestUtils.getCsvReader()); + assert statusList != null; + Assertions.assertEquals(1, statusList.size()); + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessorTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessorTest.java new file mode 100644 index 0000000000..0168e96b9e --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessorTest.java @@ -0,0 +1,119 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.scalar.db.api.DistributedStorage; +import com.scalar.db.api.DistributedTransaction; +import com.scalar.db.api.DistributedTransactionManager; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.dataloader.core.FileFormat; +import com.scalar.db.dataloader.core.ScalarDBMode; +import com.scalar.db.dataloader.core.UnitTestUtils; +import com.scalar.db.dataloader.core.dataimport.ImportMode; +import com.scalar.db.dataloader.core.dataimport.ImportOptions; +import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileValidationLevel; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDao; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDaoException; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; +import com.scalar.db.dataloader.core.dataimport.log.LogMode; +import com.scalar.db.exception.transaction.TransactionException; +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; +import org.mockito.Mockito; + +public class JsonImportProcessorTest { + @Mock ImportProcessorParams params; + @Mock ScalarDBMode scalarDBMode; + @Mock ImportOptions importOptions; + @Mock Map tableMetadataByTableName; + @Mock TableColumnDataTypes tableColumnDataTypes; + + ScalarDBDao dao; + @Mock DistributedStorage distributedStorage; + DistributedTransactionManager distributedTransactionManager; + JsonImportProcessor jsonImportProcessor; + + @BeforeEach + void setup() throws ScalarDBDaoException, TransactionException { + dao = Mockito.mock(ScalarDBDao.class); + distributedTransactionManager = mock(DistributedTransactionManager.class); + DistributedTransaction distributedTransaction = mock(DistributedTransaction.class); + when(distributedTransactionManager.start()).thenReturn(distributedTransaction); + tableMetadataByTableName = new HashMap<>(); + tableMetadataByTableName.put("namespace.table", UnitTestUtils.createTestTableMetadata()); + tableColumnDataTypes = UnitTestUtils.getTableColumnData(); + importOptions = + ImportOptions.builder() + .importMode(ImportMode.UPSERT) + .fileFormat(FileFormat.JSON) + .controlFile(UnitTestUtils.getControlFile()) + .controlFileValidationLevel(ControlFileValidationLevel.MAPPED) + .namespace("namespace") + .transactionBatchSize(1) + .dataChunkSize(5) + .tableName("table") + .logMode(LogMode.SINGLE_FILE) + .build(); + Mockito.when( + dao.get( + "namespace", + "table", + UnitTestUtils.getPartitionKey(1), + UnitTestUtils.getClusteringKey(), + distributedStorage)) + .thenReturn(UnitTestUtils.getResult(1)); + Mockito.when( + dao.get( + "namespace", + "table", + UnitTestUtils.getPartitionKey(1), + UnitTestUtils.getClusteringKey(), + distributedTransaction)) + .thenReturn(UnitTestUtils.getResult(1)); + } + + @Test + void test_importProcessWithStorage() { + params = + ImportProcessorParams.builder() + .scalarDBMode(ScalarDBMode.STORAGE) + .importOptions(importOptions) + .dao(dao) + .distributedStorage(distributedStorage) + .distributedTransactionManager(distributedTransactionManager) + .scalarDBMode(scalarDBMode) + .tableColumnDataTypes(tableColumnDataTypes) + .tableMetadataByTableName(tableMetadataByTableName) + .build(); + jsonImportProcessor = new JsonImportProcessor(params); + Map statusList = + jsonImportProcessor.process(5, 1, UnitTestUtils.getJsonReader()); + assert statusList != null; + Assertions.assertEquals(1, statusList.size()); + } + + @Test + void test_importProcessWithTransaction() { + params = + ImportProcessorParams.builder() + .scalarDBMode(ScalarDBMode.TRANSACTION) + .importOptions(importOptions) + .dao(dao) + .distributedStorage(distributedStorage) + .distributedTransactionManager(distributedTransactionManager) + .scalarDBMode(scalarDBMode) + .tableColumnDataTypes(tableColumnDataTypes) + .tableMetadataByTableName(tableMetadataByTableName) + .build(); + jsonImportProcessor = new JsonImportProcessor(params); + Map statusList = + jsonImportProcessor.process(5, 1, UnitTestUtils.getJsonReader()); + assert statusList != null; + Assertions.assertEquals(1, statusList.size()); + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessorTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessorTest.java new file mode 100644 index 0000000000..cd4e771bca --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessorTest.java @@ -0,0 +1,119 @@ +package com.scalar.db.dataloader.core.dataimport.processor; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.scalar.db.api.DistributedStorage; +import com.scalar.db.api.DistributedTransaction; +import com.scalar.db.api.DistributedTransactionManager; +import com.scalar.db.api.TableMetadata; +import com.scalar.db.dataloader.core.FileFormat; +import com.scalar.db.dataloader.core.ScalarDBMode; +import com.scalar.db.dataloader.core.UnitTestUtils; +import com.scalar.db.dataloader.core.dataimport.ImportMode; +import com.scalar.db.dataloader.core.dataimport.ImportOptions; +import com.scalar.db.dataloader.core.dataimport.controlfile.ControlFileValidationLevel; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDao; +import com.scalar.db.dataloader.core.dataimport.dao.ScalarDBDaoException; +import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; +import com.scalar.db.dataloader.core.dataimport.log.LogMode; +import com.scalar.db.exception.transaction.TransactionException; +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; +import org.mockito.Mockito; + +public class JsonLinesImportProcessorTest { + @Mock ImportProcessorParams params; + @Mock ScalarDBMode scalarDBMode; + @Mock ImportOptions importOptions; + @Mock Map tableMetadataByTableName; + @Mock TableColumnDataTypes tableColumnDataTypes; + + ScalarDBDao dao; + @Mock DistributedStorage distributedStorage; + DistributedTransactionManager distributedTransactionManager; + JsonLinesImportProcessor jsonLinesImportProcessor; + + @BeforeEach + void setup() throws ScalarDBDaoException, TransactionException { + dao = Mockito.mock(ScalarDBDao.class); + distributedTransactionManager = mock(DistributedTransactionManager.class); + DistributedTransaction distributedTransaction = mock(DistributedTransaction.class); + when(distributedTransactionManager.start()).thenReturn(distributedTransaction); + tableMetadataByTableName = new HashMap<>(); + tableMetadataByTableName.put("namespace.table", UnitTestUtils.createTestTableMetadata()); + tableColumnDataTypes = UnitTestUtils.getTableColumnData(); + importOptions = + ImportOptions.builder() + .importMode(ImportMode.UPSERT) + .fileFormat(FileFormat.JSONL) + .controlFile(UnitTestUtils.getControlFile()) + .controlFileValidationLevel(ControlFileValidationLevel.MAPPED) + .namespace("namespace") + .transactionBatchSize(1) + .dataChunkSize(5) + .tableName("table") + .logMode(LogMode.SINGLE_FILE) + .build(); + Mockito.when( + dao.get( + "namespace", + "table", + UnitTestUtils.getPartitionKey(1), + UnitTestUtils.getClusteringKey(), + distributedStorage)) + .thenReturn(UnitTestUtils.getResult(1)); + Mockito.when( + dao.get( + "namespace", + "table", + UnitTestUtils.getPartitionKey(1), + UnitTestUtils.getClusteringKey(), + distributedTransaction)) + .thenReturn(UnitTestUtils.getResult(1)); + } + + @Test + void test_importProcessWithStorage() { + params = + ImportProcessorParams.builder() + .scalarDBMode(ScalarDBMode.STORAGE) + .importOptions(importOptions) + .dao(dao) + .distributedStorage(distributedStorage) + .distributedTransactionManager(distributedTransactionManager) + .scalarDBMode(scalarDBMode) + .tableColumnDataTypes(tableColumnDataTypes) + .tableMetadataByTableName(tableMetadataByTableName) + .build(); + jsonLinesImportProcessor = new JsonLinesImportProcessor(params); + Map statusList = + jsonLinesImportProcessor.process(5, 1, UnitTestUtils.getJsonLinesReader()); + assert statusList != null; + Assertions.assertEquals(1, statusList.size()); + } + + @Test + void test_importProcessWithTransaction() { + params = + ImportProcessorParams.builder() + .scalarDBMode(ScalarDBMode.TRANSACTION) + .importOptions(importOptions) + .dao(dao) + .distributedStorage(distributedStorage) + .distributedTransactionManager(distributedTransactionManager) + .scalarDBMode(scalarDBMode) + .tableColumnDataTypes(tableColumnDataTypes) + .tableMetadataByTableName(tableMetadataByTableName) + .build(); + jsonLinesImportProcessor = new JsonLinesImportProcessor(params); + Map statusList = + jsonLinesImportProcessor.process(5, 1, UnitTestUtils.getJsonLinesReader()); + assert statusList != null; + Assertions.assertEquals(1, statusList.size()); + } +} diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java index 44d8a90c94..09c94b3844 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ColumnUtilsTest.java @@ -197,7 +197,6 @@ void getColumnsFromResult_withValidData_shouldReturnColumns() throws Base64Exception, ColumnParsingException { List> columns = ColumnUtils.getColumnsFromResult(scalarDBResult, sourceRecord, false, mockMetadata); - System.out.println(columns); assertEquals(8, columns.size()); } } From c835730e38987ac221f47be2fea532d77e0e9bef Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 25 Mar 2025 14:33:19 +0530 Subject: [PATCH 78/87] Removed unused test util methods [skip ci] --- .../db/dataloader/core/UnitTestUtils.java | 54 ------------------- 1 file changed, 54 deletions(-) diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java index 8438d74660..a4bbfbf608 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java @@ -314,72 +314,18 @@ public static ControlFile getControlFile() { return new ControlFile(controlFileTables); } - public static BufferedReader getJsonReaderOrg() { - String jsonData = - "[{\"col1\":1,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," - + "{\"col1\":2,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," - + "{\"col1\":3,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," - + "{\"col1\":4,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," - + "{\"col1\":5,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," - + "{\"col1\":6,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," - + "{\"col1\":7,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," - + "{\"col1\":8,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," - + "{\"col1\":9,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," - + "{\"col1\":10,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," - + "{\"col1\":11,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," - + "{\"col1\":2,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," - + "{\"col1\":2,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}," - + "{\"col1\":2,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}]"; - return new BufferedReader(new StringReader(jsonData)); - } - public static BufferedReader getJsonReader() { String jsonData = "[{\"col1\":1,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}]"; return new BufferedReader(new StringReader(jsonData)); } - public static BufferedReader getJsonLinesReaderOrg() { - String jsonLinesData = - "{\"col1\":1,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" - + "{\"col1\":2,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" - + "{\"col1\":3,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" - + "{\"col1\":4,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" - + "{\"col1\":5,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" - + "{\"col1\":6,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" - + "{\"col1\":7,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" - + "{\"col1\":8,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" - + "{\"col1\":9,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" - + "{\"col1\":10,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" - + "{\"col1\":11,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" - + "{\"col1\":2,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" - + "{\"col1\":2,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n" - + "{\"col1\":2,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n"; - return new BufferedReader(new StringReader(jsonLinesData)); - } - public static BufferedReader getJsonLinesReader() { String jsonLinesData = "{\"col1\":1,\"col2\":\"1\",\"col3\":\"1\",\"col4\":\"1.4e-45\",\"col5\":\"5e-324\",\"col6\":\"VALUE!!s\",\"col7\":\"0x626C6F6220746573742076616C7565\",\"col8\":\"2000-01-01\",\"col9\":\"01:01:01.000000\",\"col10\":\"2000-01-01T01:01:00\",\"col11\":\"1970-01-21T03:20:41.740Z\"}\n"; return new BufferedReader(new StringReader(jsonLinesData)); } - public static BufferedReader getCsvReaderOrg() { - String csvData = - "col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11 \n" - + "1,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n" - + "2,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n" - + "3,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n" - + "4,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n" - + "5,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n" - + "6,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n" - + "7,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n" - + "8,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n" - + "9,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n" - + "10,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n"; - return new BufferedReader(new StringReader(csvData)); - } - public static BufferedReader getCsvReader() { String csvData = From 8f7adc888d9485ae41b9b9256c39bf2be4f2bf05 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 25 Mar 2025 14:57:10 +0530 Subject: [PATCH 79/87] Fixed spotbugs test issues --- .../db/dataloader/core/UnitTestUtils.java | 47 +++---------------- 1 file changed, 6 insertions(+), 41 deletions(-) diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java index a4bbfbf608..8378049b91 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java @@ -35,7 +35,12 @@ import java.time.LocalDate; import java.time.LocalDateTime; import java.time.LocalTime; -import java.util.*; +import java.util.ArrayList; +import java.util.Base64; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; /** Utils for the service unit tests */ public class UnitTestUtils { @@ -347,47 +352,7 @@ public static Key getPartitionKey(int j) { } public static Optional getResult(long pk) { - Map> values = new HashMap<>(); - values.put(TEST_COLUMN_1_PK, BigIntColumn.of(TEST_COLUMN_1_PK, pk)); - values.put(TEST_COLUMN_2_CK, IntColumn.of(TEST_COLUMN_2_CK, 1)); - values.put(TEST_COLUMN_3_CK, BooleanColumn.of(TEST_COLUMN_3_CK, true)); - values.put(TEST_COLUMN_4, FloatColumn.of(TEST_COLUMN_4, TEST_VALUE_FLOAT)); - values.put(TEST_COLUMN_5, DoubleColumn.of(TEST_COLUMN_5, TEST_VALUE_DOUBLE)); - values.put(TEST_COLUMN_6, TextColumn.of(TEST_COLUMN_6, TEST_VALUE_TEXT)); - values.put(TEST_COLUMN_7, BlobColumn.of(TEST_COLUMN_7, TEST_VALUE_BLOB)); - values.put(TEST_COLUMN_8, DateColumn.of(TEST_COLUMN_8, TEST_VALUE_DATE)); - values.put(TEST_COLUMN_9, TimeColumn.of(TEST_COLUMN_9, TEST_VALUE_TIME)); - values.put(TEST_COLUMN_10, TimestampColumn.of(TEST_COLUMN_10, TEST_VALUE_DATE_TIME)); - values.put(TEST_COLUMN_11, TimestampTZColumn.of(TEST_COLUMN_11, TEST_VALUE_INSTANT)); - values.put( - Attribute.BEFORE_PREFIX + TEST_COLUMN_4, - FloatColumn.of(Attribute.BEFORE_PREFIX + TEST_COLUMN_4, TEST_VALUE_FLOAT)); - values.put( - Attribute.BEFORE_PREFIX + TEST_COLUMN_5, - DoubleColumn.of(Attribute.BEFORE_PREFIX + TEST_COLUMN_5, TEST_VALUE_DOUBLE)); - values.put( - Attribute.BEFORE_PREFIX + TEST_COLUMN_6, - TextColumn.of(Attribute.BEFORE_PREFIX + TEST_COLUMN_6, TEST_VALUE_TEXT)); - values.put( - Attribute.BEFORE_PREFIX + TEST_COLUMN_7, - BlobColumn.of(Attribute.BEFORE_PREFIX + TEST_COLUMN_7, TEST_VALUE_BLOB)); - values.put(Attribute.ID, TextColumn.of(Attribute.ID, TEST_VALUE_TX_ID)); - values.put(Attribute.STATE, IntColumn.of(Attribute.STATE, TEST_VALUE_INT)); - values.put(Attribute.VERSION, IntColumn.of(Attribute.VERSION, TEST_VALUE_INT)); - values.put(Attribute.PREPARED_AT, BigIntColumn.of(Attribute.PREPARED_AT, TEST_VALUE_LONG)); - values.put(Attribute.COMMITTED_AT, BigIntColumn.of(Attribute.COMMITTED_AT, TEST_VALUE_LONG)); - values.put(Attribute.BEFORE_ID, TextColumn.of(Attribute.BEFORE_ID, TEST_VALUE_TEXT)); - values.put(Attribute.BEFORE_STATE, IntColumn.of(Attribute.BEFORE_STATE, TEST_VALUE_INT)); - values.put(Attribute.BEFORE_VERSION, IntColumn.of(Attribute.BEFORE_VERSION, TEST_VALUE_INT)); - values.put( - Attribute.BEFORE_PREPARED_AT, - BigIntColumn.of(Attribute.BEFORE_PREPARED_AT, TEST_VALUE_LONG)); - values.put( - Attribute.BEFORE_COMMITTED_AT, - BigIntColumn.of(Attribute.BEFORE_COMMITTED_AT, TEST_VALUE_LONG)); - TableMetadata tableMetadata = createTestTableMetadata(); Result data = new ResultImpl(createTestValues(), createTestTableMetadata()); - return Optional.of(data); } } From 3aff01816166a92827bb3821aca95636051032c2 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 25 Mar 2025 17:17:09 +0530 Subject: [PATCH 80/87] reader data updated [skip ci] --- .../java/com/scalar/db/dataloader/core/UnitTestUtils.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java index 8378049b91..3df487fa7b 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/UnitTestUtils.java @@ -332,11 +332,9 @@ public static BufferedReader getJsonLinesReader() { } public static BufferedReader getCsvReader() { - String csvData = "col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11 \n" - + "1,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n" - + "2,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n"; + + "1,1,1,1.4E-45,5e-324,VALUE!!s,0x626C6F6220746573742076616C7565,2000-01-01,01:01:01.000000,2000-01-01T01:01:00,1970-01-21T03:20:41.740Z \n"; return new BufferedReader(new StringReader(csvData)); } From 24bfa37ad56b62a9473a4b09e190f1ea1274ec9e Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Tue, 1 Apr 2025 17:45:52 +0530 Subject: [PATCH 81/87] Changes --- .../scalar/db/dataloader/core/dataimport/ImportManager.java | 6 +----- .../core/dataimport/processor/CsvImportProcessor.java | 2 +- .../core/dataimport/processor/JsonImportProcessor.java | 2 +- .../core/dataimport/processor/JsonLinesImportProcessor.java | 2 +- .../core/dataimport/processor/CsvImportProcessorTest.java | 4 ++-- .../core/dataimport/processor/JsonImportProcessorTest.java | 4 ++-- .../dataimport/processor/JsonLinesImportProcessorTest.java | 4 ++-- 7 files changed, 10 insertions(+), 14 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java index 51da88c66d..f1984d6c26 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportManager.java @@ -18,7 +18,6 @@ import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutionException; import lombok.AllArgsConstructor; import lombok.NonNull; @@ -59,11 +58,8 @@ public class ImportManager implements ImportEventListener { * * @return a map of {@link ImportDataChunkStatus} objects containing the status of each processed * chunk - * @throws ExecutionException if there is an error during the execution of the import process - * @throws InterruptedException if the import process is interrupted */ - public ConcurrentHashMap startImport() - throws ExecutionException, InterruptedException { + public ConcurrentHashMap startImport() { ImportProcessorParams params = ImportProcessorParams.builder() .scalarDBMode(scalarDBMode) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java index ecb0919bf9..aa2a923551 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java @@ -40,7 +40,7 @@ public class CsvImportProcessor extends ImportProcessor { private static final DataLoaderObjectMapper OBJECT_MAPPER = new DataLoaderObjectMapper(); private static final AtomicInteger dataChunkIdCounter = new AtomicInteger(0); - private static final int MAX_QUEUE_SIZE = 10; + private static final int MAX_QUEUE_SIZE = 256; /** * Creates a new CsvImportProcessor with the specified parameters. diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java index 738924268e..7297846dff 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java @@ -44,7 +44,7 @@ public class JsonImportProcessor extends ImportProcessor { private static final DataLoaderObjectMapper OBJECT_MAPPER = new DataLoaderObjectMapper(); private static final AtomicInteger dataChunkIdCounter = new AtomicInteger(0); - private static final int MAX_QUEUE_SIZE = 10; + private static final int MAX_QUEUE_SIZE = 256; public JsonImportProcessor(ImportProcessorParams params) { super(params); diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java index 30cb88fb37..c60690e83b 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java @@ -38,7 +38,7 @@ public class JsonLinesImportProcessor extends ImportProcessor { private static final DataLoaderObjectMapper OBJECT_MAPPER = new DataLoaderObjectMapper(); private static final AtomicInteger dataChunkIdCounter = new AtomicInteger(0); - private static final int MAX_QUEUE_SIZE = 10; + private static final int MAX_QUEUE_SIZE = 256; /** * Creates a new JsonLinesImportProcessor with the specified parameters. diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessorTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessorTest.java index f8d08bc66d..b4c4373106 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessorTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessorTest.java @@ -26,8 +26,8 @@ import org.mockito.Mock; import org.mockito.Mockito; -public class CsvImportProcessorTest { - @Mock ImportProcessorParams params; +class CsvImportProcessorTest { + @Mock private ImportProcessorParams params; @Mock ScalarDBMode scalarDBMode; @Mock ImportOptions importOptions; @Mock Map tableMetadataByTableName; diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessorTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessorTest.java index 0168e96b9e..d60eb48ccb 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessorTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessorTest.java @@ -26,8 +26,8 @@ import org.mockito.Mock; import org.mockito.Mockito; -public class JsonImportProcessorTest { - @Mock ImportProcessorParams params; +class JsonImportProcessorTest { + @Mock private ImportProcessorParams params; @Mock ScalarDBMode scalarDBMode; @Mock ImportOptions importOptions; @Mock Map tableMetadataByTableName; diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessorTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessorTest.java index cd4e771bca..6224236a70 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessorTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessorTest.java @@ -26,8 +26,8 @@ import org.mockito.Mock; import org.mockito.Mockito; -public class JsonLinesImportProcessorTest { - @Mock ImportProcessorParams params; +class JsonLinesImportProcessorTest { + @Mock private ImportProcessorParams params; @Mock ScalarDBMode scalarDBMode; @Mock ImportOptions importOptions; @Mock Map tableMetadataByTableName; From d9f239c133e23bfc25b31806ebd0ecaee63a5c97 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 3 Apr 2025 12:10:08 +0530 Subject: [PATCH 82/87] Thread exexcuter changes --- .../core/dataimport/processor/CsvImportProcessor.java | 2 +- .../core/dataimport/processor/JsonImportProcessor.java | 2 +- .../core/dataimport/processor/JsonLinesImportProcessor.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java index aa2a923551..ea908cb7af 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java @@ -68,7 +68,7 @@ public CsvImportProcessor(ImportProcessorParams params) { public ConcurrentHashMap process( int dataChunkSize, int transactionBatchSize, BufferedReader reader) { int numCores = Runtime.getRuntime().availableProcessors(); - ExecutorService dataChunkExecutor = Executors.newFixedThreadPool(numCores); + ExecutorService dataChunkExecutor = Executors.newSingleThreadExecutor(); BlockingQueue dataChunkQueue = new LinkedBlockingQueue<>(MAX_QUEUE_SIZE); try { diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java index 7297846dff..dd61b5dc85 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java @@ -67,7 +67,7 @@ public JsonImportProcessor(ImportProcessorParams params) { public ConcurrentHashMap process( int dataChunkSize, int transactionBatchSize, BufferedReader reader) { int numCores = Runtime.getRuntime().availableProcessors(); - ExecutorService dataChunkExecutor = Executors.newFixedThreadPool(numCores); + ExecutorService dataChunkExecutor = Executors.newSingleThreadExecutor(); BlockingQueue dataChunkQueue = new LinkedBlockingQueue<>(MAX_QUEUE_SIZE); try { diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java index c60690e83b..933d697cbb 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java @@ -66,7 +66,7 @@ public JsonLinesImportProcessor(ImportProcessorParams params) { public ConcurrentHashMap process( int dataChunkSize, int transactionBatchSize, BufferedReader reader) { int numCores = Runtime.getRuntime().availableProcessors(); - ExecutorService dataChunkExecutor = Executors.newFixedThreadPool(numCores); + ExecutorService dataChunkExecutor = Executors.newSingleThreadExecutor(); BlockingQueue dataChunkQueue = new LinkedBlockingQueue<>(MAX_QUEUE_SIZE); try { From 723bd514e7d6afe7e4a6b627c637c10a23601ae7 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Thu, 3 Apr 2025 17:51:29 +0530 Subject: [PATCH 83/87] Changed few values to be configurable --- .../processor/CsvImportProcessor.java | 5 +- .../dataimport/processor/ImportProcessor.java | 3 +- .../processor/JsonImportProcessor.java | 5 +- .../processor/JsonLinesImportProcessor.java | 5 +- .../db/dataloader/core/util/ConfigUtil.java | 98 +++++++++++++++++++ .../core/src/main/resources/config.properties | 2 + .../dataloader/core/util/ConfigUtilTest.java | 66 +++++++++++++ 7 files changed, 177 insertions(+), 7 deletions(-) create mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ConfigUtil.java create mode 100644 data-loader/core/src/main/resources/config.properties create mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ConfigUtilTest.java diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java index ea908cb7af..691a6ce141 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java @@ -7,6 +7,7 @@ import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunk; import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; import com.scalar.db.dataloader.core.dataimport.datachunk.ImportRow; +import com.scalar.db.dataloader.core.util.ConfigUtil; import java.io.BufferedReader; import java.io.IOException; import java.io.UncheckedIOException; @@ -40,7 +41,6 @@ public class CsvImportProcessor extends ImportProcessor { private static final DataLoaderObjectMapper OBJECT_MAPPER = new DataLoaderObjectMapper(); private static final AtomicInteger dataChunkIdCounter = new AtomicInteger(0); - private static final int MAX_QUEUE_SIZE = 256; /** * Creates a new CsvImportProcessor with the specified parameters. @@ -69,7 +69,8 @@ public ConcurrentHashMap process( int dataChunkSize, int transactionBatchSize, BufferedReader reader) { int numCores = Runtime.getRuntime().availableProcessors(); ExecutorService dataChunkExecutor = Executors.newSingleThreadExecutor(); - BlockingQueue dataChunkQueue = new LinkedBlockingQueue<>(MAX_QUEUE_SIZE); + BlockingQueue dataChunkQueue = + new LinkedBlockingQueue<>(ConfigUtil.getImportDataChunkQueueSize()); try { CompletableFuture readerFuture = diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java index 594e6d99db..b788d3b957 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java @@ -15,6 +15,7 @@ import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatch; import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatchResult; import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatchStatus; +import com.scalar.db.dataloader.core.util.ConfigUtil; import com.scalar.db.exception.transaction.TransactionException; import java.io.BufferedReader; import java.time.Duration; @@ -332,7 +333,7 @@ private ImportDataChunkStatus processDataChunkWithTransactions( List transactionBatches = splitIntoTransactionBatches(dataChunk, transactionBatchSize); ExecutorService transactionBatchExecutor = - Executors.newFixedThreadPool(Math.min(transactionBatches.size(), numCores * 2)); + Executors.newFixedThreadPool(ConfigUtil.getTransactionBatchThreadPoolSize()); List> transactionBatchFutures = new ArrayList<>(); AtomicInteger successCount = new AtomicInteger(0); AtomicInteger failureCount = new AtomicInteger(0); diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java index dd61b5dc85..1a193c61c4 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java @@ -9,6 +9,7 @@ import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunk; import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; import com.scalar.db.dataloader.core.dataimport.datachunk.ImportRow; +import com.scalar.db.dataloader.core.util.ConfigUtil; import java.io.BufferedReader; import java.io.IOException; import java.util.ArrayList; @@ -44,7 +45,6 @@ public class JsonImportProcessor extends ImportProcessor { private static final DataLoaderObjectMapper OBJECT_MAPPER = new DataLoaderObjectMapper(); private static final AtomicInteger dataChunkIdCounter = new AtomicInteger(0); - private static final int MAX_QUEUE_SIZE = 256; public JsonImportProcessor(ImportProcessorParams params) { super(params); @@ -68,7 +68,8 @@ public ConcurrentHashMap process( int dataChunkSize, int transactionBatchSize, BufferedReader reader) { int numCores = Runtime.getRuntime().availableProcessors(); ExecutorService dataChunkExecutor = Executors.newSingleThreadExecutor(); - BlockingQueue dataChunkQueue = new LinkedBlockingQueue<>(MAX_QUEUE_SIZE); + BlockingQueue dataChunkQueue = + new LinkedBlockingQueue<>(ConfigUtil.getImportDataChunkQueueSize()); try { CompletableFuture readerFuture = diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java index 933d697cbb..6a8a46fd1c 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java @@ -6,6 +6,7 @@ import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunk; import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; import com.scalar.db.dataloader.core.dataimport.datachunk.ImportRow; +import com.scalar.db.dataloader.core.util.ConfigUtil; import java.io.BufferedReader; import java.io.IOException; import java.util.ArrayList; @@ -38,7 +39,6 @@ public class JsonLinesImportProcessor extends ImportProcessor { private static final DataLoaderObjectMapper OBJECT_MAPPER = new DataLoaderObjectMapper(); private static final AtomicInteger dataChunkIdCounter = new AtomicInteger(0); - private static final int MAX_QUEUE_SIZE = 256; /** * Creates a new JsonLinesImportProcessor with the specified parameters. @@ -67,7 +67,8 @@ public ConcurrentHashMap process( int dataChunkSize, int transactionBatchSize, BufferedReader reader) { int numCores = Runtime.getRuntime().availableProcessors(); ExecutorService dataChunkExecutor = Executors.newSingleThreadExecutor(); - BlockingQueue dataChunkQueue = new LinkedBlockingQueue<>(MAX_QUEUE_SIZE); + BlockingQueue dataChunkQueue = + new LinkedBlockingQueue<>(ConfigUtil.getImportDataChunkQueueSize()); try { CompletableFuture readerFuture = diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ConfigUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ConfigUtil.java new file mode 100644 index 0000000000..ec750b5851 --- /dev/null +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ConfigUtil.java @@ -0,0 +1,98 @@ +package com.scalar.db.dataloader.core.util; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Properties; + +/** + * Utility class for loading and retrieving configuration properties. + * + *

This class reads properties from a {@code config.properties} file located in the classpath. + */ +public class ConfigUtil { + public static final String CONFIG_PROPERTIES = "config.properties"; + private static volatile Properties properties; + + /** + * Ensures that the configuration properties are loaded. If properties are not yet loaded, it + * loads them in a thread-safe manner. + */ + private static void ensurePropertiesLoaded() { + if (properties == null) { + synchronized (ConfigUtil.class) { + if (properties == null) { + loadProperties(); + } + } + } + } + + /** + * Loads the configuration properties from the {@code config.properties} file in the classpath. + * + *

If the file is missing or cannot be read, a {@link RuntimeException} is thrown. + * + * @throws RuntimeException if the properties file is not found or cannot be loaded + */ + private static void loadProperties() { + try (InputStream inputStream = + Thread.currentThread().getContextClassLoader().getResourceAsStream(CONFIG_PROPERTIES)) { + if (inputStream == null) { + throw new RuntimeException("config.properties file not found in classpath."); + } + properties = new Properties(); + properties.load(inputStream); + } catch (IOException e) { + throw new RuntimeException("Failed to load config.properties", e); + } + } + + /** + * Retrieves the queue size for importing data chunks. + * + *

This method reads the property {@code import.data.chunk.queue.size}. If the property is + * missing or invalid, the default value {@code 256} is returned. + * + * @return the configured queue size for data chunks, or {@code 256} if not specified + */ + public static Integer getImportDataChunkQueueSize() { + ensurePropertiesLoaded(); + return getIntegerProperty("import.data.chunk.queue.size", 256); + } + + /** + * Retrieves the thread pool size for processing transaction batches. + * + *

This method reads the property {@code transaction.batch.thread.pool.size}. If the property + * is missing or invalid, the default value {@code 16} is returned. + * + * @return the configured thread pool size for transaction batches, or {@code 16} if not specified + */ + public static Integer getTransactionBatchThreadPoolSize() { + ensurePropertiesLoaded(); + return getIntegerProperty("transaction.batch.thread.pool.size", 16); + } + + /** + * Retrieves an integer property value from the loaded properties. + * + *

If the property is not found, the default value is returned. If the property is present but + * not a valid integer, an {@link IllegalArgumentException} is thrown. + * + * @param key the property key to look up + * @param defaultValue the default value to return if the property is missing or empty + * @return the integer value of the property, or the default value if not specified + * @throws IllegalArgumentException if the property value is not a valid integer + */ + private static Integer getIntegerProperty(String key, int defaultValue) { + String value = properties.getProperty(key); + if (value == null || value.trim().isEmpty()) { + return defaultValue; + } + try { + return Integer.parseInt(value); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Invalid integer value for property: " + key, e); + } + } +} diff --git a/data-loader/core/src/main/resources/config.properties b/data-loader/core/src/main/resources/config.properties new file mode 100644 index 0000000000..7cd5bd7fed --- /dev/null +++ b/data-loader/core/src/main/resources/config.properties @@ -0,0 +1,2 @@ +transaction.batch.thread.pool.size=16 +import.data.chunk.queue.size=256 \ No newline at end of file diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ConfigUtilTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ConfigUtilTest.java new file mode 100644 index 0000000000..16bf369747 --- /dev/null +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ConfigUtilTest.java @@ -0,0 +1,66 @@ +package com.scalar.db.dataloader.core.util; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.lang.reflect.Field; +import java.util.Properties; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class ConfigUtilTest { + + private Properties mockProperties; + + @BeforeEach + void setUp() throws Exception { + mockProperties = new Properties(); + setPropertiesField(mockProperties); + } + + @AfterEach + void tearDown() throws Exception { + setPropertiesField(null); + } + + private void setPropertiesField(Properties properties) throws Exception { + Field field = ConfigUtil.class.getDeclaredField("properties"); + field.setAccessible(true); + field.set(null, properties); + } + + @Test + void testGetImportDataChunkQueueSize_WithValidValue() { + mockProperties.setProperty("import.data.chunk.queue.size", "512"); + assertEquals(512, ConfigUtil.getImportDataChunkQueueSize()); + } + + @Test + void testGetImportDataChunkQueueSize_WithNoValue_UsesDefault() { + assertEquals(256, ConfigUtil.getImportDataChunkQueueSize()); + } + + @Test + void testGetImportDataChunkQueueSize_WithInvalidValue_ThrowsException() { + mockProperties.setProperty("import.data.chunk.queue.size", "invalid"); + assertThrows(IllegalArgumentException.class, ConfigUtil::getImportDataChunkQueueSize); + } + + @Test + void testGetTransactionBatchThreadPoolSize_WithValidValue() { + mockProperties.setProperty("transaction.batch.thread.pool.size", "32"); + assertEquals(32, ConfigUtil.getTransactionBatchThreadPoolSize()); + } + + @Test + void testGetTransactionBatchThreadPoolSize_WithNoValue_UsesDefault() { + assertEquals(16, ConfigUtil.getTransactionBatchThreadPoolSize()); + } + + @Test + void testGetTransactionBatchThreadPoolSize_WithInvalidValue_ThrowsException() { + mockProperties.setProperty("transaction.batch.thread.pool.size", "invalid"); + assertThrows(IllegalArgumentException.class, ConfigUtil::getTransactionBatchThreadPoolSize); + } +} From 450aaea8d837467141e987b8f1c10b0e95a48b24 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Fri, 4 Apr 2025 10:48:59 +0530 Subject: [PATCH 84/87] Added new line --- data-loader/core/src/main/resources/config.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-loader/core/src/main/resources/config.properties b/data-loader/core/src/main/resources/config.properties index 7cd5bd7fed..17bf731259 100644 --- a/data-loader/core/src/main/resources/config.properties +++ b/data-loader/core/src/main/resources/config.properties @@ -1,2 +1,2 @@ transaction.batch.thread.pool.size=16 -import.data.chunk.queue.size=256 \ No newline at end of file +import.data.chunk.queue.size=256 From aeaa08f8e8de92f17663a37d5c1e6f544c5bbda0 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Sun, 6 Apr 2025 21:47:46 +0530 Subject: [PATCH 85/87] reverted config utils and add CLI options --- .../core/dataimport/ImportOptions.java | 1 + .../processor/CsvImportProcessor.java | 7 +- .../dataimport/processor/ImportProcessor.java | 22 ++--- .../processor/JsonImportProcessor.java | 7 +- .../processor/JsonLinesImportProcessor.java | 7 +- .../db/dataloader/core/util/ConfigUtil.java | 98 ------------------- .../core/src/main/resources/config.properties | 2 - .../dataloader/core/util/ConfigUtilTest.java | 66 ------------- 8 files changed, 20 insertions(+), 190 deletions(-) delete mode 100644 data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ConfigUtil.java delete mode 100644 data-loader/core/src/main/resources/config.properties delete mode 100644 data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ConfigUtilTest.java diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportOptions.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportOptions.java index 9cb6225d30..6d3206765e 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportOptions.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/ImportOptions.java @@ -35,4 +35,5 @@ public class ImportOptions { private final String tableName; private final int maxThreads; private final String customHeaderRow; + private final int dataChunkQueueSize; } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java index 691a6ce141..c6e1a0275b 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java @@ -7,7 +7,6 @@ import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunk; import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; import com.scalar.db.dataloader.core.dataimport.datachunk.ImportRow; -import com.scalar.db.dataloader.core.util.ConfigUtil; import java.io.BufferedReader; import java.io.IOException; import java.io.UncheckedIOException; @@ -67,10 +66,9 @@ public CsvImportProcessor(ImportProcessorParams params) { @Override public ConcurrentHashMap process( int dataChunkSize, int transactionBatchSize, BufferedReader reader) { - int numCores = Runtime.getRuntime().availableProcessors(); ExecutorService dataChunkExecutor = Executors.newSingleThreadExecutor(); BlockingQueue dataChunkQueue = - new LinkedBlockingQueue<>(ConfigUtil.getImportDataChunkQueueSize()); + new LinkedBlockingQueue<>(params.getImportOptions().getDataChunkQueueSize()); try { CompletableFuture readerFuture = @@ -83,7 +81,8 @@ public ConcurrentHashMap process( ImportDataChunk dataChunk = dataChunkQueue.poll(100, TimeUnit.MILLISECONDS); if (dataChunk != null) { ImportDataChunkStatus status = - processDataChunk(dataChunk, transactionBatchSize, numCores); + processDataChunk( + dataChunk, transactionBatchSize, params.getImportOptions().getMaxThreads()); result.put(status.getDataChunkId(), status); } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java index b788d3b957..bd698ea6ce 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java @@ -15,7 +15,6 @@ import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatch; import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatchResult; import com.scalar.db.dataloader.core.dataimport.transactionbatch.ImportTransactionBatchStatus; -import com.scalar.db.dataloader.core.util.ConfigUtil; import com.scalar.db.exception.transaction.TransactionException; import java.io.BufferedReader; import java.time.Duration; @@ -295,11 +294,11 @@ private ImportTaskResult processStorageRecord(ImportDataChunk dataChunk, ImportR * * @param dataChunk the data chunk to process * @param transactionBatchSize the size of transaction batches (used only in transaction mode) - * @param numCores the number of CPU cores to use for parallel processing + * @param threadSize the number of threads to use for parallel processing * @return an {@link ImportDataChunkStatus} containing the complete processing results and metrics */ protected ImportDataChunkStatus processDataChunk( - ImportDataChunk dataChunk, int transactionBatchSize, int numCores) { + ImportDataChunk dataChunk, int transactionBatchSize, int threadSize) { ImportDataChunkStatus status = ImportDataChunkStatus.builder() .dataChunkId(dataChunk.getDataChunkId()) @@ -310,9 +309,9 @@ protected ImportDataChunkStatus processDataChunk( ImportDataChunkStatus importDataChunkStatus; if (params.getScalarDBMode() == ScalarDBMode.TRANSACTION) { importDataChunkStatus = - processDataChunkWithTransactions(dataChunk, transactionBatchSize, numCores); + processDataChunkWithTransactions(dataChunk, transactionBatchSize, threadSize); } else { - importDataChunkStatus = processDataChunkWithoutTransactions(dataChunk, numCores); + importDataChunkStatus = processDataChunkWithoutTransactions(dataChunk, threadSize); } notifyDataChunkCompleted(importDataChunkStatus); return importDataChunkStatus; @@ -324,16 +323,15 @@ protected ImportDataChunkStatus processDataChunk( * * @param dataChunk the data chunk to process * @param transactionBatchSize the number of records per transaction batch - * @param numCores the maximum number of concurrent transactions to process + * @param threadSize the maximum number of concurrent transactions to process * @return an {@link ImportDataChunkStatus} containing processing results and metrics */ private ImportDataChunkStatus processDataChunkWithTransactions( - ImportDataChunk dataChunk, int transactionBatchSize, int numCores) { + ImportDataChunk dataChunk, int transactionBatchSize, int threadSize) { Instant startTime = Instant.now(); List transactionBatches = splitIntoTransactionBatches(dataChunk, transactionBatchSize); - ExecutorService transactionBatchExecutor = - Executors.newFixedThreadPool(ConfigUtil.getTransactionBatchThreadPoolSize()); + ExecutorService transactionBatchExecutor = Executors.newFixedThreadPool(threadSize); List> transactionBatchFutures = new ArrayList<>(); AtomicInteger successCount = new AtomicInteger(0); AtomicInteger failureCount = new AtomicInteger(0); @@ -393,15 +391,15 @@ private ImportDataChunkStatus processDataChunkWithTransactions( * are processed concurrently without transaction guarantees. * * @param dataChunk the data chunk to process - * @param numCores the number of records to process concurrently + * @param threadSize the number of records to process concurrently * @return an {@link ImportDataChunkStatus} containing processing results and metrics */ private ImportDataChunkStatus processDataChunkWithoutTransactions( - ImportDataChunk dataChunk, int numCores) { + ImportDataChunk dataChunk, int threadSize) { Instant startTime = Instant.now(); AtomicInteger successCount = new AtomicInteger(0); AtomicInteger failureCount = new AtomicInteger(0); - ExecutorService recordExecutor = Executors.newFixedThreadPool(numCores); + ExecutorService recordExecutor = Executors.newFixedThreadPool(threadSize); List> recordFutures = new ArrayList<>(); try { for (ImportRow importRow : dataChunk.getSourceData()) { diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java index 1a193c61c4..5b206d9b69 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java @@ -9,7 +9,6 @@ import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunk; import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; import com.scalar.db.dataloader.core.dataimport.datachunk.ImportRow; -import com.scalar.db.dataloader.core.util.ConfigUtil; import java.io.BufferedReader; import java.io.IOException; import java.util.ArrayList; @@ -66,10 +65,9 @@ public JsonImportProcessor(ImportProcessorParams params) { @Override public ConcurrentHashMap process( int dataChunkSize, int transactionBatchSize, BufferedReader reader) { - int numCores = Runtime.getRuntime().availableProcessors(); ExecutorService dataChunkExecutor = Executors.newSingleThreadExecutor(); BlockingQueue dataChunkQueue = - new LinkedBlockingQueue<>(ConfigUtil.getImportDataChunkQueueSize()); + new LinkedBlockingQueue<>(params.getImportOptions().getDataChunkQueueSize()); try { CompletableFuture readerFuture = @@ -82,7 +80,8 @@ public ConcurrentHashMap process( ImportDataChunk dataChunk = dataChunkQueue.poll(100, TimeUnit.MILLISECONDS); if (dataChunk != null) { ImportDataChunkStatus status = - processDataChunk(dataChunk, transactionBatchSize, numCores); + processDataChunk( + dataChunk, transactionBatchSize, params.getImportOptions().getMaxThreads()); result.put(status.getDataChunkId(), status); } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java index 6a8a46fd1c..60ad6f9aa9 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java @@ -6,7 +6,6 @@ import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunk; import com.scalar.db.dataloader.core.dataimport.datachunk.ImportDataChunkStatus; import com.scalar.db.dataloader.core.dataimport.datachunk.ImportRow; -import com.scalar.db.dataloader.core.util.ConfigUtil; import java.io.BufferedReader; import java.io.IOException; import java.util.ArrayList; @@ -65,10 +64,9 @@ public JsonLinesImportProcessor(ImportProcessorParams params) { @Override public ConcurrentHashMap process( int dataChunkSize, int transactionBatchSize, BufferedReader reader) { - int numCores = Runtime.getRuntime().availableProcessors(); ExecutorService dataChunkExecutor = Executors.newSingleThreadExecutor(); BlockingQueue dataChunkQueue = - new LinkedBlockingQueue<>(ConfigUtil.getImportDataChunkQueueSize()); + new LinkedBlockingQueue<>(params.getImportOptions().getDataChunkQueueSize()); try { CompletableFuture readerFuture = @@ -81,7 +79,8 @@ public ConcurrentHashMap process( ImportDataChunk dataChunk = dataChunkQueue.poll(100, TimeUnit.MILLISECONDS); if (dataChunk != null) { ImportDataChunkStatus status = - processDataChunk(dataChunk, transactionBatchSize, numCores); + processDataChunk( + dataChunk, transactionBatchSize, params.getImportOptions().getMaxThreads()); result.put(status.getDataChunkId(), status); } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ConfigUtil.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ConfigUtil.java deleted file mode 100644 index ec750b5851..0000000000 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/util/ConfigUtil.java +++ /dev/null @@ -1,98 +0,0 @@ -package com.scalar.db.dataloader.core.util; - -import java.io.IOException; -import java.io.InputStream; -import java.util.Properties; - -/** - * Utility class for loading and retrieving configuration properties. - * - *

This class reads properties from a {@code config.properties} file located in the classpath. - */ -public class ConfigUtil { - public static final String CONFIG_PROPERTIES = "config.properties"; - private static volatile Properties properties; - - /** - * Ensures that the configuration properties are loaded. If properties are not yet loaded, it - * loads them in a thread-safe manner. - */ - private static void ensurePropertiesLoaded() { - if (properties == null) { - synchronized (ConfigUtil.class) { - if (properties == null) { - loadProperties(); - } - } - } - } - - /** - * Loads the configuration properties from the {@code config.properties} file in the classpath. - * - *

If the file is missing or cannot be read, a {@link RuntimeException} is thrown. - * - * @throws RuntimeException if the properties file is not found or cannot be loaded - */ - private static void loadProperties() { - try (InputStream inputStream = - Thread.currentThread().getContextClassLoader().getResourceAsStream(CONFIG_PROPERTIES)) { - if (inputStream == null) { - throw new RuntimeException("config.properties file not found in classpath."); - } - properties = new Properties(); - properties.load(inputStream); - } catch (IOException e) { - throw new RuntimeException("Failed to load config.properties", e); - } - } - - /** - * Retrieves the queue size for importing data chunks. - * - *

This method reads the property {@code import.data.chunk.queue.size}. If the property is - * missing or invalid, the default value {@code 256} is returned. - * - * @return the configured queue size for data chunks, or {@code 256} if not specified - */ - public static Integer getImportDataChunkQueueSize() { - ensurePropertiesLoaded(); - return getIntegerProperty("import.data.chunk.queue.size", 256); - } - - /** - * Retrieves the thread pool size for processing transaction batches. - * - *

This method reads the property {@code transaction.batch.thread.pool.size}. If the property - * is missing or invalid, the default value {@code 16} is returned. - * - * @return the configured thread pool size for transaction batches, or {@code 16} if not specified - */ - public static Integer getTransactionBatchThreadPoolSize() { - ensurePropertiesLoaded(); - return getIntegerProperty("transaction.batch.thread.pool.size", 16); - } - - /** - * Retrieves an integer property value from the loaded properties. - * - *

If the property is not found, the default value is returned. If the property is present but - * not a valid integer, an {@link IllegalArgumentException} is thrown. - * - * @param key the property key to look up - * @param defaultValue the default value to return if the property is missing or empty - * @return the integer value of the property, or the default value if not specified - * @throws IllegalArgumentException if the property value is not a valid integer - */ - private static Integer getIntegerProperty(String key, int defaultValue) { - String value = properties.getProperty(key); - if (value == null || value.trim().isEmpty()) { - return defaultValue; - } - try { - return Integer.parseInt(value); - } catch (NumberFormatException e) { - throw new IllegalArgumentException("Invalid integer value for property: " + key, e); - } - } -} diff --git a/data-loader/core/src/main/resources/config.properties b/data-loader/core/src/main/resources/config.properties deleted file mode 100644 index 17bf731259..0000000000 --- a/data-loader/core/src/main/resources/config.properties +++ /dev/null @@ -1,2 +0,0 @@ -transaction.batch.thread.pool.size=16 -import.data.chunk.queue.size=256 diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ConfigUtilTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ConfigUtilTest.java deleted file mode 100644 index 16bf369747..0000000000 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/util/ConfigUtilTest.java +++ /dev/null @@ -1,66 +0,0 @@ -package com.scalar.db.dataloader.core.util; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import java.lang.reflect.Field; -import java.util.Properties; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -class ConfigUtilTest { - - private Properties mockProperties; - - @BeforeEach - void setUp() throws Exception { - mockProperties = new Properties(); - setPropertiesField(mockProperties); - } - - @AfterEach - void tearDown() throws Exception { - setPropertiesField(null); - } - - private void setPropertiesField(Properties properties) throws Exception { - Field field = ConfigUtil.class.getDeclaredField("properties"); - field.setAccessible(true); - field.set(null, properties); - } - - @Test - void testGetImportDataChunkQueueSize_WithValidValue() { - mockProperties.setProperty("import.data.chunk.queue.size", "512"); - assertEquals(512, ConfigUtil.getImportDataChunkQueueSize()); - } - - @Test - void testGetImportDataChunkQueueSize_WithNoValue_UsesDefault() { - assertEquals(256, ConfigUtil.getImportDataChunkQueueSize()); - } - - @Test - void testGetImportDataChunkQueueSize_WithInvalidValue_ThrowsException() { - mockProperties.setProperty("import.data.chunk.queue.size", "invalid"); - assertThrows(IllegalArgumentException.class, ConfigUtil::getImportDataChunkQueueSize); - } - - @Test - void testGetTransactionBatchThreadPoolSize_WithValidValue() { - mockProperties.setProperty("transaction.batch.thread.pool.size", "32"); - assertEquals(32, ConfigUtil.getTransactionBatchThreadPoolSize()); - } - - @Test - void testGetTransactionBatchThreadPoolSize_WithNoValue_UsesDefault() { - assertEquals(16, ConfigUtil.getTransactionBatchThreadPoolSize()); - } - - @Test - void testGetTransactionBatchThreadPoolSize_WithInvalidValue_ThrowsException() { - mockProperties.setProperty("transaction.batch.thread.pool.size", "invalid"); - assertThrows(IllegalArgumentException.class, ConfigUtil::getTransactionBatchThreadPoolSize); - } -} From 44bf503d063d17c98dafdca3ac069cb0dde2e8f3 Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Mon, 7 Apr 2025 08:54:21 +0530 Subject: [PATCH 86/87] Updated tests --- .../core/dataimport/processor/CsvImportProcessorTest.java | 2 ++ .../core/dataimport/processor/JsonImportProcessorTest.java | 2 ++ .../core/dataimport/processor/JsonLinesImportProcessorTest.java | 2 ++ 3 files changed, 6 insertions(+) diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessorTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessorTest.java index b4c4373106..94acd20ace 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessorTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessorTest.java @@ -58,6 +58,8 @@ void setup() throws ScalarDBDaoException, TransactionException { .dataChunkSize(5) .tableName("table") .logMode(LogMode.SINGLE_FILE) + .maxThreads(8) + .dataChunkQueueSize(256) .build(); Mockito.when( dao.get( diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessorTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessorTest.java index d60eb48ccb..aa9a106a0c 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessorTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessorTest.java @@ -58,6 +58,8 @@ void setup() throws ScalarDBDaoException, TransactionException { .dataChunkSize(5) .tableName("table") .logMode(LogMode.SINGLE_FILE) + .maxThreads(8) + .dataChunkQueueSize(256) .build(); Mockito.when( dao.get( diff --git a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessorTest.java b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessorTest.java index 6224236a70..e3db391756 100644 --- a/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessorTest.java +++ b/data-loader/core/src/test/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessorTest.java @@ -57,6 +57,8 @@ void setup() throws ScalarDBDaoException, TransactionException { .transactionBatchSize(1) .dataChunkSize(5) .tableName("table") + .maxThreads(8) + .dataChunkQueueSize(256) .logMode(LogMode.SINGLE_FILE) .build(); Mockito.when( From a5c0b9132eb00ca496c5dd131648492294b078ce Mon Sep 17 00:00:00 2001 From: Jishnu J Date: Mon, 7 Apr 2025 09:43:04 +0530 Subject: [PATCH 87/87] Removed explict passing of thread size and use it directly --- .../processor/CsvImportProcessor.java | 4 +--- .../dataimport/processor/ImportProcessor.java | 21 ++++++++----------- .../processor/JsonImportProcessor.java | 4 +--- .../processor/JsonLinesImportProcessor.java | 4 +--- 4 files changed, 12 insertions(+), 21 deletions(-) diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java index c6e1a0275b..0c68d5e566 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/CsvImportProcessor.java @@ -80,9 +80,7 @@ public ConcurrentHashMap process( while (!(dataChunkQueue.isEmpty() && readerFuture.isDone())) { ImportDataChunk dataChunk = dataChunkQueue.poll(100, TimeUnit.MILLISECONDS); if (dataChunk != null) { - ImportDataChunkStatus status = - processDataChunk( - dataChunk, transactionBatchSize, params.getImportOptions().getMaxThreads()); + ImportDataChunkStatus status = processDataChunk(dataChunk, transactionBatchSize); result.put(status.getDataChunkId(), status); } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java index bd698ea6ce..1a317a1a82 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/ImportProcessor.java @@ -294,11 +294,10 @@ private ImportTaskResult processStorageRecord(ImportDataChunk dataChunk, ImportR * * @param dataChunk the data chunk to process * @param transactionBatchSize the size of transaction batches (used only in transaction mode) - * @param threadSize the number of threads to use for parallel processing * @return an {@link ImportDataChunkStatus} containing the complete processing results and metrics */ protected ImportDataChunkStatus processDataChunk( - ImportDataChunk dataChunk, int transactionBatchSize, int threadSize) { + ImportDataChunk dataChunk, int transactionBatchSize) { ImportDataChunkStatus status = ImportDataChunkStatus.builder() .dataChunkId(dataChunk.getDataChunkId()) @@ -308,10 +307,9 @@ protected ImportDataChunkStatus processDataChunk( notifyDataChunkStarted(status); ImportDataChunkStatus importDataChunkStatus; if (params.getScalarDBMode() == ScalarDBMode.TRANSACTION) { - importDataChunkStatus = - processDataChunkWithTransactions(dataChunk, transactionBatchSize, threadSize); + importDataChunkStatus = processDataChunkWithTransactions(dataChunk, transactionBatchSize); } else { - importDataChunkStatus = processDataChunkWithoutTransactions(dataChunk, threadSize); + importDataChunkStatus = processDataChunkWithoutTransactions(dataChunk); } notifyDataChunkCompleted(importDataChunkStatus); return importDataChunkStatus; @@ -323,15 +321,15 @@ protected ImportDataChunkStatus processDataChunk( * * @param dataChunk the data chunk to process * @param transactionBatchSize the number of records per transaction batch - * @param threadSize the maximum number of concurrent transactions to process * @return an {@link ImportDataChunkStatus} containing processing results and metrics */ private ImportDataChunkStatus processDataChunkWithTransactions( - ImportDataChunk dataChunk, int transactionBatchSize, int threadSize) { + ImportDataChunk dataChunk, int transactionBatchSize) { Instant startTime = Instant.now(); List transactionBatches = splitIntoTransactionBatches(dataChunk, transactionBatchSize); - ExecutorService transactionBatchExecutor = Executors.newFixedThreadPool(threadSize); + ExecutorService transactionBatchExecutor = + Executors.newFixedThreadPool(params.getImportOptions().getMaxThreads()); List> transactionBatchFutures = new ArrayList<>(); AtomicInteger successCount = new AtomicInteger(0); AtomicInteger failureCount = new AtomicInteger(0); @@ -391,15 +389,14 @@ private ImportDataChunkStatus processDataChunkWithTransactions( * are processed concurrently without transaction guarantees. * * @param dataChunk the data chunk to process - * @param threadSize the number of records to process concurrently * @return an {@link ImportDataChunkStatus} containing processing results and metrics */ - private ImportDataChunkStatus processDataChunkWithoutTransactions( - ImportDataChunk dataChunk, int threadSize) { + private ImportDataChunkStatus processDataChunkWithoutTransactions(ImportDataChunk dataChunk) { Instant startTime = Instant.now(); AtomicInteger successCount = new AtomicInteger(0); AtomicInteger failureCount = new AtomicInteger(0); - ExecutorService recordExecutor = Executors.newFixedThreadPool(threadSize); + ExecutorService recordExecutor = + Executors.newFixedThreadPool(params.getImportOptions().getMaxThreads()); List> recordFutures = new ArrayList<>(); try { for (ImportRow importRow : dataChunk.getSourceData()) { diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java index 5b206d9b69..733a5afa96 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonImportProcessor.java @@ -79,9 +79,7 @@ public ConcurrentHashMap process( while (!(dataChunkQueue.isEmpty() && readerFuture.isDone())) { ImportDataChunk dataChunk = dataChunkQueue.poll(100, TimeUnit.MILLISECONDS); if (dataChunk != null) { - ImportDataChunkStatus status = - processDataChunk( - dataChunk, transactionBatchSize, params.getImportOptions().getMaxThreads()); + ImportDataChunkStatus status = processDataChunk(dataChunk, transactionBatchSize); result.put(status.getDataChunkId(), status); } } diff --git a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java index 60ad6f9aa9..a121a106a5 100644 --- a/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java +++ b/data-loader/core/src/main/java/com/scalar/db/dataloader/core/dataimport/processor/JsonLinesImportProcessor.java @@ -78,9 +78,7 @@ public ConcurrentHashMap process( while (!(dataChunkQueue.isEmpty() && readerFuture.isDone())) { ImportDataChunk dataChunk = dataChunkQueue.poll(100, TimeUnit.MILLISECONDS); if (dataChunk != null) { - ImportDataChunkStatus status = - processDataChunk( - dataChunk, transactionBatchSize, params.getImportOptions().getMaxThreads()); + ImportDataChunkStatus status = processDataChunk(dataChunk, transactionBatchSize); result.put(status.getDataChunkId(), status); } }