diff --git a/dataset/load-from-test/fixed_list/fixed_list_with_null.csv b/dataset/load-from-test/fixed_list/fixed_list_with_null.csv new file mode 100644 index 0000000000..9cb9452599 --- /dev/null +++ b/dataset/load-from-test/fixed_list/fixed_list_with_null.csv @@ -0,0 +1,3 @@ +"[3324.123,342423.4375,432.123]" +"NULL" +"[1,4231,432.123]" diff --git a/src/common/vector/value_vector.cpp b/src/common/vector/value_vector.cpp index b6d36376e3..5d9101662c 100644 --- a/src/common/vector/value_vector.cpp +++ b/src/common/vector/value_vector.cpp @@ -294,6 +294,34 @@ std::unique_ptr ValueVector::getAsValue(uint64_t pos) { value->childrenSize = children.size(); value->children = std::move(children); } break; + case PhysicalTypeID::FIXED_LIST: { + auto childDataType = FixedListType::getChildType(&dataType); + auto numElements = FixedListType::getNumElementsInList(&dataType); + std::vector> children; + children.reserve(numElements); + switch (childDataType->getPhysicalType()) { + case PhysicalTypeID::INT64: { + FixedListVector::getAsValue(this, children, pos, numElements); + } break; + case PhysicalTypeID::INT32: { + FixedListVector::getAsValue(this, children, pos, numElements); + } break; + case PhysicalTypeID::INT16: { + FixedListVector::getAsValue(this, children, pos, numElements); + } break; + case PhysicalTypeID::DOUBLE: { + FixedListVector::getAsValue(this, children, pos, numElements); + } break; + case PhysicalTypeID::FLOAT: { + FixedListVector::getAsValue(this, children, pos, numElements); + } break; + default: // LCOV_EXCL_START + KU_UNREACHABLE; + // LCOV_EXCL_STOP + } + value->childrenSize = numElements; + value->children = std::move(children); + } break; case PhysicalTypeID::STRUCT: { auto& fieldVectors = StructVector::getFieldVectors(this); std::vector> children; @@ -536,6 +564,57 @@ void ListVector::sliceDataVector( } } +template<> +void FixedListVector::getAsValue(ValueVector* vector, + std::vector>& children, uint64_t pos, uint64_t numElements) { + for (auto i = 0u; i < numElements; ++i) { + children.push_back(Value::createDefaultValue(LogicalType{LogicalTypeID::INT64}).copy()); + children[i]->val.int64Val = + reinterpret_cast(vector->getData() + vector->getNumBytesPerValue() * pos)[i]; + } +} + +template<> +void FixedListVector::getAsValue(ValueVector* vector, + std::vector>& children, uint64_t pos, uint64_t numElements) { + for (auto i = 0u; i < numElements; ++i) { + children.push_back(Value::createDefaultValue(LogicalType{LogicalTypeID::INT32}).copy()); + children[i]->val.int32Val = + reinterpret_cast(vector->getData() + vector->getNumBytesPerValue() * pos)[i]; + } +} + +template<> +void FixedListVector::getAsValue(ValueVector* vector, + std::vector>& children, uint64_t pos, uint64_t numElements) { + for (auto i = 0u; i < numElements; ++i) { + children.push_back(Value::createDefaultValue(LogicalType{LogicalTypeID::INT16}).copy()); + children[i]->val.int16Val = + reinterpret_cast(vector->getData() + vector->getNumBytesPerValue() * pos)[i]; + } +} + +template<> +void FixedListVector::getAsValue(ValueVector* vector, + std::vector>& children, uint64_t pos, uint64_t numElements) { + for (auto i = 0u; i < numElements; ++i) { + children.push_back(Value::createDefaultValue(LogicalType{LogicalTypeID::FLOAT}).copy()); + children[i]->val.floatVal = + reinterpret_cast(vector->getData() + vector->getNumBytesPerValue() * pos)[i]; + } +} + +template<> +void FixedListVector::getAsValue(ValueVector* vector, + std::vector>& children, uint64_t pos, uint64_t numElements) { + // default: int64 + for (auto i = 0u; i < numElements; ++i) { + children.push_back(Value::createDefaultValue(LogicalType{LogicalTypeID::DOUBLE}).copy()); + children[i]->val.doubleVal = + reinterpret_cast(vector->getData() + vector->getNumBytesPerValue() * pos)[i]; + } +} + void StructVector::copyFromRowData(ValueVector* vector, uint32_t pos, const uint8_t* rowData) { KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::STRUCT); auto& structFields = getFieldVectors(vector); diff --git a/src/function/cast_string_to_functions.cpp b/src/function/cast_string_to_functions.cpp index 1d58be1640..15d46eeb99 100644 --- a/src/function/cast_string_to_functions.cpp +++ b/src/function/cast_string_to_functions.cpp @@ -20,6 +20,9 @@ struct CastStringHelper { uint64_t /*rowToAdd*/ = 0, const CSVReaderConfig* /*csvReaderConfig*/ = nullptr) { simpleIntegerCast(input, len, result, LogicalType{LogicalTypeID::INT64}); } + + static void castToFixedList(const char* input, uint64_t len, ValueVector* vector, + uint64_t rowToAdd, const CSVReaderConfig* csvReaderConfig); }; template<> @@ -167,7 +170,7 @@ static bool skipToClose(const char*& input, const char* end, uint64_t& lvl, char if (!skipToCloseQuotes(input, end)) { return false; } - } else if (*input == '{') { // must have closing brackets fro {, ] if they are not quoted + } else if (*input == '{') { // must have closing brackets {, ] if they are not quoted if (!skipToClose(input, end, lvl, '}', csvReaderConfig)) { return false; } @@ -331,7 +334,6 @@ struct SplitStringFixedListOperation { if (str.empty() || isNull(str)) { throw ConversionException("Cast failed. NULL is not allowed for FIXEDLIST."); } - auto type = FixedListType::getChildType(&resultVector->dataType); CastStringHelper::cast(start, str.length(), value); resultVector->setValue(offset, value); offset++; @@ -347,8 +349,8 @@ static void validateNumElementsInList(uint64_t numElementsRead, const LogicalTyp } } -void castStringToFixedList(const char* input, uint64_t len, ValueVector* vector, uint64_t rowToAdd, - const CSVReaderConfig* csvReaderConfig) { +void CastStringHelper::castToFixedList(const char* input, uint64_t len, ValueVector* vector, + uint64_t rowToAdd, const CSVReaderConfig* csvReaderConfig) { KU_ASSERT(vector->dataType.getLogicalTypeID() == LogicalTypeID::FIXED_LIST); auto childDataType = FixedListType::getChildType(&vector->dataType); @@ -359,7 +361,7 @@ void castStringToFixedList(const char* input, uint64_t len, ValueVector* vector, auto startOffset = state.count * rowToAdd; switch (childDataType->getLogicalTypeID()) { - // TODO: currently only allow these type + // TODO(Kebing): currently only allow these type case LogicalTypeID::INT64: { SplitStringFixedListOperation split{startOffset, vector}; startListCast(input, len, split, csvReaderConfig, vector); @@ -381,11 +383,17 @@ void castStringToFixedList(const char* input, uint64_t len, ValueVector* vector, startListCast(input, len, split, csvReaderConfig, vector); } break; default: { - throw NotImplementedException("Unsupported data type: Driver::castStringToFixedList"); + throw NotImplementedException("Unsupported data type: Function::castStringToFixedList"); } } } +void CastString::castToFixedList(const ku_string_t& input, ValueVector* resultVector, + uint64_t rowToAdd, const CSVReaderConfig* csvReaderConfig) { + CastStringHelper::castToFixedList(reinterpret_cast(input.getData()), input.len, + resultVector, rowToAdd, csvReaderConfig); +} + // ---------------------- cast String to Map ------------------------------ // struct SplitStringMapOperation { SplitStringMapOperation(uint64_t& offset, ValueVector* resultVector) @@ -420,7 +428,7 @@ static bool parseKeyOrValue(const char*& input, const char* end, T& state, bool if (*input == '"' || *input == '\'') { if (!skipToCloseQuotes(input, end)) { return false; - }; + } } else if (*input == '{') { if (!skipToClose(input, end, lvl, '}', csvReaderConfig)) { return false; @@ -429,7 +437,7 @@ static bool parseKeyOrValue(const char*& input, const char* end, T& state, bool if (!skipToClose( input, end, lvl, CopyConstants::DEFAULT_CSV_LIST_END_CHAR, csvReaderConfig)) { return false; - }; + } } else if (isKey && *input == '=') { return state.handleKey(start, input, csvReaderConfig); } else if (!isKey && (*input == csvReaderConfig->delimiter || *input == '}')) { @@ -500,9 +508,9 @@ void CastStringHelper::cast(const char* input, uint64_t len, map_entry_t& /*resu template<> void CastString::operation(const ku_string_t& input, map_entry_t& result, ValueVector* resultVector, - uint64_t rowToAdd, const CSVReaderConfig* CSVReaderConfig) { + uint64_t rowToAdd, const CSVReaderConfig* csvReaderConfig) { CastStringHelper::cast(reinterpret_cast(input.getData()), input.len, result, - resultVector, rowToAdd, CSVReaderConfig); + resultVector, rowToAdd, csvReaderConfig); } // ---------------------- cast String to Struct ------------------------------ // @@ -854,8 +862,8 @@ void CastString::copyStringToVector(ValueVector* vector, uint64_t rowToAdd, std: strVal.data(), strVal.length(), val, vector, rowToAdd, csvReaderConfig); } break; case LogicalTypeID::FIXED_LIST: { - // TODO: add fix list function wrapper - castStringToFixedList(strVal.data(), strVal.length(), vector, rowToAdd, csvReaderConfig); + CastStringHelper::castToFixedList( + strVal.data(), strVal.length(), vector, rowToAdd, csvReaderConfig); } break; case LogicalTypeID::STRUCT: { struct_entry_t val; diff --git a/src/function/vector_cast_functions.cpp b/src/function/vector_cast_functions.cpp index 4c1de73248..3e4a6e538c 100644 --- a/src/function/vector_cast_functions.cpp +++ b/src/function/vector_cast_functions.cpp @@ -3,7 +3,7 @@ #include "binder/binder.h" #include "binder/expression/literal_expression.h" #include "common/exception/binder.h" -#include "common/exception/not_implemented.h" +#include "common/exception/conversion.h" #include "function/cast/functions/cast_functions.h" #include "function/cast/functions/cast_rdf_variant.h" #include "function/cast/functions/cast_string_to_functions.h" @@ -13,6 +13,79 @@ using namespace kuzu::common; namespace kuzu { namespace function { +static void castFixedListToString( + ValueVector& param, uint64_t pos, ValueVector& resultVector, uint64_t resultPos) { + resultVector.setNull(resultPos, param.isNull(pos)); + if (param.isNull(pos)) { + return; + } + std::string result = "["; + auto numValuesPerList = FixedListType::getNumElementsInList(¶m.dataType); + auto childType = FixedListType::getChildType(¶m.dataType); + auto values = param.getData() + pos * param.getNumBytesPerValue(); + for (auto i = 0u; i < numValuesPerList - 1; ++i) { + // Note: FixedList can only store numeric types and doesn't allow nulls. + result += TypeUtils::castValueToString(*childType, values, nullptr /* vector */); + result += ","; + values += PhysicalTypeUtils::getFixedTypeSize(childType->getPhysicalType()); + } + result += TypeUtils::castValueToString(*childType, values, nullptr /* vector */); + result += "]"; + resultVector.setValue(resultPos, result); +} + +static void fixedListCastExecFunction(const std::vector>& params, + ValueVector& result, void* /*dataPtr*/ = nullptr) { + assert(params.size() == 1); + auto param = params[0]; + if (param->state->isFlat()) { + castFixedListToString(*param, param->state->selVector->selectedPositions[0], result, + result.state->selVector->selectedPositions[0]); + } else if (param->state->selVector->isUnfiltered()) { + for (auto i = 0u; i < param->state->selVector->selectedSize; i++) { + castFixedListToString(*param, i, result, i); + } + } else { + for (auto i = 0u; i < param->state->selVector->selectedSize; i++) { + castFixedListToString(*param, param->state->selVector->selectedPositions[i], result, + result.state->selVector->selectedPositions[i]); + } + } +} + +static void StringtoFixedListCastExecFunction( + const std::vector>& params, ValueVector& result, void* dataPtr) { + assert(params.size() == 1); + auto param = params[0]; + auto csvReaderConfig = &reinterpret_cast(dataPtr)->csvConfig; + if (param->state->isFlat()) { + auto inputPos = param->state->selVector->selectedPositions[0]; + auto resultPos = result.state->selVector->selectedPositions[0]; + result.setNull(resultPos, param->isNull(inputPos)); + if (!result.isNull(inputPos)) { + CastString::castToFixedList( + param->getValue(inputPos), &result, resultPos, csvReaderConfig); + } + } else if (param->state->selVector->isUnfiltered()) { + for (auto i = 0u; i < param->state->selVector->selectedSize; i++) { + result.setNull(i, param->isNull(i)); + if (!result.isNull(i)) { + CastString::castToFixedList( + param->getValue(i), &result, i, csvReaderConfig); + } + } + } else { + for (auto i = 0u; i < param->state->selVector->selectedSize; i++) { + auto pos = param->state->selVector->selectedPositions[i]; + result.setNull(pos, param->isNull(pos)); + if (!result.isNull(pos)) { + CastString::castToFixedList( + param->getValue(pos), &result, pos, csvReaderConfig); + } + } + } +} + bool CastFunction::hasImplicitCast(const LogicalType& srcType, const LogicalType& dstType) { // We allow cast between any numerical types if (LogicalTypeUtils::isNumerical(srcType) && LogicalTypeUtils::isNumerical(dstType)) { @@ -103,28 +176,28 @@ static std::unique_ptr bindCastFromStringFunction( case LogicalTypeID::UINT8: { execFunc = ScalarFunction::UnaryCastStringExecFunction; } break; - case common::LogicalTypeID::VAR_LIST: { - execFunc = ScalarFunction::UnaryCastStringExecFunction; + case LogicalTypeID::VAR_LIST: { + execFunc = + ScalarFunction::UnaryCastStringExecFunction; + } break; + case LogicalTypeID::FIXED_LIST: { + execFunc = StringtoFixedListCastExecFunction; } break; - case common::LogicalTypeID::MAP: { - execFunc = ScalarFunction::UnaryCastStringExecFunction; + case LogicalTypeID::MAP: { + execFunc = + ScalarFunction::UnaryCastStringExecFunction; } break; - case common::LogicalTypeID::STRUCT: { - execFunc = ScalarFunction::UnaryCastStringExecFunction; + case LogicalTypeID::STRUCT: { + execFunc = + ScalarFunction::UnaryCastStringExecFunction; } break; - case common::LogicalTypeID::UNION: { - execFunc = ScalarFunction::UnaryCastStringExecFunction; + case LogicalTypeID::UNION: { + execFunc = + ScalarFunction::UnaryCastStringExecFunction; } break; - // LCOV_EXCL_START default: - throw common::NotImplementedException{ - stringFormat("Unimplemented casting function from STRING to {}.", - LogicalTypeUtils::toString(targetTypeID))}; - // LCOV_EXCL_STOP + throw ConversionException{stringFormat("Unsupported casting function from STRING to {}.", + LogicalTypeUtils::toString(targetTypeID))}; } return std::make_unique( functionName, std::vector{LogicalTypeID::STRING}, targetTypeID, execFunc); @@ -201,8 +274,8 @@ static std::unique_ptr bindCastFromRdfVariantFunction( } break; // LCOV_EXCL_START default: - throw common::NotImplementedException{ - stringFormat("Unimplemented casting function from RDF_VARIANT to {}.", + throw ConversionException{ + stringFormat("Unsupported casting function from RDF_VARIANT to {}.", LogicalTypeUtils::toString(targetTypeID))}; // LCOV_EXCL_STOP } @@ -210,46 +283,6 @@ static std::unique_ptr bindCastFromRdfVariantFunction( std::vector{LogicalTypeID::RDF_VARIANT}, targetTypeID, execFunc); } -static void castFixedListToString( - ValueVector& param, uint64_t pos, ValueVector& resultVector, uint64_t resultPos) { - resultVector.setNull(resultPos, param.isNull(pos)); - if (param.isNull(pos)) { - return; - } - std::string result = "["; - auto numValuesPerList = FixedListType::getNumElementsInList(¶m.dataType); - auto childType = FixedListType::getChildType(¶m.dataType); - auto values = param.getData() + pos * param.getNumBytesPerValue(); - for (auto i = 0u; i < numValuesPerList - 1; ++i) { - // Note: FixedList can only store numeric types and doesn't allow nulls. - result += TypeUtils::castValueToString(*childType, values, nullptr /* vector */); - result += ","; - values += PhysicalTypeUtils::getFixedTypeSize(childType->getPhysicalType()); - } - result += TypeUtils::castValueToString(*childType, values, nullptr /* vector */); - result += "]"; - resultVector.setValue(resultPos, result); -} - -static void fixedListCastExecFunction(const std::vector>& params, - common::ValueVector& result, void* /*dataPtr*/ = nullptr) { - assert(params.size() == 1); - auto param = params[0]; - if (param->state->isFlat()) { - castFixedListToString(*param, param->state->selVector->selectedPositions[0], result, - result.state->selVector->selectedPositions[0]); - } else if (param->state->selVector->isUnfiltered()) { - for (auto i = 0u; i < param->state->selVector->selectedSize; i++) { - castFixedListToString(*param, i, result, i); - } - } else { - for (auto i = 0u; i < param->state->selVector->selectedSize; i++) { - castFixedListToString(*param, param->state->selVector->selectedPositions[i], result, - result.state->selVector->selectedPositions[i]); - } - } -} - static std::unique_ptr bindCastToStringFunction( const std::string& functionName, LogicalTypeID sourceTypeID) { scalar_exec_func func; @@ -323,11 +356,10 @@ static std::unique_ptr bindCastToStringFunction( case LogicalTypeID::UNION: { func = ScalarFunction::UnaryCastExecFunction; } break; + // ToDo(Kebing): RECURSIVE_REL to string // LCOV_EXCL_START default: - throw common::NotImplementedException{ - stringFormat("Unimplemented casting function from {} to STRING.", - LogicalTypeUtils::toString(sourceTypeID))}; + KU_UNREACHABLE; // LCOV_EXCL_STOP } return std::make_unique( @@ -339,69 +371,65 @@ static std::unique_ptr bindCastToNumericFunction( const std::string& functionName, LogicalTypeID sourceTypeID, LogicalTypeID targetTypeID) { scalar_exec_func func; switch (sourceTypeID) { - case common::LogicalTypeID::INT8: { + case LogicalTypeID::INT8: { func = ScalarFunction::UnaryExecFunction; } break; - case common::LogicalTypeID::INT16: { + case LogicalTypeID::INT16: { func = ScalarFunction::UnaryExecFunction; } break; - case common::LogicalTypeID::INT32: { + case LogicalTypeID::INT32: { func = ScalarFunction::UnaryExecFunction; } break; - case common::LogicalTypeID::SERIAL: - case common::LogicalTypeID::INT64: { + case LogicalTypeID::SERIAL: + case LogicalTypeID::INT64: { func = ScalarFunction::UnaryExecFunction; } break; - case common::LogicalTypeID::UINT8: { + case LogicalTypeID::UINT8: { func = ScalarFunction::UnaryExecFunction; } break; - case common::LogicalTypeID::UINT16: { + case LogicalTypeID::UINT16: { func = ScalarFunction::UnaryExecFunction; } break; - case common::LogicalTypeID::UINT32: { + case LogicalTypeID::UINT32: { func = ScalarFunction::UnaryExecFunction; } break; - case common::LogicalTypeID::UINT64: { + case LogicalTypeID::UINT64: { func = ScalarFunction::UnaryExecFunction; } break; - case common::LogicalTypeID::INT128: { - func = ScalarFunction::UnaryExecFunction; + case LogicalTypeID::INT128: { + func = ScalarFunction::UnaryExecFunction; } break; - case common::LogicalTypeID::FLOAT: { + case LogicalTypeID::FLOAT: { func = ScalarFunction::UnaryExecFunction; } break; - case common::LogicalTypeID::DOUBLE: { + case LogicalTypeID::DOUBLE: { func = ScalarFunction::UnaryExecFunction; } break; - // LCOV_EXCL_START default: - throw common::NotImplementedException{stringFormat( - "Unimplemented casting function from {} to {}.", + throw ConversionException{stringFormat("Unsupported casting function from {} to {}.", LogicalTypeUtils::toString(sourceTypeID), LogicalTypeUtils::toString(targetTypeID))}; - // LCOV_EXCL_STOP } return std::make_unique( - functionName, std::vector{sourceTypeID}, targetTypeID, func); + functionName, std::vector{sourceTypeID}, targetTypeID, func); } static std::unique_ptr bindCastToTimestampFunction( const std::string& functionName, LogicalTypeID sourceTypeID) { scalar_exec_func func; switch (sourceTypeID) { - case common::LogicalTypeID::DATE: { + case LogicalTypeID::DATE: { func = ScalarFunction::UnaryExecFunction; } break; default: - // LCOV_EXCL_START - throw common::NotImplementedException{"bindCastToTimestampFunction"}; - // LCOV_EXCL_STOP + throw ConversionException{stringFormat("Unsupported casting function from {} to TIMESTAMP.", + LogicalTypeUtils::toString(sourceTypeID))}; } - return std::make_unique(functionName, - std::vector{sourceTypeID}, LogicalTypeID::TIMESTAMP, func); + return std::make_unique( + functionName, std::vector{sourceTypeID}, LogicalTypeID::TIMESTAMP, func); } -std::unique_ptr CastFunction::bindCastFunction(const std::string& functionName, - common::LogicalTypeID sourceTypeID, common::LogicalTypeID targetTypeID) { +std::unique_ptr CastFunction::bindCastFunction( + const std::string& functionName, LogicalTypeID sourceTypeID, LogicalTypeID targetTypeID) { if (sourceTypeID == LogicalTypeID::STRING) { return bindCastFromStringFunction(functionName, targetTypeID); } @@ -463,10 +491,9 @@ std::unique_ptr CastFunction::bindCastFunction(const std::string case LogicalTypeID::TIMESTAMP: { return bindCastToTimestampFunction(functionName, sourceTypeID); } - // LCOV_EXCL_START default: { - throw common::NotImplementedException{"bindCastFunction"}; - // LCOV_EXCL_STOP + throw ConversionException{stringFormat("Unsupported casting function from {} to {}.", + LogicalTypeUtils::toString(sourceTypeID), LogicalTypeUtils::toString(targetTypeID))}; } } } @@ -684,7 +711,9 @@ std::unique_ptr CastAnyFunction::bindFunc( const binder::expression_vector& arguments, Function* function) { // check the size of the arguments if (arguments.size() != 2) { - throw BinderException("Invalid number of arguments for given function CAST."); + throw BinderException(stringFormat( + "Invalid number of arguments for given function CAST. Expected: 2, Actual: {}.", + arguments.size())); } auto inputTypeID = arguments[0]->dataType.getLogicalTypeID(); diff --git a/src/include/common/vector/value_vector.h b/src/include/common/vector/value_vector.h index 5304359159..19baa4ff0f 100644 --- a/src/include/common/vector/value_vector.h +++ b/src/include/common/vector/value_vector.h @@ -17,6 +17,7 @@ class Value; //! The capacity of a ValueVector is either 1 (sequence) or DEFAULT_VECTOR_CAPACITY. class ValueVector { friend class ListVector; + friend class FixedListVector; friend class ListAuxiliaryBuffer; friend class StructVector; friend class StringVector; @@ -70,8 +71,8 @@ class ValueVector { void copyFromVectorData( uint8_t* dstData, const ValueVector* srcVector, const uint8_t* srcVectorData); void copyFromVectorData(uint64_t dstPos, const ValueVector* srcVector, uint64_t srcPos); - void copyFromValue(uint64_t pos, const Value& value); + std::unique_ptr getAsValue(uint64_t pos); inline uint8_t* getData() const { return valueBuffer.get(); } @@ -181,6 +182,29 @@ class ListVector { static void sliceDataVector(ValueVector* vectorToSlice, uint64_t childIdx, uint64_t numValues); }; +class FixedListVector { +public: + template + static void getAsValue(ValueVector* vector, std::vector>& children, + uint64_t pos, uint64_t numElements); +}; + +template<> +void FixedListVector::getAsValue(ValueVector* vector, + std::vector>& children, uint64_t pos, uint64_t numElements); +template<> +void FixedListVector::getAsValue(ValueVector* vector, + std::vector>& children, uint64_t pos, uint64_t numElements); +template<> +void FixedListVector::getAsValue(ValueVector* vector, + std::vector>& children, uint64_t pos, uint64_t numElements); +template<> +void FixedListVector::getAsValue(ValueVector* vector, + std::vector>& children, uint64_t pos, uint64_t numElements); +template<> +void FixedListVector::getAsValue(ValueVector* vector, + std::vector>& children, uint64_t pos, uint64_t numElements); + class StructVector { public: static inline const std::vector>& getFieldVectors( diff --git a/src/include/function/cast/functions/cast_string_to_functions.h b/src/include/function/cast/functions/cast_string_to_functions.h index e908770169..43136d1e16 100644 --- a/src/include/function/cast/functions/cast_string_to_functions.h +++ b/src/include/function/cast/functions/cast_string_to_functions.h @@ -30,6 +30,9 @@ struct CastString { simpleIntegerCast(reinterpret_cast(input.getData()), input.len, result, LogicalType{LogicalTypeID::INT64}); } + + static void castToFixedList(const ku_string_t& input, ValueVector* resultVector, + uint64_t rowToAdd, const CSVReaderConfig* csvReaderConfig); }; template<> diff --git a/test/test_files/exceptions/binder/binder_error.test b/test/test_files/exceptions/binder/binder_error.test index bf9c1cd73e..5824625114 100644 --- a/test/test_files/exceptions/binder/binder_error.test +++ b/test/test_files/exceptions/binder/binder_error.test @@ -292,12 +292,17 @@ Binder exception: No file found that matches the pattern: wrong_path.parquet. -STATEMENT COPY person FROM "person_0_0.csv" (pk=",") ---- error Binder exception: Unrecognized csv parsing option: PK. --STATEMENT LOAD WITH HEADERS (list STRING[], str STRING, struct STRUCT(a STRUCT(b STRUCT(c INT64[])))) FROM "${KUZU_ROOT_DIRECTORY}/dataset/load-from-test/var_list/change_config.csv" (HEADER=true, DELIM="|", ESCAPE="~", QUOTE="'", LIST_BEGIN="(", LIST_END=")") RETURN * ; +-STATEMENT LOAD WITH HEADERS (list STRING[], str STRING, struct STRUCT(a STRUCT(b STRUCT(c INT64[])))) FROM "${KUZU_ROOT_DIRECTORY}/dataset/load-from-test/var_list/change_config.csv" (HEADER=true, DELIM="|", ESCAPE="~", QUOTE="'", LIST_END=")") RETURN * ; ---- error Binder exception: Unrecognized csv parsing option: LIST_END. --STATEMENT LOAD WITH HEADERS (map MAP(STRING, MAP(STRING, INT16))) FROM "${KUZU_ROOT_DIRECTORY}/dataset/load-from-test/map/nested_map_correct.csv" (YES="c") RETURN *; +-STATEMENT LOAD WITH HEADERS (map MAP(STRING, MAP(STRING, INT16))) FROM "${KUZU_ROOT_DIRECTORY}/dataset/load-from-test/map/nested_map_correct.csv" (LIST_BEGIN="(") RETURN *; ---- error -Binder exception: Unrecognized csv parsing option: YES. +Binder exception: Unrecognized csv parsing option: LIST_BEGIN. + +-LOG LoadCSVInvalidCol +-STATEMENT LOAD WITH HEADERS (list INT32[]) FROM "${KUZU_ROOT_DIRECTORY}/dataset/load-from-test/var_list/quote_error.csv" RETURN * ; +---- error +Binder exception: Number of columns mismatch. Expected 1 but got 3. -LOG CopyCSVInvalidSchemaName -STATEMENT COPY university FROM "person_0_0.csv" (pk=",") @@ -543,3 +548,11 @@ Binder exception: Invalid number of arguments for macro ADD4. -STATEMENT COPY (MATCH (a:person) RETURN a) TO 'person.npy'; ---- error Binder exception: COPY TO currently only supports csv and parquet files. + +-CASE InvalidArgCast +-STATEMENT RETURN cast("[sdf, fsd, fad]", "STRING[]", "3rd arg"); +---- error +Binder exception: Invalid number of arguments for given function CAST. Expected: 2, Actual: 3. +-STATEMENT RETURN cast("[sdf, fsd, fad]"); +---- error +Binder exception: Invalid number of arguments for given function CAST. Expected: 2, Actual: 1. diff --git a/test/test_files/tinysnb/cast/cast_error.test b/test/test_files/tinysnb/cast/cast_error.test index 6e07ec63be..ca28f9c42b 100644 --- a/test/test_files/tinysnb/cast/cast_error.test +++ b/test/test_files/tinysnb/cast/cast_error.test @@ -578,15 +578,6 @@ Conversion exception: Cast failed. bool is not in INT64[] range. -STATEMENT RETURN cast("[[231|4324]", "INT64[][]"); ---- error Conversion exception: Cast failed. [[231|4324] is not in INT64[][] range. --STATEMENT RETURN cast("[sdf, fsd, fad]", "STRING[]", "3rd arg"); ----- error -Binder exception: Invalid number of arguments for given function CAST. --STATEMENT MATCH (:person)-[e:studyAt]->(:organisation) return cast(e, "INT64"); ----- error -Unimplemented casting function from REL to INT64. --STATEMENT RETURN cast("dfsdfasdgv", "INTERNAL_ID"); ----- error -Unimplemented casting function from STRING to INTERNAL_ID. -CASE CastStringToNestedError -STATEMENT LOAD WITH HEADERS (list STRING[][]) FROM "${KUZU_ROOT_DIRECTORY}/dataset/load-from-test/var_list/delim_error.csv" (DELIM="|", ESCAPE="~", QUOTE="'") RETURN * ; @@ -595,9 +586,6 @@ Conversion exception: Cast failed. "[[hello],[bdfadf],]" is not in STRING[][] ra -STATEMENT LOAD WITH HEADERS (list STRING[][]) FROM "${KUZU_ROOT_DIRECTORY}/dataset/load-from-test/var_list/bracket_error.csv" (DELIM="|", ESCAPE="~") RETURN * ; ---- error Conversion exception: Cast failed. [[] is not in STRING[][] range. --STATEMENT LOAD WITH HEADERS (list INT32[]) FROM "${KUZU_ROOT_DIRECTORY}/dataset/load-from-test/var_list/quote_error.csv" RETURN * ; ----- error -Binder exception: Number of columns mismatch. Expected 1 but got 3. -STATEMENT LOAD WITH HEADERS (list STRING[]) FROM "${KUZU_ROOT_DIRECTORY}/dataset/load-from-test/var_list/single_quote.csv" RETURN *; ---- error Conversion exception: Cast failed. ['fdsfdsfe werw] is not in STRING[] range. @@ -669,7 +657,7 @@ Conversion exception: Cast failed. [4324, 432] fewrew is not in INT64[2] range. Binder exception: The number of elements in a fixed list must be greater than 0. Given: 0. -STATEMENT LOAD WITH HEADERS (fixedList UINT8[1]) FROM "${KUZU_ROOT_DIRECTORY}/dataset/load-from-test/fixed_list/unsupported_type2.csv" RETURN *; ---- error -Unsupported data type: Driver::castStringToFixedList +Unsupported data type: Function::castStringToFixedList -STATEMENT LOAD WITH HEADERS (fixedList INT64[1]) FROM "${KUZU_ROOT_DIRECTORY}/dataset/load-from-test/fixed_list/fixed_list_null.csv" RETURN *; ---- error Conversion exception: Cast failed. NULL is not allowed for FIXEDLIST. @@ -682,3 +670,35 @@ Copy exception: Invalid UTF8-encoded string. -STATEMENT LOAD WITH HEADERS (a UNION(v1 INT64, v2 INT32)) FROM "${KUZU_ROOT_DIRECTORY}/dataset/load-from-test/union/union_error.csv" RETURN *; ---- error Conversion exception: Could not convert to union type UNION(v1:INT64, v2:INT32): fdsaf. +-STATEMENT RETURN cast("[-32769]", "INT16[1]"); +---- error +Conversion exception: Cast failed. -32769 is not in INT16 range. +-STATEMENT RETURN cast("[42, 42] fdaf", "INT64[2]"); +---- error +Conversion exception: Cast failed. [42, 42] fdaf is not in INT64[2] range. +-STATEMENT RETURN cast("[42|42]", "DOUBLE[2]"); +---- error +Copy exception: Each fixed list should have fixed number of elements. Expected: 2, Actual: 1. +-STATEMENT RETURN cast("[42,42", "FLOAT[2]"); +---- error +Copy exception: Each fixed list should have fixed number of elements. Expected: 2, Actual: 1. +-STATEMENT RETURN cast("(42,42)", "INT32[2]"); +---- error +Copy exception: Each fixed list should have fixed number of elements. Expected: 2, Actual: 0. + +-LOG InvalidCastTypeError +-STATEMENT RETURN cast("dfsa", "INTERNAL_ID"); +---- error +Conversion exception: Unsupported casting function from STRING to INTERNAL_ID. +-STATEMENT RETURN cast(432, "INTERNAL_ID"); +---- error +Conversion exception: Unsupported casting function from INT64 to INTERNAL_ID. +-STATEMENT MATCH (:person)-[e:studyAt]->(:organisation) return cast(e, "INT64"); +---- error +Conversion exception: Unsupported casting function from REL to INT64. +-STATEMENT MATCH (:person)-[e:studyAt]->(:organisation) return cast(e, "TIMESTAMP"); +---- error +Conversion exception: Unsupported casting function from REL to TIMESTAMP. +-STATEMENT MATCH (:person)-[e:studyAt*1..3]->(:organisation) return cast(e, "INT64"); +---- error +Conversion exception: Unsupported casting function from RECURSIVE_REL to INT64. diff --git a/test/test_files/tinysnb/cast/cast_string_to_nested_types.test b/test/test_files/tinysnb/cast/cast_to_nested_types.test similarity index 91% rename from test/test_files/tinysnb/cast/cast_string_to_nested_types.test rename to test/test_files/tinysnb/cast/cast_to_nested_types.test index 9ebabe9ef5..6527fb5b67 100644 --- a/test/test_files/tinysnb/cast/cast_string_to_nested_types.test +++ b/test/test_files/tinysnb/cast/cast_to_nested_types.test @@ -16,7 +16,7 @@ []||[111,0,0] ||[0,111,12] |{num: 0, str: }|[0,0,0] --STATEMENT LOAD WITH HEADERS (list STRING[], str STRING, struct STRUCT(a STRUCT(b STRUCT(c INT64[])))) FROM "${KUZU_ROOT_DIRECTORY}/dataset/load-from-test/var_list/change_config.csv" (HEADER=true, DELIM="|", ESCAPE="~", QUOTE="'") RETURN * ; +-STATEMENT LOAD WITH HEADERS (list STRING[], str STRING, struct STRUCT(a STRUCT(b STRUCT(c INT64[])))) FROM "${KUZU_ROOT_DIRECTORY}/dataset/load-from-test/var_list/change_config.csv" (HEADER=true, DELIM="|", DELIM="|", ESCAPE="~", QUOTE="'") RETURN * ; ---- 3 [escape , is escape success? ~]| ' [ ] do not need to escape sepeical | []|{a: {b: {c: []}}} [this , is a word , normal not escape , ' [ ] | , ~ ' ]|try escape ~|{a: {b: {c: [3432423,-43423,31231,]}}} @@ -106,6 +106,25 @@ False|-4325||18446744073709551616.000000| dfsa [1,2,3]|[1,2,3]|[1,2,3]|[1,2,3]|[1,2,3]|[1,2,3]|[1,2,3]|[1,2,3]|[1,2,3]|[1.000000,2.000000,3.000000]|[1.000000,2.000000,3.000000] # timestamp/date/... list +-LOG CastToFixedList +-STATEMENT LOAD WITH HEADERS (fixedList STRING, bool BOOLEAN, intv INTERVAL) FROM "${KUZU_ROOT_DIRECTORY}/dataset/load-from-test/fixed_list/fixed_list_correct.csv" RETURN cast(fixedList, "FLOAT[1]"); +---- 6 +[23123.433594] +[-4321312.500000] +[-1.344455] +[43213123584.000000] +[-4212.423340] +[-2.000000] +-STATEMENT LOAD WITH HEADERS (stockPrice STRING) FROM "${KUZU_ROOT_DIRECTORY}/dataset/load-from-test/fixed_list/fixed_list_with_null.csv" RETURN cast(stockPrice, "DOUBLE[3]"); +---- 3 +[3324.123000,342423.437500,432.123000] + +[1.000000,4231.000000,432.123000] +-STATEMENT LOAD WITH HEADERS (stockPrice STRING) FROM "${KUZU_ROOT_DIRECTORY}/dataset/load-from-test/fixed_list/fixed_list_with_null.csv" where size(stockPrice) > 10 RETURN cast(stockPrice, "FLOAT[3]"); +---- 2 +[3324.123047,342423.437500,432.122986] +[1.000000,4231.000000,432.122986] + -LOG CastToMap -STATEMENT RETURN cast(" { c= {a = 3423 }, b = { g = 3421 } } ", "MAP(STRING, MAP(STRING, INT16))"), cast("{}", "MAP(STRING, MAP(STRING, INT16))"), cast("{d = {}}", "MAP(STRING, MAP(STRING, INT16))"); ---- 1 diff --git a/test/test_files/tinysnb/function/cast.test b/test/test_files/tinysnb/function/cast.test index a94503bd2e..7c5f084480 100644 --- a/test/test_files/tinysnb/function/cast.test +++ b/test/test_files/tinysnb/function/cast.test @@ -1083,3 +1083,8 @@ False -STATEMENT Return cast(cast(-15, "float"), "int128"), cast(cast(-1, "double"), "int128"), cast(cast(15, "float"), "int128"), cast(cast(1, "double"), "int128") ---- 1 -15|-1|15|1 + +-LOG CastStringToFixedList +-STATEMENT RETURN cast("[423, 321, 423]", "INT64[3]"), cast(null, "INT64[5]"), cast("[432.43214]", "FLOAT[1]"), cast("[4, -5]", "double[2]"), cast("[4234, 42312, 432, 1321]", "INT32[4]"), cast("[-32768]", "INT16[1]") +---- 1 +[423,321,423]||[432.432129]|[4.000000,-5.000000]|[4234,42312,432,1321]|[-32768]