Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support Cast(String, FixedList Type) #2369

Merged
merged 1 commit into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions dataset/load-from-test/fixed_list/fixed_list_with_null.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"[3324.123,342423.4375,432.123]"
"NULL"
"[1,4231,432.123]"
80 changes: 80 additions & 0 deletions src/common/vector/value_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,35 @@ std::unique_ptr<Value> ValueVector::getAsValue(uint64_t pos) {
value->childrenSize = children.size();
value->children = std::move(children);
} break;
case PhysicalTypeID::FIXED_LIST: {
auto childDataType = FixedListType::getChildType(&dataType);
auto numElements = FixedListType::getNumElementsInList(&dataType);
std::vector<std::unique_ptr<Value>> children;
children.reserve(numElements);
switch (childDataType->getPhysicalType()) {
case PhysicalTypeID::INT64: {
FixedListVector::getAsValue<int64_t>(this, children, pos, numElements);
} break;
case PhysicalTypeID::INT32: {
FixedListVector::getAsValue<int32_t>(this, children, pos, numElements);
} break;
case PhysicalTypeID::INT16: {
FixedListVector::getAsValue<int16_t>(this, children, pos, numElements);
} break;
case PhysicalTypeID::DOUBLE: {
FixedListVector::getAsValue<double>(this, children, pos, numElements);
} break;
case PhysicalTypeID::FLOAT: {
FixedListVector::getAsValue<float>(this, children, pos, numElements);
} break;
// LCOV_EXCL_START
default:
KU_UNREACHABLE;
AEsir777 marked this conversation as resolved.
Show resolved Hide resolved
// LCOV_EXCL_STOP
}
value->childrenSize = numElements;
value->children = std::move(children);
} break;
case PhysicalTypeID::STRUCT: {
auto& fieldVectors = StructVector::getFieldVectors(this);
std::vector<std::unique_ptr<Value>> children;
Expand Down Expand Up @@ -536,6 +565,57 @@ void ListVector::sliceDataVector(
}
}

template<>
void FixedListVector::getAsValue<int64_t>(ValueVector* vector,
std::vector<std::unique_ptr<Value>>& children, uint64_t pos, uint64_t numElements) {
for (auto i = 0u; i < numElements; ++i) {
children.push_back(Value::createDefaultValue(LogicalType{LogicalTypeID::INT64}).copy());
children[i]->val.int64Val =
reinterpret_cast<int64_t*>(vector->getData() + vector->getNumBytesPerValue() * pos)[i];
}
}

template<>
void FixedListVector::getAsValue<int32_t>(ValueVector* vector,
std::vector<std::unique_ptr<Value>>& children, uint64_t pos, uint64_t numElements) {
for (auto i = 0u; i < numElements; ++i) {
children.push_back(Value::createDefaultValue(LogicalType{LogicalTypeID::INT32}).copy());
children[i]->val.int32Val =
reinterpret_cast<int32_t*>(vector->getData() + vector->getNumBytesPerValue() * pos)[i];
}
}

template<>
void FixedListVector::getAsValue<int16_t>(ValueVector* vector,
std::vector<std::unique_ptr<Value>>& children, uint64_t pos, uint64_t numElements) {
for (auto i = 0u; i < numElements; ++i) {
children.push_back(Value::createDefaultValue(LogicalType{LogicalTypeID::INT16}).copy());
children[i]->val.int16Val =
reinterpret_cast<int16_t*>(vector->getData() + vector->getNumBytesPerValue() * pos)[i];
}
}

template<>
void FixedListVector::getAsValue<float>(ValueVector* vector,
std::vector<std::unique_ptr<Value>>& children, uint64_t pos, uint64_t numElements) {
for (auto i = 0u; i < numElements; ++i) {
children.push_back(Value::createDefaultValue(LogicalType{LogicalTypeID::FLOAT}).copy());
children[i]->val.floatVal =
reinterpret_cast<float*>(vector->getData() + vector->getNumBytesPerValue() * pos)[i];
}
}

template<>
void FixedListVector::getAsValue<double>(ValueVector* vector,
std::vector<std::unique_ptr<Value>>& children, uint64_t pos, uint64_t numElements) {
// default: int64
for (auto i = 0u; i < numElements; ++i) {
children.push_back(Value::createDefaultValue(LogicalType{LogicalTypeID::DOUBLE}).copy());
children[i]->val.doubleVal =
reinterpret_cast<double*>(vector->getData() + vector->getNumBytesPerValue() * pos)[i];
}
}

void StructVector::copyFromRowData(ValueVector* vector, uint32_t pos, const uint8_t* rowData) {
KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::STRUCT);
auto& structFields = getFieldVectors(vector);
Expand Down
32 changes: 20 additions & 12 deletions src/function/cast_string_to_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ struct CastStringHelper {
uint64_t /*rowToAdd*/ = 0, const CSVReaderConfig* /*csvReaderConfig*/ = nullptr) {
simpleIntegerCast<int64_t>(input, len, result, LogicalType{LogicalTypeID::INT64});
}

static void castToFixedList(const char* input, uint64_t len, ValueVector* vector,
uint64_t rowToAdd, const CSVReaderConfig* csvReaderConfig);
};

template<>
Expand Down Expand Up @@ -167,7 +170,7 @@ static bool skipToClose(const char*& input, const char* end, uint64_t& lvl, char
if (!skipToCloseQuotes(input, end)) {
return false;
}
} else if (*input == '{') { // must have closing brackets fro {, ] if they are not quoted
} else if (*input == '{') { // must have closing brackets {, ] if they are not quoted
if (!skipToClose(input, end, lvl, '}', csvReaderConfig)) {
return false;
}
Expand Down Expand Up @@ -331,7 +334,6 @@ struct SplitStringFixedListOperation {
if (str.empty() || isNull(str)) {
throw ConversionException("Cast failed. NULL is not allowed for FIXEDLIST.");
}
auto type = FixedListType::getChildType(&resultVector->dataType);
CastStringHelper::cast(start, str.length(), value);
resultVector->setValue(offset, value);
offset++;
Expand All @@ -347,8 +349,8 @@ static void validateNumElementsInList(uint64_t numElementsRead, const LogicalTyp
}
}

void castStringToFixedList(const char* input, uint64_t len, ValueVector* vector, uint64_t rowToAdd,
const CSVReaderConfig* csvReaderConfig) {
void CastStringHelper::castToFixedList(const char* input, uint64_t len, ValueVector* vector,
uint64_t rowToAdd, const CSVReaderConfig* csvReaderConfig) {
KU_ASSERT(vector->dataType.getLogicalTypeID() == LogicalTypeID::FIXED_LIST);
auto childDataType = FixedListType::getChildType(&vector->dataType);

Expand All @@ -359,7 +361,7 @@ void castStringToFixedList(const char* input, uint64_t len, ValueVector* vector,

auto startOffset = state.count * rowToAdd;
switch (childDataType->getLogicalTypeID()) {
// TODO: currently only allow these type
// TODO(Kebing): currently only allow these type
case LogicalTypeID::INT64: {
SplitStringFixedListOperation<int64_t> split{startOffset, vector};
startListCast(input, len, split, csvReaderConfig, vector);
Expand All @@ -381,11 +383,17 @@ void castStringToFixedList(const char* input, uint64_t len, ValueVector* vector,
startListCast(input, len, split, csvReaderConfig, vector);
} break;
default: {
throw NotImplementedException("Unsupported data type: Driver::castStringToFixedList");
throw NotImplementedException("Unsupported data type: Function::castStringToFixedList");
}
}
}

void CastString::castToFixedList(const ku_string_t& input, ValueVector* resultVector,
uint64_t rowToAdd, const CSVReaderConfig* csvReaderConfig) {
CastStringHelper::castToFixedList(reinterpret_cast<const char*>(input.getData()), input.len,
resultVector, rowToAdd, csvReaderConfig);
}

// ---------------------- cast String to Map ------------------------------ //
struct SplitStringMapOperation {
SplitStringMapOperation(uint64_t& offset, ValueVector* resultVector)
Expand Down Expand Up @@ -420,7 +428,7 @@ static bool parseKeyOrValue(const char*& input, const char* end, T& state, bool
if (*input == '"' || *input == '\'') {
if (!skipToCloseQuotes(input, end)) {
return false;
};
}
} else if (*input == '{') {
if (!skipToClose(input, end, lvl, '}', csvReaderConfig)) {
return false;
Expand All @@ -429,7 +437,7 @@ static bool parseKeyOrValue(const char*& input, const char* end, T& state, bool
if (!skipToClose(
input, end, lvl, CopyConstants::DEFAULT_CSV_LIST_END_CHAR, csvReaderConfig)) {
return false;
};
}
} else if (isKey && *input == '=') {
return state.handleKey(start, input, csvReaderConfig);
} else if (!isKey && (*input == csvReaderConfig->delimiter || *input == '}')) {
Expand Down Expand Up @@ -500,9 +508,9 @@ void CastStringHelper::cast(const char* input, uint64_t len, map_entry_t& /*resu

template<>
void CastString::operation(const ku_string_t& input, map_entry_t& result, ValueVector* resultVector,
uint64_t rowToAdd, const CSVReaderConfig* CSVReaderConfig) {
uint64_t rowToAdd, const CSVReaderConfig* csvReaderConfig) {
CastStringHelper::cast(reinterpret_cast<const char*>(input.getData()), input.len, result,
resultVector, rowToAdd, CSVReaderConfig);
resultVector, rowToAdd, csvReaderConfig);
}

// ---------------------- cast String to Struct ------------------------------ //
Expand Down Expand Up @@ -854,8 +862,8 @@ void CastString::copyStringToVector(ValueVector* vector, uint64_t rowToAdd, std:
strVal.data(), strVal.length(), val, vector, rowToAdd, csvReaderConfig);
} break;
case LogicalTypeID::FIXED_LIST: {
// TODO: add fix list function wrapper
castStringToFixedList(strVal.data(), strVal.length(), vector, rowToAdd, csvReaderConfig);
CastStringHelper::castToFixedList(
strVal.data(), strVal.length(), vector, rowToAdd, csvReaderConfig);
} break;
case LogicalTypeID::STRUCT: {
struct_entry_t val;
Expand Down
Loading