Skip to content

Commit

Permalink
support Cast String to FixedList
Browse files Browse the repository at this point in the history
  • Loading branch information
AEsir777 committed Nov 7, 2023
1 parent 62f6f41 commit 4b1332c
Show file tree
Hide file tree
Showing 8 changed files with 265 additions and 104 deletions.
76 changes: 76 additions & 0 deletions src/common/vector/value_vector.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "common/vector/value_vector.h"

#include "common/exception/not_implemented.h"
#include "common/null_buffer.h"
#include "common/types/value/nested.h"
#include "common/types/value/value.h"
Expand Down Expand Up @@ -235,6 +236,52 @@ void ValueVector::copyFromValue(uint64_t pos, const Value& value) {
}
}

template<>
void ValueVector::getFixedListAsValue<int64_t>(
std::vector<std::unique_ptr<Value>>& children, uint64_t pos, uint64_t numElements) {
// default: int64
for (auto i = 0u; i < numElements; ++i) {
children.push_back(Value::createDefaultValue(LogicalType{LogicalTypeID::INT64}).copy());
children[i]->val.int64Val = getValue<int64_t>(pos + i);
}
}

template<>
void ValueVector::getFixedListAsValue<int32_t>(
std::vector<std::unique_ptr<Value>>& children, uint64_t pos, uint64_t numElements) {
for (auto i = 0u; i < numElements; ++i) {
children.push_back(Value::createDefaultValue(LogicalType{LogicalTypeID::INT32}).copy());
children[i]->val.int32Val = getValue<int32_t>(pos + i);
}
}

template<>
void ValueVector::getFixedListAsValue<int16_t>(
std::vector<std::unique_ptr<Value>>& children, uint64_t pos, uint64_t numElements) {
for (auto i = 0u; i < numElements; ++i) {
children.push_back(Value::createDefaultValue(LogicalType{LogicalTypeID::INT16}).copy());
children[i]->val.int16Val = getValue<int16_t>(pos + i);
}
}

template<>
void ValueVector::getFixedListAsValue<float>(
std::vector<std::unique_ptr<Value>>& children, uint64_t pos, uint64_t numElements) {
for (auto i = 0u; i < numElements; ++i) {
children.push_back(Value::createDefaultValue(LogicalType{LogicalTypeID::FLOAT}).copy());
children[i]->val.floatVal = getValue<float>(pos + i);
}
}

template<>
void ValueVector::getFixedListAsValue<double>(
std::vector<std::unique_ptr<Value>>& children, uint64_t pos, uint64_t numElements) {
for (auto i = 0u; i < numElements; ++i) {
children.push_back(Value::createDefaultValue(LogicalType{LogicalTypeID::DOUBLE}).copy());
children[i]->val.doubleVal = getValue<double>(pos + i);
}
}

std::unique_ptr<Value> ValueVector::getAsValue(uint64_t pos) {
if (isNull(pos)) {
return Value::createNullValue(dataType).copy();
Expand Down Expand Up @@ -294,6 +341,35 @@ std::unique_ptr<Value> ValueVector::getAsValue(uint64_t pos) {
value->childrenSize = children.size();
value->children = std::move(children);
} break;
case PhysicalTypeID::FIXED_LIST: {
auto childDataType = FixedListType::getChildType(&dataType);
auto numElements = FixedListType::getNumElementsInList(&dataType);
std::vector<std::unique_ptr<Value>> children;
children.reserve(numElements);
auto startPos = pos * numElements;
switch (childDataType->getPhysicalType()) {
case PhysicalTypeID::INT64: {
getFixedListAsValue<int64_t>(children, startPos, numElements);
} break;
case PhysicalTypeID::INT32: {
getFixedListAsValue<int32_t>(children, startPos, numElements);
} break;
case PhysicalTypeID::INT16: {
getFixedListAsValue<int16_t>(children, startPos, numElements);
} break;
case PhysicalTypeID::DOUBLE: {
getFixedListAsValue<double>(children, startPos, numElements);
} break;
case PhysicalTypeID::FLOAT: {
getFixedListAsValue<float>(children, startPos, numElements);
} break;
default: {
throw NotImplementedException{"ValueVector::getFixedListAsValue"};
}
}
value->childrenSize = numElements;
value->children = std::move(children);
} break;
case PhysicalTypeID::STRUCT: {
auto& fieldVectors = StructVector::getFieldVectors(this);
std::vector<std::unique_ptr<Value>> children;
Expand Down
32 changes: 20 additions & 12 deletions src/function/cast_string_to_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ struct CastStringHelper {
uint64_t /*rowToAdd*/ = 0, const CSVReaderConfig* /*csvReaderConfig*/ = nullptr) {
simpleIntegerCast<int64_t>(input, len, result, LogicalType{LogicalTypeID::INT64});
}

static void castToFixedList(const char* input, uint64_t len, ValueVector* vector, uint64_t rowToAdd,
const CSVReaderConfig* csvReaderConfig);
};

template<>
Expand Down Expand Up @@ -167,7 +170,7 @@ static bool skipToClose(const char*& input, const char* end, uint64_t& lvl, char
if (!skipToCloseQuotes(input, end)) {
return false;
}
} else if (*input == '{') { // must have closing brackets fro {, ] if they are not quoted
} else if (*input == '{') { // must have closing brackets {, ] if they are not quoted
if (!skipToClose(input, end, lvl, '}', csvReaderConfig)) {
return false;
}
Expand Down Expand Up @@ -331,7 +334,6 @@ struct SplitStringFixedListOperation {
if (str.empty() || isNull(str)) {
throw ConversionException("Cast failed. NULL is not allowed for FIXEDLIST.");
}
auto type = FixedListType::getChildType(&resultVector->dataType);
CastStringHelper::cast(start, str.length(), value);
resultVector->setValue(offset, value);
offset++;
Expand All @@ -347,8 +349,8 @@ static void validateNumElementsInList(uint64_t numElementsRead, const LogicalTyp
}
}

void castStringToFixedList(const char* input, uint64_t len, ValueVector* vector, uint64_t rowToAdd,
const CSVReaderConfig* csvReaderConfig) {
void CastStringHelper::castToFixedList(const char* input, uint64_t len, ValueVector* vector,
uint64_t rowToAdd, const CSVReaderConfig* csvReaderConfig) {
KU_ASSERT(vector->dataType.getLogicalTypeID() == LogicalTypeID::FIXED_LIST);
auto childDataType = FixedListType::getChildType(&vector->dataType);

Expand All @@ -359,7 +361,7 @@ void castStringToFixedList(const char* input, uint64_t len, ValueVector* vector,

auto startOffset = state.count * rowToAdd;
switch (childDataType->getLogicalTypeID()) {
// TODO: currently only allow these type
// TODO(Kebing): currently only allow these type
case LogicalTypeID::INT64: {
SplitStringFixedListOperation<int64_t> split{startOffset, vector};
startListCast(input, len, split, csvReaderConfig, vector);
Expand All @@ -381,11 +383,17 @@ void castStringToFixedList(const char* input, uint64_t len, ValueVector* vector,
startListCast(input, len, split, csvReaderConfig, vector);
} break;
default: {
throw NotImplementedException("Unsupported data type: Driver::castStringToFixedList");
throw NotImplementedException("Unsupported data type: Function::castStringToFixedList");
}
}
}

void CastString::castToFixedList(const ku_string_t& input, ValueVector* resultVector,
uint64_t rowToAdd, const CSVReaderConfig* csvReaderConfig) {
CastStringHelper::castToFixedList(reinterpret_cast<const char*>(input.getData()), input.len,
resultVector, rowToAdd, csvReaderConfig);
}

// ---------------------- cast String to Map ------------------------------ //
struct SplitStringMapOperation {
SplitStringMapOperation(uint64_t& offset, ValueVector* resultVector)
Expand Down Expand Up @@ -420,7 +428,7 @@ static bool parseKeyOrValue(const char*& input, const char* end, T& state, bool
if (*input == '"' || *input == '\'') {
if (!skipToCloseQuotes(input, end)) {
return false;
};
}
} else if (*input == '{') {
if (!skipToClose(input, end, lvl, '}', csvReaderConfig)) {
return false;
Expand All @@ -429,7 +437,7 @@ static bool parseKeyOrValue(const char*& input, const char* end, T& state, bool
if (!skipToClose(
input, end, lvl, CopyConstants::DEFAULT_CSV_LIST_END_CHAR, csvReaderConfig)) {
return false;
};
}
} else if (isKey && *input == '=') {
return state.handleKey(start, input, csvReaderConfig);
} else if (!isKey && (*input == csvReaderConfig->delimiter || *input == '}')) {
Expand Down Expand Up @@ -500,9 +508,9 @@ void CastStringHelper::cast(const char* input, uint64_t len, map_entry_t& /*resu

template<>
void CastString::operation(const ku_string_t& input, map_entry_t& result, ValueVector* resultVector,
uint64_t rowToAdd, const CSVReaderConfig* CSVReaderConfig) {
uint64_t rowToAdd, const CSVReaderConfig* csvReaderConfig) {
CastStringHelper::cast(reinterpret_cast<const char*>(input.getData()), input.len, result,
resultVector, rowToAdd, CSVReaderConfig);
resultVector, rowToAdd, csvReaderConfig);
}

// ---------------------- cast String to Struct ------------------------------ //
Expand Down Expand Up @@ -854,8 +862,8 @@ void CastString::copyStringToVector(ValueVector* vector, uint64_t rowToAdd, std:
strVal.data(), strVal.length(), val, vector, rowToAdd, csvReaderConfig);
} break;
case LogicalTypeID::FIXED_LIST: {
// TODO: add fix list function wrapper
castStringToFixedList(strVal.data(), strVal.length(), vector, rowToAdd, csvReaderConfig);
CastStringHelper::castToFixedList(
strVal.data(), strVal.length(), vector, rowToAdd, csvReaderConfig);
} break;
case LogicalTypeID::STRUCT: {
struct_entry_t val;
Expand Down
Loading

0 comments on commit 4b1332c

Please sign in to comment.