Skip to content

Commit

Permalink
allow CAST from string to VarList for CAST function
Browse files Browse the repository at this point in the history
  • Loading branch information
AEsir777 committed Nov 8, 2023
1 parent 9ab52f6 commit 4a7fb80
Show file tree
Hide file tree
Showing 10 changed files with 327 additions and 123 deletions.
3 changes: 3 additions & 0 deletions dataset/load-from-test/fixed_list/fixed_list_with_null.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"[3324.123,342423.4375,432.123]"
"NULL"
"[1,4231,432.123]"
80 changes: 80 additions & 0 deletions src/common/vector/value_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,35 @@ std::unique_ptr<Value> ValueVector::getAsValue(uint64_t pos) {
value->childrenSize = children.size();
value->children = std::move(children);
} break;
case PhysicalTypeID::FIXED_LIST: {
auto childDataType = FixedListType::getChildType(&dataType);
auto numElements = FixedListType::getNumElementsInList(&dataType);
std::vector<std::unique_ptr<Value>> children;
children.reserve(numElements);
switch (childDataType->getPhysicalType()) {
case PhysicalTypeID::INT64: {
FixedListVector::getAsValue<int64_t>(this, children, pos, numElements);
} break;
case PhysicalTypeID::INT32: {
FixedListVector::getAsValue<int32_t>(this, children, pos, numElements);
} break;
case PhysicalTypeID::INT16: {
FixedListVector::getAsValue<int16_t>(this, children, pos, numElements);
} break;
case PhysicalTypeID::DOUBLE: {
FixedListVector::getAsValue<double>(this, children, pos, numElements);
} break;
case PhysicalTypeID::FLOAT: {
FixedListVector::getAsValue<float>(this, children, pos, numElements);
} break;
// LCOV_EXCL_START
default:
KU_UNREACHABLE;
// LCOV_EXCL_STOP
}
value->childrenSize = numElements;
value->children = std::move(children);
} break;
case PhysicalTypeID::STRUCT: {
auto& fieldVectors = StructVector::getFieldVectors(this);
std::vector<std::unique_ptr<Value>> children;
Expand Down Expand Up @@ -536,6 +565,57 @@ void ListVector::sliceDataVector(
}
}

template<>
void FixedListVector::getAsValue<int64_t>(ValueVector* vector,
std::vector<std::unique_ptr<Value>>& children, uint64_t pos, uint64_t numElements) {
for (auto i = 0u; i < numElements; ++i) {
children.push_back(Value::createDefaultValue(LogicalType{LogicalTypeID::INT64}).copy());
children[i]->val.int64Val =
reinterpret_cast<int64_t*>(vector->getData() + vector->getNumBytesPerValue() * pos)[i];
}
}

template<>
void FixedListVector::getAsValue<int32_t>(ValueVector* vector,
std::vector<std::unique_ptr<Value>>& children, uint64_t pos, uint64_t numElements) {
for (auto i = 0u; i < numElements; ++i) {
children.push_back(Value::createDefaultValue(LogicalType{LogicalTypeID::INT32}).copy());
children[i]->val.int32Val =
reinterpret_cast<int32_t*>(vector->getData() + vector->getNumBytesPerValue() * pos)[i];
}
}

template<>
void FixedListVector::getAsValue<int16_t>(ValueVector* vector,
std::vector<std::unique_ptr<Value>>& children, uint64_t pos, uint64_t numElements) {
for (auto i = 0u; i < numElements; ++i) {
children.push_back(Value::createDefaultValue(LogicalType{LogicalTypeID::INT16}).copy());
children[i]->val.int16Val =
reinterpret_cast<int16_t*>(vector->getData() + vector->getNumBytesPerValue() * pos)[i];
}
}

template<>
void FixedListVector::getAsValue<float>(ValueVector* vector,
std::vector<std::unique_ptr<Value>>& children, uint64_t pos, uint64_t numElements) {
for (auto i = 0u; i < numElements; ++i) {
children.push_back(Value::createDefaultValue(LogicalType{LogicalTypeID::FLOAT}).copy());
children[i]->val.floatVal =
reinterpret_cast<float*>(vector->getData() + vector->getNumBytesPerValue() * pos)[i];
}
}

template<>
void FixedListVector::getAsValue<double>(ValueVector* vector,
std::vector<std::unique_ptr<Value>>& children, uint64_t pos, uint64_t numElements) {
// default: int64
for (auto i = 0u; i < numElements; ++i) {
children.push_back(Value::createDefaultValue(LogicalType{LogicalTypeID::DOUBLE}).copy());
children[i]->val.doubleVal =
reinterpret_cast<double*>(vector->getData() + vector->getNumBytesPerValue() * pos)[i];
}
}

void StructVector::copyFromRowData(ValueVector* vector, uint32_t pos, const uint8_t* rowData) {
KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::STRUCT);
auto& structFields = getFieldVectors(vector);
Expand Down
32 changes: 20 additions & 12 deletions src/function/cast_string_to_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ struct CastStringHelper {
uint64_t /*rowToAdd*/ = 0, const CSVReaderConfig* /*csvReaderConfig*/ = nullptr) {
simpleIntegerCast<int64_t>(input, len, result, LogicalType{LogicalTypeID::INT64});
}

static void castToFixedList(const char* input, uint64_t len, ValueVector* vector,
uint64_t rowToAdd, const CSVReaderConfig* csvReaderConfig);
};

template<>
Expand Down Expand Up @@ -167,7 +170,7 @@ static bool skipToClose(const char*& input, const char* end, uint64_t& lvl, char
if (!skipToCloseQuotes(input, end)) {
return false;
}
} else if (*input == '{') { // must have closing brackets fro {, ] if they are not quoted
} else if (*input == '{') { // must have closing brackets {, ] if they are not quoted
if (!skipToClose(input, end, lvl, '}', csvReaderConfig)) {
return false;
}
Expand Down Expand Up @@ -331,7 +334,6 @@ struct SplitStringFixedListOperation {
if (str.empty() || isNull(str)) {
throw ConversionException("Cast failed. NULL is not allowed for FIXEDLIST.");
}
auto type = FixedListType::getChildType(&resultVector->dataType);
CastStringHelper::cast(start, str.length(), value);
resultVector->setValue(offset, value);
offset++;
Expand All @@ -347,8 +349,8 @@ static void validateNumElementsInList(uint64_t numElementsRead, const LogicalTyp
}
}

void castStringToFixedList(const char* input, uint64_t len, ValueVector* vector, uint64_t rowToAdd,
const CSVReaderConfig* csvReaderConfig) {
void CastStringHelper::castToFixedList(const char* input, uint64_t len, ValueVector* vector,
uint64_t rowToAdd, const CSVReaderConfig* csvReaderConfig) {
KU_ASSERT(vector->dataType.getLogicalTypeID() == LogicalTypeID::FIXED_LIST);
auto childDataType = FixedListType::getChildType(&vector->dataType);

Expand All @@ -359,7 +361,7 @@ void castStringToFixedList(const char* input, uint64_t len, ValueVector* vector,

auto startOffset = state.count * rowToAdd;
switch (childDataType->getLogicalTypeID()) {
// TODO: currently only allow these type
// TODO(Kebing): currently only allow these type
case LogicalTypeID::INT64: {
SplitStringFixedListOperation<int64_t> split{startOffset, vector};
startListCast(input, len, split, csvReaderConfig, vector);
Expand All @@ -381,11 +383,17 @@ void castStringToFixedList(const char* input, uint64_t len, ValueVector* vector,
startListCast(input, len, split, csvReaderConfig, vector);
} break;
default: {
throw NotImplementedException("Unsupported data type: Driver::castStringToFixedList");
throw NotImplementedException("Unsupported data type: Function::castStringToFixedList");
}
}
}

void CastString::castToFixedList(const ku_string_t& input, ValueVector* resultVector,
uint64_t rowToAdd, const CSVReaderConfig* csvReaderConfig) {
CastStringHelper::castToFixedList(reinterpret_cast<const char*>(input.getData()), input.len,
resultVector, rowToAdd, csvReaderConfig);
}

// ---------------------- cast String to Map ------------------------------ //
struct SplitStringMapOperation {
SplitStringMapOperation(uint64_t& offset, ValueVector* resultVector)
Expand Down Expand Up @@ -420,7 +428,7 @@ static bool parseKeyOrValue(const char*& input, const char* end, T& state, bool
if (*input == '"' || *input == '\'') {
if (!skipToCloseQuotes(input, end)) {
return false;
};
}
} else if (*input == '{') {
if (!skipToClose(input, end, lvl, '}', csvReaderConfig)) {
return false;
Expand All @@ -429,7 +437,7 @@ static bool parseKeyOrValue(const char*& input, const char* end, T& state, bool
if (!skipToClose(
input, end, lvl, CopyConstants::DEFAULT_CSV_LIST_END_CHAR, csvReaderConfig)) {
return false;
};
}
} else if (isKey && *input == '=') {
return state.handleKey(start, input, csvReaderConfig);
} else if (!isKey && (*input == csvReaderConfig->delimiter || *input == '}')) {
Expand Down Expand Up @@ -500,9 +508,9 @@ void CastStringHelper::cast(const char* input, uint64_t len, map_entry_t& /*resu

template<>
void CastString::operation(const ku_string_t& input, map_entry_t& result, ValueVector* resultVector,
uint64_t rowToAdd, const CSVReaderConfig* CSVReaderConfig) {
uint64_t rowToAdd, const CSVReaderConfig* csvReaderConfig) {
CastStringHelper::cast(reinterpret_cast<const char*>(input.getData()), input.len, result,
resultVector, rowToAdd, CSVReaderConfig);
resultVector, rowToAdd, csvReaderConfig);
}

// ---------------------- cast String to Struct ------------------------------ //
Expand Down Expand Up @@ -854,8 +862,8 @@ void CastString::copyStringToVector(ValueVector* vector, uint64_t rowToAdd, std:
strVal.data(), strVal.length(), val, vector, rowToAdd, csvReaderConfig);
} break;
case LogicalTypeID::FIXED_LIST: {
// TODO: add fix list function wrapper
castStringToFixedList(strVal.data(), strVal.length(), vector, rowToAdd, csvReaderConfig);
CastStringHelper::castToFixedList(
strVal.data(), strVal.length(), vector, rowToAdd, csvReaderConfig);
} break;
case LogicalTypeID::STRUCT: {
struct_entry_t val;
Expand Down
Loading

0 comments on commit 4a7fb80

Please sign in to comment.