Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix cast node, rel, recursive rel to string #2896

Merged
merged 1 commit into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 98 additions & 41 deletions src/common/type_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
namespace kuzu {
namespace common {

std::string TypeUtils::castValueToString(
const LogicalType& dataType, const uint8_t* value, void* vector) {
static std::string entryToString(
const LogicalType& dataType, const uint8_t* value, ValueVector* vector) {
auto valueVector = reinterpret_cast<ValueVector*>(vector);
switch (dataType.getLogicalTypeID()) {
case LogicalTypeID::BOOL:
Expand Down Expand Up @@ -53,17 +53,52 @@
return TypeUtils::toString(*reinterpret_cast<const ku_string_t*>(value));
case LogicalTypeID::INTERNAL_ID:
return TypeUtils::toString(*reinterpret_cast<const internalID_t*>(value));
case LogicalTypeID::FIXED_LIST:
return TypeUtils::fixedListToString(value, dataType, valueVector);
case LogicalTypeID::VAR_LIST:
return TypeUtils::toString(*reinterpret_cast<const list_entry_t*>(value), valueVector);
case LogicalTypeID::MAP:
return TypeUtils::toString(*reinterpret_cast<const map_entry_t*>(value), valueVector);
case LogicalTypeID::STRUCT:
return TypeUtils::toString(*reinterpret_cast<const struct_entry_t*>(value), valueVector);
case LogicalTypeID::UNION:
return TypeUtils::toString(*reinterpret_cast<const union_entry_t*>(value), valueVector);
case LogicalTypeID::UUID:
return TypeUtils::toString(*reinterpret_cast<const uuid_t*>(value));
case LogicalTypeID::NODE:
return TypeUtils::nodeToString(
*reinterpret_cast<const struct_entry_t*>(value), valueVector);
case LogicalTypeID::REL:
return TypeUtils::relToString(*reinterpret_cast<const struct_entry_t*>(value), valueVector);
default:
KU_UNREACHABLE;
}
}

static std::string entryToString(sel_t pos, ValueVector* vector) {
if (vector->isNull(pos)) {
return "";
}
return entryToString(
vector->dataType, vector->getData() + vector->getNumBytesPerValue() * pos, vector);
}

std::string TypeUtils::fixedListToString(
const uint8_t* val, const LogicalType& type, ValueVector* dummyVector) {
std::string result = "[";
auto numValuesPerList = FixedListType::getNumValuesInList(&type);
auto childType = FixedListType::getChildType(&type);
for (auto i = 0u; i < numValuesPerList - 1; ++i) {
// Note: FixedList can only store numeric types and doesn't allow nulls.
result += entryToString(*childType, val, dummyVector);
result += ",";
val += PhysicalTypeUtils::getFixedTypeSize(childType->getPhysicalType());
}
result += entryToString(*childType, val, dummyVector);
result += "]";
return result;
}

template<>
std::string TypeUtils::toString(const int128_t& val, void* /*valueVector*/) {
return Int128_t::ToString(val);
Expand Down Expand Up @@ -136,90 +171,112 @@
return "[]";
}
std::string result = "[";
auto values = ListVector::getListValues(listVector, val);
auto childType = VarListType::getChildType(&listVector->dataType);
auto dataVector = ListVector::getDataVector(listVector);
for (auto i = 0u; i < val.size - 1; ++i) {
result += dataVector->isNull(val.offset + i) ?
"" :
castValueToString(*childType, values, dataVector);
result += entryToString(val.offset + i, dataVector);
result += ",";
values += ListVector::getDataVector(listVector)->getNumBytesPerValue();
}
result += dataVector->isNull(val.offset + val.size - 1) ?
"" :
castValueToString(*childType, values, dataVector);
result += entryToString(val.offset + val.size - 1, dataVector);
result += "]";
return result;
}

static std::string getMapEntryStr(
sel_t pos, ValueVector* dataVector, ValueVector* keyVector, ValueVector* valVector) {
if (dataVector->isNull(pos)) {
return "";

Check warning on line 187 in src/common/type_utils.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/type_utils.cpp#L187

Added line #L187 was not covered by tests
}
return entryToString(pos, keyVector) + "=" + entryToString(pos, valVector);
}

template<>
std::string TypeUtils::toString(const map_entry_t& val, void* valueVector) {
auto mapVector = (ValueVector*)valueVector;
if (val.entry.size == 0) {
return "{}";
}
std::string result = "{";
auto keyType = MapType::getKeyType(&mapVector->dataType);
auto valType = MapType::getValueType(&mapVector->dataType);
auto dataVector = ListVector::getDataVector(mapVector);
auto keyVector = MapVector::getKeyVector(mapVector);
auto valVector = MapVector::getValueVector(mapVector);
auto keyValues = keyVector->getData() + keyVector->getNumBytesPerValue() * val.entry.offset;
auto valValues = valVector->getData() + valVector->getNumBytesPerValue() * val.entry.offset;
for (auto i = 0u; i < val.entry.size - 1; ++i) {
result += dataVector->isNull(val.entry.offset + i) ?
"" :
castValueToString(*keyType, keyValues, dataVector) + "=" +
castValueToString(*valType, valValues, dataVector);
auto pos = val.entry.offset + i;
result += getMapEntryStr(pos, dataVector, keyVector, valVector);
result += ", ";
keyValues += keyVector->getNumBytesPerValue();
valValues += valVector->getNumBytesPerValue();
}
result += dataVector->isNull(val.entry.offset + val.entry.size - 1) ?
"" :
castValueToString(*keyType, keyValues, dataVector) + "=" +
castValueToString(*valType, valValues, dataVector);
auto pos = val.entry.offset + val.entry.size - 1;
result += getMapEntryStr(pos, dataVector, keyVector, valVector);
result += "}";
return result;
}

template<>
std::string TypeUtils::toString(const struct_entry_t& val, void* valVector) {
auto structVector = (ValueVector*)valVector;
auto fields = StructType::getFields(&structVector->dataType);
template<bool SKIP_NULL_ENTRY>
static std::string structToString(const struct_entry_t& val, ValueVector* vector) {
auto fields = StructType::getFields(&vector->dataType);
if (fields.size() == 0) {
return "{}";
}
std::string result = "{";
auto i = 0u;
for (; i < fields.size() - 1; ++i) {
auto fieldVector = StructVector::getFieldVector(structVector, i);
result += StructType::getField(&structVector->dataType, i)->getName();
auto fieldVector = StructVector::getFieldVector(vector, i);
if constexpr (SKIP_NULL_ENTRY) {
if (fieldVector->isNull(val.pos)) {
continue;
}
}
if (i != 0) {
result += ", ";
}
result += StructType::getField(&vector->dataType, i)->getName();
result += ": ";
result += castValueToString(*fields[i]->getType(),
fieldVector->getData() + fieldVector->getNumBytesPerValue() * val.pos,
fieldVector.get());
result += entryToString(val.pos, fieldVector.get());
}
auto fieldVector = StructVector::getFieldVector(vector, i);
if constexpr (SKIP_NULL_ENTRY) {
if (fieldVector->isNull(val.pos)) {
result += "}";
return result;
}
}
if (i != 0) {
result += ", ";
}
auto fieldVector = StructVector::getFieldVector(structVector, i);
result += StructType::getField(&structVector->dataType, i)->getName();
result += StructType::getField(&vector->dataType, i)->getName();
result += ": ";
result += castValueToString(*fields[i]->getType(),
fieldVector->getData() + fieldVector->getNumBytesPerValue() * val.pos, fieldVector.get());
result += entryToString(val.pos, fieldVector.get());
result += "}";
return result;
}

std::string TypeUtils::nodeToString(const struct_entry_t& val, ValueVector* vector) {
// Internal ID vector is the first field vector.
if (StructVector::getFieldVector(vector, 0)->isNull(val.pos)) {
return "";

Check warning on line 255 in src/common/type_utils.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/type_utils.cpp#L255

Added line #L255 was not covered by tests
}
return structToString<true>(val, vector);
}

std::string TypeUtils::relToString(const struct_entry_t& val, ValueVector* vector) {
// Internal ID vector is the third field vector.
if (StructVector::getFieldVector(vector, 3)->isNull(val.pos)) {
return "";

Check warning on line 263 in src/common/type_utils.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/type_utils.cpp#L263

Added line #L263 was not covered by tests
}
return structToString<true>(val, vector);
}

template<>
std::string TypeUtils::toString(const struct_entry_t& val, void* valVector) {
return structToString<false>(val, (ValueVector*)valVector);
}

template<>
std::string TypeUtils::toString(const union_entry_t& val, void* valVector) {
auto structVector = (ValueVector*)valVector;
auto unionFieldIdx =
UnionVector::getTagVector(structVector)->getValue<union_field_idx_t>(val.entry.pos);
auto unionFieldVector = UnionVector::getValVector(structVector, unionFieldIdx);
return castValueToString(unionFieldVector->dataType,
unionFieldVector->getData() + unionFieldVector->getNumBytesPerValue() * val.entry.pos,
unionFieldVector);
return entryToString(val.entry.pos, unionFieldVector);
}

} // namespace common
Expand Down
16 changes: 3 additions & 13 deletions src/function/cast/cast_fixed_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,19 +103,9 @@ static void CastFixedListToString(
if (param.isNull(pos)) {
return;
}
std::string result = "[";
auto numValuesPerList = FixedListType::getNumValuesInList(&param.dataType);
auto childType = FixedListType::getChildType(&param.dataType);
auto values = param.getData() + pos * param.getNumBytesPerValue();
for (auto i = 0u; i < numValuesPerList - 1; ++i) {
// Note: FixedList can only store numeric types and doesn't allow nulls.
result += TypeUtils::castValueToString(*childType, values, nullptr /* vector */);
result += ",";
values += PhysicalTypeUtils::getFixedTypeSize(childType->getPhysicalType());
}
result += TypeUtils::castValueToString(*childType, values, nullptr /* vector */);
result += "]";
resultVector.setValue(resultPos, result);
auto value = param.getData() + pos * param.getNumBytesPerValue();
auto result = TypeUtils::fixedListToString(value, param.dataType, &param);
StringVector::addString(&resultVector, resultPos, result);
}

template<>
Expand Down
12 changes: 9 additions & 3 deletions src/function/vector_cast_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -469,8 +469,15 @@ static std::unique_ptr<ScalarFunction> bindCastToStringFunction(
func =
ScalarFunction::UnaryCastExecFunction<map_entry_t, ku_string_t, CastToString, EXECUTOR>;
} break;
case LogicalTypeID::NODE:
case LogicalTypeID::REL:
case LogicalTypeID::NODE: {
func = ScalarFunction::UnaryCastExecFunction<struct_entry_t, ku_string_t, CastNodeToString,
EXECUTOR>;
} break;
case LogicalTypeID::REL: {
func = ScalarFunction::UnaryCastExecFunction<struct_entry_t, ku_string_t, CastRelToString,
EXECUTOR>;
} break;
case LogicalTypeID::RECURSIVE_REL:
case LogicalTypeID::STRUCT: {
func = ScalarFunction::UnaryCastExecFunction<struct_entry_t, ku_string_t, CastToString,
EXECUTOR>;
Expand All @@ -479,7 +486,6 @@ static std::unique_ptr<ScalarFunction> bindCastToStringFunction(
func = ScalarFunction::UnaryCastExecFunction<union_entry_t, ku_string_t, CastToString,
EXECUTOR>;
} break;
// ToDo(Kebing): RECURSIVE_REL to string
default:
KU_UNREACHABLE;
}
Expand Down
12 changes: 8 additions & 4 deletions src/include/common/type_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
namespace kuzu {
namespace common {

class ValueVector;
struct blob_t;
struct uuid_t;

Expand All @@ -25,7 +26,6 @@ struct overload : Funcs... {
};

class TypeUtils {

public:
template<typename T>
static inline std::string toString(const T& val, void* /*valueVector*/ = nullptr) {
Expand All @@ -36,6 +36,13 @@ class TypeUtils {
std::is_same<T, double>::value || std::is_same<T, float>::value);
return std::to_string(val);
}
// Fixed list does not have a physical class. So we cannot reuse above toString template.
// dummyVector is used to avoid clang-tidy check and should be removed once we unify
// Fixed-LIST in memory layout with VAR-LIST.
static std::string fixedListToString(
const uint8_t* val, const common::LogicalType& type, ValueVector* dummyVector);
static std::string nodeToString(const struct_entry_t& val, ValueVector* vector);
static std::string relToString(const struct_entry_t& val, ValueVector* vector);

static inline void encodeOverflowPtr(
uint64_t& overflowPtr, page_idx_t pageIdx, uint16_t pageOffset) {
Expand All @@ -49,9 +56,6 @@ class TypeUtils {
memcpy(&pageOffset, ((uint8_t*)&overflowPtr) + 4, 2);
}

static std::string castValueToString(
const LogicalType& dataType, const uint8_t* value, void* vector);

template<typename T>
static inline constexpr common::PhysicalTypeID getPhysicalTypeIDForType() {
if constexpr (std::is_same_v<T, int64_t>) {
Expand Down
31 changes: 18 additions & 13 deletions src/include/function/cast/functions/cast_functions.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#pragma once

#include "common/constants.h"
#include "common/exception/message.h"
#include "common/exception/overflow.h"
#include "common/string_format.h"
#include "common/type_utils.h"
Expand All @@ -15,17 +13,24 @@ struct CastToString {
template<typename T>
static inline void operation(T& input, common::ku_string_t& result,
common::ValueVector& inputVector, common::ValueVector& resultVector) {
std::string resultStr = common::TypeUtils::toString(input, (void*)&inputVector);
if (resultStr.length() > common::ku_string_t::SHORT_STR_LENGTH) {
if (resultStr.length() > common::BufferPoolConstants::PAGE_256KB_SIZE) {
throw common::RuntimeException(
common::ExceptionMessage::overLargeStringValueException(resultStr.length()));
}
result.overflowPtr = reinterpret_cast<uint64_t>(
common::StringVector::getInMemOverflowBuffer(&resultVector)
->allocateSpace(resultStr.length()));
}
result.set(resultStr);
auto str = common::TypeUtils::toString(input, (void*)&inputVector);
common::StringVector::addString(&resultVector, result, str);
}
};

struct CastNodeToString {
static inline void operation(common::struct_entry_t& input, common::ku_string_t& result,
common::ValueVector& inputVector, common::ValueVector& resultVector) {
auto str = common::TypeUtils::nodeToString(input, &inputVector);
common::StringVector::addString(&resultVector, result, str);
}
};

struct CastRelToString {
static inline void operation(common::struct_entry_t& input, common::ku_string_t& result,
common::ValueVector& inputVector, common::ValueVector& resultVector) {
auto str = common::TypeUtils::relToString(input, &inputVector);
common::StringVector::addString(&resultVector, result, str);
}
};

Expand Down
Loading
Loading