Skip to content

Commit

Permalink
Fix cast node, rel, recursive to string
Browse files Browse the repository at this point in the history
  • Loading branch information
andyfengHKU committed Feb 15, 2024
1 parent 7fc4519 commit e862dab
Show file tree
Hide file tree
Showing 8 changed files with 188 additions and 85 deletions.
138 changes: 97 additions & 41 deletions src/common/type_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
namespace kuzu {
namespace common {

std::string TypeUtils::castValueToString(
const LogicalType& dataType, const uint8_t* value, void* vector) {
static std::string entryToString(
const LogicalType& dataType, const uint8_t* value, ValueVector* vector) {
auto valueVector = reinterpret_cast<ValueVector*>(vector);
switch (dataType.getLogicalTypeID()) {
case LogicalTypeID::BOOL:
Expand Down Expand Up @@ -53,17 +53,51 @@ std::string TypeUtils::castValueToString(
return TypeUtils::toString(*reinterpret_cast<const ku_string_t*>(value));
case LogicalTypeID::INTERNAL_ID:
return TypeUtils::toString(*reinterpret_cast<const internalID_t*>(value));
case LogicalTypeID::FIXED_LIST:
return TypeUtils::fixedListToString(value, dataType);
case LogicalTypeID::VAR_LIST:
return TypeUtils::toString(*reinterpret_cast<const list_entry_t*>(value), valueVector);
case LogicalTypeID::MAP:
return TypeUtils::toString(*reinterpret_cast<const map_entry_t*>(value), valueVector);
case LogicalTypeID::STRUCT:
return TypeUtils::toString(*reinterpret_cast<const struct_entry_t*>(value), valueVector);
case LogicalTypeID::UNION:
return TypeUtils::toString(*reinterpret_cast<const union_entry_t*>(value), valueVector);
case LogicalTypeID::UUID:
return TypeUtils::toString(*reinterpret_cast<const uuid_t*>(value));
case LogicalTypeID::NODE:
return TypeUtils::nodeToString(
*reinterpret_cast<const struct_entry_t*>(value), valueVector);
case LogicalTypeID::REL:
return TypeUtils::relToString(*reinterpret_cast<const struct_entry_t*>(value), valueVector);
default:
KU_UNREACHABLE;
}
}

static std::string entryToString(sel_t pos, ValueVector* vector) {
if (vector->isNull(pos)) {
return "";
}
return entryToString(
vector->dataType, vector->getData() + vector->getNumBytesPerValue() * pos, vector);
}

std::string TypeUtils::fixedListToString(const uint8_t* val, const LogicalType& type) {
std::string result = "[";
auto numValuesPerList = FixedListType::getNumValuesInList(&type);
auto childType = FixedListType::getChildType(&type);
for (auto i = 0u; i < numValuesPerList - 1; ++i) {
// Note: FixedList can only store numeric types and doesn't allow nulls.
result += entryToString(*childType, val, nullptr /* vector */);
result += ",";
val += PhysicalTypeUtils::getFixedTypeSize(childType->getPhysicalType());
}
result += entryToString(*childType, val, nullptr /* vector */);
result += "]";
return result;
}

template<>
std::string TypeUtils::toString(const int128_t& val, void* /*valueVector*/) {
return Int128_t::ToString(val);
Expand Down Expand Up @@ -136,90 +170,112 @@ std::string TypeUtils::toString(const list_entry_t& val, void* valueVector) {
return "[]";
}
std::string result = "[";
auto values = ListVector::getListValues(listVector, val);
auto childType = VarListType::getChildType(&listVector->dataType);
auto dataVector = ListVector::getDataVector(listVector);
for (auto i = 0u; i < val.size - 1; ++i) {
result += dataVector->isNull(val.offset + i) ?
"" :
castValueToString(*childType, values, dataVector);
result += entryToString(val.offset + i, dataVector);
result += ",";
values += ListVector::getDataVector(listVector)->getNumBytesPerValue();
}
result += dataVector->isNull(val.offset + val.size - 1) ?
"" :
castValueToString(*childType, values, dataVector);
result += entryToString(val.offset + val.size - 1, dataVector);
result += "]";
return result;
}

static std::string getMapEntryStr(
sel_t pos, ValueVector* dataVector, ValueVector* keyVector, ValueVector* valVector) {
if (dataVector->isNull(pos)) {
return "";
}
return entryToString(pos, keyVector) + "=" + entryToString(pos, valVector);
}

template<>
std::string TypeUtils::toString(const map_entry_t& val, void* valueVector) {
auto mapVector = (ValueVector*)valueVector;
if (val.entry.size == 0) {
return "{}";
}
std::string result = "{";
auto keyType = MapType::getKeyType(&mapVector->dataType);
auto valType = MapType::getValueType(&mapVector->dataType);
auto dataVector = ListVector::getDataVector(mapVector);
auto keyVector = MapVector::getKeyVector(mapVector);
auto valVector = MapVector::getValueVector(mapVector);
auto keyValues = keyVector->getData() + keyVector->getNumBytesPerValue() * val.entry.offset;
auto valValues = valVector->getData() + valVector->getNumBytesPerValue() * val.entry.offset;
for (auto i = 0u; i < val.entry.size - 1; ++i) {
result += dataVector->isNull(val.entry.offset + i) ?
"" :
castValueToString(*keyType, keyValues, dataVector) + "=" +
castValueToString(*valType, valValues, dataVector);
auto pos = val.entry.offset + i;
result += getMapEntryStr(pos, dataVector, keyVector, valVector);
result += ", ";
keyValues += keyVector->getNumBytesPerValue();
valValues += valVector->getNumBytesPerValue();
}
result += dataVector->isNull(val.entry.offset + val.entry.size - 1) ?
"" :
castValueToString(*keyType, keyValues, dataVector) + "=" +
castValueToString(*valType, valValues, dataVector);
auto pos = val.entry.offset + val.entry.size - 1;
result += getMapEntryStr(pos, dataVector, keyVector, valVector);
result += "}";
return result;
}

template<>
std::string TypeUtils::toString(const struct_entry_t& val, void* valVector) {
auto structVector = (ValueVector*)valVector;
auto fields = StructType::getFields(&structVector->dataType);
template<bool SKIP_NULL_ENTRY>
static std::string structToString(const struct_entry_t& val, ValueVector* vector) {
auto fields = StructType::getFields(&vector->dataType);
if (fields.size() == 0) {
return "{}";
}
std::string result = "{";
auto i = 0u;
for (; i < fields.size() - 1; ++i) {
auto fieldVector = StructVector::getFieldVector(structVector, i);
result += StructType::getField(&structVector->dataType, i)->getName();
auto fieldVector = StructVector::getFieldVector(vector, i);
if constexpr (SKIP_NULL_ENTRY) {
if (fieldVector->isNull(val.pos)) {
continue;
}
}
if (i != 0) {
result += ", ";
}
result += StructType::getField(&vector->dataType, i)->getName();
result += ": ";
result += castValueToString(*fields[i]->getType(),
fieldVector->getData() + fieldVector->getNumBytesPerValue() * val.pos,
fieldVector.get());
result += entryToString(val.pos, fieldVector.get());
}
auto fieldVector = StructVector::getFieldVector(vector, i);
if constexpr (SKIP_NULL_ENTRY) {
if (fieldVector->isNull(val.pos)) {
result += "}";
return result;
}
}
if (i != 0) {
result += ", ";
}
auto fieldVector = StructVector::getFieldVector(structVector, i);
result += StructType::getField(&structVector->dataType, i)->getName();
result += StructType::getField(&vector->dataType, i)->getName();
result += ": ";
result += castValueToString(*fields[i]->getType(),
fieldVector->getData() + fieldVector->getNumBytesPerValue() * val.pos, fieldVector.get());
result += entryToString(val.pos, fieldVector.get());
result += "}";
return result;
}

std::string TypeUtils::nodeToString(const struct_entry_t& val, ValueVector* vector) {
// Internal ID vector is the first field vector.
if (StructVector::getFieldVector(vector, 0)->isNull(val.pos)) {
return "";
}
return structToString<true>(val, vector);
}

std::string TypeUtils::relToString(const struct_entry_t& val, ValueVector* vector) {
// Internal ID vector is the third field vector.
if (StructVector::getFieldVector(vector, 3)->isNull(val.pos)) {
return "";
}
return structToString<true>(val, vector);
}

template<>
std::string TypeUtils::toString(const struct_entry_t& val, void* valVector) {
return structToString<false>(val, (ValueVector*)valVector);
}

template<>
std::string TypeUtils::toString(const union_entry_t& val, void* valVector) {
auto structVector = (ValueVector*)valVector;
auto unionFieldIdx =
UnionVector::getTagVector(structVector)->getValue<union_field_idx_t>(val.entry.pos);
auto unionFieldVector = UnionVector::getValVector(structVector, unionFieldIdx);
return castValueToString(unionFieldVector->dataType,
unionFieldVector->getData() + unionFieldVector->getNumBytesPerValue() * val.entry.pos,
unionFieldVector);
return entryToString(val.entry.pos, unionFieldVector);
}

} // namespace common
Expand Down
16 changes: 3 additions & 13 deletions src/function/cast/cast_fixed_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,19 +103,9 @@ static void CastFixedListToString(
if (param.isNull(pos)) {
return;
}
std::string result = "[";
auto numValuesPerList = FixedListType::getNumValuesInList(&param.dataType);
auto childType = FixedListType::getChildType(&param.dataType);
auto values = param.getData() + pos * param.getNumBytesPerValue();
for (auto i = 0u; i < numValuesPerList - 1; ++i) {
// Note: FixedList can only store numeric types and doesn't allow nulls.
result += TypeUtils::castValueToString(*childType, values, nullptr /* vector */);
result += ",";
values += PhysicalTypeUtils::getFixedTypeSize(childType->getPhysicalType());
}
result += TypeUtils::castValueToString(*childType, values, nullptr /* vector */);
result += "]";
resultVector.setValue(resultPos, result);
auto value = param.getData() + pos * param.getNumBytesPerValue();
auto result = TypeUtils::fixedListToString(value, param.dataType);
StringVector::addString(&resultVector, resultPos, result);
}

template<>
Expand Down
12 changes: 9 additions & 3 deletions src/function/vector_cast_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -469,8 +469,15 @@ static std::unique_ptr<ScalarFunction> bindCastToStringFunction(
func =
ScalarFunction::UnaryCastExecFunction<map_entry_t, ku_string_t, CastToString, EXECUTOR>;
} break;
case LogicalTypeID::NODE:
case LogicalTypeID::REL:
case LogicalTypeID::NODE: {
func = ScalarFunction::UnaryCastExecFunction<struct_entry_t, ku_string_t, CastNodeToString,
EXECUTOR>;
} break;
case LogicalTypeID::REL: {
func = ScalarFunction::UnaryCastExecFunction<struct_entry_t, ku_string_t, CastRelToString,
EXECUTOR>;
} break;
case LogicalTypeID::RECURSIVE_REL:
case LogicalTypeID::STRUCT: {
func = ScalarFunction::UnaryCastExecFunction<struct_entry_t, ku_string_t, CastToString,
EXECUTOR>;
Expand All @@ -479,7 +486,6 @@ static std::unique_ptr<ScalarFunction> bindCastToStringFunction(
func = ScalarFunction::UnaryCastExecFunction<union_entry_t, ku_string_t, CastToString,
EXECUTOR>;
} break;
// ToDo(Kebing): RECURSIVE_REL to string
default:
KU_UNREACHABLE;
}
Expand Down
9 changes: 5 additions & 4 deletions src/include/common/type_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
namespace kuzu {
namespace common {

class ValueVector;
struct blob_t;
struct uuid_t;

Expand All @@ -25,7 +26,6 @@ struct overload : Funcs... {
};

class TypeUtils {

public:
template<typename T>
static inline std::string toString(const T& val, void* /*valueVector*/ = nullptr) {
Expand All @@ -36,6 +36,10 @@ class TypeUtils {
std::is_same<T, double>::value || std::is_same<T, float>::value);
return std::to_string(val);
}
// Fixed list does not have a physical class. So we cannot reuse above toString template.
static std::string fixedListToString(const uint8_t* val, const common::LogicalType& type);
static std::string nodeToString(const struct_entry_t& val, ValueVector* vector);
static std::string relToString(const struct_entry_t& val, ValueVector* vector);

static inline void encodeOverflowPtr(
uint64_t& overflowPtr, page_idx_t pageIdx, uint16_t pageOffset) {
Expand All @@ -49,9 +53,6 @@ class TypeUtils {
memcpy(&pageOffset, ((uint8_t*)&overflowPtr) + 4, 2);
}

static std::string castValueToString(
const LogicalType& dataType, const uint8_t* value, void* vector);

template<typename T>
static inline constexpr common::PhysicalTypeID getPhysicalTypeIDForType() {
if constexpr (std::is_same_v<T, int64_t>) {
Expand Down
29 changes: 18 additions & 11 deletions src/include/function/cast/functions/cast_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,24 @@ struct CastToString {
template<typename T>
static inline void operation(T& input, common::ku_string_t& result,
common::ValueVector& inputVector, common::ValueVector& resultVector) {
std::string resultStr = common::TypeUtils::toString(input, (void*)&inputVector);
if (resultStr.length() > common::ku_string_t::SHORT_STR_LENGTH) {
if (resultStr.length() > common::BufferPoolConstants::PAGE_256KB_SIZE) {
throw common::RuntimeException(
common::ExceptionMessage::overLargeStringValueException(resultStr.length()));
}
result.overflowPtr = reinterpret_cast<uint64_t>(
common::StringVector::getInMemOverflowBuffer(&resultVector)
->allocateSpace(resultStr.length()));
}
result.set(resultStr);
auto str = common::TypeUtils::toString(input, (void*)&inputVector);
common::StringVector::addString(&resultVector, result, str);
}
};

struct CastNodeToString {
static inline void operation(common::struct_entry_t& input, common::ku_string_t& result,
common::ValueVector& inputVector, common::ValueVector& resultVector) {
auto str = common::TypeUtils::nodeToString(input, &inputVector);
common::StringVector::addString(&resultVector, result, str);
}
};

struct CastRelToString {
static inline void operation(common::struct_entry_t& input, common::ku_string_t& result,
common::ValueVector& inputVector, common::ValueVector& resultVector) {
auto str = common::TypeUtils::relToString(input, &inputVector);
common::StringVector::addString(&resultVector, result, str);
}
};

Expand Down
Loading

0 comments on commit e862dab

Please sign in to comment.