Skip to content
This repository was archived by the owner on Nov 1, 2024. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 40 additions & 1 deletion csrc/velox/column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/

#include "column.h"
#include <boost/algorithm/string.hpp>
#include <memory>
#include "VariantToVector.h"
#include "bindings.h"
Expand Down Expand Up @@ -435,6 +436,30 @@ std::unique_ptr<OperatorHandle> OperatorHandle::fromCall(
std::move(callTypedExprs), &TorchArrowGlobalStatic::execContext()));
}

std::string udfSignaturesToString(
const std::vector<const velox::exec::FunctionSignature*>& signatures) {
std::stringstream out;
for (auto i = 0; i < signatures.size(); ++i) {
if (i > 0) {
out << ", ";
}
out << signatures[i]->toString();
}
return out.str();
}

std::string udfSignaturesToString(velox::RowTypePtr inputRowType) {
auto children = inputRowType->children();
std::stringstream out;
for (auto i = 0; i < children.size(); ++i) {
if (i > 0) {
out << ",";
}
out << children[i]->toString();
}
return "(" + boost::algorithm::to_lower_copy(out.str()) + ")";
}

std::unique_ptr<OperatorHandle> OperatorHandle::fromUDF(
velox::RowTypePtr inputRowType,
const std::string& udfName) {
Expand All @@ -445,8 +470,22 @@ std::unique_ptr<OperatorHandle> OperatorHandle::fromUDF(

velox::TypePtr outputType =
velox::resolveFunction(udfName, inputRowType->children());

if (outputType == nullptr) {
throw std::runtime_error("Request for unknown Velox UDF: " + udfName);
std::string signature = udfSignaturesToString(inputRowType);

auto allSignatures = velox::getFunctionSignatures();
auto it = allSignatures.find(udfName);
if (it == allSignatures.end()) {
throw std::runtime_error(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shall we just use VELOX_USER_FAIL here? :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tried that a bit and found it's actually pretty verbose (see the example below), feel it's more concise to keep the current version.

WARNING: Logging before InitGoogleLogging() is written to STDERR
E20220624 11:25:39.216804 641654 Exceptions.h:68] Line: /Users/bearzx/facebookresearch-torcharrow/csrc/velox/column.cpp:483, Function:fromUDF, Expression: blaaaaaa (bigint, bigint), Source: USER, ErrorCode: INVALID_ARGUMENT
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/Users/bearzx/facebookresearch-torcharrow/torcharrow/functional.py", line 241, in firstx
    return _dispatch("firstx", col, num_to_copy)
  File "/Users/bearzx/facebookresearch-torcharrow/torcharrow/functional.py", line 53, in _dispatch
    return op(*args)
  File "/Users/bearzx/facebookresearch-torcharrow/torcharrow/velox_rt/functional.py", line 39, in dispatch
    result_col = ta.generic_udf_dispatch(op_name, *wrapped_args)
RuntimeError: Exception: VeloxUserError
Error Source: USER
Error Code: INVALID_ARGUMENT
Reason: blaaaaaa (bigint, bigint)
Retriable: False
Function: fromUDF
File: /Users/bearzx/facebookresearch-torcharrow/csrc/velox/column.cpp
Line: 483
Stack trace:
# 0
# 1
# 2
# 3
# 4
# 5
# 6
# 7
# 8
# 9
# 10
# 11
# 12
# 13
# 14
# 15
# 16
# 17
# 18
# 19
# 20
# 21
# 22
# 23
# 24
# 25
# 26
# 27
# 28
# 29
# 30
# 31
# 32
# 33
# 34
# 35

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think what we need to do is to register the exception to Python so we can catch it in Python, etc. But for now let' use runtime_error :)

"Request for unknown Velox UDF: " + udfName + signature);
} else {
const auto& functionSignatures = it->second;
throw std::runtime_error(
"Velox UDF signature is not supported: " + signature +
". Supported signatures: " +
udfSignaturesToString(functionSignatures));
}
}
return OperatorHandle::fromCall(inputRowType, outputType, udfName);
}
Expand Down
19 changes: 18 additions & 1 deletion torcharrow/test/test_functional_cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,24 @@ def test_functional_dispatch(self):
# Validate that invoking unknown UDFs errors nicely.
with self.assertRaises(RuntimeError) as ex:
assert functional.idontexist(str_col)
self.assertEqual(str(ex.exception), "Request for unknown Velox UDF: idontexist")
self.assertTrue(
str(ex.exception).startswith("Request for unknown Velox UDF: idontexist")
)

# Validate that invoking unknown UDFs with unsupported signatures errors nicely too.
with self.assertRaises(RuntimeError) as ex:
assert functional.firstx(str_col, 1)
msg = str(ex.exception)
print(msg)
self.assertTrue(
msg.startswith("Velox UDF signature is not supported: (varchar,bigint)")
)

supported_sig = msg[msg.find("Supported signatures:") : :]
self.assertTrue(
"(array(bigint),bigint) -> array(bigint), (array(integer),bigint) -> array(integer), (array(bigint),integer) -> array(bigint), (array(integer),integer) -> array(integer)"
in supported_sig
)

def test_factory_dispatch(self):
rand_col = functional.rand(size=42)
Expand Down