From e558f383da7ccd869008d22480963e1d3cc5d075 Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Fri, 24 Jun 2022 15:36:19 -0700 Subject: [PATCH] better error message with unsupported velox udfs (#400) Summary: As noted in issue https://github.com/pytorch/torcharrow/issues/230 we want to improve the error messages when missing velox UDFs to specify is it: 1. UDF doesn't exist at all 2. UDF exists, but doesn't accept the input types that user provided This is implemented using similar code in https://github.com/facebookincubator/velox/blob/041f3759d6e4c070c2ecd2e01eae6a7be9d93957/velox/parse/TypeResolver.cpp#L112-L124 Pull Request resolved: https://github.com/pytorch/torcharrow/pull/400 Differential Revision: D37399459 Pulled By: bearzx fbshipit-source-id: ab2c239f0c2a22e119092df6b4c787fc4e7cc3d4 --- csrc/velox/column.cpp | 41 +++++++++++++++++++++++++- torcharrow/test/test_functional_cpu.py | 19 +++++++++++- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/csrc/velox/column.cpp b/csrc/velox/column.cpp index b99fffb45..c143fc6d0 100644 --- a/csrc/velox/column.cpp +++ b/csrc/velox/column.cpp @@ -7,6 +7,7 @@ */ #include "column.h" +#include #include #include "VariantToVector.h" #include "bindings.h" @@ -435,6 +436,30 @@ std::unique_ptr OperatorHandle::fromCall( std::move(callTypedExprs), &TorchArrowGlobalStatic::execContext())); } +std::string udfSignaturesToString( + const std::vector& signatures) { + std::stringstream out; + for (auto i = 0; i < signatures.size(); ++i) { + if (i > 0) { + out << ", "; + } + out << signatures[i]->toString(); + } + return out.str(); +} + +std::string udfSignaturesToString(velox::RowTypePtr inputRowType) { + auto children = inputRowType->children(); + std::stringstream out; + for (auto i = 0; i < children.size(); ++i) { + if (i > 0) { + out << ","; + } + out << children[i]->toString(); + } + return "(" + boost::algorithm::to_lower_copy(out.str()) + ")"; +} + std::unique_ptr OperatorHandle::fromUDF( velox::RowTypePtr inputRowType, const std::string& udfName) { @@ -445,8 +470,22 @@ std::unique_ptr OperatorHandle::fromUDF( velox::TypePtr outputType = velox::resolveFunction(udfName, inputRowType->children()); + if (outputType == nullptr) { - throw std::runtime_error("Request for unknown Velox UDF: " + udfName); + std::string signature = udfSignaturesToString(inputRowType); + + auto allSignatures = velox::getFunctionSignatures(); + auto it = allSignatures.find(udfName); + if (it == allSignatures.end()) { + throw std::runtime_error( + "Request for unknown Velox UDF: " + udfName + signature); + } else { + const auto& functionSignatures = it->second; + throw std::runtime_error( + "Velox UDF signature is not supported: " + signature + + ". Supported signatures: " + + udfSignaturesToString(functionSignatures)); + } } return OperatorHandle::fromCall(inputRowType, outputType, udfName); } diff --git a/torcharrow/test/test_functional_cpu.py b/torcharrow/test/test_functional_cpu.py index aa9f1a247..be4379523 100644 --- a/torcharrow/test/test_functional_cpu.py +++ b/torcharrow/test/test_functional_cpu.py @@ -51,7 +51,24 @@ def test_functional_dispatch(self): # Validate that invoking unknown UDFs errors nicely. with self.assertRaises(RuntimeError) as ex: assert functional.idontexist(str_col) - self.assertEqual(str(ex.exception), "Request for unknown Velox UDF: idontexist") + self.assertTrue( + str(ex.exception).startswith("Request for unknown Velox UDF: idontexist") + ) + + # Validate that invoking unknown UDFs with unsupported signatures errors nicely too. + with self.assertRaises(RuntimeError) as ex: + assert functional.firstx(str_col, 1) + msg = str(ex.exception) + print(msg) + self.assertTrue( + msg.startswith("Velox UDF signature is not supported: (varchar,bigint)") + ) + + supported_sig = msg[msg.find("Supported signatures:") : :] + self.assertTrue( + "(array(bigint),bigint) -> array(bigint), (array(integer),bigint) -> array(integer), (array(bigint),integer) -> array(bigint), (array(integer),integer) -> array(integer)" + in supported_sig + ) def test_factory_dispatch(self): rand_col = functional.rand(size=42)