diff --git a/llvm/include/llvm/Support/ConvertEBCDIC.h b/llvm/include/llvm/Support/ConvertEBCDIC.h index 1ed88b9a905a7..3caa70b153bc6 100644 --- a/llvm/include/llvm/Support/ConvertEBCDIC.h +++ b/llvm/include/llvm/Support/ConvertEBCDIC.h @@ -20,6 +20,10 @@ namespace llvm { namespace ConverterEBCDIC { +/// Converts UTF-8 text to EBCDIC-1047. +/// +/// Returns std::errc::illegal_byte_sequence for malformed UTF-8 and +/// std::errc::invalid_argument for truncated UTF-8 input. LLVM_ABI std::error_code convertToEBCDIC(StringRef Source, SmallVectorImpl &Result); diff --git a/llvm/unittests/Support/ConvertEBCDICTest.cpp b/llvm/unittests/Support/ConvertEBCDICTest.cpp index 557f29c391f9c..a541bafcaab02 100644 --- a/llvm/unittests/Support/ConvertEBCDICTest.cpp +++ b/llvm/unittests/Support/ConvertEBCDICTest.cpp @@ -40,6 +40,8 @@ static const char AccentE[] = "\xaa\x4a\xb1\xc1\x63\x67\x9e\xc5\x74\x71\x72" // String with Cyrillic character ya. static const char CyrillicUTF[] = "\xd0\xaf"; +static const char TruncatedUTF[] = "\xc2"; +static const char MalformedUTF[] = {'\xc2', 'A', '\0'}; TEST(ConverterEBCDIC, convertToEBCDIC) { // Hello string. @@ -94,4 +96,20 @@ TEST(ConverterEBCDIC, convertFromEBCDIC) { Dst.clear(); } +TEST(ConverterEBCDIC, convertToEBCDICRejectsTruncatedUTF8) { + SmallString<8> Dst; + + std::error_code EC = + ConverterEBCDIC::convertToEBCDIC(StringRef(TruncatedUTF, 1), Dst); + EXPECT_EQ(EC, std::errc::invalid_argument); +} + +TEST(ConverterEBCDIC, convertToEBCDICRejectsMalformedUTF8Continuation) { + SmallString<8> Dst; + + std::error_code EC = + ConverterEBCDIC::convertToEBCDIC(StringRef(MalformedUTF, 2), Dst); + EXPECT_EQ(EC, std::errc::illegal_byte_sequence); +} + } // namespace