diff --git a/lldb/include/lldb/Core/Mangled.h b/lldb/include/lldb/Core/Mangled.h index dcaa7a8cda6cc..e9e7f52bf3fde 100644 --- a/lldb/include/lldb/Core/Mangled.h +++ b/lldb/include/lldb/Core/Mangled.h @@ -43,6 +43,7 @@ class Mangled { eManglingSchemeNone = 0, eManglingSchemeMSVC, eManglingSchemeItanium, + eManglingSchemeOxCaml, eManglingSchemeRustV0, eManglingSchemeD }; diff --git a/lldb/source/Core/Mangled.cpp b/lldb/source/Core/Mangled.cpp index 0c4d9f78c4402..5d1e0db40d486 100644 --- a/lldb/source/Core/Mangled.cpp +++ b/lldb/source/Core/Mangled.cpp @@ -44,6 +44,9 @@ Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) { if (name.startswith("?")) return Mangled::eManglingSchemeMSVC; + if (name.startswith("_O")) + return Mangled::eManglingSchemeOxCaml; + if (name.startswith("_R")) return Mangled::eManglingSchemeRustV0; @@ -167,6 +170,19 @@ static char *GetItaniumDemangledStr(const char *M) { return demangled_cstr; } +static char *GetOxCamlDemangledStr(const char *M) { + char *demangled_cstr = llvm::oxcamlDemangle(M); + + if (Log *log = GetLog(LLDBLog::Demangle)) { + if (demangled_cstr && demangled_cstr[0]) + LLDB_LOG(log, "demangled oxcaml: {0} -> \"{1}\"", M, demangled_cstr); + else + LLDB_LOG(log, "demangled oxcaml: {0} -> error: failed to demangle", M); + } + + return demangled_cstr; +} + static char *GetRustV0DemangledStr(const char *M) { char *demangled_cstr = llvm::rustDemangle(M); @@ -242,6 +258,7 @@ bool Mangled::GetRichManglingInfo(RichManglingContext &context, } } + case eManglingSchemeOxCaml: case eManglingSchemeRustV0: case eManglingSchemeD: // Rich demangling scheme is not supported @@ -275,6 +292,9 @@ ConstString Mangled::GetDemangledName() const { demangled_name = GetItaniumDemangledStr(mangled_name); break; } + case eManglingSchemeOxCaml: + demangled_name = GetOxCamlDemangledStr(mangled_name); + break; case eManglingSchemeRustV0: demangled_name = GetRustV0DemangledStr(mangled_name); break; diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp index 40777e03be784..47c49a60f9e56 100644 --- a/lldb/source/Symbol/Symtab.cpp +++ b/lldb/source/Symbol/Symtab.cpp @@ -255,6 +255,7 @@ static bool lldb_skip_name(llvm::StringRef mangled, // No filters for this scheme yet. Include all names in indexing. case Mangled::eManglingSchemeMSVC: + case Mangled::eManglingSchemeOxCaml: case Mangled::eManglingSchemeRustV0: case Mangled::eManglingSchemeD: return false; diff --git a/llvm/include/llvm/Demangle/Demangle.h b/llvm/include/llvm/Demangle/Demangle.h index 6133d0b95bbfc..c0231e0bf1af2 100644 --- a/llvm/include/llvm/Demangle/Demangle.h +++ b/llvm/include/llvm/Demangle/Demangle.h @@ -57,6 +57,9 @@ char *microsoftDemangle(const char *mangled_name, size_t *n_read, char *buf, size_t *n_buf, int *status, MSDemangleFlags Flags = MSDF_None); +// Demangles an OxCaml mangled symbol +char *oxcamlDemangle(const char *MangledName); + // Demangles a Rust v0 mangled symbol. char *rustDemangle(const char *MangledName); diff --git a/llvm/lib/Demangle/CMakeLists.txt b/llvm/lib/Demangle/CMakeLists.txt index eb7d212a02449..50c2a7f699381 100644 --- a/llvm/lib/Demangle/CMakeLists.txt +++ b/llvm/lib/Demangle/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_component_library(LLVMDemangle ItaniumDemangle.cpp MicrosoftDemangle.cpp MicrosoftDemangleNodes.cpp + OxCamlDemangle.cpp RustDemangle.cpp DLangDemangle.cpp diff --git a/llvm/lib/Demangle/Demangle.cpp b/llvm/lib/Demangle/Demangle.cpp index 9d128424cabf4..e32881d28d1ba 100644 --- a/llvm/lib/Demangle/Demangle.cpp +++ b/llvm/lib/Demangle/Demangle.cpp @@ -19,6 +19,9 @@ static bool isItaniumEncoding(const char *S) { return std::strncmp(S, "_Z", 2) == 0 || std::strncmp(S, "___Z", 4) == 0; } +static bool isOxCamlEncoding(const std::string &S) { + return S.size() >= 2 && S[0] == '_' && S[1] == 'O'; } + static bool isRustEncoding(const char *S) { return S[0] == '_' && S[1] == 'R'; } static bool isDLangEncoding(const std::string &MangledName) { @@ -54,6 +57,8 @@ bool llvm::nonMicrosoftDemangle(const char *MangledName, std::string &Result) { Demangled = rustDemangle(MangledName); else if (isDLangEncoding(MangledName)) Demangled = dlangDemangle(MangledName); + else if (isOxCamlEncoding(MangledName)) + Demangled = oxcamlDemangle(MangledName); if (!Demangled) return false; diff --git a/llvm/lib/Demangle/OxCamlDemangle.cpp b/llvm/lib/Demangle/OxCamlDemangle.cpp new file mode 100644 index 0000000000000..bf112f0efd66e --- /dev/null +++ b/llvm/lib/Demangle/OxCamlDemangle.cpp @@ -0,0 +1,256 @@ +//===--- OxCamlDemangle.cpp -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a demangler for the new mangling scheme devised for OxCaml +// +//===----------------------------------------------------------------------===// + +#include + +#include "llvm/Demangle/Demangle.h" +#include "llvm/Demangle/StringView.h" +#include "llvm/Demangle/Utility.h" + +using llvm::itanium_demangle::OutputBuffer; +using llvm::itanium_demangle::StringView; + +#define ERROR (~((unsigned)0)) + +static unsigned ConsumeUnsignedDecimal(StringView& sv) { + unsigned res = 0, i = 0; + while(sv[i] >= '0' && sv[i] <= '9') { + res = res * 10 + (sv[i] - '0'); + i++; + } + sv = sv.dropFront(i); + if(i == 0) + return ERROR; + return res; +} + +static unsigned ConsumeUnsigned26(StringView& sv) { + unsigned res = 0, i = 0; + while(sv[i] >= 'A' && sv[i] <= 'Z') { + res = res * 26 + (sv[i] - 'A'); + i++; + } + sv = sv.dropFront(i); + if(i == 0) + return ERROR; + return res; +} + +static bool islowerhex(char c) { + return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'); +} + +static unsigned lowerhex(char c) { + if(c >= '0' && c <= '9') + return c - '0'; + else { + assert(c >= 'a' && c <= 'f'); + return c - 'a' + 10; + } +} + +// Decode unicode-escaped identifier (format: u_) +// Returns true on success, false on error +static bool DecodeUnicodeEscaped(StringView& Mangled, OutputBuffer& Demangled) { + unsigned len = ConsumeUnsignedDecimal(Mangled); + if(len == ERROR || len <= 0 || len > Mangled.size()) + return false; + + size_t split = Mangled.find('_'); + if(split >= len) + return false; + + StringView coded = Mangled.substr(0, split); + StringView raw = Mangled.substr(split+1, len-split-1); + + while(!coded.empty()) { + unsigned chunklen = ConsumeUnsigned26(coded); + if(chunklen == ERROR || chunklen > raw.size()) + return false; + Demangled << raw.substr(0, chunklen); + raw = raw.dropFront(chunklen); + + unsigned i; + for(i = 0; i+1 < coded.size() && islowerhex(coded[i]); i+=2) { + if(!islowerhex(coded[i+1])) + return false; + char c = (char)(lowerhex(coded[i]) << 4 | lowerhex(coded[i+1])); + Demangled << c; + } + coded = coded.dropFront(i); + } + + if(!raw.empty()) + Demangled << raw; + + Mangled = Mangled.dropFront(len); + return true; +} + +// Decode identifier (either plain or unicode-escaped) +// Handles: or u_ +// Returns true on success, false on error +static bool DecodeIdentifier(StringView& Mangled, OutputBuffer& Demangled) { + if(Mangled.consumeFront('u')) { + // Unicode-escaped identifier + return DecodeUnicodeEscaped(Mangled, Demangled); + } else { + // Plain identifier with length prefix + unsigned len = ConsumeUnsignedDecimal(Mangled); + if(len == ERROR || len <= 0 || len > Mangled.size()) + return false; + Demangled << Mangled.substr(0, len); + Mangled = Mangled.dropFront(len); + return true; + } +} + +// Decode anonymous location (format: filename_line_col) +// Anonymous functions/modules are encoded as: fn(filename:line:col) +// Returns true on success, false on error +static bool DecodeAnonymousLocation(StringView& Mangled, OutputBuffer& Demangled) { + // Allocate temporary buffer based on remaining mangled string size + // The decoded identifier will be at most the size of the remaining mangled string + size_t buffer_size = Mangled.size(); + if(buffer_size == 0) + return false; + + char *temp_buf = static_cast(std::malloc(buffer_size)); + if(temp_buf == nullptr) + std::terminate(); + + OutputBuffer TempDemangled(temp_buf, buffer_size); + + if(!DecodeIdentifier(Mangled, TempDemangled)) { + std::free(temp_buf); + return false; + } + + size_t temp_len = TempDemangled.getCurrentPosition(); + + // Parse filename_line_col format by finding the last two underscores + size_t first_underscore = 0, second_underscore = 0; + int underscore_count = 0; + + for(size_t j = temp_len; j > 0; j--) { + if(temp_buf[j-1] == '_') { + underscore_count++; + if(underscore_count == 1) + second_underscore = j - 1; + else if(underscore_count == 2) { + first_underscore = j - 1; + break; + } + } + } + + // Output in format fn(filename:line:col) + if(underscore_count >= 2) { + Demangled << "fn("; + for(size_t j = 0; j < first_underscore; j++) + Demangled << temp_buf[j]; + Demangled << ':'; + for(size_t j = first_underscore + 1; j < second_underscore; j++) + Demangled << temp_buf[j]; + Demangled << ':'; + for(size_t j = second_underscore + 1; j < temp_len; j++) + Demangled << temp_buf[j]; + Demangled << ')'; + } else { + // Fallback: just output the identifier as-is + for(size_t j = 0; j < temp_len; j++) + Demangled << temp_buf[j]; + } + + std::free(temp_buf); + return true; +} + +char *llvm::oxcamlDemangle(const char *MangledName) { + StringView Mangled(MangledName); + if(!Mangled.consumeFront("_O")) + return nullptr; + + // Allocate the buffer at a reasonable size, as OutputBuffer allocates 992 + // bytes when starting from an empty buffer + char *DemangledBuffer; + DemangledBuffer = static_cast(std::malloc(Mangled.size())); + if (DemangledBuffer == nullptr) + std::terminate(); + OutputBuffer Demangled(DemangledBuffer, Mangled.size()); + +#define ENDONERROR() do { \ + std::free(Demangled.getBuffer()); \ + return nullptr; \ +} while(0) + + // Parse path items + while(!Mangled.empty()) { + // Check for terminating underscore + if(Mangled[0] == '_') { + // End of symbol path, rest is unique id + break; + } + + // Handle each path_item type + switch(Mangled[0]) { + case 'M': // Module + if(!Demangled.empty()) + Demangled << '.'; + Mangled = Mangled.dropFront(1); + if(!DecodeIdentifier(Mangled, Demangled)) + ENDONERROR(); + break; + + case 'F': // NamedFunction + if(!Demangled.empty()) + Demangled << '.'; + Mangled = Mangled.dropFront(1); + if(!DecodeIdentifier(Mangled, Demangled)) + ENDONERROR(); + break; + + case 'L': // AnonymousFunction + if(!Demangled.empty()) + Demangled << '.'; + Mangled = Mangled.dropFront(1); + if(!DecodeAnonymousLocation(Mangled, Demangled)) + ENDONERROR(); + break; + + case 'S': // AnonymousModule + if(!Demangled.empty()) + Demangled << '.'; + Mangled = Mangled.dropFront(1); + if(!DecodeAnonymousLocation(Mangled, Demangled)) + ENDONERROR(); + break; + + case 'P': // PartialFunction (no dot separator) + Mangled = Mangled.dropFront(1); + Demangled << "(partially_applied)"; + break; + + default: + // No prefix means Module (legacy compatibility) + if(!Demangled.empty()) + Demangled << '.'; + if(!DecodeIdentifier(Mangled, Demangled)) + ENDONERROR(); + break; + } + } + + Demangled << '\0'; + + return Demangled.getBuffer(); +} diff --git a/llvm/unittests/Demangle/CMakeLists.txt b/llvm/unittests/Demangle/CMakeLists.txt index d6071bc36bda8..aa3c4aa3df2a2 100644 --- a/llvm/unittests/Demangle/CMakeLists.txt +++ b/llvm/unittests/Demangle/CMakeLists.txt @@ -10,5 +10,6 @@ add_llvm_unittest(DemangleTests OutputBufferTest.cpp PartialDemangleTest.cpp RustDemangleTest.cpp + OxCamlDemangleTest.cpp StringViewTest.cpp ) diff --git a/llvm/unittests/Demangle/OxCamlDemangleTest.cpp b/llvm/unittests/Demangle/OxCamlDemangleTest.cpp new file mode 100644 index 0000000000000..d9c1543ca9b16 --- /dev/null +++ b/llvm/unittests/Demangle/OxCamlDemangleTest.cpp @@ -0,0 +1,106 @@ +//===------------------ OxCamlDemangleTest.cpp ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Demangle/Demangle.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include + +TEST(OxCamlDemangle, Success) { + char *Demangled = nullptr; + + // NamedFunction + Demangled = llvm::oxcamlDemangle("_OM4MainF9say_hello_345_code"); + EXPECT_STREQ(Demangled, "Main.say_hello"); + std::free(Demangled); + + // Nested Modules with function + Demangled = llvm::oxcamlDemangle("_OM4Main4TestF3foo_345"); + EXPECT_STREQ(Demangled, "Main.Test.foo"); + std::free(Demangled); + + // Nested Modules with function + Demangled = llvm::oxcamlDemangle("_OM4Demo8PositiveF4make_2_code"); + EXPECT_STREQ(Demangled, "Demo.Positive.make"); + std::free(Demangled); + + Demangled = llvm::oxcamlDemangle( "_OM12Stdlib__ListF3map_113_code"); + EXPECT_STREQ(Demangled, "Stdlib__List.map"); + std::free(Demangled); +} + +TEST(OxCamlDemangle, PathItemPrefixes) { + char *Demangled = nullptr; + + // Module prefix 'M' + Demangled = llvm::oxcamlDemangle("_OM4Main_123"); + EXPECT_STREQ(Demangled, "Main"); + std::free(Demangled); + + // NamedFunction prefix 'F' + Demangled = llvm::oxcamlDemangle("_OM4MainF3foo_456"); + EXPECT_STREQ(Demangled, "Main.foo"); + std::free(Demangled); + + // Multiple modules with function + Demangled = llvm::oxcamlDemangle("_OM4MainM4TestF3bar_789"); + EXPECT_STREQ(Demangled, "Main.Test.bar"); + std::free(Demangled); + + // PartialFunction prefix 'P' (no identifier) + Demangled = llvm::oxcamlDemangle("_OM4MainF3fooP_100"); + EXPECT_STREQ(Demangled, "Main.foo(partially_applied)"); + std::free(Demangled); + + // AnonymousFunction prefix 'L' (filename with unicode escaping for '.') + // "main.ml_10_20": 'main' (4 chars) + '.' (encoded as 2e) + 'ml_10_20' (8 chars) + // Coded: "E2e" (E=4 in base-26), Raw: "mainml_10_20" (12 chars), Total: 16 + Demangled = llvm::oxcamlDemangle("_OM4MainLu16E2e_mainml_10_20_200"); + EXPECT_STREQ(Demangled, "Main.fn(main.ml:10:20)"); + std::free(Demangled); + + // AnonymousModule prefix 'S' (filename with unicode escaping for '.') + // "test.ml_5_15": 'test' (4 chars) + '.' (encoded as 2e) + 'ml_5_15' (7 chars) + // Coded: "E2e" (E=4), Raw: "testml_5_15" (11 chars), Total: 15 + Demangled = llvm::oxcamlDemangle("_OM4MainSu15E2e_testml_5_15_300"); + EXPECT_STREQ(Demangled, "Main.fn(test.ml:5:15)"); + std::free(Demangled); +} + +TEST(OxCamlDemangle, UnicodeEscaping) { + // Test will be added when we have valid unicode-escaped identifier examples + // Note: OCaml identifiers have restrictions on valid characters + EXPECT_TRUE(true); +} + +TEST(OxCamlDemangle, MixedEncoding) { + char *Demangled = nullptr; + + // Mix of prefixed modules and nested modules + Demangled = llvm::oxcamlDemangle("_OM4MainM8PositiveF4make_2"); + EXPECT_STREQ(Demangled, "Main.Positive.make"); + std::free(Demangled); + + // Actual symbols from Stdlib with nested module names using __ + Demangled = llvm::oxcamlDemangle("_OM12Stdlib__ListF3map_123"); + EXPECT_STREQ(Demangled, "Stdlib__List.map"); + std::free(Demangled); +} + +TEST(OxCamlDemangle, Invalid) { + char *Demangled = nullptr; + + // Invalid prefix. + Demangled = llvm::oxcamlDemangle("_ABCDEF"); + EXPECT_EQ(Demangled, nullptr); + + // Correct prefix but still invalid. + Demangled = llvm::oxcamlDemangle("_Ocaml"); + EXPECT_EQ(Demangled, nullptr); +} \ No newline at end of file diff --git a/llvm/utils/gn/secondary/llvm/lib/Demangle/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Demangle/BUILD.gn index 88ac00b2cc101..9123c63f4fa57 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Demangle/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Demangle/BUILD.gn @@ -7,6 +7,7 @@ static_library("Demangle") { "ItaniumDemangle.cpp", "MicrosoftDemangle.cpp", "MicrosoftDemangleNodes.cpp", + "OxCamlDemangle.cpp", "RustDemangle.cpp", ] }