From 84cf912dd019edb79e069383f159fb8121312473 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Tue, 7 Oct 2025 09:51:52 +0100 Subject: [PATCH] [llvm][DebugInfo] Support for versioned DISourceLanguageName --- llvm/include/llvm/AsmParser/LLToken.h | 43 ++++++++-------- llvm/lib/AsmParser/LLLexer.cpp | 1 + llvm/lib/AsmParser/LLParser.cpp | 49 ++++++++++++++++--- llvm/lib/Bitcode/Reader/MetadataLoader.cpp | 16 ++++-- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 8 ++- llvm/lib/IR/AsmWriter.cpp | 14 ++++-- ...compileunit-conflicting-language-fields.ll | 4 ++ .../dicompileunit-invalid-language.ll | 22 +++++++++ .../invalid-dicompileunit-missing-language.ll | 2 +- .../Bitcode/dwarf-source-language-name.ll | 15 ++++++ 10 files changed, 136 insertions(+), 38 deletions(-) create mode 100644 llvm/test/Assembler/dicompileunit-conflicting-language-fields.ll create mode 100644 llvm/test/Assembler/dicompileunit-invalid-language.ll create mode 100644 llvm/test/Bitcode/dwarf-source-language-name.ll diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index d976d40e5e956..6de99fe182ad9 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -488,27 +488,28 @@ enum Kind { SummaryID, // ^42 // String valued tokens (StrVal). - LabelStr, // foo: - GlobalVar, // @foo @"foo" - ComdatVar, // $foo - LocalVar, // %foo %"foo" - MetadataVar, // !foo - StringConstant, // "foo" - DwarfTag, // DW_TAG_foo - DwarfAttEncoding, // DW_ATE_foo - DwarfVirtuality, // DW_VIRTUALITY_foo - DwarfLang, // DW_LANG_foo - DwarfCC, // DW_CC_foo - EmissionKind, // lineTablesOnly - NameTableKind, // GNU - FixedPointKind, // Fixed point - DwarfOp, // DW_OP_foo - DIFlag, // DIFlagFoo - DISPFlag, // DISPFlagFoo - DwarfMacinfo, // DW_MACINFO_foo - ChecksumKind, // CSK_foo - DbgRecordType, // dbg_foo - DwarfEnumKind, // DW_APPLE_ENUM_KIND_foo + LabelStr, // foo: + GlobalVar, // @foo @"foo" + ComdatVar, // $foo + LocalVar, // %foo %"foo" + MetadataVar, // !foo + StringConstant, // "foo" + DwarfTag, // DW_TAG_foo + DwarfAttEncoding, // DW_ATE_foo + DwarfVirtuality, // DW_VIRTUALITY_foo + DwarfLang, // DW_LANG_foo + DwarfSourceLangName, // DW_LNAME_foo + DwarfCC, // DW_CC_foo + EmissionKind, // lineTablesOnly + NameTableKind, // GNU + FixedPointKind, // Fixed point + DwarfOp, // DW_OP_foo + DIFlag, // DIFlagFoo + DISPFlag, // DISPFlagFoo + DwarfMacinfo, // DW_MACINFO_foo + ChecksumKind, // CSK_foo + DbgRecordType, // dbg_foo + DwarfEnumKind, // DW_APPLE_ENUM_KIND_foo // Type valued tokens (TyVal). Type, diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index f6937d38eb38c..50d1d4730007a 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -982,6 +982,7 @@ lltok::Kind LLLexer::LexIdentifier() { DWKEYWORD(ATE, DwarfAttEncoding); DWKEYWORD(VIRTUALITY, DwarfVirtuality); DWKEYWORD(LANG, DwarfLang); + DWKEYWORD(LNAME, DwarfSourceLangName); DWKEYWORD(CC, DwarfCC); DWKEYWORD(OP, DwarfOp); DWKEYWORD(MACINFO, DwarfMacinfo); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 55899660fa84a..380b19296a3c4 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -4740,6 +4740,10 @@ struct DwarfLangField : public MDUnsignedField { DwarfLangField() : MDUnsignedField(0, dwarf::DW_LANG_hi_user) {} }; +struct DwarfSourceLangNameField : public MDUnsignedField { + DwarfSourceLangNameField() : MDUnsignedField(0, UINT32_MAX) {} +}; + struct DwarfCCField : public MDUnsignedField { DwarfCCField() : MDUnsignedField(0, dwarf::DW_CC_hi_user) {} }; @@ -4997,6 +5001,25 @@ bool LLParser::parseMDField(LocTy Loc, StringRef Name, DwarfLangField &Result) { return false; } +template <> +bool LLParser::parseMDField(LocTy Loc, StringRef Name, + DwarfSourceLangNameField &Result) { + if (Lex.getKind() == lltok::APSInt) + return parseMDField(Loc, Name, static_cast(Result)); + + if (Lex.getKind() != lltok::DwarfSourceLangName) + return tokError("expected DWARF source language name"); + + unsigned Lang = dwarf::getSourceLanguageName(Lex.getStrVal()); + if (!Lang) + return tokError("invalid DWARF source language name" + Twine(" '") + + Lex.getStrVal() + "'"); + assert(Lang <= Result.Max && "Expected valid DWARF source language name"); + Result.assign(Lang); + Lex.Lex(); + return false; +} + template <> bool LLParser::parseMDField(LocTy Loc, StringRef Name, DwarfCCField &Result) { if (Lex.getKind() == lltok::APSInt) @@ -5836,9 +5859,12 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) { if (!IsDistinct) return tokError("missing 'distinct', required for !DICompileUnit"); + LocTy Loc = Lex.getLoc(); + #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ - REQUIRED(language, DwarfLangField, ); \ REQUIRED(file, MDField, (/* AllowNull */ false)); \ + OPTIONAL(language, DwarfLangField, ); \ + OPTIONAL(sourceLanguageName, DwarfSourceLangNameField, ); \ OPTIONAL(producer, MDStringField, ); \ OPTIONAL(isOptimized, MDBoolField, ); \ OPTIONAL(flags, MDStringField, ); \ @@ -5860,12 +5886,23 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) { PARSE_MD_FIELDS(); #undef VISIT_MD_FIELDS + if (!language.Seen && !sourceLanguageName.Seen) + return error(Loc, "missing one of 'language' or 'sourceLanguageName', " + "required for !DICompileUnit"); + + if (language.Seen && sourceLanguageName.Seen) + return error(Loc, "can only specify one of 'language' and " + "'sourceLanguageName' on !DICompileUnit"); + Result = DICompileUnit::getDistinct( - Context, DISourceLanguageName(language.Val), file.Val, producer.Val, - isOptimized.Val, flags.Val, runtimeVersion.Val, splitDebugFilename.Val, - emissionKind.Val, enums.Val, retainedTypes.Val, globals.Val, imports.Val, - macros.Val, dwoId.Val, splitDebugInlining.Val, debugInfoForProfiling.Val, - nameTableKind.Val, rangesBaseAddress.Val, sysroot.Val, sdk.Val); + Context, + language.Seen ? DISourceLanguageName(language.Val) + : DISourceLanguageName(sourceLanguageName.Val, 0), + file.Val, producer.Val, isOptimized.Val, flags.Val, runtimeVersion.Val, + splitDebugFilename.Val, emissionKind.Val, enums.Val, retainedTypes.Val, + globals.Val, imports.Val, macros.Val, dwoId.Val, splitDebugInlining.Val, + debugInfoForProfiling.Val, nameTableKind.Val, rangesBaseAddress.Val, + sysroot.Val, sdk.Val); return false; } diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index a4d1b8372dfac..cdcf7a80ffac7 100644 --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -1867,12 +1867,18 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( // distinct. It's always distinct. IsDistinct = true; + const auto LangVersionMask = (uint64_t(1) << 63); + const bool HasVersionedLanguage = Record[1] & LangVersionMask; + auto *CU = DICompileUnit::getDistinct( - Context, DISourceLanguageName(Record[1]), getMDOrNull(Record[2]), - getMDString(Record[3]), Record[4], getMDString(Record[5]), Record[6], - getMDString(Record[7]), Record[8], getMDOrNull(Record[9]), - getMDOrNull(Record[10]), getMDOrNull(Record[12]), - getMDOrNull(Record[13]), + Context, + HasVersionedLanguage + ? DISourceLanguageName(Record[1] & ~LangVersionMask, 0) + : DISourceLanguageName(Record[1]), + getMDOrNull(Record[2]), getMDString(Record[3]), Record[4], + getMDString(Record[5]), Record[6], getMDString(Record[7]), Record[8], + getMDOrNull(Record[9]), getMDOrNull(Record[10]), + getMDOrNull(Record[12]), getMDOrNull(Record[13]), Record.size() <= 15 ? nullptr : getMDOrNull(Record[15]), Record.size() <= 14 ? 0 : Record[14], Record.size() <= 16 ? true : Record[16], diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 7ed140d392fca..0ca55a26f8013 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -2108,7 +2108,13 @@ void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N, assert(N->isDistinct() && "Expected distinct compile units"); Record.push_back(/* IsDistinct */ true); - Record.push_back(N->getSourceLanguage().getUnversionedName()); + auto Lang = N->getSourceLanguage(); + Record.push_back(Lang.getName()); + // Set bit so the MetadataLoader can distniguish between versioned and + // unversioned names. + if (Lang.hasVersionedName()) + Record.back() ^= (uint64_t(1) << 63); + Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(VE.getMetadataOrNullID(N->getRawProducer())); Record.push_back(N->isOptimized()); diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index ae086bcd3902d..0bc877d1c4471 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -2370,10 +2370,16 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N, Out << "!DICompileUnit("; MDFieldPrinter Printer(Out, WriterCtx); - Printer.printDwarfEnum("language", - N->getSourceLanguage().getUnversionedName(), - dwarf::LanguageString, - /* ShouldSkipZero */ false); + auto Lang = N->getSourceLanguage(); + if (Lang.hasVersionedName()) + Printer.printDwarfEnum( + "sourceLanguageName", + static_cast(Lang.getName()), + dwarf::SourceLanguageNameString, + /* ShouldSkipZero */ false); + else + Printer.printDwarfEnum("language", Lang.getName(), dwarf::LanguageString, + /* ShouldSkipZero */ false); Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false); Printer.printString("producer", N->getProducer()); diff --git a/llvm/test/Assembler/dicompileunit-conflicting-language-fields.ll b/llvm/test/Assembler/dicompileunit-conflicting-language-fields.ll new file mode 100644 index 0000000000000..3aad27ba8a87a --- /dev/null +++ b/llvm/test/Assembler/dicompileunit-conflicting-language-fields.ll @@ -0,0 +1,4 @@ +; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s + +; CHECK: :[[@LINE+1]]:15: error: can only specify one of 'language' and 'sourceLanguageName' on !DICompileUnit +!0 = distinct !DICompileUnit(language: DW_LANG_C, sourceLanguageName: DW_LNAME_C, file: !DIFile(filename: "a", directory: "b")) diff --git a/llvm/test/Assembler/dicompileunit-invalid-language.ll b/llvm/test/Assembler/dicompileunit-invalid-language.ll new file mode 100644 index 0000000000000..da93c4f20e333 --- /dev/null +++ b/llvm/test/Assembler/dicompileunit-invalid-language.ll @@ -0,0 +1,22 @@ +; RUN: split-file %s %t +; RUN: not llvm-as < %t/invalid_dw_lang.ll -disable-output 2>&1 | FileCheck %s --check-prefix=INVALID_DW_LANG +; RUN: not llvm-as < %t/invalid_dw_lang_2.ll -disable-output 2>&1 | FileCheck %s --check-prefix=INVALID_DW_LANG_2 +; RUN: not llvm-as < %t/invalid_dw_lname.ll -disable-output 2>&1 | FileCheck %s --check-prefix=INVALID_DW_LNAME +; RUN: not llvm-as < %t/invalid_dw_lname_2.ll -disable-output 2>&1 | FileCheck %s --check-prefix=INVALID_DW_LNAME_2 + +; INVALID_DW_LANG: invalid DWARF language 'DW_LANG_blah' +; INVALID_DW_LANG_2: expected DWARF language +; INVALID_DW_LNAME: invalid DWARF source language name 'DW_LNAME_blah' +; INVALID_DW_LNAME_2: expected DWARF source language name + +;--- invalid_dw_lang.ll +!0 = distinct !DICompileUnit(language: DW_LANG_blah) + +;--- invalid_dw_lang_2.ll +!0 = distinct !DICompileUnit(language: DW_LNAME_C) + +;--- invalid_dw_lname.ll +!0 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_blah) + +;--- invalid_dw_lname_2.ll +!0 = distinct !DICompileUnit(sourceLanguageName: DW_LANG_C) diff --git a/llvm/test/Assembler/invalid-dicompileunit-missing-language.ll b/llvm/test/Assembler/invalid-dicompileunit-missing-language.ll index 8e4cb0261dbbf..ebc86e3410d66 100644 --- a/llvm/test/Assembler/invalid-dicompileunit-missing-language.ll +++ b/llvm/test/Assembler/invalid-dicompileunit-missing-language.ll @@ -1,4 +1,4 @@ ; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s -; CHECK: :[[@LINE+1]]:74: error: missing required field 'language' +; CHECK: :[[@LINE+1]]:15: error: missing one of 'language' or 'sourceLanguageName', required for !DICompileUnit !0 = distinct !DICompileUnit(file: !DIFile(filename: "a", directory: "b")) diff --git a/llvm/test/Bitcode/dwarf-source-language-name.ll b/llvm/test/Bitcode/dwarf-source-language-name.ll new file mode 100644 index 0000000000000..e9893638f61b5 --- /dev/null +++ b/llvm/test/Bitcode/dwarf-source-language-name.ll @@ -0,0 +1,15 @@ +; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s + +; CHECK: sourceLanguageName: DW_LNAME_ObjC_plus_plus + +source_filename = "cu.cpp" +target triple = "arm64-apple-macosx" + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} + +!0 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_ObjC_plus_plus, file: !1, producer: "handwritten", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !2, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") +!1 = !DIFile(filename: "cu.cpp", directory: "/tmp") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3}