Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[lldb] Use Python script to generate SBLanguages.h #90753

Merged
merged 3 commits into from
May 1, 2024

Conversation

JDevlieghere
Copy link
Member

Use a Python script to generate SBLanguages.h instead of piggybacking on LLDB TableGen. This addresses Nico Weber's post-commit feedback.

Use a Python script to generate SBLanguages.h instead of piggybacking on
LLDB TableGen. This addresses Nico Weber's post-commit feedback.
@llvmbot
Copy link
Collaborator

llvmbot commented May 1, 2024

@llvm/pr-subscribers-lldb

Author: Jonas Devlieghere (JDevlieghere)

Changes

Use a Python script to generate SBLanguages.h instead of piggybacking on LLDB TableGen. This addresses Nico Weber's post-commit feedback.


Full diff: https://github.com/llvm/llvm-project/pull/90753.diff

5 Files Affected:

  • (added) lldb/scripts/generate-sbapi-dwarf-enum.py (+65)
  • (modified) lldb/source/API/CMakeLists.txt (+14-4)
  • (modified) lldb/utils/TableGen/CMakeLists.txt (-1)
  • (removed) lldb/utils/TableGen/LLDBSBAPIDWARFEnum.cpp (-67)
  • (modified) lldb/utils/TableGen/LLDBTableGen.cpp (-8)
diff --git a/lldb/scripts/generate-sbapi-dwarf-enum.py b/lldb/scripts/generate-sbapi-dwarf-enum.py
new file mode 100755
index 00000000000000..c4252223430ed6
--- /dev/null
+++ b/lldb/scripts/generate-sbapi-dwarf-enum.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+
+import argparse
+import re
+
+HEADER = """\
+//===-- SBLanguages.h -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_API_SBLANGUAGE_H
+#define LLDB_API_SBLANGUAGE_H
+/// Used by \\ref SBExpressionOptions.
+/// These enumerations use the same language enumerations as the DWARF
+/// specification for ease of use and consistency.
+enum SBSourceLanguageName : uint16_t {
+"""
+
+FOOTER = """\
+};
+
+#endif
+"""
+
+REGEX = re.compile(r'(^ *HANDLE_DW_LNAME *\( *([^,]+), ([^,]+), )"(.*)",.*\).*')
+
+
+def emit_enum(input, output):
+    # Read the input and break it up by lines.
+    lines = []
+    with open(input, "r") as f:
+        lines = f.readlines()
+
+    # Write the output.
+    with open(output, "w") as f:
+        # Emit the header.
+        f.write(HEADER)
+
+        # Emit the enum values.
+        for line in lines:
+            match = REGEX.search(line)
+            if not match:
+                continue
+            f.write(f"  /// {match.group(4)}.\n")
+            f.write(f"  eLanguageName{match.group(3)} = {match.group(2)},\n")
+
+        # Emit the footer
+        f.write(FOOTER)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--output", "-o")
+    parser.add_argument("input")
+    args = parser.parse_args()
+
+    emit_enum(args.input, args.output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt
index ad960403ae70bf..a64c0d4a333425 100644
--- a/lldb/source/API/CMakeLists.txt
+++ b/lldb/source/API/CMakeLists.txt
@@ -20,9 +20,19 @@ if(LLDB_ENABLE_LUA)
   set(lldb_lua_wrapper ${lua_bindings_dir}/LLDBWrapLua.cpp)
 endif()
 
-lldb_tablegen(../../include/lldb/API/SBLanguages.h -gen-lldb-sbapi-dwarf-enum
-  SOURCE ${LLVM_MAIN_INCLUDE_DIR}/llvm/BinaryFormat/Dwarf.def
-  TARGET lldb-sbapi-dwarf-enums)
+# Target to generate SBLanguages.h from Dwarf.def.
+set(sb_languages_file
+  ${CMAKE_CURRENT_BINARY_DIR}/../../include/lldb/API/SBLanguages.h)
+add_custom_target(
+  lldb-sbapi-dwarf-enums
+  "${Python3_EXECUTABLE}"
+      ${LLDB_SOURCE_DIR}/scripts/generate-sbapi-dwarf-enum.py
+      ${LLVM_MAIN_INCLUDE_DIR}/llvm/BinaryFormat/Dwarf.def
+      -o ${sb_languages_file}
+  BYPRODUCTS ${sb_languages_file}
+  DEPENDS ${LLVM_MAIN_INCLUDE_DIR}/llvm/BinaryFormat/Dwarf.def
+  WORKING_DIRECTORY ${LLVM_LIBRARY_OUTPUT_INTDIR}
+)
 
 add_lldb_library(liblldb SHARED ${option_framework}
   SBAddress.cpp
@@ -106,7 +116,7 @@ add_lldb_library(liblldb SHARED ${option_framework}
 
   DEPENDS
     lldb-sbapi-dwarf-enums
-  
+
   LINK_LIBS
     lldbBreakpoint
     lldbCore
diff --git a/lldb/utils/TableGen/CMakeLists.txt b/lldb/utils/TableGen/CMakeLists.txt
index 68547fe13e1aeb..47a6400b4287e2 100644
--- a/lldb/utils/TableGen/CMakeLists.txt
+++ b/lldb/utils/TableGen/CMakeLists.txt
@@ -10,7 +10,6 @@ if (NOT DEFINED LLDB_TABLEGEN_EXE)
     add_tablegen(lldb-tblgen LLDB
       LLDBOptionDefEmitter.cpp
       LLDBPropertyDefEmitter.cpp
-      LLDBSBAPIDWARFEnum.cpp
       LLDBTableGen.cpp
       LLDBTableGenUtils.cpp
       )
diff --git a/lldb/utils/TableGen/LLDBSBAPIDWARFEnum.cpp b/lldb/utils/TableGen/LLDBSBAPIDWARFEnum.cpp
deleted file mode 100644
index 084284ed6aa82a..00000000000000
--- a/lldb/utils/TableGen/LLDBSBAPIDWARFEnum.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-//===- LLDBPropertyDefEmitter.cpp -----------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Produce the list of source languages header file fragment for the SBAPI.
-//
-//===----------------------------------------------------------------------===//
-
-#include <fstream>
-#include <llvm/ADT/StringRef.h>
-#include <regex>
-
-namespace lldb_private {
-int EmitSBAPIDWARFEnum(int argc, char **argv) {
-  std::string InputFilename;
-  std::string OutputFilename;
-  std::string DepFilename;
-  // This command line option parser is as robust as the worst shell script.
-  for (int i = 0; i < argc; ++i) {
-    if (llvm::StringRef(argv[i]).ends_with("Dwarf.def"))
-      InputFilename = std::string(argv[i]);
-    if (llvm::StringRef(argv[i]) == "-o" && i + 1 < argc)
-      OutputFilename = std::string(argv[i + 1]);
-    if (llvm::StringRef(argv[i]) == "-d" && i + 1 < argc)
-      DepFilename = std::string(argv[i + 1]);
-  }
-  std::ifstream input(InputFilename);
-  std::ofstream output(OutputFilename);
-  output
-      << R"(//===-- SBLanguages.h -----------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLDB_API_SBLANGUAGE_H
-#define LLDB_API_SBLANGUAGE_H
-/// Used by \ref SBExpressionOptions.
-/// These enumerations use the same language enumerations as the DWARF
-/// specification for ease of use and consistency.
-enum SBSourceLanguageName : uint16_t {
-)";
-  std::string line;
-  std::regex macro_regex(R"(^ *HANDLE_DW_LNAME *\( *([^,]+), ([^,]+), )"
-                         "\"(.*)\",.*\\).*",
-                         std::regex::extended);
-  while (std::getline(input, line)) {
-    std::smatch match;
-    if (!std::regex_match(line, match, macro_regex))
-      continue;
-
-    output << "  /// " << match[3] << ".\n";
-    output << "  eLanguageName" << match[2] << " = " << match[1] << ",\n";
-  }
-  output << "};\n\n";
-  output << "#endif\n";
-  // Emit the dependencies file.
-  std::ofstream(DepFilename) << OutputFilename << ": " << InputFilename << '\n';
-  return 0;
-}
-} // namespace lldb_private
diff --git a/lldb/utils/TableGen/LLDBTableGen.cpp b/lldb/utils/TableGen/LLDBTableGen.cpp
index 75d16755604758..c63ca76c0d48f1 100644
--- a/lldb/utils/TableGen/LLDBTableGen.cpp
+++ b/lldb/utils/TableGen/LLDBTableGen.cpp
@@ -27,7 +27,6 @@ enum ActionType {
   GenOptionDefs,
   GenPropertyDefs,
   GenPropertyEnumDefs,
-  GenSBAPIDWARFEnum
 };
 
 static cl::opt<ActionType> Action(
@@ -41,8 +40,6 @@ static cl::opt<ActionType> Action(
                clEnumValN(GenPropertyDefs, "gen-lldb-property-defs",
                           "Generate lldb property definitions"),
                clEnumValN(GenPropertyEnumDefs, "gen-lldb-property-enum-defs",
-                          "Generate lldb property enum definitions"),
-               clEnumValN(GenSBAPIDWARFEnum, "gen-lldb-sbapi-dwarf-enum",
                           "Generate lldb property enum definitions")));
 
 static bool LLDBTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
@@ -62,8 +59,6 @@ static bool LLDBTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenPropertyEnumDefs:
     EmitPropertyEnumDefs(Records, OS);
     break;
-  case GenSBAPIDWARFEnum:
-    llvm_unreachable("already handled");
   }
   return false;
 }
@@ -74,9 +69,6 @@ int main(int argc, char **argv) {
   cl::ParseCommandLineOptions(argc, argv);
   llvm_shutdown_obj Y;
 
-  if (Action == GenSBAPIDWARFEnum)
-    return EmitSBAPIDWARFEnum(argc, argv);
-
   return TableGenMain(argv[0], &LLDBTableGenMain);
 }
 


# Emit the enum values.
for line in lines:
match = REGEX.search(line)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since the regex is anchored to the start (^), this should probably be match (in which case you can remove the ^ if you want)

Comment on lines 48 to 49
f.write(f" /// {match.group(4)}.\n")
f.write(f" eLanguageName{match.group(3)} = {match.group(2)},\n")
Copy link
Contributor

@kastiglione kastiglione May 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these would benefit from using named captures, ex:

Suggested change
f.write(f" /// {match.group(4)}.\n")
f.write(f" eLanguageName{match.group(3)} = {match.group(2)},\n")
f.write(f" /// {match.group("name")}.\n")
f.write(f" eLanguageName{match.group("ident")} = {match.group("value")},\n")

#endif
"""

REGEX = re.compile(r'(^ *HANDLE_DW_LNAME *\( *([^,]+), ([^,]+), )"(.*)",.*\).*')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
REGEX = re.compile(r'(^ *HANDLE_DW_LNAME *\( *([^,]+), ([^,]+), )"(.*)",.*\).*')
REGEX = re.compile(r'^ *HANDLE_DW_LNAME *\( *(?P<value>[^,]+), (?P<ident>[^,]+), "(?P<name>.*)",.*\)')

"""

REGEX = re.compile(
r'^ *HANDLE_DW_LNAME *\( *(?P<value>[^,]+), (?P<comment>[^,]+), "(?P<name>.*)",.*\)'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like you have comment and name swapped

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also is it possible to make the third one P<comment>[^"]? That would be safer.

Copy link
Collaborator

@adrian-prantl adrian-prantl left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

"""

REGEX = re.compile(
r'^ *HANDLE_DW_LNAME *\( *(?P<value>[^,]+), (?P<comment>[^,]+), "(?P<name>.*)",.*\)'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also is it possible to make the third one P<comment>[^"]? That would be safer.

@JDevlieghere JDevlieghere merged commit dcbf0fc into llvm:main May 1, 2024
4 checks passed
@JDevlieghere JDevlieghere deleted the generate-sbapi-dwarf-enum.py branch May 1, 2024 20:02
adrian-prantl pushed a commit to adrian-prantl/llvm-project that referenced this pull request May 1, 2024
Use a Python script to generate SBLanguages.h instead of piggybacking on
LLDB TableGen. This addresses Nico Weber's post-commit feedback.

(cherry picked from commit dcbf0fc)
adrian-prantl pushed a commit to adrian-prantl/llvm-project that referenced this pull request May 1, 2024
Use a Python script to generate SBLanguages.h instead of piggybacking on
LLDB TableGen. This addresses Nico Weber's post-commit feedback.

(cherry picked from commit dcbf0fc)
"""

REGEX = re.compile(
r'^ *HANDLE_DW_LNAME *\( *(?P<value>[^,]+), (?P<comment>[^"]+), "(?P<name>.*)",.*\)'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to combine my feedback with Adrian's:

r'^ *HANDLE_DW_LNAME *\( *(?P<value>[^,]+), (?P<name>[^,]+), "(?P<comment>[^"]*)",.*\)'

# Target to generate SBLanguages.h from Dwarf.def.
set(sb_languages_file
${CMAKE_CURRENT_BINARY_DIR}/../../include/lldb/API/SBLanguages.h)
add_custom_target(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this should be using add_custom_command. Per the cmake docs, add_custom_target "... has no output file and is always considered out of date even if the commands try to create a file with the name of the target". This causes an incremental clean build to always run this command and also rebuild anything that depends on it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

adrian-prantl pushed a commit to adrian-prantl/llvm-project that referenced this pull request Jun 12, 2024
Use a Python script to generate SBLanguages.h instead of piggybacking on
LLDB TableGen. This addresses Nico Weber's post-commit feedback.

(cherry picked from commit dcbf0fc)
(cherry picked from commit 88d242e)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

5 participants