Enable parsing of pre processor tokens as include path

martis42 · Sep 18, 2023 · b2b8f50 · b2b8f50
1 parent 556b278
commit b2b8f50
Show file tree

Hide file tree

Showing 7 changed files with 57 additions and 16 deletions.
diff --git a/README.md b/README.md
@@ -10,7 +10,6 @@
   - [Skipping Targets](#skipping-targets)
   - [Recursion](#recursion)
   - [Implementation_deps](#Implementation_deps)
-  - [Known limitations](#known-limitations)
   - [Applying automatic fixes](#applying-automatic-fixes)
 - [Assumptions of use](#assumptions-of-use)
 - [Supported Platforms](#supported-platforms)
@@ -167,16 +166,6 @@ your_aspect = dwyu_aspect_factory(use_implementation_deps = True)
 
 Examples for this can be seen at the [implementation_deps test cases](test/aspect/implementation_deps).
 
-## Known limitations
-
-Includes which are added through a preprocessor token are not recognized.
-For example the following won't be analyzed properly:
-
-```cpp
-#define INCLUDE_PATH "some/header.h"
-#include INCLUDE_PATH
-```
-
 ## Applying automatic fixes
 
 DWYU offers a tool to automatically fix some detected problems.

diff --git a/src/analyze_includes/parse_source.py b/src/analyze_includes/parse_source.py
@@ -78,7 +78,11 @@ def is_ignored(self, include: str) -> bool:
 
 def get_includes_from_file(file: Path, defines: List[str], include_paths: List[str]) -> List[Include]:
     """
-    Parse a C/C++ file and extract include statements which are neither commented nor disabled through a define.
+    Parse a C/C++ file and extract include statements which are neither commented nor disabled through pre processor
+    branching (e.g. #ifdef).
+
+    The preprocessor removes all comments and inactive code branches. This allows us then to find all include statements
+    with a simple regex.
     """
     with open(file, encoding="utf-8") as fin:
         pre_processor = make_pre_processor()
@@ -91,10 +95,20 @@ def get_includes_from_file(file: Path, defines: List[str], include_paths: List[s
         output_sink = StringIO()
         pre_processor.write(output_sink)
 
-        return [
-            Include(file=file, include=include)
-            for include in re.findall(r'^\s*#include\s*["<](.+)[">]', output_sink.getvalue(), re.MULTILINE)
-        ]
+        included_paths = []
+        for include in re.findall(r"^\s*#include\s*(.+)", output_sink.getvalue(), re.MULTILINE):
+            if include.startswith(('"', "<")) and include.endswith(('"', ">")):
+                included_paths.append(include)
+            else:
+                # Either a malformed include statement or an include path defined through a pre processor token.
+                # We ignore malformed include paths as they violate our assumptions of use.
+                if include in pre_processor.macros:
+                    # 'macros' is a {str: 'Macro'} dictionary based on pcpp.parser.Macro.
+                    # The value is a list of 'LexToken' classes from 'ply.lex.LexToken'.
+                    # In all our tests with include statements the list had always just one element.
+                    included_paths.append(pre_processor.macros[include].value[0].value)
+
+        return [Include(file=file, include=include.lstrip('"<').rstrip('">')) for include in included_paths]
 
 
 def filter_includes(includes: List[Include], ignored_includes: IgnoredIncludes) -> List[Include]:

diff --git a/src/analyze_includes/test/BUILD b/src/analyze_includes/test/BUILD
@@ -28,6 +28,7 @@ py_test(
         "data/commented_includes/single_line_comments.h",
         "data/empty_header.h",
         "data/header_with_defines.h",
+        "data/include_based_on_pre_processor_token.h",
         "data/some_defines.h",
         "data/some_header.h",
         "data/use_defines.h",

diff --git a/src/analyze_includes/test/data/include_based_on_pre_processor_token.h b/src/analyze_includes/test/data/include_based_on_pre_processor_token.h
@@ -0,0 +1,3 @@
+#define MY_HEADER "some/header.h"
+
+#include MY_HEADER
diff --git a/src/analyze_includes/test/parse_source_test.py b/src/analyze_includes/test/parse_source_test.py
@@ -166,6 +166,13 @@ def test_includes_selected_through_defines_from_header(self):
         self.assertTrue(Include(file=test_file, include="expected/include_a.h") in result)
         self.assertTrue(Include(file=test_file, include="expected/include_b.h") in result)
 
+    def test_include_based_on_pre_processor_token(self):
+        test_file = Path("src/analyze_includes/test/data/include_based_on_pre_processor_token.h")
+        result = get_includes_from_file(test_file, defines=[], include_paths=[])
+
+        self.assertEqual(len(result), 1)
+        self.assertTrue(Include(file=test_file, include="some/header.h") in result)
+
 
 class TestGetRelevantIncludesFromFiles(unittest.TestCase):
     def test_get_relevant_includes_from_files(self):

diff --git a/test/aspect/defines/BUILD b/test/aspect/defines/BUILD
@@ -30,3 +30,15 @@ cc_library(
         "//test/aspect/defines/support:lib_b",
     ],
 )
+
+cc_library(
+    name = "include_using_pre_processor_token",
+    hdrs = ["include_using_pre_processor_token.h"],
+    copts = ['-DCONDITIONAL_DEFINES_HEADER=\"test/aspect/defines/support/conditional_defines.h\"'],
+    deps = [
+        "//test/aspect/defines/support:conditional_defines",
+        "//test/aspect/defines/support:lib_a",
+        "//test/aspect/defines/support:lib_b",
+        "//test/aspect/defines/support:some_defines",
+    ],
+)
diff --git a/test/aspect/defines/include_using_pre_processor_token.h b/test/aspect/defines/include_using_pre_processor_token.h
@@ -0,0 +1,15 @@
+#define HEADER_PATH_A "test/aspect/defines/support/some_defines.h"
+
+#include HEADER_PATH_A
+
+// Ensure the 'some_define.h' header was included and parsed correctly by using some of its content
+#if SOME_SWITCH_VALUE > 100
+#include "test/aspect/defines/support/a.h"
+#endif
+
+#include CONDITIONAL_DEFINES_HEADER
+
+// Ensure the 'conditional_defines.h' header was included and parsed correctly by using some of its content
+#if SOME_VALUE > 40
+#include "test/aspect/defines/support/b.h"
+#else