diff --git a/.github/workflows/formatting.yml b/.github/workflows/formatting.yml index f81cf639..da6fa7df 100644 --- a/.github/workflows/formatting.yml +++ b/.github/workflows/formatting.yml @@ -32,4 +32,3 @@ jobs: with: source: "." extensions: "h,hpp,c,cc,cpp,cxx" - exclude: "./crates/bender-slang/vendor" diff --git a/Cargo.lock b/Cargo.lock index 801cd0cc..4fed06a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -148,7 +148,7 @@ dependencies = [ [[package]] name = "bender-slang" -version = "0.1.1" +version = "0.2.0" dependencies = [ "cmake", "cxx", diff --git a/Cargo.toml b/Cargo.toml index 25669ce4..64768a3e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ inherits = "release" lto = "thin" [dependencies] -bender-slang = { version = "0.1.1", path = "crates/bender-slang", optional = true } +bender-slang = { version = "0.2.0", path = "crates/bender-slang", optional = true } serde = { version = "1", features = ["derive"] } serde_yaml_ng = "0.10" diff --git a/crates/bender-slang/Cargo.toml b/crates/bender-slang/Cargo.toml index 7e59f576..5d7eec64 100644 --- a/crates/bender-slang/Cargo.toml +++ b/crates/bender-slang/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "bender-slang" -version = "0.1.1" +version = "0.2.0" edition = "2024" description = "Internal bender crate: Rust bindings for the Slang SystemVerilog parser" license = "Apache-2.0" diff --git a/crates/bender-slang/build.rs b/crates/bender-slang/build.rs index 6d056aed..ef7518ab 100644 --- a/crates/bender-slang/build.rs +++ b/crates/bender-slang/build.rs @@ -1,38 +1,6 @@ // Copyright (c) 2025 ETH Zurich // Tim Fischer -// Generates cpp/compile_flags.txt so that clangd gets the correct include paths -// for the C++ bridge files. The file is written to the cpp/ directory and should -// be gitignored. It is picked up automatically by clangd for all files in that directory. -fn generate_compile_flags( - manifest_dir: &std::path::Path, - dst: &std::path::Path, - includes: &[&std::path::Path], - defines: &[(&str, &str)], -) { - use std::ffi::OsStr; - - let Some(target_root) = dst - .ancestors() - .find(|p| p.file_name() == Some(OsStr::new("target"))) - else { - return; - }; - - let flags: Vec = ["-x", "c++", "-std=c++20", "-fno-cxx-modules"] - .map(str::to_string) - .into_iter() - .chain(includes.iter().map(|p| format!("-I{}", p.display()))) - .chain([format!("-I{}", target_root.join("cxxbridge").display())]) - .chain(defines.iter().map(|(k, v)| format!("-D{}={}", k, v))) - .collect(); - - let _ = std::fs::write( - manifest_dir.join("cpp/compile_flags.txt"), - flags.join("\n") + "\n", - ); -} - fn main() { let manifest_dir = std::path::PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap()); @@ -171,3 +139,39 @@ fn main() { println!("cargo:rerun-if-changed=cpp/print.cpp"); println!("cargo:rerun-if-changed=cpp/analysis.cpp"); } + +// Generates cpp/compile_flags.txt so that clangd gets the correct include paths +// for the C++ bridge files. The file is written to the cpp/ directory and should +// be gitignored. It is picked up automatically by clangd for all files in that directory. +fn generate_compile_flags( + manifest_dir: &std::path::Path, + dst: &std::path::Path, + includes: &[&std::path::Path], + defines: &[(&str, &str)], +) { + use std::ffi::OsStr; + + let Some(target_root) = dst + .ancestors() + .find(|p| p.file_name() == Some(OsStr::new("target"))) + else { + return; + }; + + let bridge_crate_include = manifest_dir.parent().unwrap_or(manifest_dir); + let flags: Vec = ["-x", "c++", "-std=c++20", "-fno-cxx-modules"] + .map(str::to_string) + .into_iter() + .chain(includes.iter().map(|p| format!("-I{}", p.display()))) + .chain([ + format!("-I{}", target_root.join("cxxbridge").display()), + format!("-I{}", bridge_crate_include.display()), + ]) + .chain(defines.iter().map(|(k, v)| format!("-D{}={}", k, v))) + .collect(); + + let _ = std::fs::write( + manifest_dir.join("cpp/compile_flags.txt"), + flags.join("\n") + "\n", + ); +} diff --git a/crates/bender-slang/cpp/analysis.cpp b/crates/bender-slang/cpp/analysis.cpp index 1233d261..36a78e66 100644 --- a/crates/bender-slang/cpp/analysis.cpp +++ b/crates/bender-slang/cpp/analysis.cpp @@ -1,6 +1,7 @@ // Copyright (c) 2025 ETH Zurich // Tim Fischer +#include "bender-slang/src/lib.rs.h" #include "slang/syntax/AllSyntax.h" #include "slang_bridge.h" @@ -32,8 +33,28 @@ static bool stderr_is_tty() { static const slang::DiagCode kDuplicateTopLevelDecl(slang::DiagSubsystem::General, 9999); static constexpr std::string_view kDuplicateTopLevelDeclFormat = "module '{}' overwrites previous definition in '{}'"; -rust::Vec reachable_tree_indices(const SlangSession& session, const rust::Vec& tops) { - const auto& treeVec = session.trees(); +// Converts an internal per-tree record into the cxx shared struct handed across the bridge. +static ParsedTree to_parsed(const TreeEntry& entry) { + ParsedTree pt; + pt.tree = entry.tree; + pt.path = rust::String(entry.path); + pt.parsed_ok = entry.parsedOk; + pt.encrypted = entry.encrypted; + return pt; +} + +// Returns every parsed tree in the session, each bundled with its per-file facts. The order +// matches parse order across all parse_group calls. +rust::Vec all_trees(const SlangSession& session) { + rust::Vec out; + for (const auto& entry : session.entries()) { + out.push_back(to_parsed(entry)); + } + return out; +} + +rust::Vec reachable_trees(const SlangSession& session, const rust::Vec& tops) { + const auto& entries = session.entries(); // One engine+client per distinct SourceManager. Each parse group creates its own // SourceManager (see SlangContext), so trees from different groups need different @@ -60,8 +81,8 @@ rust::Vec reachable_tree_indices(const SlangSession& session, con // Build the name-to-tree-index map with last-wins semantics, emitting a warning // whenever a later definition overwrites an earlier one. std::unordered_map nameToTreeIndex; - for (size_t i = 0; i < treeVec.size(); ++i) { - const auto& metadata = treeVec[i]->getMetadata(); + for (size_t i = 0; i < entries.size(); ++i) { + const auto& metadata = entries[i].tree->getMetadata(); auto checkAndInsert = [&](std::string_view name, slang::SourceLocation loc) { if (name.empty()) @@ -70,12 +91,12 @@ rust::Vec reachable_tree_indices(const SlangSession& session, con if (inserted) return; - const auto& prevBufferIds = treeVec[it->second]->getSourceBufferIds(); - std::string_view prevFile = prevBufferIds.empty() - ? std::string_view("") - : treeVec[it->second]->sourceManager().getRawFileName(prevBufferIds[0]); + const auto& prevBufferIds = entries[it->second].tree->getSourceBufferIds(); + std::string_view prevFile = + prevBufferIds.empty() ? std::string_view("") + : entries[it->second].tree->sourceManager().getRawFileName(prevBufferIds[0]); - auto& state = diagFor(treeVec[i]->sourceManager()); + auto& state = diagFor(entries[i].tree->sourceManager()); slang::Diagnostic diag(kDuplicateTopLevelDecl, loc); diag << name; diag << prevFile; @@ -93,9 +114,9 @@ rust::Vec reachable_tree_indices(const SlangSession& session, con // Build a dependency graph where each tree points to the trees that declare // symbols it references. - std::vector> deps(treeVec.size()); - for (size_t i = 0; i < treeVec.size(); ++i) { - const auto& metadata = treeVec[i]->getMetadata(); + std::vector> deps(entries.size()); + for (size_t i = 0; i < entries.size(); ++i) { + const auto& metadata = entries[i].tree->getMetadata(); std::unordered_set seen; for (auto ref : metadata.getReferencedSymbols()) { auto it = nameToTreeIndex.find(ref); @@ -121,7 +142,7 @@ rust::Vec reachable_tree_indices(const SlangSession& session, con } // Perform a DFS from the top modules to find all reachable trees. - std::vector reachable(treeVec.size(), false); + std::vector reachable(entries.size(), false); std::function dfs = [&](size_t index) { if (reachable[index]) { return; @@ -136,26 +157,24 @@ rust::Vec reachable_tree_indices(const SlangSession& session, con dfs(start); } - rust::Vec result; + rust::Vec result; for (size_t i = 0; i < reachable.size(); ++i) { if (reachable[i]) { - result.push_back(static_cast(i)); + result.push_back(to_parsed(entries[i])); } } return result; } // Returns the deduped, canonical filesystem paths of every header file that was actually loaded -// via `include directives while parsing the requested trees. Trees from different parse groups -// may live in different SourceManagers, so the lookup is per-tree. -rust::Vec resolved_include_paths_for(const SlangSession& session, - const rust::Vec& tree_indices) { - const auto& treeVec = session.trees(); +// via `include directives while parsing the given trees. Trees from different parse groups may +// live in different SourceManagers, so the lookup is per-tree. +rust::Vec resolved_include_paths_for(const rust::Vec& trees) { std::unordered_set uniquePaths; - for (auto idx : tree_indices) { - if (idx >= treeVec.size()) + for (const auto& parsed : trees) { + const auto& tree = parsed.tree; + if (!tree) continue; - const auto& tree = treeVec[idx]; const auto& sm = tree->sourceManager(); for (const auto& inc : tree->getIncludeDirectives()) { if (!inc.buffer.id.valid()) diff --git a/crates/bender-slang/cpp/session.cpp b/crates/bender-slang/cpp/session.cpp index 783e4bd8..2a0b7add 100644 --- a/crates/bender-slang/cpp/session.cpp +++ b/crates/bender-slang/cpp/session.cpp @@ -1,14 +1,15 @@ // Copyright (c) 2025 ETH Zurich // Tim Fischer +#include "slang/diagnostics/PreprocessorDiags.h" #include "slang_bridge.h" +#include #include using namespace slang; using namespace slang::syntax; -using std::shared_ptr; using std::string; using std::string_view; @@ -34,19 +35,23 @@ void SlangContext::set_defines(const rust::Vec& defs) { } } -// Parses a list of source files and returns the resulting syntax trees as a vector (of shared pointers). -// If any file fails to parse, an exception is thrown with the error message(s) from the diagnostic engine. -std::vector> SlangContext::parse_files(const rust::Vec& paths) { +// Parses a list of source files and returns one TreeEntry per file, each bundling the resulting +// syntax tree with the per-file facts slang reported (path, parse success, encryption). +// System-level errors (file unreadable, etc.) throw; per-file parse errors are surfaced +// non-fatally via the TreeEntry::parsedOk flag so the caller can apply policy. +std::vector SlangContext::parse_files(const rust::Vec& paths) { Bag options; options.set(ppOptions); - std::vector> out; + std::vector out; out.reserve(paths.size()); for (const auto& path : paths) { string_view pathView(path.data(), path.size()); auto result = SyntaxTree::fromFile(pathView, sourceManager, options); + // A system-level failure (file unreadable, etc.) is still fatal: the caller asked for + // this path and we couldn't even open it. Parse errors are tolerated below. if (!result) { auto& err = result.error(); std::string msg = "System Error loading '" + std::string(err.second) + "': " + err.first.message(); @@ -58,20 +63,24 @@ std::vector> SlangContext::parse_files(const rust::V diagEngine.clearIncludeStack(); bool hasErrors = false; + bool hasProtectDiag = false; for (const auto& diag : tree->diagnostics()) { hasErrors |= diag.isError(); + if (diag.code == slang::diag::ProtectedEnvelope) { + hasProtectDiag = true; + } diagEngine.issue(diag); } + // Surface diagnostics for any file with errors, but keep going — the Rust side decides + // what to do with the (possibly partial) tree. The encrypted flag lets the Rust side + // discriminate IEEE-1735 encrypted IP (auto-tolerated) from real syntax bugs (fail by + // default; tolerate with --allow-broken). if (hasErrors) { - std::string rendered = diagClient->getString(); - if (rendered.empty()) { - rendered = "Failed to parse '" + std::string(pathView) + "'."; - } - throw std::runtime_error(rendered); + std::cerr << diagClient->getString(); } - out.push_back(tree); + out.push_back(TreeEntry{tree, std::string(path.data(), path.size()), !hasErrors, hasProtectDiag}); } return out; @@ -86,23 +95,13 @@ void SlangSession::parse_group(const rust::Vec& files, const rust: ctx->set_includes(includes); ctx->set_defines(defines); - // Parse the files and store the resulting syntax trees in the session. + // Parse the files and append the resulting per-tree records to the session, so callers can + // decide how to handle partially-parsed files. auto parsed = ctx->parse_files(files); - allTrees.reserve(allTrees.size() + parsed.size()); - for (const auto& tree : parsed) { - allTrees.push_back(tree); + treeEntries.reserve(treeEntries.size() + parsed.size()); + for (auto& entry : parsed) { + treeEntries.push_back(std::move(entry)); } contexts.push_back(std::move(ctx)); } - -// Returns the number of syntax trees currently stored in the session. -std::size_t tree_count(const SlangSession& session) { return session.trees().size(); } - -// Returns the syntax tree at the given index in the session. -std::shared_ptr tree_at(const SlangSession& session, std::size_t index) { - if (index >= session.trees().size()) { - throw std::runtime_error("Tree index out of bounds."); - } - return session.trees()[index]; -} diff --git a/crates/bender-slang/cpp/slang_bridge.h b/crates/bender-slang/cpp/slang_bridge.h index 79840a66..69b78bb7 100644 --- a/crates/bender-slang/cpp/slang_bridge.h +++ b/crates/bender-slang/cpp/slang_bridge.h @@ -20,6 +20,24 @@ #include struct SlangPrintOpts; +struct ParsedTree; + +// Internal per-tree record kept by the session. Plain C++ (no cxx types) so the session can own +// it without depending on the generated header; the bridge functions convert it to `ParsedTree` +// at the FFI boundary. +struct TreeEntry { + std::shared_ptr tree; + // The source path exactly as it was handed to parse_group (so the Rust side can match it + // back to its own SourceFile entry without a separate index map). + std::string path; + // False if slang reported any error diagnostic while parsing this file. + bool parsedOk = true; + // True if slang emitted at least one `pragma protect` envelope diagnostic (the + // lexer/preprocessor signal that the file contains IEEE-1735 encrypted content). Lets the + // Rust side discriminate "encrypted IP" (auto-tolerated) from "real syntax bug" (fail by + // default; tolerate with --allow-broken). + bool encrypted = false; +}; class SlangContext { public: @@ -28,7 +46,7 @@ class SlangContext { void set_includes(const rust::Vec& includes); void set_defines(const rust::Vec& defines); - std::vector> parse_files(const rust::Vec& paths); + std::vector parse_files(const rust::Vec& paths); private: slang::SourceManager sourceManager; @@ -42,11 +60,11 @@ class SlangSession { void parse_group(const rust::Vec& files, const rust::Vec& includes, const rust::Vec& defines); - const std::vector>& trees() const { return allTrees; } + const std::vector& entries() const { return treeEntries; } private: std::vector> contexts; - std::vector> allTrees; + std::vector treeEntries; }; class SyntaxTreeRewriter { @@ -77,11 +95,9 @@ rust::String print_tree(std::shared_ptr tree, SlangPr rust::String dump_tree_json(std::shared_ptr tree); -rust::Vec reachable_tree_indices(const SlangSession& session, const rust::Vec& tops); -rust::Vec resolved_include_paths_for(const SlangSession& session, - const rust::Vec& tree_indices); -std::size_t tree_count(const SlangSession& session); -std::shared_ptr tree_at(const SlangSession& session, std::size_t index); +rust::Vec all_trees(const SlangSession& session); +rust::Vec reachable_trees(const SlangSession& session, const rust::Vec& tops); +rust::Vec resolved_include_paths_for(const rust::Vec& trees); std::uint64_t renamed_declarations(const SyntaxTreeRewriter& rewriter); std::uint64_t renamed_references(const SyntaxTreeRewriter& rewriter); diff --git a/crates/bender-slang/src/lib.rs b/crates/bender-slang/src/lib.rs index 9a789662..240330ab 100644 --- a/crates/bender-slang/src/lib.rs +++ b/crates/bender-slang/src/lib.rs @@ -16,10 +16,6 @@ pub enum SlangError { ParseGroup { message: String }, #[error("Failed to trim files by top modules: {message}")] TrimByTop { message: String }, - #[error("Failed to access parsed syntax tree: {message}")] - TreeAccess { message: String }, - #[error("Failed to rewrite syntax trees: {message}")] - Rewrite { message: String }, } #[derive(Debug, Clone, Copy, Default)] @@ -39,6 +35,18 @@ mod ffi { squash_newlines: bool, } + /// A parsed syntax tree bundled with the per-file facts slang reported while parsing it. + struct ParsedTree { + /// The parsed syntax tree (possibly partial if `parsed_ok` is false). + tree: SharedPtr, + /// The source path as it was handed to `parse_group`. + path: String, + /// False if slang reported any error diagnostic for this file. + parsed_ok: bool, + /// True if slang emitted a `pragma protect` envelope diag (IEEE-1735 encrypted IP). + encrypted: bool, + } + unsafe extern "C++" { include!("bender-slang/cpp/slang_bridge.h"); include!("slang/syntax/SyntaxTree.h"); @@ -60,16 +68,11 @@ mod ffi { defines: &Vec, ) -> Result<()>; - fn reachable_tree_indices(session: &SlangSession, tops: &Vec) -> Result>; - - fn resolved_include_paths_for( - session: &SlangSession, - tree_indices: &Vec, - ) -> Result>; + fn all_trees(session: &SlangSession) -> Vec; - fn tree_count(session: &SlangSession) -> usize; + fn reachable_trees(session: &SlangSession, tops: &Vec) -> Result>; - fn tree_at(session: &SlangSession, index: usize) -> Result>; + fn resolved_include_paths_for(trees: &Vec) -> Vec; fn new_syntax_tree_rewriter() -> UniquePtr; fn set_suffix(self: Pin<&mut SyntaxTreeRewriter>, suffix: &str); @@ -103,6 +106,33 @@ pub struct SyntaxTree<'a> { _session: PhantomData<&'a SlangSession>, } +/// A parsed syntax tree bundled with the per-file facts slang reported while parsing it. +pub struct ParsedTree<'a> { + /// The parsed syntax tree (possibly partial when `parsed_ok` is false). + pub tree: SyntaxTree<'a>, + /// The source path exactly as it was handed to [`SlangSession::parse_group`]. + pub path: String, + /// False if slang reported any error diagnostic for this file. + pub parsed_ok: bool, + /// True if slang emitted a `pragma protect` envelope diagnostic, i.e. the file is + /// IEEE-1735 encrypted IP rather than a genuinely broken source file. + pub encrypted: bool, +} + +impl<'a> ParsedTree<'a> { + fn from_ffi(parsed: ffi::ParsedTree) -> Self { + Self { + tree: SyntaxTree { + inner: parsed.tree, + _session: PhantomData, + }, + path: parsed.path, + parsed_ok: parsed.parsed_ok, + encrypted: parsed.encrypted, + } + } +} + pub struct SyntaxTreeRewriter { inner: UniquePtr, } @@ -159,82 +189,54 @@ impl SlangSession { files: &[String], includes: &[String], defines: &[String], - ) -> Result> { + ) -> Result<()> { let files_vec = files.to_vec(); let includes_vec = normalize_include_dirs(includes)?; let defines_vec = defines.to_vec(); - let start = self.tree_count(); self.inner .pin_mut() .parse_group(&files_vec, &includes_vec, &defines_vec) .map_err(|cause| SlangError::ParseGroup { message: cause.to_string(), - })?; - - let end = self.tree_count(); - Ok((start..end).collect()) + }) } - /// Returns the total number of parsed syntax trees in the session. - pub fn tree_count(&self) -> usize { - ffi::tree_count(self.inner.as_ref().unwrap()) + /// Returns every parsed tree in the session, each bundled with its per-file facts (path, + /// parse success, encryption). The order matches parse order across all `parse_group` calls. + pub fn all_trees(&self) -> Vec> { + ffi::all_trees(self.inner.as_ref().unwrap()) + .into_iter() + .map(ParsedTree::from_ffi) + .collect() } - /// Returns all parsed syntax trees in the session. - pub fn all_trees(&self) -> Result>> { - let count = self.tree_count(); - let mut out = Vec::with_capacity(count); - for idx in 0..count { - out.push(self.tree(idx)?); - } - Ok(out) - } - - /// Returns the indices of syntax trees reachable from the given top modules. - pub fn reachable_indices(&self, tops: &[String]) -> Result> { + /// Returns the parsed trees reachable from the given top modules, each bundled with its + /// per-file facts. + pub fn reachable_trees(&self, tops: &[String]) -> Result>> { let tops = tops.to_vec(); - let indices = - ffi::reachable_tree_indices(self.inner.as_ref().unwrap(), &tops).map_err(|cause| { - SlangError::TrimByTop { - message: cause.to_string(), - } - })?; - Ok(indices.into_iter().map(|i| i as usize).collect()) + let trees = ffi::reachable_trees(self.inner.as_ref().unwrap(), &tops).map_err(|cause| { + SlangError::TrimByTop { + message: cause.to_string(), + } + })?; + Ok(trees.into_iter().map(ParsedTree::from_ffi).collect()) } /// Returns the canonical filesystem paths of every header that was actually loaded via an /// `include directive while parsing the given trees. Useful for figuring out which include /// directories were actually consulted. - pub fn resolved_include_paths(&self, tree_indices: &[usize]) -> Result> { - let indices: Vec = tree_indices.iter().map(|&i| i as u32).collect(); - let paths = ffi::resolved_include_paths_for(self.inner.as_ref().unwrap(), &indices) - .map_err(|cause| SlangError::TrimByTop { - message: cause.to_string(), - })?; - Ok(paths.into_iter().collect()) - } - - /// Returns syntax trees reachable from the given top modules. - pub fn reachable_trees(&self, tops: &[String]) -> Result>> { - let indices = self.reachable_indices(tops)?; - let mut out = Vec::with_capacity(indices.len()); - for idx in indices { - out.push(self.tree(idx)?); - } - Ok(out) - } - - /// Returns a handle to the syntax tree at the given index. - pub fn tree(&self, index: usize) -> Result> { - Ok(SyntaxTree { - inner: ffi::tree_at(self.inner.as_ref().unwrap(), index).map_err(|cause| { - SlangError::TreeAccess { - message: cause.to_string(), - } - })?, - _session: PhantomData, - }) + pub fn resolved_include_paths(&self, trees: &[ParsedTree]) -> Vec { + let ffi_trees: Vec = trees + .iter() + .map(|t| ffi::ParsedTree { + tree: t.tree.inner.clone(), + path: t.path.clone(), + parsed_ok: t.parsed_ok, + encrypted: t.encrypted, + }) + .collect(); + ffi::resolved_include_paths_for(&ffi_trees) } } diff --git a/crates/bender-slang/tests/basic.rs b/crates/bender-slang/tests/basic.rs index 5b405f26..315ef51a 100644 --- a/crates/bender-slang/tests/basic.rs +++ b/crates/bender-slang/tests/basic.rs @@ -18,22 +18,32 @@ fn parse_valid_file_succeeds() { let includes = vec![fixture_path("include")]; let defines = vec![]; assert!(session.parse_group(&files, &includes, &defines).is_ok()); - assert_eq!(session.tree_count(), 1); + assert_eq!(session.all_trees().len(), 1); } #[test] -fn parse_invalid_file_returns_parse_error() { +fn parse_invalid_file_reported_via_parsed_ok() { + // The contract: parse_group is lenient — system errors still throw, but per-file parse + // errors are surfaced via ParsedTree::parsed_ok. Callers (bender script, bender pickle) + // layer their own policy (allow / refuse / discriminate) on top. let mut session = bender_slang::SlangSession::new(); let files = vec![fixture_path("src/broken.sv")]; let includes = vec![]; let defines = vec![]; - let result = session.parse_group(&files, &includes, &defines); + session + .parse_group(&files, &includes, &defines) + .expect("parse_group is lenient and should return Ok even on parse errors"); - match result { - Err(bender_slang::SlangError::ParseGroup { .. }) => {} - Err(other) => panic!("expected SlangError::ParseGroup, got {other}"), - Ok(_) => panic!("expected parse to fail"), - } + let trees = session.all_trees(); + assert_eq!(trees.len(), 1); + assert!( + !trees[0].parsed_ok, + "broken.sv should be reported as a failed parse" + ); + assert!( + !trees[0].encrypted, + "broken.sv has no pragma protect envelope" + ); } #[test] @@ -46,13 +56,13 @@ fn rewriter_build_from_trees_is_repeatable() { .parse_group(&files, &includes, &defines) .expect("parse should succeed"); - let trees = session.all_trees().expect("tree collection should succeed"); + let trees = session.all_trees(); let mut rewriter_once = bender_slang::SyntaxTreeRewriter::new(); rewriter_once.set_prefix("p_"); rewriter_once.set_suffix("_s"); let first_pass_trees: Vec<_> = trees .iter() - .map(|t| rewriter_once.rewrite_declarations(t)) + .map(|t| rewriter_once.rewrite_declarations(&t.tree)) .collect(); let renamed_once = rewriter_once.rewrite_references( first_pass_trees @@ -76,7 +86,7 @@ fn rewriter_build_from_trees_is_repeatable() { rewriter_twice.set_suffix("_s"); let first_pass_trees: Vec<_> = trees .iter() - .map(|t| rewriter_twice.rewrite_declarations(t)) + .map(|t| rewriter_twice.rewrite_declarations(&t.tree)) .collect(); let renamed_twice = rewriter_twice.rewrite_references( first_pass_trees diff --git a/src/cmd/pickle.rs b/src/cmd/pickle.rs index 130b2138..3d80f023 100644 --- a/src/cmd/pickle.rs +++ b/src/cmd/pickle.rs @@ -220,8 +220,27 @@ pub fn run(sess: &Session, args: PickleArgs) -> Result<()> { .into_diagnostic()?; } + // Pickle rewrites and reprints each tree, so a partial parse silently emits broken output + // (encrypted protect envelopes get mangled, etc.). Refuse rather than corrupt — the file + // filter in `bender script` is fine with partial trees, but pickle is not. Each tree carries + // its own source path, so we can name the offenders directly. + let all_trees = session.all_trees(); + let failed: Vec<&str> = all_trees + .iter() + .filter(|t| !t.parsed_ok) + .map(|t| t.path.as_str()) + .collect(); + if !failed.is_empty() { + return Err(miette::miette!( + "pickle cannot rewrite {} file(s) with slang parse errors (output would be corrupt): {}\n\ + see diagnostics above; use `bender script --top ...` if you only need a file list", + failed.len(), + failed.join(", ") + )); + } + let trees = if args.top.is_empty() { - session.all_trees().into_diagnostic()? + all_trees } else { session.reachable_trees(&args.top).into_diagnostic()? }; @@ -234,7 +253,7 @@ pub fn run(sess: &Session, args: PickleArgs) -> Result<()> { // Pass 1: build rename map across all trees. let trees: Vec<_> = trees .iter() - .map(|tree| rewriter.rewrite_declarations(tree)) + .map(|parsed| rewriter.rewrite_declarations(&parsed.tree)) .collect(); // Pass 2: rewrite declarations and references using the complete map. diff --git a/src/cmd/script.rs b/src/cmd/script.rs index a7c41919..f1f5d5bc 100644 --- a/src/cmd/script.rs +++ b/src/cmd/script.rs @@ -6,10 +6,10 @@ use std::io::Write; use std::path::{Path, PathBuf}; -#[cfg(unix)] +#[cfg(all(unix, feature = "slang"))] use std::fs::canonicalize; -#[cfg(windows)] +#[cfg(all(windows, feature = "slang"))] use dunce::canonicalize; use clap::{ArgAction, Args, Subcommand, ValueEnum}; @@ -89,7 +89,7 @@ pub struct ScriptArgs { /// Trim unreachable Verilog files via the given top-level module(s) #[cfg(feature = "slang")] - #[arg(long, global = true, help_heading = "General Script Options")] + #[arg(long, global = true, help_heading = "Slang Options")] pub top: Vec, /// Drop unused include directories from the generated script @@ -99,10 +99,22 @@ pub struct ScriptArgs { value_enum, default_value_t, global = true, - help_heading = "General Script Options" + help_heading = "Slang Options" )] pub trim_incdirs: TrimIncdirs, + /// What to do with files slang reports parse errors on with no `pragma protect` envelope + /// [implicit default: error when slang runs; no effect otherwise] + #[cfg(feature = "slang")] + #[arg(long, value_enum, global = true, help_heading = "Slang Options")] + pub broken: Option, + + /// What to do with IEEE-1735 encrypted files slang cannot fully parse + /// [implicit default: keep when slang runs; no effect otherwise] + #[cfg(feature = "slang")] + #[arg(long, value_enum, global = true, help_heading = "Slang Options")] + pub encrypted: Option, + /// Format of the generated script #[command(subcommand)] pub format: ScriptFormat, @@ -133,6 +145,19 @@ pub enum TrimIncdirs { Never, } +/// What to do with a class of files slang reports parse errors on. +#[cfg(feature = "slang")] +#[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum ParsePolicy { + /// Abort the run if any file of this class is found + Error, + /// Tolerate these files and include them in the script + Keep, + /// Tolerate these files but drop them from the script + Drop, +} + /// Common arguments for Vivado scripts #[derive(Args, Debug, Clone)] pub struct OnlyArgs { @@ -352,20 +377,42 @@ pub fn run(sess: &Session, args: &ScriptArgs) -> Result<()> { .collect::>>()?; // Slang-based filtering: trim unreachable Verilog files (when `--top` is given) and/or - // unused include directories (per `--trim-incdirs`). + // unused include directories (per `--trim-incdirs`), with per-class policies for files + // slang couldn't fully parse (`--broken`, `--encrypted`). #[cfg(feature = "slang")] - let srcs = { + let (srcs, unparseable_paths) = { let trim_incdirs = match args.trim_incdirs { TrimIncdirs::Always => true, TrimIncdirs::Never => false, TrimIncdirs::Auto => !args.top.is_empty(), }; - if args.top.is_empty() && !trim_incdirs { - srcs + // Skip the slang pass entirely when no flag requires it. `Keep` (the implicit default + // for encrypted, and what an explicit `--broken keep` / `--encrypted keep` says) is a + // no-op without slang — we'd keep the file anyway since no filter touched it. Only an + // explicit `error` or `drop` value needs slang to actually classify files. + let broken_policy = args.broken.unwrap_or(ParsePolicy::Error); + let encrypted_policy = args.encrypted.unwrap_or(ParsePolicy::Keep); + let policies_need_slang = matches!( + args.broken, + Some(ParsePolicy::Error) | Some(ParsePolicy::Drop) + ) || matches!( + args.encrypted, + Some(ParsePolicy::Error) | Some(ParsePolicy::Drop) + ); + if args.top.is_empty() && !trim_incdirs && !policies_need_slang { + (srcs, std::collections::HashSet::::new()) } else { - apply_slang_filters(srcs, &args.top, trim_incdirs)? + apply_slang_filters( + srcs, + &args.top, + trim_incdirs, + broken_policy, + encrypted_policy, + )? } }; + #[cfg(not(feature = "slang"))] + let unparseable_paths = std::collections::HashSet::::new(); let mut tera_context = Context::new(); let mut only_args = OnlyArgs { @@ -448,7 +495,15 @@ pub fn run(sess: &Session, args: &ScriptArgs) -> Result<()> { ScriptFormat::TemplateJson => JSON, }; - emit_template(sess, tera_context, template_content, args, only_args, srcs) + emit_template( + sess, + tera_context, + template_content, + args, + only_args, + srcs, + &unparseable_paths, + ) } /// Subdivide the source files in a group. @@ -487,16 +542,30 @@ where /// dropped (VHDL and untyped files are always retained, and any group that ends up with no /// files is dropped). When `trim_incdirs` is true, include directories slang did not resolve /// an `include through are dropped from `include_dirs` and `export_incdirs`. +/// +/// Files slang couldn't fully parse are classified into: +/// * encrypted — slang emitted a `ProtectedEnvelope` diag (IEEE-1735 protect block) +/// * broken — parse errors with no such diag (looks like a real syntax bug) +/// Each class is handled per its `ParsePolicy`: `Error` aborts the run, `Keep` includes the +/// file in the script, `Drop` tolerates but excludes it. Sensible defaults are `broken=Error` +/// and `encrypted=Keep`. +/// +/// Non-Verilog files (VHDL, untyped) intentionally bypass every filter here — slang doesn't +/// process them, so we have no reachability information and conservatively retain them all. +/// +/// Returns the filtered groups plus the set of unparseable file paths that survived filtering, +/// so the caller can annotate them in `source_annotations` output. #[cfg(feature = "slang")] fn apply_slang_filters<'a>( srcs: Vec>, top: &[String], trim_incdirs: bool, -) -> Result>> { - use std::collections::{HashMap, HashSet}; + broken_policy: ParsePolicy, + encrypted_policy: ParsePolicy, +) -> Result<(Vec>, std::collections::HashSet)> { + use std::collections::HashSet; let mut session = SlangSession::new(); - let mut index_to_path: HashMap = HashMap::new(); for src_group in &srcs { // Collect include dirs @@ -536,28 +605,69 @@ fn apply_slang_filters<'a>( .iter() .map(|p| p.to_string_lossy().into_owned()) .collect(); - let indices = session + session .parse_group(&file_paths, &include_dirs, &defines) .into_diagnostic()?; - for (idx, path) in indices.into_iter().zip(&paths) { - index_to_path.insert(idx, path); - } } } + // Discriminate encrypted IP (legal SystemVerilog, just hard to parse) from genuinely broken + // files (real syntax bugs). Encrypted ⇔ slang emitted a `ProtectedEnvelope` diag on that + // tree. Each tree carries its own source path, so we identify files by path directly. + let all_trees = session.all_trees(); + let mut encrypted_paths: HashSet = HashSet::new(); + let mut broken_paths: HashSet = HashSet::new(); + for parsed in &all_trees { + if parsed.parsed_ok { + continue; + } + if parsed.encrypted { + encrypted_paths.insert(PathBuf::from(&parsed.path)); + } else { + broken_paths.insert(PathBuf::from(&parsed.path)); + } + } + + let list = |set: &HashSet| -> String { + let mut v: Vec = set.iter().map(|p| p.display().to_string()).collect(); + v.sort(); + v.join("\n ") + }; + + // Policy enforcement: abort up front for any `Error` class. + if broken_policy == ParsePolicy::Error && !broken_paths.is_empty() { + return Err(miette::miette!( + "slang reported parse errors in {} file(s) with no `pragma protect` envelope (looks like real syntax bugs):\n {}\n\ + see diagnostics above; pass --broken keep or --broken drop to continue", + broken_paths.len(), + list(&broken_paths) + )); + } + if encrypted_policy == ParsePolicy::Error && !encrypted_paths.is_empty() { + return Err(miette::miette!( + "slang reported parse errors in {} encrypted file(s) and --encrypted error was requested:\n {}", + encrypted_paths.len(), + list(&encrypted_paths) + )); + } + + // After Error-class abort: any remaining unparseable file is either Kept or Dropped. + let broken_kept = broken_policy == ParsePolicy::Keep; + let encrypted_kept = encrypted_policy == ParsePolicy::Keep; + // Determine which trees feed into the include / file-retention questions. With `--top` we // only look at trees reachable from those top modules; without `--top` we use every tree // (relevant when the caller asked for include-dir trimming but no file filtering). let filter_files = !top.is_empty(); - let kept_indices: Vec = if filter_files { - session.reachable_indices(top).into_diagnostic()? + let kept_trees = if filter_files { + session.reachable_trees(top).into_diagnostic()? } else { - (0..session.tree_count()).collect() + all_trees }; let kept_paths: HashSet<&Path> = if filter_files { - kept_indices + kept_trees .iter() - .filter_map(|i| index_to_path.get(i).copied()) + .map(|t| Path::new(t.path.as_str())) .collect() } else { HashSet::new() @@ -568,8 +678,7 @@ fn apply_slang_filters<'a>( // cause spurious mismatches. let resolved_includes: Vec = if trim_incdirs { session - .resolved_include_paths(&kept_indices) - .into_diagnostic()? + .resolved_include_paths(&kept_trees) .into_iter() .map(PathBuf::from) .collect() @@ -581,12 +690,28 @@ fn apply_slang_filters<'a>( resolved_includes.iter().any(|f| f.starts_with(&canon)) }; - Ok(srcs + // Single retention rule per Verilog file: + // * broken files keep iff `broken_policy == Keep` (regardless of reachability) + // * encrypted files keep iff `encrypted_policy == Keep` (regardless of reachability) + // * everything else keep iff no --top filter is active, or it's reachable from one + let retain_verilog = |p: &Path| -> bool { + if broken_paths.contains(p) { + broken_kept + } else if encrypted_paths.contains(p) { + encrypted_kept + } else { + !filter_files || kept_paths.contains(p) + } + }; + let drop_anything = broken_policy == ParsePolicy::Drop || encrypted_policy == ParsePolicy::Drop; + let run_file_filter = filter_files || drop_anything; + + let filtered: Vec> = srcs .into_iter() .map(|mut group| { - if filter_files { + if run_file_filter { group.files.retain(|f| match f { - SourceFile::File(p, Some(SourceType::Verilog)) => kept_paths.contains(p), + SourceFile::File(p, Some(SourceType::Verilog)) => retain_verilog(p), _ => true, }); } @@ -600,7 +725,40 @@ fn apply_slang_filters<'a>( }) // Remove empty groups that may have resulted from filtering out all Verilog files. .filter(|group| !group.files.is_empty()) - .collect()) + .collect(); + + // Summary lines: report encrypted and broken classes separately so users can tell apart + // automatic vs explicit tolerance and the inclusion verdict at a glance. + let kept_unparseable: HashSet = encrypted_paths + .iter() + .filter(|_| encrypted_kept) + .chain(broken_paths.iter().filter(|_| broken_kept)) + .cloned() + .collect(); + let class_summary = |label: &str, set: &HashSet, policy: ParsePolicy| { + if set.is_empty() || policy == ParsePolicy::Error { + // Error was handled above; Keep/Drop are the only branches that reach here. + return; + } + let verb = match policy { + ParsePolicy::Keep => "kept in script output", + ParsePolicy::Drop => "dropped", + ParsePolicy::Error => unreachable!(), + }; + let mut names: Vec = set.iter().map(|p| p.display().to_string()).collect(); + names.sort(); + eprintln!( + "warning: {} {} file(s) ({}): {}", + names.len(), + label, + verb, + names.join(", "), + ); + }; + class_summary("encrypted", &encrypted_paths, encrypted_policy); + class_summary("broken", &broken_paths, broken_policy); + + Ok((filtered, kept_unparseable)) } static HEADER_AUTOGEN: &str = "This script was generated automatically by bender."; @@ -623,7 +781,17 @@ fn emit_template( args: &ScriptArgs, only: OnlyArgs, srcs: Vec, + unparseable_paths: &std::collections::HashSet, ) -> Result<()> { + // Helper for annotating FileEntry.comment on files that survived filtering despite slang + // failing to parse them; visible to users with `--source-annotations`. + let unparseable_comment = |p: &Path| -> Option { + if unparseable_paths.contains(p) { + Some("UNPARSEABLE: slang reported parse errors".to_string()) + } else { + None + } + }; tera_context.insert("HEADER_AUTOGEN", HEADER_AUTOGEN); tera_context.insert("root", sess.root); // tera_context.insert("srcs", &srcs); @@ -714,7 +882,7 @@ fn emit_template( }, None => FileEntry { file: file.0.to_path_buf(), - comment: file.1, + comment: file.1.or_else(|| unparseable_comment(file.0)), }, } }) @@ -781,7 +949,7 @@ fn emit_template( }, None => FileEntry { file: p.to_path_buf(), - comment: None, + comment: unparseable_comment(p), }, } } diff --git a/tests/pickle/Bender.yml b/tests/pickle/Bender.yml index 861040b1..e1f658fe 100644 --- a/tests/pickle/Bender.yml +++ b/tests/pickle/Bender.yml @@ -25,3 +25,13 @@ sources: - src/dup_a.sv - src/dup_b.sv - src/dup_top.sv + + - target: encrypted + files: + - src/encrypted_ip.sv + - src/encrypted_user.sv + - src/encrypted_top.sv + + - target: broken + files: + - src/broken.sv diff --git a/tests/pickle/src/encrypted_ip.sv b/tests/pickle/src/encrypted_ip.sv new file mode 100644 index 00000000..62119a42 --- /dev/null +++ b/tests/pickle/src/encrypted_ip.sv @@ -0,0 +1,12 @@ +// Realistic-shape IEEE-1735 encrypted module: slang lexes/skips the protect +// envelope but the parser still trips on the surrounding endmodule, which is +// what we want this fixture to exercise. +module encrypted_ip (); +`pragma protect begin_protected +`pragma protect encrypt_agent = "Test" +`pragma protect data_method = "aes128-cbc" +`pragma protect encoding = ( enctype = "base64", bytes = 33 ) +`pragma protect data_block +QUJDREVGRzEyMzQ1Njc4OTBhYmNkZWZnaGlqa2xtbm9wcXIK +`pragma protect end_protected +endmodule diff --git a/tests/pickle/src/encrypted_top.sv b/tests/pickle/src/encrypted_top.sv new file mode 100644 index 00000000..74dc0962 --- /dev/null +++ b/tests/pickle/src/encrypted_top.sv @@ -0,0 +1,3 @@ +module encrypted_top (); + encrypted_user u(); +endmodule diff --git a/tests/pickle/src/encrypted_user.sv b/tests/pickle/src/encrypted_user.sv new file mode 100644 index 00000000..f7fa526f --- /dev/null +++ b/tests/pickle/src/encrypted_user.sv @@ -0,0 +1,6 @@ +// Plain RTL that instantiates the encrypted IP. Slang parses this file fine +// but the encrypted_ip reference has no corresponding tree (the encrypted +// file failed to parse), so the dangling-ref tolerance kicks in. +module encrypted_user (); + encrypted_ip u_enc(); +endmodule diff --git a/tests/script.rs b/tests/script.rs index ca0b39a7..a3d3b0ed 100644 --- a/tests/script.rs +++ b/tests/script.rs @@ -26,6 +26,26 @@ mod tests { String::from_utf8(out.stdout).expect("stdout must be utf-8") } + /// Run the script subcommand expecting failure; returns the captured stderr. + fn run_script_failing(args: &[&str]) -> String { + let mut full_args = vec!["-d", "tests/pickle", "script"]; + full_args.extend(args); + + let out = cargo::cargo_bin_cmd!() + .args(&full_args) + .output() + .expect("Failed to execute bender binary"); + + assert!( + !out.status.success(), + "script command unexpectedly succeeded.\nstdout:\n{}\nstderr:\n{}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + + String::from_utf8(out.stderr).expect("stderr must be utf-8") + } + /// Return the path component after the last `/` or `\`. On Windows, bender's source paths /// can come out mixed (e.g. `D:\workspace\tests\pickle/src/top.sv`) while incdir paths are /// all-backslash because the Bender.yml entry has no embedded separator — so splitting on @@ -171,6 +191,171 @@ mod tests { assert!(!files.contains("unused_top.sv")); } + /// Encrypted RTL (IEEE-1735 protect envelopes) makes slang trip at the surrounding + /// `endmodule` even though the envelope itself is skipped. The filter must: + /// * not abort `bender script --top` because of slang errors in encrypted IP, and + /// * preserve the encrypted file in the output even though no internal reference resolves + /// to it (its module symbol is hidden behind the protect envelope). + #[test] + fn script_top_keeps_encrypted_file() { + let out = run_script(&[ + "--target", + "encrypted", + "--top", + "encrypted_top", + "flist-plus", + ]); + let files = source_basenames(&out); + assert!( + files.contains("encrypted_top.sv"), + "top file missing: {files:?}" + ); + assert!( + files.contains("encrypted_user.sv"), + "user of encrypted IP missing: {files:?}" + ); + assert!( + files.contains("encrypted_ip.sv"), + "encrypted IP must be force-kept despite parse errors: {files:?}" + ); + } + + /// `--encrypted drop` removes encrypted files from the output even when `--top` keeps + /// the rest of the reachable design. + #[test] + fn script_encrypted_drop_excludes_encrypted_file() { + let out = run_script(&[ + "--target", + "encrypted", + "--top", + "encrypted_top", + "--encrypted", + "drop", + "flist-plus", + ]); + let files = source_basenames(&out); + assert!(files.contains("encrypted_top.sv")); + assert!(files.contains("encrypted_user.sv")); + assert!( + !files.contains("encrypted_ip.sv"), + "encrypted IP should be dropped by --encrypted drop: {files:?}" + ); + } + + /// `--encrypted error` makes the run abort when any encrypted file is encountered — useful + /// as a CI lint for codebases that should be encryption-free. + #[test] + fn script_encrypted_error_aborts() { + let stderr = run_script_failing(&[ + "--target", + "encrypted", + "--encrypted", + "error", + "flist-plus", + ]); + assert!( + stderr.contains("encrypted file(s)") && stderr.contains("--encrypted error"), + "expected encrypted-lint abort message, got stderr:\n{stderr}" + ); + } + + /// `--source-annotations` adds a `// UNPARSEABLE: ...` comment above the kept encrypted file + /// so users reading the script can tell which entries slang couldn't analyze. + #[test] + fn script_source_annotations_marks_unparseable() { + let out = run_script(&[ + "--target", + "encrypted", + "--top", + "encrypted_top", + "--source-annotations", + "flist-plus", + ]); + assert!( + out.contains("// UNPARSEABLE: slang reported parse errors"), + "expected UNPARSEABLE annotation in output:\n{out}" + ); + } + + /// A file with a real syntax error (no `pragma protect` envelope) should abort the run by + /// default — `--broken` defaults to `error`, while encrypted IP is auto-tolerated. + #[test] + fn script_broken_file_fails_by_default() { + let stderr = run_script_failing(&["--target", "broken", "--top", "broken", "flist-plus"]); + assert!( + stderr.contains("looks like real syntax bugs"), + "expected real-bug error message, got stderr:\n{stderr}" + ); + assert!( + stderr.contains("--broken keep") || stderr.contains("--broken drop"), + "error should point at the --broken keep/drop escape hatches:\n{stderr}" + ); + } + + /// `--broken keep` lets users opt into tolerance for non-encrypted parse errors. The broken + /// file is kept in the output with a warning instead of aborting. + #[test] + fn script_broken_keep_includes_broken_file() { + let out = run_script(&[ + "--target", + "broken", + "--top", + "broken", + "--broken", + "keep", + "flist-plus", + ]); + let files = source_basenames(&out); + assert!( + files.contains("broken.sv"), + "broken.sv should survive with --broken keep: {files:?}" + ); + } + + /// `--broken drop` tolerates broken files but excludes them from the script — useful when + /// the downstream tool would choke on the broken file but you still want bender to finish. + #[test] + fn script_broken_drop_excludes_broken_file() { + let out = run_script(&[ + "--target", + "broken", + "--top", + "broken", + "--broken", + "drop", + "flist-plus", + ]); + let files = source_basenames(&out); + assert!( + !files.contains("broken.sv"), + "broken.sv should be dropped with --broken drop: {files:?}" + ); + } + + /// Explicit `--broken drop` triggers slang even without `--top`, so users can drop broken + /// files from a plain `bender script` output without setting up reachability filtering. + #[test] + fn script_broken_drop_without_top_excludes_broken_file() { + let out = run_script(&["--target", "broken", "--broken", "drop", "flist-plus"]); + let files = source_basenames(&out); + assert!( + !files.contains("broken.sv"), + "broken.sv should be dropped without --top when --broken drop is set: {files:?}" + ); + } + + /// `--broken keep` is a no-op without `--top` (keeping is what happens anyway when slang + /// doesn't run). Verifies broken.sv passes through with no warnings emitted. + #[test] + fn script_broken_keep_without_top_no_slang() { + let out = run_script(&["--target", "broken", "--broken", "keep", "flist-plus"]); + let files = source_basenames(&out); + assert!( + files.contains("broken.sv"), + "broken.sv should pass through with --broken keep: {files:?}" + ); + } + /// Regression test: when two files define the same module name, last-wins semantics apply. /// The file parsed last (dup_b.sv) wins; the earlier definition (dup_a.sv) is dropped. #[test]