From e4c6990185c714c2538d90ea790eb1f6cedec7d5 Mon Sep 17 00:00:00 2001 From: svkeerthy Date: Wed, 8 Oct 2025 18:20:17 +0000 Subject: [PATCH] IR2Vec Flow-aware fix --- llvm/lib/Analysis/IR2Vec.cpp | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Analysis/IR2Vec.cpp b/llvm/lib/Analysis/IR2Vec.cpp index 688535161d4b9..1794a604b991d 100644 --- a/llvm/lib/Analysis/IR2Vec.cpp +++ b/llvm/lib/Analysis/IR2Vec.cpp @@ -239,10 +239,21 @@ void FlowAwareEmbedder::computeEmbeddings(const BasicBlock &BB) const { // If the operand is defined elsewhere, we use its embedding if (const auto *DefInst = dyn_cast(Op)) { auto DefIt = InstVecMap.find(DefInst); - assert(DefIt != InstVecMap.end() && - "Instruction should have been processed before its operands"); - ArgEmb += DefIt->second; - continue; + // Fixme (#159171): Ideally we should never miss an instruction + // embedding here. + // But when we have cyclic dependencies (e.g., phi + // nodes), we might miss the embedding. In such cases, we fall back to + // using the vocabulary embedding. This can be fixed by iterating to a + // fixed-point, or by using a simple solver for the set of simultaneous + // equations. + // Another case when we might miss an instruction embedding is when + // the operand instruction is in a different basic block that has not + // been processed yet. This can be fixed by processing the basic blocks + // in a topological order. + if (DefIt != InstVecMap.end()) + ArgEmb += DefIt->second; + else + ArgEmb += Vocab[*Op]; } // If the operand is not defined by an instruction, we use the vocabulary else {