Skip to content

Conversation

svkeerthy
Copy link
Contributor

No description provided.

@llvmbot llvmbot added mlgo llvm:analysis Includes value tracking, cost tables and constant folding labels Oct 2, 2025
@llvmbot
Copy link
Member

llvmbot commented Oct 2, 2025

@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-mlgo

Author: S. VenkataKeerthy (svkeerthy)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/161711.diff

2 Files Affected:

  • (modified) llvm/include/llvm/Analysis/IR2Vec.h (+7-2)
  • (modified) llvm/lib/Analysis/IR2Vec.cpp (+43-43)
diff --git a/llvm/include/llvm/Analysis/IR2Vec.h b/llvm/include/llvm/Analysis/IR2Vec.h
index b7c301580a8a4..ed43f19b4a7d3 100644
--- a/llvm/include/llvm/Analysis/IR2Vec.h
+++ b/llvm/include/llvm/Analysis/IR2Vec.h
@@ -210,6 +210,13 @@ class VocabStorage {
   const_iterator end() const {
     return const_iterator(this, getNumSections(), 0);
   }
+
+  using VocabMap = std::map<std::string, Embedding>;
+  /// Parse a vocabulary section from JSON and populate the target vocabulary
+  /// map.
+  static Error parseVocabSection(StringRef Key,
+                                 const json::Value &ParsedVocabValue,
+                                 VocabMap &TargetVocab, unsigned &Dim);
 };
 
 /// Class for storing and accessing the IR2Vec vocabulary.
@@ -600,8 +607,6 @@ class IR2VecVocabAnalysis : public AnalysisInfoMixin<IR2VecVocabAnalysis> {
 
   Error readVocabulary(VocabMap &OpcVocab, VocabMap &TypeVocab,
                        VocabMap &ArgVocab);
-  Error parseVocabSection(StringRef Key, const json::Value &ParsedVocabValue,
-                          VocabMap &TargetVocab, unsigned &Dim);
   void generateVocabStorage(VocabMap &OpcVocab, VocabMap &TypeVocab,
                             VocabMap &ArgVocab);
   void emitError(Error Err, LLVMContext &Ctx);
diff --git a/llvm/lib/Analysis/IR2Vec.cpp b/llvm/lib/Analysis/IR2Vec.cpp
index af30422b73759..295b6d33525d9 100644
--- a/llvm/lib/Analysis/IR2Vec.cpp
+++ b/llvm/lib/Analysis/IR2Vec.cpp
@@ -330,6 +330,43 @@ bool VocabStorage::const_iterator::operator!=(
   return !(*this == Other);
 }
 
+Error VocabStorage::parseVocabSection(StringRef Key,
+                                      const json::Value &ParsedVocabValue,
+                                      VocabMap &TargetVocab, unsigned &Dim) {
+  json::Path::Root Path("");
+  const json::Object *RootObj = ParsedVocabValue.getAsObject();
+  if (!RootObj)
+    return createStringError(errc::invalid_argument,
+                             "JSON root is not an object");
+
+  const json::Value *SectionValue = RootObj->get(Key);
+  if (!SectionValue)
+    return createStringError(errc::invalid_argument,
+                             "Missing '" + std::string(Key) +
+                                 "' section in vocabulary file");
+  if (!json::fromJSON(*SectionValue, TargetVocab, Path))
+    return createStringError(errc::illegal_byte_sequence,
+                             "Unable to parse '" + std::string(Key) +
+                                 "' section from vocabulary");
+
+  Dim = TargetVocab.begin()->second.size();
+  if (Dim == 0)
+    return createStringError(errc::illegal_byte_sequence,
+                             "Dimension of '" + std::string(Key) +
+                                 "' section of the vocabulary is zero");
+
+  if (!std::all_of(TargetVocab.begin(), TargetVocab.end(),
+                   [Dim](const std::pair<StringRef, Embedding> &Entry) {
+                     return Entry.second.size() == Dim;
+                   }))
+    return createStringError(
+        errc::illegal_byte_sequence,
+        "All vectors in the '" + std::string(Key) +
+            "' section of the vocabulary are not of the same dimension");
+
+  return Error::success();
+}
+
 // ==----------------------------------------------------------------------===//
 // Vocabulary
 //===----------------------------------------------------------------------===//
@@ -460,43 +497,6 @@ VocabStorage Vocabulary::createDummyVocabForTest(unsigned Dim) {
 // IR2VecVocabAnalysis
 //===----------------------------------------------------------------------===//
 
-Error IR2VecVocabAnalysis::parseVocabSection(
-    StringRef Key, const json::Value &ParsedVocabValue, VocabMap &TargetVocab,
-    unsigned &Dim) {
-  json::Path::Root Path("");
-  const json::Object *RootObj = ParsedVocabValue.getAsObject();
-  if (!RootObj)
-    return createStringError(errc::invalid_argument,
-                             "JSON root is not an object");
-
-  const json::Value *SectionValue = RootObj->get(Key);
-  if (!SectionValue)
-    return createStringError(errc::invalid_argument,
-                             "Missing '" + std::string(Key) +
-                                 "' section in vocabulary file");
-  if (!json::fromJSON(*SectionValue, TargetVocab, Path))
-    return createStringError(errc::illegal_byte_sequence,
-                             "Unable to parse '" + std::string(Key) +
-                                 "' section from vocabulary");
-
-  Dim = TargetVocab.begin()->second.size();
-  if (Dim == 0)
-    return createStringError(errc::illegal_byte_sequence,
-                             "Dimension of '" + std::string(Key) +
-                                 "' section of the vocabulary is zero");
-
-  if (!std::all_of(TargetVocab.begin(), TargetVocab.end(),
-                   [Dim](const std::pair<StringRef, Embedding> &Entry) {
-                     return Entry.second.size() == Dim;
-                   }))
-    return createStringError(
-        errc::illegal_byte_sequence,
-        "All vectors in the '" + std::string(Key) +
-            "' section of the vocabulary are not of the same dimension");
-
-  return Error::success();
-}
-
 // FIXME: Make this optional. We can avoid file reads
 // by auto-generating a default vocabulary during the build time.
 Error IR2VecVocabAnalysis::readVocabulary(VocabMap &OpcVocab,
@@ -513,16 +513,16 @@ Error IR2VecVocabAnalysis::readVocabulary(VocabMap &OpcVocab,
     return ParsedVocabValue.takeError();
 
   unsigned OpcodeDim = 0, TypeDim = 0, ArgDim = 0;
-  if (auto Err =
-          parseVocabSection("Opcodes", *ParsedVocabValue, OpcVocab, OpcodeDim))
+  if (auto Err = VocabStorage::parseVocabSection("Opcodes", *ParsedVocabValue,
+                                                 OpcVocab, OpcodeDim))
     return Err;
 
-  if (auto Err =
-          parseVocabSection("Types", *ParsedVocabValue, TypeVocab, TypeDim))
+  if (auto Err = VocabStorage::parseVocabSection("Types", *ParsedVocabValue,
+                                                 TypeVocab, TypeDim))
     return Err;
 
-  if (auto Err =
-          parseVocabSection("Arguments", *ParsedVocabValue, ArgVocab, ArgDim))
+  if (auto Err = VocabStorage::parseVocabSection("Arguments", *ParsedVocabValue,
+                                                 ArgVocab, ArgDim))
     return Err;
 
   if (!(OpcodeDim == TypeDim && TypeDim == ArgDim))

@svkeerthy svkeerthy merged commit 79d1524 into main Oct 2, 2025
10 of 11 checks passed
@svkeerthy svkeerthy deleted the users/svkeerthy/10-02-ir2vec-nfc branch October 2, 2025 23:35
mahesh-attarde pushed a commit to mahesh-attarde/llvm-project that referenced this pull request Oct 3, 2025
MixedMatched pushed a commit to MixedMatched/llvm-project that referenced this pull request Oct 3, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
llvm:analysis Includes value tracking, cost tables and constant folding mlgo
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants