From 99a306f96a883857afb0f8045e01cea570ce3b27 Mon Sep 17 00:00:00 2001
From: Shangdi Yu <shangdiy@meta.com>
Date: Tue, 19 Aug 2025 11:41:52 -0700
Subject: [PATCH 1/2] refactor code from javascript to rust

---
 src/lib.rs          | 343 +++++++++++++++++++++++++++++++++++++++++++-
 src/provenance.html |   7 +-
 src/provenance.js   | 264 +++++-----------------------------
 src/types.rs        |   2 +-
 4 files changed, 383 insertions(+), 233 deletions(-)
diff --git a/src/lib.rs b/src/lib.rs
index b039de6..15b2e73 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1196,6 +1196,17 @@ pub fn parse_path(path: &PathBuf, config: &ParseConfig) -> anyhow::Result<ParseO
                 directory_name,
             );
 
+            // Convert node mappings to line number mappings
+            let line_mappings_content = convert_node_mappings_to_line_numbers(
+                &node_mappings_content,
+                &pre_grad_graph_content,
+                &post_grad_graph_content,
+                &output_code_content,
+                &aot_code_content,
+            );
+            let line_mappings_content_str = serde_json::to_string_pretty(&line_mappings_content)
+                .unwrap_or_else(|_| "{}".to_string());
+
             output.push((
                 PathBuf::from(format!("provenance_tracking_{}.html", directory_name)),
                 tt.render(
@@ -1207,7 +1218,7 @@ pub fn parse_path(path: &PathBuf, config: &ParseConfig) -> anyhow::Result<ParseO
                         post_grad_graph_content,
                         output_code_content,
                         aot_code_content,
-                        node_mappings_content,
+                        line_mappings_content: line_mappings_content_str,
                     },
                 )?,
             ));
@@ -1383,3 +1394,333 @@ pub fn analyze_graph_runtime_deltas(
         has_mismatched_graph_counts: false,
     })
 }
+
+/// Converts node-based mappings to line number-based mappings for visualization.
+///
+/// This function processes node mappings and converts them to line number mappings
+/// that can be used to highlight corresponding lines across different views.
+/// It handles pre-grad graph, post-grad graph, and generated code files.
+fn convert_node_mappings_to_line_numbers(
+    node_mappings_content: &str,
+    pre_grad_graph_content: &str,
+    post_grad_graph_content: &str,
+    output_code_content: &str,
+    aot_code_content: &str,
+) -> serde_json::Value {
+    // Parse the node mappings JSON
+    let node_mappings: serde_json::Value = match serde_json::from_str(node_mappings_content) {
+        Ok(mappings) => mappings,
+        Err(_) => return serde_json::json!({}),
+    };
+
+    // Helper function to check if a line is valid (not empty and doesn't start with comment)
+    fn valid_line(line: &str, symbol: &str) -> bool {
+        let stripped = line.trim();
+        !stripped.is_empty() && !stripped.starts_with(symbol)
+    }
+
+    // Helper function to extract node name from a line
+    fn extract_node_name(line: &str) -> Option<String> {
+        let trimmed = line.trim();
+        if valid_line(trimmed, "#") {
+            // Split on '=' and take everything before it
+            let before_equals = trimmed.split('=').next()?;
+            // Split on ':' and take everything before it
+            let node_name = before_equals.split(':').next()?.trim();
+            if !node_name.is_empty() {
+                return Some(node_name.to_string());
+            }
+        }
+        None
+    }
+
+    // Helper function to build node-to-line lookup map from graph content
+    fn build_node_to_lines_map(content: &str) -> std::collections::HashMap<String, usize> {
+        let mut node_to_lines = std::collections::HashMap::new();
+        for (i, line) in content.lines().enumerate() {
+            if let Some(node_name) = extract_node_name(line) {
+                node_to_lines.insert(node_name, i + 1); // 1-based line numbers
+            }
+        }
+        node_to_lines
+    }
+
+    // Helper function to build Python kernel-to-lines lookup map
+    fn build_python_kernel_to_lines_map(
+        content: &str,
+        kernel_names: &[&str],
+    ) -> std::collections::HashMap<String, Vec<usize>> {
+        let content = content
+            .lines()
+            .skip_while(|line| line.is_empty())
+            .collect::<Vec<&str>>()
+            .join("\n");
+        let mut kernel_to_lines = std::collections::HashMap::new();
+
+        // Find the line number of "def call(args)" - allowing for whitespace between tokens
+        let run_impl_line = content
+            .lines()
+            .position(|line| {
+                line.contains("def") && line.contains("call") && line.contains("(args)")
+            })
+            .unwrap_or(0);
+        let first_line_number = content
+            .lines()
+            .position(|line| line.contains("# AOT ID:"))
+            .unwrap_or(0);
+
+        println!("run_impl_line: {}", run_impl_line);
+        for (i, line) in content.lines().enumerate().skip(run_impl_line) {
+            if valid_line(line, "#") {
+                for kernel_name in kernel_names {
+                    if line.contains(kernel_name) {
+                        kernel_to_lines
+                            .entry(kernel_name.to_string())
+                            .or_insert_with(Vec::new)
+                            .push(i + 1 - first_line_number);
+                    }
+                }
+            }
+        }
+        kernel_to_lines
+    }
+
+    // Helper function to build C++ kernel-to-lines lookup map
+    // We only consider lines after "::run_impl(" and skip the empty lines at the beginning when computing line numbers
+    fn build_cpp_kernel_to_lines_map(
+        content: &str,
+        kernel_names: &[&str],
+    ) -> std::collections::HashMap<String, Vec<usize>> {
+        // remove empty lines at the beginning and end of the content
+        // We need to do this because empty lines are ignored in html <pre> tags
+        let content = content
+            .lines()
+            .skip_while(|line| line.is_empty())
+            .collect::<Vec<&str>>()
+            .join("\n");
+        let mut kernel_to_lines = std::collections::HashMap::new();
+
+        // Find the line number of "::run_impl("
+        let run_impl_line = content
+            .lines()
+            .position(|line| line.contains("::run_impl("))
+            .unwrap_or(0);
+        for (i, line) in content.lines().enumerate().skip(run_impl_line) {
+            if valid_line(line, "//")
+                && valid_line(line, "def")
+                && valid_line(line, "static inline void")
+            {
+                for kernel_name in kernel_names {
+                    if line.contains(&format!("{}(", kernel_name)) {
+                        kernel_to_lines
+                            .entry(kernel_name.to_string())
+                            .or_insert_with(Vec::new)
+                            .push(i + 1);
+                    }
+                }
+            }
+        }
+        kernel_to_lines
+    }
+
+    // Helper function to process mappings from source to target
+    fn process_mappings<F>(
+        source_mappings: &serde_json::Map<String, serde_json::Value>,
+        source_lookup: &std::collections::HashMap<String, usize>,
+        target_lookup: &std::collections::HashMap<String, usize>,
+        target_line_processor: F,
+    ) -> std::collections::HashMap<usize, Vec<usize>>
+    where
+        F: Fn(&str) -> Option<usize>,
+    {
+        let mut result = std::collections::HashMap::new();
+
+        for (source_node, target_nodes) in source_mappings {
+            if let Some(source_line) = source_lookup.get(source_node) {
+                let mut target_lines = Vec::new();
+                if let Some(target_nodes_array) = target_nodes.as_array() {
+                    for target_node in target_nodes_array {
+                        if let Some(target_node_str) = target_node.as_str() {
+                            if let Some(target_line) = target_line_processor(target_node_str) {
+                                target_lines.push(target_line);
+                            }
+                        }
+                    }
+                }
+                if !target_lines.is_empty() {
+                    result.insert(*source_line, target_lines);
+                }
+            }
+        }
+        result
+    }
+
+    // Helper function to process kernel-to-post mappings
+    fn process_kernel_to_post_mappings(
+        kernel_mappings: &serde_json::Map<String, serde_json::Value>,
+        kernel_lookup: &std::collections::HashMap<String, Vec<usize>>,
+        post_lookup: &std::collections::HashMap<String, usize>,
+    ) -> std::collections::HashMap<usize, Vec<usize>> {
+        let mut result = std::collections::HashMap::new();
+
+        for (kernel_name, post_nodes) in kernel_mappings {
+            if let Some(kernel_lines) = kernel_lookup.get(kernel_name) {
+                for kernel_line in kernel_lines {
+                    let mut target_lines = Vec::new();
+                    if let Some(post_nodes_array) = post_nodes.as_array() {
+                        for post_node in post_nodes_array {
+                            if let Some(post_node_str) = post_node.as_str() {
+                                if let Some(post_line) = post_lookup.get(post_node_str) {
+                                    target_lines.push(*post_line);
+                                }
+                            }
+                        }
+                    }
+                    if !target_lines.is_empty() {
+                        result.insert(*kernel_line, target_lines);
+                    }
+                }
+            }
+        }
+        result
+    }
+
+    // Helper function to process post-to-kernel mappings
+    fn process_post_to_kernel_mappings(
+        post_mappings: &serde_json::Map<String, serde_json::Value>,
+        post_lookup: &std::collections::HashMap<String, usize>,
+        kernel_lookup: &std::collections::HashMap<String, Vec<usize>>,
+    ) -> std::collections::HashMap<usize, Vec<usize>> {
+        let mut result = std::collections::HashMap::new();
+
+        for (post_node, kernel_names) in post_mappings {
+            if let Some(post_line) = post_lookup.get(post_node) {
+                let mut target_lines = Vec::new();
+                if let Some(kernel_names_array) = kernel_names.as_array() {
+                    for kernel_name in kernel_names_array {
+                        if let Some(kernel_name_str) = kernel_name.as_str() {
+                            if let Some(kernel_lines) = kernel_lookup.get(kernel_name_str) {
+                                target_lines.extend(kernel_lines);
+                            }
+                        }
+                    }
+                }
+                if !target_lines.is_empty() {
+                    result.insert(*post_line, target_lines);
+                }
+            }
+        }
+        result
+    }
+
+    // Helper function to convert HashMap to JSON Map
+    fn hashmap_to_json_map(
+        map: std::collections::HashMap<usize, Vec<usize>>,
+    ) -> serde_json::Map<String, serde_json::Value> {
+        map.into_iter()
+            .map(|(k, v)| (k.to_string(), serde_json::json!(v)))
+            .collect()
+    }
+
+    let kernel_names: Vec<&str> = node_mappings
+        .get("cppCodeToPost")
+        .and_then(|v| v.as_object())
+        .map(|obj| obj.keys().map(|k| k.as_str()).collect())
+        .unwrap_or_default();
+
+    // Build lookup maps
+    let pre_grad_node_to_lines = build_node_to_lines_map(pre_grad_graph_content);
+    let post_grad_node_to_lines = build_node_to_lines_map(post_grad_graph_content);
+    let py_kernel_to_lines = build_python_kernel_to_lines_map(output_code_content, &kernel_names);
+    let cpp_code_to_lines = build_cpp_kernel_to_lines_map(aot_code_content, &kernel_names);
+    println!("py_kernel_to_lines: {:?}", py_kernel_to_lines);
+    println!("cpp_kernel_names: {:?}", kernel_names);
+    println!("cpp_code_to_lines: {:?}", cpp_code_to_lines);
+
+    // Process all mappings using helper functions
+    let line_pre_to_post =
+        if let Some(pre_to_post) = node_mappings.get("preToPost").and_then(|v| v.as_object()) {
+            process_mappings(
+                pre_to_post,
+                &pre_grad_node_to_lines,
+                &post_grad_node_to_lines,
+                |node_name| post_grad_node_to_lines.get(node_name).copied(),
+            )
+        } else {
+            std::collections::HashMap::new()
+        };
+
+    let line_post_to_pre =
+        if let Some(post_to_pre) = node_mappings.get("postToPre").and_then(|v| v.as_object()) {
+            process_mappings(
+                post_to_pre,
+                &post_grad_node_to_lines,
+                &pre_grad_node_to_lines,
+                |node_name| pre_grad_node_to_lines.get(node_name).copied(),
+            )
+        } else {
+            std::collections::HashMap::new()
+        };
+
+    let line_cpp_code_to_post = if let Some(cpp_code_to_post) = node_mappings
+        .get("cppCodeToPost")
+        .and_then(|v| v.as_object())
+    {
+        process_kernel_to_post_mappings(
+            cpp_code_to_post,
+            &cpp_code_to_lines,
+            &post_grad_node_to_lines,
+        )
+    } else {
+        std::collections::HashMap::new()
+    };
+
+    let line_post_to_cpp_code = if let Some(post_to_cpp_code) = node_mappings
+        .get("postToCppCode")
+        .and_then(|v| v.as_object())
+    {
+        process_post_to_kernel_mappings(
+            post_to_cpp_code,
+            &post_grad_node_to_lines,
+            &cpp_code_to_lines,
+        )
+    } else {
+        std::collections::HashMap::new()
+    };
+
+    let line_py_code_to_post = if let Some(cpp_code_to_post) = node_mappings
+        .get("cppCodeToPost")
+        .and_then(|v| v.as_object())
+    {
+        process_kernel_to_post_mappings(
+            cpp_code_to_post,
+            &py_kernel_to_lines,
+            &post_grad_node_to_lines,
+        )
+    } else {
+        std::collections::HashMap::new()
+    };
+
+    let line_post_to_py_code = if let Some(post_to_cpp_code) = node_mappings
+        .get("postToCppCode")
+        .and_then(|v| v.as_object())
+    {
+        process_post_to_kernel_mappings(
+            post_to_cpp_code,
+            &post_grad_node_to_lines,
+            &py_kernel_to_lines,
+        )
+    } else {
+        std::collections::HashMap::new()
+    };
+
+    // Convert all HashMaps to JSON objects
+    serde_json::json!({
+        "preToPost": hashmap_to_json_map(line_pre_to_post),
+        "postToPre": hashmap_to_json_map(line_post_to_pre),
+        "pyCodeToPost": hashmap_to_json_map(line_py_code_to_post),
+        "postToPyCode": hashmap_to_json_map(line_post_to_py_code),
+        "cppCodeToPost": hashmap_to_json_map(line_cpp_code_to_post),
+        "postToCppCode": hashmap_to_json_map(line_post_to_cpp_code)
+    })
+}
diff --git a/src/provenance.html b/src/provenance.html
index 1690963..09201e4 100644
--- a/src/provenance.html
+++ b/src/provenance.html
@@ -25,9 +25,12 @@
         </div>
     </div>
 
+    <!-- Line mappings data for JavaScript -->
+    <script id="lineMappings" type="application/json">
+        {line_mappings_content | format_unescaped}
+    </script>
+
     <script>
-        // Make node mappings available to JavaScript
-        const nodeMappings = {node_mappings_content | format_unescaped};
         {js | format_unescaped}
     </script>
 </body>
diff --git a/src/provenance.js b/src/provenance.js
index 55804de..52f4d56 100644
--- a/src/provenance.js
+++ b/src/provenance.js
@@ -10,246 +10,52 @@ let postToPyCode = {};
 let postToCppCode = {};
 let cppCodeToPost = {};
 
-let jsonData = null;
+let lineMappings = null;
 
 /**
- * Converts node-based mappings to line number-based mappings for visualization.
+ * Initializes the line number mappings from the pre-processed data.
  * 
- * This function processes four types of files and their relationships:
- * 1. Pre-grad graph (FX IR before autograd and any pre_grad pass)
- * 2. Post-grad graph (FX IR after autograd)
- * 3. Generated Python triton code (produced by JIT inductor)
- * 3. Generated C++ code  (produced by AOT inductor)
- * 
- * The conversion happens in several steps:
- * 
- * 1. First, it creates lookup maps that associate node names with line numbers:
- *    - For pre/post grad graphs: Extracts node names from lines like "node_name = ..." or "node_name: ... = ..."
- *    - For C++/python code: Identifies kernel definitions and their associated line ranges
- * 
- * 2. Then, it processes four types of mappings:
- *    - preToPost: Maps pre-grad nodes to post-grad nodes
- *    - postToPre: Maps post-grad nodes back to pre-grad nodes
- *    - cppCodeToPost: Maps C++/triton kernel lines to post-grad nodes
- *    - postToCppCode: Maps post-grad nodes to C++ kernel lines
- * 
- * 3. For each mapping type, it:
- *    - Looks up the line numbers for the source nodes
- *    - Looks up the line numbers for the target nodes
- *    - Creates a new mapping using line numbers instead of node names
- * 
- * Special handling for C++ code:
- * - C++ kernels span multiple lines (from kernel definition to launch)
- * - Each kernel's line range includes:
- *   * The nullptr check line
- *   * All lines up to and including the launchKernel call
- *   * One line after the launch for completeness
- * 
- * The function updates these global variables:
+ * This function expects the line mappings to be already converted from node mappings
+ * to line number mappings by the Rust backend. The mappings should contain:
  * - preToPost: {sourceLineNum: [targetLineNums]}
  * - postToPre: {sourceLineNum: [targetLineNums]}
+ * - pyCodeToPost: {sourceLineNum: [targetLineNums]}
+ * - postToPyCode: {sourceLineNum: [targetLineNums]}
  * - cppCodeToPost: {sourceLineNum: [targetLineNums]}
  * - postToCppCode: {sourceLineNum: [targetLineNums]}
  * 
  * These mappings enable the UI to highlight corresponding lines
  * across different views when a user clicks on a line.
  */
-function convertNodeMappingsToLineNumbers() {
-    if (!nodeMappings) {
-        console.warn('No node mappings available');
-        return;
-    }
-
-    function validLine(line, symbol = "#") {
-        const stripped = line.trim();
-        return stripped && !stripped.startsWith(symbol);
-    }
-
-    // Create lookup maps for both files
-    const preGradNodeToLines = {};
-    const postGradNodeToLines = {};
-    const pyKernelToLines = {};
-    const cppCodeToLines = {};
-
-    // Build pre_grad graph lookup map
-    preGradGraphData.forEach((line, i) => {
-        if (validLine(line)) {
-            // Split on '=' and take everything before it
-            const beforeEquals = line.trim().split("=")[0];
-            // Split on ':' and take everything before it
-            const nodeName = beforeEquals.split(":")[0].trim();
-            if (nodeName) {
-                preGradNodeToLines[nodeName] = i + 1;  // 1-based line numbers
-            }
-        }
-    });
-
-    // Build post_grad lookup map
-    postGradGraphData.forEach((line, i) => {
-        if (validLine(line)) {
-            // Split on '=' and take everything before it
-            const beforeEquals = line.trim().split("=")[0];
-            // Split on ':' and take everything before it
-            const nodeName = beforeEquals.split(":")[0].trim();
-            if (nodeName) {
-                postGradNodeToLines[nodeName] = i + 1;  // 1-based line numbers
-            }
-        }
-    });
-
-    // Build generated python code lookup map
-    let currentKernelName = null;
-    let currentKernelLines = [];
-
-    if (codeData) {
-        codeData.forEach((line, i) => {
-            if (validLine(line)) {
-                if (line.includes('async_compile.triton(')) {
-                    currentKernelName = line.split('=')[0].trim();
-                    currentKernelLines = [i + 1];  // Start collecting lines
-                } else if (line.includes("''', device_str='cuda')") && currentKernelName) {
-                    currentKernelLines.push(i + 1);  // Add the last line
-                    pyKernelToLines[currentKernelName] = currentKernelLines;
-                    currentKernelName = null;
-                    currentKernelLines = [];
-                } else if (currentKernelName) {
-                    currentKernelLines.push(i + 1);  // Add lines in between
-                }
-            }
-        });
-    }
-
-    if (cppCodeData) {
-        let kernelNames = Object.keys(nodeMappings["cppCodeToPost"]);
-
-        // Build generated cpp wrapper code lookup map
-        for (let i = 0; i < cppCodeData.length; i++) {
-            const line = cppCodeData[i];
-            // check if the line include any of the kernel names
-            // Skip definition lines, highlight the launch line
-            if (validLine(line, "//") && validLine(line, "def") && validLine(line, "static inline void") && kernelNames.some(kernelName => line.includes(kernelName + "("))) {
-                // let kernelName be the first match
-                const kernelName = kernelNames.find(kernelName => line.includes(kernelName + "("));
-                // create an array for the kernel name if it doesn't exist
-                if (!cppCodeToLines[kernelName]) {
-                    cppCodeToLines[kernelName] = [];
-                }
-                // add the line number to the array
-                cppCodeToLines[kernelName].push(i + 1);
-            }
-        }
-    }
-
-    // Process all mappings
-    const linePreToPost = {};
-    const linePostToPre = {};
-    const linePyCodeToPost = {};
-    const linePostToPyCode = {};
-    const lineCppCodeToPost = {};
-    const linePostToCppCode = {};
-
-    // Process preToPost using lookup maps
-    for (const [fxNodeName, genCodeNodes] of Object.entries(nodeMappings["preToPost"])) {
-        if (fxNodeName in preGradNodeToLines) {
-            const fxLineNum = preGradNodeToLines[fxNodeName];
-            linePreToPost[fxLineNum] = [];
-            for (const genNodeName of genCodeNodes) {
-                if (genNodeName in postGradNodeToLines) {
-                    linePreToPost[fxLineNum].push(postGradNodeToLines[genNodeName]);
-                }
-            }
-        }
-    }
-
-    // Process postToPre using lookup maps
-    for (const [genNodeName, fxNodeNames] of Object.entries(nodeMappings["postToPre"])) {
-        if (genNodeName in postGradNodeToLines) {
-            const genLineNum = postGradNodeToLines[genNodeName];
-            linePostToPre[genLineNum] = [];
-            for (const fxNodeName of fxNodeNames) {
-                if (fxNodeName in preGradNodeToLines) {
-                    linePostToPre[genLineNum].push(preGradNodeToLines[fxNodeName]);
-                }
-            }
-        }
-    }
-
-    // Process pyCodeToPost using lookup maps
-    for (const [pyKernelName, postGradNodeNames] of Object.entries(nodeMappings["cppCodeToPost"] || {})) {
-        if (pyKernelName in pyKernelToLines) {
-            const genLineNums = pyKernelToLines[pyKernelName];
-            for (const genLineNum of genLineNums) {
-                if (!(genLineNum in linePyCodeToPost)) {
-                    linePyCodeToPost[genLineNum] = [];
-                }
-                for (const postGradNodeName of postGradNodeNames) {
-                    if (postGradNodeName in postGradNodeToLines) {
-                        linePyCodeToPost[genLineNum].push(postGradNodeToLines[postGradNodeName]);
-                    }
-                }
-            }
-        }
-    }
-
-    // Process postToPyCode using lookup maps
-    for (const [postGradNode, pyKernelNames] of Object.entries(nodeMappings["postToCppCode"] || {})) {
-        if (postGradNode in postGradNodeToLines) {
-            const genLineNum = postGradNodeToLines[postGradNode];
-            linePostToPyCode[genLineNum] = [];
-            for (const pyKernelName of pyKernelNames) {
-                if (pyKernelName in pyKernelToLines) {
-                    linePostToPyCode[genLineNum].push(...pyKernelToLines[pyKernelName]);
-                }
-            }
-        }
-    }
-
-    // Process cppCodeToPost using lookup maps
-    for (const [cppCodeKernelName, postGradNodeNames] of Object.entries(nodeMappings["cppCodeToPost"])) {
-        if (cppCodeKernelName in cppCodeToLines) {
-            const genLineNums = cppCodeToLines[cppCodeKernelName];
-            for (const genLineNum of genLineNums) {
-                if (!(genLineNum in lineCppCodeToPost)) {
-                    lineCppCodeToPost[genLineNum] = [];
-                }
-                for (const postGradNodeName of postGradNodeNames) {
-                    if (postGradNodeName in postGradNodeToLines) {
-                        lineCppCodeToPost[genLineNum].push(postGradNodeToLines[postGradNodeName]);
-                    }
-                }
-            }
-        }
-    }
-
-    // Process postToCppCode using lookup maps
-    for (const [postGradNode, cppCodeKernelNames] of Object.entries(nodeMappings["postToCppCode"])) {
-        if (postGradNode in postGradNodeToLines) {
-            const genLineNum = postGradNodeToLines[postGradNode];
-            linePostToCppCode[genLineNum] = [];
-            for (const cppCodeKernelName of cppCodeKernelNames) {
-                if (cppCodeKernelName in cppCodeToLines) {
-                    linePostToCppCode[genLineNum].push(...cppCodeToLines[cppCodeKernelName]);
-                }
-            }
+function initializeLineMappings() {
+    try {
+        // Get the line mappings from the embedded JSON data
+        const lineMappingsElement = document.getElementById('lineMappings');
+        if (lineMappingsElement) {
+            lineMappings = JSON.parse(lineMappingsElement.textContent);
+            
+            // Update global variables with the line mappings
+            preToPost = lineMappings.preToPost || {};
+            postToPre = lineMappings.postToPre || {};
+            pyCodeToPost = lineMappings.pyCodeToPost || {};
+            postToPyCode = lineMappings.postToPyCode || {};
+            cppCodeToPost = lineMappings.cppCodeToPost || {};
+            postToCppCode = lineMappings.postToCppCode || {};
+            
+            console.log('Line mappings initialized:', {
+                preToPost,
+                postToPre,
+                pyCodeToPost,
+                postToPyCode,
+                cppCodeToPost,
+                postToCppCode
+            });
+        } else {
+            console.warn('No line mappings element found');
         }
+    } catch (error) {
+        console.error('Error initializing line mappings:', error);
     }
-
-    // Update global variables
-    preToPost = linePreToPost;
-    postToPre = linePostToPre;
-    pyCodeToPost = linePyCodeToPost;
-    postToPyCode = linePostToPyCode;
-    cppCodeToPost = lineCppCodeToPost;
-    postToCppCode = linePostToCppCode;
-
-    console.log('Mappings converted to line numbers:', {
-        preToPost,
-        postToPre,
-        pyCodeToPost,
-        postToPyCode,
-        cppCodeToPost,
-        postToCppCode
-    });
 }
 
 
@@ -374,8 +180,8 @@ function initializeData() {
             }
         }
 
-        // Convert node mappings to line numbers
-        convertNodeMappingsToLineNumbers();
+        // Initialize line mappings from pre-processed data
+        initializeLineMappings();
 
         // Setup highlighting
         setupEditorContent('preGradGraph', preGradGraphData);
diff --git a/src/types.rs b/src/types.rs
index 0351dca..5b7dc7a 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -900,7 +900,7 @@ pub struct ProvenanceContext<'a> {
     pub post_grad_graph_content: String,
     pub output_code_content: String,
     pub aot_code_content: String,
-    pub node_mappings_content: String,
+    pub line_mappings_content: String,
 }
 
 #[derive(Debug, Clone, Copy, Default, serde::Serialize, serde::Deserialize)]

From ecac70f1efba9c960e860caea645badb845889be Mon Sep 17 00:00:00 2001
From: Shangdi Yu <shangdiy@meta.com>
Date: Tue, 19 Aug 2025 12:42:00 -0700
Subject: [PATCH 2/2] add debug handle

---
 src/lib.rs                                    |  110 +-
 ...ductor_provenance_aot_debug_handle_log.txt | 2740 +++++++++++++++++
 ...ductor_provenance_jit_debug_handle_log.txt | 2331 ++++++++++++++
 tests/integration_test.rs                     | 1144 ++++++-
 4 files changed, 6309 insertions(+), 16 deletions(-)
 create mode 100644 tests/inputs/inductor_provenance_aot_debug_handle_log.txt
 create mode 100644 tests/inputs/inductor_provenance_jit_debug_handle_log.txt

diff --git a/src/lib.rs b/src/lib.rs
index 15b2e73..523ad06 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1413,6 +1413,13 @@ fn convert_node_mappings_to_line_numbers(
         Err(_) => return serde_json::json!({}),
     };
 
+    let version = node_mappings
+        .get("version")
+        .and_then(|v| v.as_f64())
+        .unwrap_or(1.0) as i64;
+    #[cfg(debug_assertions)]
+    println!("Inductor Provenance Tracking Mapping Version: {}", version);
+
     // Helper function to check if a line is valid (not empty and doesn't start with comment)
     fn valid_line(line: &str, symbol: &str) -> bool {
         let stripped = line.trim();
@@ -1449,6 +1456,7 @@ fn convert_node_mappings_to_line_numbers(
     fn build_python_kernel_to_lines_map(
         content: &str,
         kernel_names: &[&str],
+        _version: i64,
     ) -> std::collections::HashMap<String, Vec<usize>> {
         let content = content
             .lines()
@@ -1469,11 +1477,43 @@ fn convert_node_mappings_to_line_numbers(
             .position(|line| line.contains("# AOT ID:"))
             .unwrap_or(0);
 
-        println!("run_impl_line: {}", run_impl_line);
-        for (i, line) in content.lines().enumerate().skip(run_impl_line) {
-            if valid_line(line, "#") {
-                for kernel_name in kernel_names {
+        // For each kernel name (e.g. triton_poi_fused_mul_1:2):
+        // - Extract pure_kernel_name (triton_poi_fused_mul_1) before the ':'
+        // - If kernel name found: map to next line containing pure_kernel_name
+        // - If kernel_name not found: map to all lines with pure_kernel_name
+        for kernel_name in kernel_names {
+            // Get pure kernel name before ':' if it exists
+            let pure_kernel_name = if let Some(idx) = kernel_name.find(':') {
+                &kernel_name[..idx]
+            } else {
+                kernel_name
+            };
+
+            let mut found = false;
+            // If kernel_name contains a debug handle and we found it, we can stop after first match
+            if kernel_name.contains(':') {
+                for (i, line) in content.lines().enumerate().skip(run_impl_line) {
                     if line.contains(kernel_name) {
+                        // Found kernel name, look for next line with pure_kernel_name
+                        for (j, next_line) in content.lines().enumerate().skip(i + 1) {
+                            if next_line.contains(pure_kernel_name) {
+                                kernel_to_lines
+                                    .entry(kernel_name.to_string())
+                                    .or_insert_with(Vec::new)
+                                    .push(j + 1 - first_line_number);
+                                found = true;
+                                break;
+                            }
+                        }
+                        break;
+                    }
+                }
+            }
+
+            // If exact kernel name not found, map all lines with pure kernel name
+            if !found {
+                for (i, line) in content.lines().enumerate().skip(run_impl_line) {
+                    if line.contains(pure_kernel_name) {
                         kernel_to_lines
                             .entry(kernel_name.to_string())
                             .or_insert_with(Vec::new)
@@ -1490,6 +1530,7 @@ fn convert_node_mappings_to_line_numbers(
     fn build_cpp_kernel_to_lines_map(
         content: &str,
         kernel_names: &[&str],
+        _version: i64,
     ) -> std::collections::HashMap<String, Vec<usize>> {
         // remove empty lines at the beginning and end of the content
         // We need to do this because empty lines are ignored in html <pre> tags
@@ -1505,13 +1546,47 @@ fn convert_node_mappings_to_line_numbers(
             .lines()
             .position(|line| line.contains("::run_impl("))
             .unwrap_or(0);
-        for (i, line) in content.lines().enumerate().skip(run_impl_line) {
-            if valid_line(line, "//")
-                && valid_line(line, "def")
-                && valid_line(line, "static inline void")
-            {
-                for kernel_name in kernel_names {
-                    if line.contains(&format!("{}(", kernel_name)) {
+
+        // For each kernel name (e.g. triton_poi_fused_mul_1:2):
+        // - Extract pure_kernel_name (triton_poi_fused_mul_1) before the ':'
+        // - If kernel name found: map to next line containing pure_kernel_name
+        // - If kernel_name not found: map to all lines with pure_kernel_name
+        for kernel_name in kernel_names {
+            // Get pure kernel name before ':' if it exists
+            let pure_kernel_name = if let Some(idx) = kernel_name.find(':') {
+                &kernel_name[..idx]
+            } else {
+                kernel_name
+            };
+
+            let mut found = false;
+            if kernel_name.contains(':') {
+                for (i, line) in content.lines().enumerate().skip(run_impl_line) {
+                    if valid_line(line, "def")
+                        && valid_line(line, "static inline void")
+                        && line.contains(kernel_name)
+                    {
+                        // Found exact kernel name - map to next matching line
+                        let next_line = content
+                            .lines()
+                            .skip(i + 1)
+                            .position(|l| l.contains(pure_kernel_name))
+                            .map(|pos| i + pos + 2);
+
+                        if let Some(line_num) = next_line {
+                            kernel_to_lines
+                                .entry(kernel_name.to_string())
+                                .or_insert_with(Vec::new)
+                                .push(line_num);
+                            found = true;
+                            break;
+                        }
+                    }
+                }
+            }
+            if !found {
+                for (i, line) in content.lines().enumerate().skip(run_impl_line) {
+                    if line.contains(pure_kernel_name) {
                         kernel_to_lines
                             .entry(kernel_name.to_string())
                             .or_insert_with(Vec::new)
@@ -1527,7 +1602,7 @@ fn convert_node_mappings_to_line_numbers(
     fn process_mappings<F>(
         source_mappings: &serde_json::Map<String, serde_json::Value>,
         source_lookup: &std::collections::HashMap<String, usize>,
-        target_lookup: &std::collections::HashMap<String, usize>,
+        _target_lookup: &std::collections::HashMap<String, usize>,
         target_line_processor: F,
     ) -> std::collections::HashMap<usize, Vec<usize>>
     where
@@ -1631,10 +1706,15 @@ fn convert_node_mappings_to_line_numbers(
     // Build lookup maps
     let pre_grad_node_to_lines = build_node_to_lines_map(pre_grad_graph_content);
     let post_grad_node_to_lines = build_node_to_lines_map(post_grad_graph_content);
-    let py_kernel_to_lines = build_python_kernel_to_lines_map(output_code_content, &kernel_names);
-    let cpp_code_to_lines = build_cpp_kernel_to_lines_map(aot_code_content, &kernel_names);
+    let py_kernel_to_lines =
+        build_python_kernel_to_lines_map(output_code_content, &kernel_names, version);
+    let cpp_code_to_lines = build_cpp_kernel_to_lines_map(aot_code_content, &kernel_names, version);
+
+    #[cfg(debug_assertions)]
+    println!("kernel_names: {:?}", kernel_names);
+    #[cfg(debug_assertions)]
     println!("py_kernel_to_lines: {:?}", py_kernel_to_lines);
-    println!("cpp_kernel_names: {:?}", kernel_names);
+    #[cfg(debug_assertions)]
     println!("cpp_code_to_lines: {:?}", cpp_code_to_lines);
 
     // Process all mappings using helper functions
diff --git a/tests/inputs/inductor_provenance_aot_debug_handle_log.txt b/tests/inputs/inductor_provenance_aot_debug_handle_log.txt
new file mode 100644
index 0000000..e2c6525
--- /dev/null
+++ b/tests/inputs/inductor_provenance_aot_debug_handle_log.txt
@@ -0,0 +1,2740 @@
+V0819 12:17:11.089000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/__run_lpar_main__.py", 0]}
+V0819 12:17:11.090000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/__par__/meta_only/bootstrap.py", 1]}
+V0819 12:17:11.090000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/__par__/bootstrap.py", 2]}
+V0819 12:17:11.091000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/usr/local/fbcode/platform010/lib/python3.10/runpy.py", 3]}
+V0819 12:17:11.091000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/testinfra/testpilot/integration/python/adapters/unittest.py", 4]}
+V0819 12:17:11.092000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/testinfra/testpilot/integration/python/adapters/base.py", 5]}
+V0819 12:17:11.092000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/usr/local/fbcode/platform010/lib/python3.10/unittest/runner.py", 6]}
+V0819 12:17:11.093000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/usr/local/fbcode/platform010/lib/python3.10/unittest/suite.py", 7]}
+V0819 12:17:11.093000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/usr/local/fbcode/platform010/lib/python3.10/unittest/case.py", 8]}
+V0819 12:17:11.093000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/testing/_internal/common_utils.py", 9]}
+V0819 12:17:11.094000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/usr/local/fbcode/platform010/lib/python3.10/contextlib.py", 10]}
+V0819 12:17:11.094000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py", 11]}
+V0819 12:17:11.095000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/export/__init__.py", 12]}
+V0819 12:17:11.095000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/export/_trace.py", 13]}
+V0819 12:17:11.096000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/export/exported_program.py", 14]}
+V0819 12:17:11.096000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_export/non_strict_utils.py", 15]}
+V0819 12:17:11.096000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/utils/_pytree.py", 16]}
+V0819 12:17:11.097000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_subclasses/fake_tensor.py", 17]}
+V0819 12:17:11.097000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_subclasses/meta_utils.py", 18]}
+V0819 12:17:11.098000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:270] {"describe_storage": {"id": 0, "describer_id": 0, "size": 320}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1895, "name": "__call__", "filename": 18, "loc": "t_desc = self.describer.describe_tensor(t, trace=trace)"}, {"line": 310, "name": "describe_tensor", "filename": 18, "loc": "storage = self.describe_storage(t.untyped_storage(), trace=trace)"}, {"line": 270, "name": "describe_storage", "filename": 18, "loc": "trace_structured("}]}
+V0819 12:17:11.099000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:487] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [8, 10], "is_leaf": true, "stride": [10, 1], "storage": 0, "view_func": "_CustomViewFunc(func=<built-in method _view_func_unsafe of Tensor object at 0x7f8be5e5bce0>)", "describer_id": 0}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1895, "name": "__call__", "filename": 18, "loc": "t_desc = self.describer.describe_tensor(t, trace=trace)"}, {"line": 487, "name": "describe_tensor", "filename": 18, "loc": "trace_structured("}]}
+V0819 12:17:11.100000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:1899] {"describe_source": {"describer_id": 0, "id": 0, "source": "L['x']"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1899, "name": "__call__", "filename": 18, "loc": "trace_structured("}]}
+V0819 12:17:11.102000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:270] {"describe_storage": {"id": 1, "describer_id": 0, "size": 800}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1895, "name": "__call__", "filename": 18, "loc": "t_desc = self.describer.describe_tensor(t, trace=trace)"}, {"line": 310, "name": "describe_tensor", "filename": 18, "loc": "storage = self.describe_storage(t.untyped_storage(), trace=trace)"}, {"line": 270, "name": "describe_storage", "filename": 18, "loc": "trace_structured("}]}
+V0819 12:17:11.103000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:487] {"describe_tensor": {"id": 1, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [10, 20], "is_leaf": true, "stride": [20, 1], "storage": 1, "view_func": "_CustomViewFunc(func=<built-in method _view_func_unsafe of Tensor object at 0x7f8be5e7cb80>)", "describer_id": 0}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1895, "name": "__call__", "filename": 18, "loc": "t_desc = self.describer.describe_tensor(t, trace=trace)"}, {"line": 487, "name": "describe_tensor", "filename": 18, "loc": "trace_structured("}]}
+V0819 12:17:11.104000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:1899] {"describe_source": {"describer_id": 0, "id": 1, "source": "L['a']"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1899, "name": "__call__", "filename": 18, "loc": "trace_structured("}]}
+V0819 12:17:11.106000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:270] {"describe_storage": {"id": 2, "describer_id": 0, "size": 2400}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1895, "name": "__call__", "filename": 18, "loc": "t_desc = self.describer.describe_tensor(t, trace=trace)"}, {"line": 310, "name": "describe_tensor", "filename": 18, "loc": "storage = self.describe_storage(t.untyped_storage(), trace=trace)"}, {"line": 270, "name": "describe_storage", "filename": 18, "loc": "trace_structured("}]}
+V0819 12:17:11.107000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:487] {"describe_tensor": {"id": 2, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [20, 30], "is_leaf": true, "stride": [30, 1], "storage": 2, "view_func": "_CustomViewFunc(func=<built-in method _view_func_unsafe of Tensor object at 0x7f8bd693d850>)", "describer_id": 0}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1895, "name": "__call__", "filename": 18, "loc": "t_desc = self.describer.describe_tensor(t, trace=trace)"}, {"line": 487, "name": "describe_tensor", "filename": 18, "loc": "trace_structured("}]}
+V0819 12:17:11.107000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:1899] {"describe_source": {"describer_id": 0, "id": 2, "source": "L['b']"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1899, "name": "__call__", "filename": 18, "loc": "trace_structured("}]}
+V0819 12:17:11.109000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:270] {"describe_storage": {"id": 3, "describer_id": 0, "size": 1200}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1895, "name": "__call__", "filename": 18, "loc": "t_desc = self.describer.describe_tensor(t, trace=trace)"}, {"line": 310, "name": "describe_tensor", "filename": 18, "loc": "storage = self.describe_storage(t.untyped_storage(), trace=trace)"}, {"line": 270, "name": "describe_storage", "filename": 18, "loc": "trace_structured("}]}
+V0819 12:17:11.110000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:487] {"describe_tensor": {"id": 3, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [10, 30], "is_leaf": true, "stride": [30, 1], "storage": 3, "view_func": "_CustomViewFunc(func=<built-in method _view_func_unsafe of Tensor object at 0x7f8bd693d800>)", "describer_id": 0}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1895, "name": "__call__", "filename": 18, "loc": "t_desc = self.describer.describe_tensor(t, trace=trace)"}, {"line": 487, "name": "describe_tensor", "filename": 18, "loc": "trace_structured("}]}
+V0819 12:17:11.111000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:1899] {"describe_source": {"describer_id": 0, "id": 3, "source": "L['c']"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 621, "name": "test_kernel_information_generation", "filename": 11, "loc": "ep = torch.export.export(model, inputs, strict=False)"}, {"line": 274, "name": "export", "filename": 12, "loc": "return _export("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2192, "name": "_export", "filename": 13, "loc": "ep = _export_for_training("}, {"line": 1124, "name": "wrapper", "filename": 13, "loc": "ep = fn(*args, **kwargs)"}, {"line": 124, "name": "wrapper", "filename": 14, "loc": "return fn(*args, **kwargs)"}, {"line": 2055, "name": "_export_for_training", "filename": 13, "loc": "export_artifact = export_func("}, {"line": 1952, "name": "_non_strict_export", "filename": 13, "loc": ") = make_fake_inputs("}, {"line": 403, "name": "make_fake_inputs", "filename": 15, "loc": "fake_args, fake_kwargs = tree_map_with_path("}, {"line": 2056, "name": "tree_map_with_path", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 1193, "name": "unflatten", "filename": 16, "loc": "leaves = list(leaves)"}, {"line": 2056, "name": "<genexpr>", "filename": 16, "loc": "return treespec.unflatten(func(*xs) for xs in zip(*all_keypath_leaves))"}, {"line": 404, "name": "<lambda>", "filename": 15, "loc": "lambda kp, val: fakify("}, {"line": 232, "name": "fakify", "filename": 15, "loc": "fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)"}, {"line": 2989, "name": "from_tensor", "filename": 17, "loc": "return self.fake_tensor_converter.from_real_tensor("}, {"line": 404, "name": "from_real_tensor", "filename": 17, "loc": "out = self.meta_converter("}, {"line": 1899, "name": "__call__", "filename": 18, "loc": "trace_structured("}]}
+V0819 12:17:11.212000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "6aca4c2393e9ee762ae85ded683fe3e1"}
+	{
+	"name": "compile_fx_aot",
+	"ts": 1755631031212288.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:11.215000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "9fc9b4d8e207af26c84b91013d416c20"}
+	{
+	"name": "inductor_codecache_torch_key",
+	"ts": 1755631031215360.2,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:11.217000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "3cc7ee90f5aecb2fcf07b8816a341089"}
+	{
+	"name": "inductor_codecache_torch_key",
+	"ts": 1755631031217169.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:11.224000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_inductor/__init__.py", 19]}
+V0819 12:17:11.224000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_inductor/debug.py", 20]}
+V0819 12:17:11.225000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_inductor/compile_fx.py", 21]}
+V0819 12:17:11.227000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:2223] {"artifact": {"name": "before_pre_grad_graph", "encoding": "string"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2223, "name": "compile_fx", "filename": 21, "loc": "trace_structured("}], "has_payload": "4a0a3d9a0a4da4e2c240ac1983842cbb"}
+	class GraphModule(torch.nn.Module):
+	    def forward(self, x: "f32[8, 10][10, 1]cuda:0", a: "f32[10, 20][20, 1]cuda:0", b: "f32[20, 30][30, 1]cuda:0", c: "f32[10, 30][30, 1]cuda:0"):
+	        # No stacktrace found for following nodes
+	        fc1_weight: "f32[16, 10][10, 1]cuda:0" = self.fc1.weight
+	        fc1_bias: "f32[16][1]cuda:0" = self.fc1.bias
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/linear.py:134 in forward, code: return F.linear(input, self.weight, self.bias)
+	        linear: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.linear.default(x, fc1_weight, fc1_bias);  x = fc1_weight = fc1_bias = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py:144 in forward, code: return F.relu(input, inplace=self.inplace)
+	        relu: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.relu.default(linear);  linear = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py:359 in forward, code: return torch.sigmoid(input)
+	        sigmoid: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.sigmoid.default(relu);  relu = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:82 in forward, code: d = a * 3.14
+	        mul: "f32[10, 20][20, 1]cuda:0" = torch.ops.aten.mul.Tensor(a, 3.14);  a = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:83 in forward, code: y = torch.addmm(c, d, b)
+	        addmm: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.addmm.default(c, mul, b);  c = mul = b = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:84 in forward, code: z = torch.nn.functional.gelu(y)
+	        gelu: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.gelu.default(addmm);  addmm = None
+	        return (sigmoid, gelu)
+	        
+	
+	 # graph id: 140239416832880
+V0819 12:17:11.228000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "feaae61087882c73fd487f611da2f3f6"}
+	{
+	"name": "_recursive_pre_grad_passes",
+	"ts": 1755631031228600.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:11.248000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "c61bd9aba6cba7a9f938607528578adc"}
+	{
+	"name": "_recursive_pre_grad_passes",
+	"ts": 1755631031248490.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:11.256000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:2254] {"artifact": {"name": "after_pre_grad_graph", "encoding": "string"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2254, "name": "compile_fx", "filename": 21, "loc": "trace_structured("}], "has_payload": "4a0a3d9a0a4da4e2c240ac1983842cbb"}
+	class GraphModule(torch.nn.Module):
+	    def forward(self, x: "f32[8, 10][10, 1]cuda:0", a: "f32[10, 20][20, 1]cuda:0", b: "f32[20, 30][30, 1]cuda:0", c: "f32[10, 30][30, 1]cuda:0"):
+	        # No stacktrace found for following nodes
+	        fc1_weight: "f32[16, 10][10, 1]cuda:0" = self.fc1.weight
+	        fc1_bias: "f32[16][1]cuda:0" = self.fc1.bias
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/linear.py:134 in forward, code: return F.linear(input, self.weight, self.bias)
+	        linear: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.linear.default(x, fc1_weight, fc1_bias);  x = fc1_weight = fc1_bias = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py:144 in forward, code: return F.relu(input, inplace=self.inplace)
+	        relu: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.relu.default(linear);  linear = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py:359 in forward, code: return torch.sigmoid(input)
+	        sigmoid: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.sigmoid.default(relu);  relu = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:82 in forward, code: d = a * 3.14
+	        mul: "f32[10, 20][20, 1]cuda:0" = torch.ops.aten.mul.Tensor(a, 3.14);  a = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:83 in forward, code: y = torch.addmm(c, d, b)
+	        addmm: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.addmm.default(c, mul, b);  c = mul = b = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:84 in forward, code: z = torch.nn.functional.gelu(y)
+	        gelu: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.gelu.default(addmm);  addmm = None
+	        return (sigmoid, gelu)
+	        
+	
+	 # graph id: 140239416832880
+V0819 12:17:11.258000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "261b4a27f8afe5281619e6ce09acc6b1"}
+	{
+	"name": "create_aot_dispatcher_function",
+	"ts": 1755631031258767.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:11.263000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "f899ec11974508bf54cfd2ddda8573a8"}
+	{
+	"name": "aot_collect_metadata",
+	"ts": 1755631031263436.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:11.286000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "f6de1051f01eecef01c84f1f8e6a074c"}
+	{
+	"name": "aot_collect_metadata",
+	"ts": 1755631031285986.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:11.327000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_functorch/aot_autograd.py", 22]}
+V0819 12:17:11.328000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_functorch/_aot_autograd/graph_compile.py", 23]}
+V0819 12:17:11.328000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_functorch/_aot_autograd/graph_capture.py", 24]}
+V0819 12:17:11.329000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_functorch/_aot_autograd/graph_capture.py:301] {"artifact": {"name": "aot_forward_graph_fw_metadata", "encoding": "string"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2458, "name": "compile_fx", "filename": 21, "loc": "gm, graph_signature = aot_export_module("}, {"line": 1444, "name": "aot_export_module", "filename": 22, "loc": "fx_g, metadata, in_spec, out_spec = _aot_export_function("}, {"line": 1703, "name": "_aot_export_function", "filename": 22, "loc": "aot_graph_capture = aot_stage1_graph_capture(aot_state, flat_fn)"}, {"line": 171, "name": "aot_stage1_graph_capture", "filename": 23, "loc": "aot_dispatch_base_graph(  # type: ignore[assignment]"}, {"line": 301, "name": "aot_dispatch_base_graph", "filename": 24, "loc": "trace_structured("}], "has_payload": "4d5e8ee520aca9cec301c0adf555bcb8"}
+	ViewAndMutationMeta(input_info=[InputAliasInfo(is_leaf=True,
+	                                              mutates_data=False,
+	                                              mutates_metadata=False,
+	                                              mutations_hidden_from_autograd=True,
+	                                              mutations_under_no_grad_or_inference_mode=False,
+	                                              mutation_inductor_storage_resize=False,
+	                                              mutates_storage_metadata=False,
+	                                              requires_grad=True,
+	                                              keep_input_mutations=False),
+	                               InputAliasInfo(is_leaf=True,
+	                                              mutates_data=False,
+	                                              mutates_metadata=False,
+	                                              mutations_hidden_from_autograd=True,
+	                                              mutations_under_no_grad_or_inference_mode=False,
+	                                              mutation_inductor_storage_resize=False,
+	                                              mutates_storage_metadata=False,
+	                                              requires_grad=True,
+	                                              keep_input_mutations=False),
+	                               InputAliasInfo(is_leaf=True,
+	                                              mutates_data=False,
+	                                              mutates_metadata=False,
+	                                              mutations_hidden_from_autograd=True,
+	                                              mutations_under_no_grad_or_inference_mode=False,
+	                                              mutation_inductor_storage_resize=False,
+	                                              mutates_storage_metadata=False,
+	                                              requires_grad=False,
+	                                              keep_input_mutations=False),
+	                               InputAliasInfo(is_leaf=True,
+	                                              mutates_data=False,
+	                                              mutates_metadata=False,
+	                                              mutations_hidden_from_autograd=True,
+	                                              mutations_under_no_grad_or_inference_mode=False,
+	                                              mutation_inductor_storage_resize=False,
+	                                              mutates_storage_metadata=False,
+	                                              requires_grad=False,
+	                                              keep_input_mutations=False),
+	                               InputAliasInfo(is_leaf=True,
+	                                              mutates_data=False,
+	                                              mutates_metadata=False,
+	                                              mutations_hidden_from_autograd=True,
+	                                              mutations_under_no_grad_or_inference_mode=False,
+	                                              mutation_inductor_storage_resize=False,
+	                                              mutates_storage_metadata=False,
+	                                              requires_grad=False,
+	                                              keep_input_mutations=False),
+	                               InputAliasInfo(is_leaf=True,
+	                                              mutates_data=False,
+	                                              mutates_metadata=False,
+	                                              mutations_hidden_from_autograd=True,
+	                                              mutations_under_no_grad_or_inference_mode=False,
+	                                              mutation_inductor_storage_resize=False,
+	                                              mutates_storage_metadata=False,
+	                                              requires_grad=False,
+	                                              keep_input_mutations=False)],
+	                    output_info=[OutputAliasInfo(output_type=<OutputType.non_alias: 1>,
+	                                                raw_type=<class 'torch._subclasses.functional_tensor.FunctionalTensor'>,
+	                                                base_idx=None,
+	                                                dynamic_dims=set(),
+	                                                requires_grad=False,
+	                                                functional_tensor=None),
+	                                OutputAliasInfo(output_type=<OutputType.non_alias: 1>,
+	                                                raw_type=<class 'torch._subclasses.functional_tensor.FunctionalTensor'>,
+	                                                base_idx=None,
+	                                                dynamic_dims=set(),
+	                                                requires_grad=False,
+	                                                functional_tensor=None)],
+	                    num_intermediate_bases=0,
+	                    keep_input_mutations=False,
+	                    traced_tangents=[],
+	                    traced_tangents_descs=[],
+	                    subclass_inp_meta=[PlainTensorMeta(unwrapped_idx=0,
+	                                                      memory_format=None),
+	                                      PlainTensorMeta(unwrapped_idx=1,
+	                                                      memory_format=None),
+	                                      PlainTensorMeta(unwrapped_idx=2,
+	                                                      memory_format=None),
+	                                      PlainTensorMeta(unwrapped_idx=3,
+	                                                      memory_format=None),
+	                                      PlainTensorMeta(unwrapped_idx=4,
+	                                                      memory_format=None),
+	                                      PlainTensorMeta(unwrapped_idx=5,
+	                                                      memory_format=None)],
+	                    subclass_fw_graph_out_meta=[PlainTensorMeta(unwrapped_idx=0,
+	                                                               memory_format=None),
+	                                               PlainTensorMeta(unwrapped_idx=1,
+	                                                               memory_format=None)],
+	                    subclass_tangent_meta=[],
+	                    is_train=False,
+	                    traced_tangent_metas=None,
+	                    num_symints_saved_for_bw=None,
+	                    grad_enabled_mutation=None,
+	                    deterministic=False,
+	                    static_input_indices=[],
+	                    tokens={},
+	                    indices_of_inputs_that_requires_grad_with_mutations_in_bw=[],
+	                    bw_donated_idxs=None,
+	                    num_backward_tokens=0,
+	                    num_graphsafe_rng_states=0,
+	                    graphsafe_rng_state_index=None)
+V0819 12:17:11.332000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_functorch/_aot_autograd/graph_capture.py:319] {"aot_inference_graph": {}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2458, "name": "compile_fx", "filename": 21, "loc": "gm, graph_signature = aot_export_module("}, {"line": 1444, "name": "aot_export_module", "filename": 22, "loc": "fx_g, metadata, in_spec, out_spec = _aot_export_function("}, {"line": 1703, "name": "_aot_export_function", "filename": 22, "loc": "aot_graph_capture = aot_stage1_graph_capture(aot_state, flat_fn)"}, {"line": 171, "name": "aot_stage1_graph_capture", "filename": 23, "loc": "aot_dispatch_base_graph(  # type: ignore[assignment]"}, {"line": 319, "name": "aot_dispatch_base_graph", "filename": 24, "loc": "trace_structured("}], "has_payload": "b951429148939c22d26b0940141f8b77"}
+	class <lambda>(torch.nn.Module):
+	    def forward(
+	        self,
+	        arg0_1: "f32[16, 10][10, 1]cuda:0",  # PlainAOTInput(idx=0)
+	        arg1_1: "f32[16][1]cuda:0",  # PlainAOTInput(idx=1)
+	        arg2_1: "f32[8, 10][10, 1]cuda:0",  # PlainAOTInput(idx=2)
+	        arg3_1: "f32[10, 20][20, 1]cuda:0",  # PlainAOTInput(idx=3)
+	        arg4_1: "f32[20, 30][30, 1]cuda:0",  # PlainAOTInput(idx=4)
+	        arg5_1: "f32[10, 30][30, 1]cuda:0",  # PlainAOTInput(idx=5)
+	    ):
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/linear.py:134 in forward, code: return F.linear(input, self.weight, self.bias)
+	        permute: "f32[10, 16][1, 10]cuda:0" = torch.ops.aten.permute.default(arg0_1, [1, 0]);  arg0_1 = None
+	        addmm: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.addmm.default(arg1_1, arg2_1, permute);  arg1_1 = arg2_1 = permute = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py:144 in forward, code: return F.relu(input, inplace=self.inplace)
+	        relu: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.relu.default(addmm);  addmm = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py:359 in forward, code: return torch.sigmoid(input)
+	        sigmoid: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.sigmoid.default(relu);  relu = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:82 in forward, code: d = a * 3.14
+	        mul: "f32[10, 20][20, 1]cuda:0" = torch.ops.aten.mul.Tensor(arg3_1, 3.14);  arg3_1 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:83 in forward, code: y = torch.addmm(c, d, b)
+	        addmm_1: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.addmm.default(arg5_1, mul, arg4_1);  arg5_1 = mul = arg4_1 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:84 in forward, code: z = torch.nn.functional.gelu(y)
+	        mul_1: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(addmm_1, 0.5)
+	        mul_2: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(addmm_1, 0.7071067811865476);  addmm_1 = None
+	        erf: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.erf.default(mul_2);  mul_2 = None
+	        add: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.add.Tensor(erf, 1);  erf = None
+	        mul_3: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_1, add);  mul_1 = add = None
+	        return (
+	            sigmoid,  # PlainAOTOutput(idx=0)
+	            mul_3,  # PlainAOTOutput(idx=1)
+	        )
+	        
+V0819 12:17:11.336000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "7becc45412f1402ef7762c00ad5932f3"}
+	{
+	"name": "create_aot_dispatcher_function",
+	"ts": 1755631031336150.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:11.340000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "ceef05227dc68692b0c2a5ca2505a5d2"}
+	{
+	"name": "compile_fx.<locals>.fw_compiler_base",
+	"ts": 1755631031340807.2,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:11.341000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "daf6b8f1a3ef1f763d4b8047cb580bb0"}
+	{
+	"name": "_recursive_joint_graph_passes",
+	"ts": 1755631031341705.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:11.573000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "8b14f19de16b7539b26676fb7d25e657"}
+	{
+	"name": "pad_mm_benchmark",
+	"ts": 1755631031573546.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:11.576000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "57000f52b79007b58fea7dadb68faec6"}
+	{
+	"name": "pad_mm_benchmark_get_do_bench",
+	"ts": 1755631031575236.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:11.577000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "684ab6cc51e1a97fc83a2617d9193a89"}
+	{
+	"name": "pad_mm_benchmark_get_do_bench",
+	"ts": 1755631031577450.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:13.574000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "9e2289a2565d8614dbaf7f591a0c71a0"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755631033574302.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:13.938000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "8723cf30905af166a9f27e688633d534"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755631033938727.2,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:13.940000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "782f5055c2285b1347238bb617a06264"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755631033940683.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.082000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "5c19ba3069bd0fc6c56053cc0ee17d21"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755631034082677.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.085000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "2d8af9f8b4561019e88ccdd4a7c79239"}
+	{
+	"name": "pad_mm_benchmark",
+	"ts": 1755631034084961.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.088000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "1d53c73457381df48799916c32b9d6e0"}
+	{
+	"name": "pad_mm_benchmark",
+	"ts": 1755631034088690.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.089000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "adc3f87a3f7d41f8df12f5b1d234d9b4"}
+	{
+	"name": "pad_mm_benchmark_get_do_bench",
+	"ts": 1755631034089778.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.090000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "9c9aa97686737676e9b28517c8d5592c"}
+	{
+	"name": "pad_mm_benchmark_get_do_bench",
+	"ts": 1755631034090892.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.092000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "02668a64656757d7a10dd08691939306"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755631034092335.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.204000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "b4d4b07879507fe50a3dfea58faf9422"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755631034204193.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.206000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "9705a76501f21cbef7177d5ca3445fd0"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755631034206306.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.334000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "7b4dcaef7e0675dea4417a9f60dc57e2"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755631034334180.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.336000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "98cf7a2fddbd6e5264c110037b4b9a10"}
+	{
+	"name": "pad_mm_benchmark",
+	"ts": 1755631034336454.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.338000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "b9ba61789c85cd982e45683b482ce64f"}
+	{
+	"name": "_recursive_joint_graph_passes",
+	"ts": 1755631034338054.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.342000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "bd1ad090c0cbcad5010ae1d99865a581"}
+	{
+	"name": "inductor_compile",
+	"ts": 1755631034342007.8,
+	"args": {
+	"fn_name": "compile_fx_inner",
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.354000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "4c64e47ea44971bba809d1f5bf8a81e5"}
+	{
+	"name": "fx_codegen_and_compile",
+	"ts": 1755631034353917.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.366000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_functorch/_aot_autograd/schemas.py", 25]}
+V0819 12:17:14.367000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_dynamo/repro/after_aot.py", 26]}
+V0819 12:17:14.367000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_inductor/fb/utils.py", 27]}
+V0819 12:17:14.367000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:1230] {"artifact": {"name": "fx_graph_runnable", "encoding": "string"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2511, "name": "compile_fx", "filename": 21, "loc": "return inference_compiler(unlifted_gm, example_inputs_)"}, {"line": 1267, "name": "__call__", "filename": 25, "loc": "return self.compiler_fn(gm, example_inputs)"}, {"line": 2374, "name": "fw_compiler_base", "filename": 21, "loc": "return inner_compile("}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 776, "name": "compile_fx_inner", "filename": 21, "loc": "return wrap_compiler_debug(_compile_fx_inner, compiler_name=\"inductor\")("}, {"line": 141, "name": "debug_wrapper", "filename": 26, "loc": "inner_compiled_fn = compiler_fn(gm, example_inputs)"}, {"line": 167, "name": "newFunction", "filename": 27, "loc": "return old_func(*args, **kwargs)"}, {"line": 955, "name": "_compile_fx_inner", "filename": 21, "loc": "mb_compiled_graph = fx_codegen_and_compile("}, {"line": 1654, "name": "fx_codegen_and_compile", "filename": 21, "loc": "return scheme.codegen_and_compile(gm, example_inputs, inputs_to_check, graph_kwargs)"}, {"line": 1230, "name": "codegen_and_compile", "filename": 21, "loc": "trace_structured("}], "has_payload": "cb0ac49018c0e74516fc42058b037ae6"}
+	
+	import os
+	os.environ['PYTORCH_TEST_FBCODE'] = '1'
+	os.environ['TORCH_TRACE'] = '/home/shangdiy/my_trace_log_dir'
+	os.environ['PYTORCH_TEST_REMOTE_GPU'] = '1'
+	os.environ['PYTORCH_DDP_USE_SIDE_STREAM'] = '0'
+	os.environ['TRITON_ALLOW_NON_CONSTEXPR_GLOBALS'] = '1'
+	os.environ['TRITON_LIBHIP_PATH'] = '/usr/local/fbcode/platform010/lib/rocm-6.2.1/lib/libamdhip64.so'
+	os.environ['TRITON_CUPTI_LIB_PATH'] = '/usr/local/fbcode/platform010/lib/libcupti.so'
+	os.environ['TRITON_HOME'] = '/tmp/shangdiy'
+	os.environ['TORCHINDUCTOR_CACHE_DIR'] = '/tmp/tmpspd28pc5'
+	os.environ['TRITON_CACHE_DIR'] = '/tmp/tmpspd28pc5/triton'
+	
+	import torch
+	from torch import tensor, device
+	import torch.fx as fx
+	from torch._dynamo.testing import rand_strided
+	from math import inf
+	import torch._inductor.inductor_prims
+	
+	
+	
+	import torch._dynamo.config
+	import torch._inductor.config
+	import torch._functorch.config
+	import torch.fx.experimental._config
+	torch._dynamo.config.specialize_int = False
+	torch._dynamo.config.specialize_float = False
+	torch._dynamo.config.assume_static_by_default = True
+	torch._dynamo.config.automatic_dynamic_shapes = True
+	torch._dynamo.config.suppress_errors = False
+	torch._dynamo.config.capture_scalar_outputs = False
+	torch._dynamo.config.capture_dynamic_output_shape_ops = False
+	torch._dynamo.config.prefer_deferred_runtime_asserts_over_guards = False
+	torch._dynamo.config.do_not_emit_runtime_asserts = False
+	torch._dynamo.config.raise_on_ctx_manager_usage = True
+	torch._dynamo.config.allow_rnn = False
+	torch._dynamo.config.log_compilation_metrics = False
+	torch._inductor.config.fx_graph_cache = True
+	torch._inductor.config.cpp_wrapper = True
+	torch._inductor.config.compile_threads = 32
+	torch._inductor.config.triton.cudagraphs = False
+	torch._inductor.config.triton.autotune_cublasLt = False
+	torch._inductor.config.triton.autotune_at_compile_time = True
+	torch._inductor.config.triton.store_cubin = True
+	torch._inductor.config.aot_inductor.output_path = 'cwhkamk7hukdm5d55b4fxkyyok5x57mzbc2hzfy243x4xp2dcbtz'
+	torch._inductor.config.aot_inductor.serialized_in_spec = '[1, {"type": "builtins.tuple", "context": "null", "children_spec": [{"type": "builtins.tuple", "context": "null", "children_spec": [{"type": null, "context": null, "children_spec": []}, {"type": null, "context": null, "children_spec": []}, {"type": null, "context": null, "children_spec": []}, {"type": null, "context": null, "children_spec": []}]}, {"type": "builtins.dict", "context": "[]", "children_spec": []}]}]'
+	torch._inductor.config.aot_inductor.serialized_out_spec = '[1, {"type": "builtins.tuple", "context": "null", "children_spec": [{"type": null, "context": null, "children_spec": []}, {"type": null, "context": null, "children_spec": []}]}]'
+	torch._inductor.config.aot_inductor.package = True
+	torch._inductor.config.trace.provenance_tracking_level = 1
+	torch._functorch.config.functionalize_rng_ops = False
+	torch._functorch.config.enable_autograd_cache = True
+	torch._functorch.config.fake_tensor_allow_unsafe_data_ptr_access = True
+	torch._functorch.config.unlift_effect_tokens = False
+	
+	
+	
+	isolate_fails_code_str = None
+	
+	torch.ops.load_library("//caffe2/torch/fb/sparsenn:sparsenn_operators_gpu")
+	torch.ops.load_library("//caffe2/torch/fb/sparsenn:sparsenn_operators")
+	torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")
+	torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
+	
+	"""
+	To run this script in fbcode:
+	- Create a directory (//scripts/{your_unixname}/repro)
+	- Put this file in scripts/{your_unixname}/repro/fx_graph_runnable.py
+	- Add a TARGETS file that looks like the following
+	- `buck2 run //scripts/{your_unixname}/repro:repro`
+	
+	NOTE: you may need additional deps to actually be able to run the script.
+	```
+	# Contents of TARGETS file
+	load("@fbcode_macros//build_defs:python_binary.bzl", "python_binary")
+	
+	python_binary(
+	    name = "repro",
+	    main_src = "fx_graph_runnable.py",
+	    deps = [
+	        "//caffe2:torch",
+	        "//caffe2/torch/fb/sparsenn:sparsenn_operators_gpu",
+	        "//caffe2/torch/fb/sparsenn:sparsenn_operators",
+	        "//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu",
+	        "//deeplearning/fbgemm/fbgemm_gpu:sparse_ops",
+	    ],
+	)
+	```
+	"""
+	
+	# torch version: 2.9.0a0+fb
+	# torch cuda version: 12.4.0
+	# CUDA Info: 
+	# nvcc: NVIDIA (R) Cuda compiler driver 
+	# Copyright (c) 2005-2024 NVIDIA Corporation 
+	# Built on Tue_Oct_29_23:50:19_PDT_2024 
+	# Cuda compilation tools, release 12.6, V12.6.85 
+	# Build cuda_12.6.r12.6/compiler.35059454_0 
+	
+	# GPU Hardware Info: 
+	# NVIDIA PG509-210 : 8 
+	
+	
+	from torch.nn import *
+	class Repro(torch.nn.Module):
+	    def __init__(self) -> None:
+	        super().__init__()
+	        self.fc1 = Module().cuda()
+	
+	    
+	    
+	    def forward(self):
+	        arg2_1, arg3_1, arg4_1, arg5_1, = fx_pytree.tree_flatten_spec([], self._in_spec)
+	        fc1_weight = self.fc1.weight
+	        fc1_bias = self.fc1.bias
+	        permute = torch.ops.aten.permute.default(fc1_weight, [1, 0]);  fc1_weight = None
+	        addmm = torch.ops.aten.addmm.default(fc1_bias, arg2_1, permute);  fc1_bias = arg2_1 = permute = None
+	        relu = torch.ops.aten.relu.default(addmm);  addmm = None
+	        sigmoid = torch.ops.aten.sigmoid.default(relu);  relu = None
+	        mul = torch.ops.aten.mul.Tensor(arg3_1, 3.14);  arg3_1 = None
+	        addmm_1 = torch.ops.aten.addmm.default(arg5_1, mul, arg4_1);  arg5_1 = mul = arg4_1 = None
+	        mul_1 = torch.ops.aten.mul.Tensor(addmm_1, 0.5)
+	        mul_2 = torch.ops.aten.mul.Tensor(addmm_1, 0.7071067811865476);  addmm_1 = None
+	        erf = torch.ops.aten.erf.default(mul_2);  mul_2 = None
+	        add = torch.ops.aten.add.Tensor(erf, 1);  erf = None
+	        mul_3 = torch.ops.aten.mul.Tensor(mul_1, add);  mul_1 = add = None
+	        return (sigmoid, mul_3)
+	        
+	def load_args(reader):
+	    buf0 = reader.storage(None, 320, device=device(type='cuda', index=0))
+	    reader.tensor(buf0, (8, 10), is_leaf=True)  # arg2_1
+	    buf1 = reader.storage(None, 800, device=device(type='cuda', index=0))
+	    reader.tensor(buf1, (10, 20), is_leaf=True)  # arg3_1
+	    buf2 = reader.storage(None, 2400, device=device(type='cuda', index=0))
+	    reader.tensor(buf2, (20, 30), is_leaf=True)  # arg4_1
+	    buf3 = reader.storage(None, 1200, device=device(type='cuda', index=0))
+	    reader.tensor(buf3, (10, 30), is_leaf=True)  # arg5_1
+	load_args._version = 0
+	mod = Repro()
+	if __name__ == '__main__':
+	    from torch._dynamo.repro.after_aot import run_repro
+	    with torch.no_grad():
+	        run_repro(mod, load_args, accuracy=False, command='run', save_dir=None, tracing_mode='real', check_str=None)
+	        # To run it separately, do 
+	        # mod, args = run_repro(mod, load_args, accuracy=False, command='get_args', save_dir=None, tracing_mode='real', check_str=None)
+	        # mod(*args)
+V0819 12:17:14.369000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "ebf308296b1f8689a0d63c41a1db992a"}
+	{
+	"name": "additional_fake_tensor_prop",
+	"ts": 1755631034368948.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.380000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "97d42b48ddb9905a491bdf3cba1c6527"}
+	{
+	"name": "additional_fake_tensor_prop",
+	"ts": 1755631034380876.2,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.385000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:1279] {"artifact": {"name": "before_post_grad_graph", "encoding": "string"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2511, "name": "compile_fx", "filename": 21, "loc": "return inference_compiler(unlifted_gm, example_inputs_)"}, {"line": 1267, "name": "__call__", "filename": 25, "loc": "return self.compiler_fn(gm, example_inputs)"}, {"line": 2374, "name": "fw_compiler_base", "filename": 21, "loc": "return inner_compile("}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 776, "name": "compile_fx_inner", "filename": 21, "loc": "return wrap_compiler_debug(_compile_fx_inner, compiler_name=\"inductor\")("}, {"line": 141, "name": "debug_wrapper", "filename": 26, "loc": "inner_compiled_fn = compiler_fn(gm, example_inputs)"}, {"line": 167, "name": "newFunction", "filename": 27, "loc": "return old_func(*args, **kwargs)"}, {"line": 955, "name": "_compile_fx_inner", "filename": 21, "loc": "mb_compiled_graph = fx_codegen_and_compile("}, {"line": 1654, "name": "fx_codegen_and_compile", "filename": 21, "loc": "return scheme.codegen_and_compile(gm, example_inputs, inputs_to_check, graph_kwargs)"}, {"line": 1279, "name": "codegen_and_compile", "filename": 21, "loc": "trace_structured("}], "has_payload": "40cdb2b1144299274a9c2e5a11a97ed8"}
+	class <lambda>(torch.nn.Module):
+	    def forward(self):
+	        arg2_1: "f32[8, 10][10, 1]cuda:0"; arg3_1: "f32[10, 20][20, 1]cuda:0"; arg4_1: "f32[20, 30][30, 1]cuda:0"; arg5_1: "f32[10, 30][30, 1]cuda:0"; 
+	    
+	        arg2_1, arg3_1, arg4_1, arg5_1, = fx_pytree.tree_flatten_spec([], self._in_spec)
+	        # No stacktrace found for following nodes
+	        fc1_weight: "f32[16, 10][10, 1]cuda:0" = self.fc1.weight
+	        fc1_bias: "f32[16][1]cuda:0" = self.fc1.bias
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/linear.py:134 in forward, code: return F.linear(input, self.weight, self.bias)
+	        permute: "f32[10, 16][1, 10]cuda:0" = torch.ops.aten.permute.default(fc1_weight, [1, 0]);  fc1_weight = None
+	        addmm: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.addmm.default(fc1_bias, arg2_1, permute);  fc1_bias = arg2_1 = permute = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py:144 in forward, code: return F.relu(input, inplace=self.inplace)
+	        relu: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.relu.default(addmm);  addmm = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py:359 in forward, code: return torch.sigmoid(input)
+	        sigmoid: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.sigmoid.default(relu);  relu = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:82 in forward, code: d = a * 3.14
+	        mul: "f32[10, 20][20, 1]cuda:0" = torch.ops.aten.mul.Tensor(arg3_1, 3.14);  arg3_1 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:83 in forward, code: y = torch.addmm(c, d, b)
+	        addmm_1: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.addmm.default(arg5_1, mul, arg4_1);  arg5_1 = mul = arg4_1 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:84 in forward, code: z = torch.nn.functional.gelu(y)
+	        mul_1: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(addmm_1, 0.5)
+	        mul_2: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(addmm_1, 0.7071067811865476);  addmm_1 = None
+	        erf: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.erf.default(mul_2);  mul_2 = None
+	        add: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.add.Tensor(erf, 1);  erf = None
+	        mul_3: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_1, add);  mul_1 = add = None
+	        return (sigmoid, mul_3)
+	        
+V0819 12:17:14.386000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "c2e1ab260461351e26f60feba0fbef35"}
+	{
+	"name": "_recursive_post_grad_passes",
+	"ts": 1755631034386761.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.439000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "a663ba4038609b5264cc740a664dd102"}
+	{
+	"name": "_recursive_post_grad_passes",
+	"ts": 1755631034439715.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.444000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:1317] {"artifact": {"name": "after_post_grad_graph", "encoding": "string"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2511, "name": "compile_fx", "filename": 21, "loc": "return inference_compiler(unlifted_gm, example_inputs_)"}, {"line": 1267, "name": "__call__", "filename": 25, "loc": "return self.compiler_fn(gm, example_inputs)"}, {"line": 2374, "name": "fw_compiler_base", "filename": 21, "loc": "return inner_compile("}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 776, "name": "compile_fx_inner", "filename": 21, "loc": "return wrap_compiler_debug(_compile_fx_inner, compiler_name=\"inductor\")("}, {"line": 141, "name": "debug_wrapper", "filename": 26, "loc": "inner_compiled_fn = compiler_fn(gm, example_inputs)"}, {"line": 167, "name": "newFunction", "filename": 27, "loc": "return old_func(*args, **kwargs)"}, {"line": 955, "name": "_compile_fx_inner", "filename": 21, "loc": "mb_compiled_graph = fx_codegen_and_compile("}, {"line": 1654, "name": "fx_codegen_and_compile", "filename": 21, "loc": "return scheme.codegen_and_compile(gm, example_inputs, inputs_to_check, graph_kwargs)"}, {"line": 1317, "name": "codegen_and_compile", "filename": 21, "loc": "trace_structured("}], "has_payload": "6eaa709538210c0772f354a3d61c2e68"}
+	class <lambda>(torch.nn.Module):
+	    def forward(self):
+	        arg2_1: "f32[8, 10][10, 1]cuda:0"; arg3_1: "f32[10, 20][20, 1]cuda:0"; arg4_1: "f32[20, 30][30, 1]cuda:0"; arg5_1: "f32[10, 30][30, 1]cuda:0"; 
+	    
+	        arg2_1, arg3_1, arg4_1, arg5_1, = fx_pytree.tree_flatten_spec([], self._in_spec)
+	        # No stacktrace found for following nodes
+	        fc1_weight: "f32[16, 10][10, 1]cuda:0" = self.fc1.weight
+	        fc1_bias: "f32[16][1]cuda:0" = self.fc1.bias
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/linear.py:134 in forward, code: return F.linear(input, self.weight, self.bias)
+	        permute: "f32[10, 16][1, 10]cuda:0" = torch.ops.aten.permute.default(fc1_weight, [1, 0]);  fc1_weight = None
+	        mm_default_1: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.mm.default(arg2_1, permute);  arg2_1 = permute = None
+	        add_tensor_1: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_1, fc1_bias);  mm_default_1 = fc1_bias = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py:144 in forward, code: return F.relu(input, inplace=self.inplace)
+	        relu: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.relu.default(add_tensor_1);  add_tensor_1 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py:359 in forward, code: return torch.sigmoid(input)
+	        sigmoid: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.sigmoid.default(relu);  relu = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:82 in forward, code: d = a * 3.14
+	        mul: "f32[10, 20][20, 1]cuda:0" = torch.ops.aten.mul.Tensor(arg3_1, 3.14);  arg3_1 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:83 in forward, code: y = torch.addmm(c, d, b)
+	        mm_default: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mm.default(mul, arg4_1);  mul = arg4_1 = None
+	        add_tensor: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default, arg5_1);  mm_default = arg5_1 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:84 in forward, code: z = torch.nn.functional.gelu(y)
+	        mul_1: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_tensor, 0.5)
+	        mul_2: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_tensor, 0.7071067811865476);  add_tensor = None
+	        erf: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.erf.default(mul_2);  mul_2 = None
+	        add: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.add.Tensor(erf, 1);  erf = None
+	        mul_3: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_1, add);  mul_1 = add = None
+	        return (sigmoid, mul_3)
+	        
+V0819 12:17:14.450000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "84b2439db28331c02d0b456cf808b57c"}
+	{
+	"name": "GraphLowering.run",
+	"ts": 1755631034450230.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.515000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "2625ae8656c861528d7a57471a08c3fe"}
+	{
+	"name": "GraphLowering.run",
+	"ts": 1755631034515895.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.516000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "12e90de2e22a17a64deac9d90988dc67"}
+	{
+	"name": "GraphLowering.compile_to_fn",
+	"ts": 1755631034516910.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.517000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "815bef5a99f93dedcc4f5c7586e01b49"}
+	{
+	"name": "GraphLowering.codegen",
+	"ts": 1755631034517755.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.920000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "9bb19a2d03217feeb3f52c7df60d9cfa"}
+	{
+	"name": "Scheduler.__init__",
+	"ts": 1755631034920225.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.952000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "5ddae14ce346a1093e4c60c7519015a0"}
+	{
+	"name": "Scheduler.fused_nodes",
+	"ts": 1755631034951976.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.953000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "a5473fc85fb86639239f35f450617eda"}
+	{
+	"name": "Scheduler.fused_nodes",
+	"ts": 1755631034953532.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.959000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "286c5594c9a995a35144acab005004e5"}
+	{
+	"name": "Scheduler.__init__",
+	"ts": 1755631034959233.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:14.960000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "887c175a8f855c440d9be1e2cf0db86e"}
+	{
+	"name": "Scheduler.codegen",
+	"ts": 1755631034960085.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.022000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "e1212b2f0699fa5c4ebb2ad3f3edefc5"}
+	{
+	"name": "Scheduler.codegen",
+	"ts": 1755631035022240.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.023000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "ee9c787f272ac480a8d7673ef75615e1"}
+	{
+	"name": "CppWrapperGpu.generate",
+	"ts": 1755631035023390.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.024000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "8874e63300b9ea71451acc470c2fa525"}
+	{
+	"name": "CppWrapperCpu.generate",
+	"ts": 1755631035024265.2,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.027000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "4d6f15f1961f4086d9ef7d34973a3f0d"}
+	{
+	"name": "PythonWrapperCodegen.generate",
+	"ts": 1755631035027388.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.032000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "e6a3888f177717a05d134fcc172b1813"}
+	{
+	"name": "async_compile.precompile",
+	"ts": 1755631035032324.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.147000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "907b4e0c82e1b74115a5e22ffe08c050"}
+	{
+	"name": "CachingAutotuner.synchronize",
+	"ts": 1755631035146913.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.148000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "28dd1a6930067e5ef34c1467e2a6d60f"}
+	{
+	"name": "CachingAutotuner.synchronize",
+	"ts": 1755631035148481.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.200000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "7b5aa873eb861796cef055859afaa37e"}
+	{
+	"name": "async_compile.precompile",
+	"ts": 1755631035200462.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.205000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "500a2823cc070f4fd8266361346dc5a8"}
+	{
+	"name": "async_compile.precompile",
+	"ts": 1755631035205102.2,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.385000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "3ef3c138073641f2c2571b395e16f91a"}
+	{
+	"name": "CachingAutotuner.synchronize",
+	"ts": 1755631035385462.2,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.387000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "e24be073aa4b7b88e8ea57265b65b8af"}
+	{
+	"name": "CachingAutotuner.synchronize",
+	"ts": 1755631035386995.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.389000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "f64718dc768c55058ef38857f102d619"}
+	{
+	"name": "async_compile.precompile",
+	"ts": 1755631035389500.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.393000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "fccd5234583c742aea7c7115daa1311f"}
+	{
+	"name": "async_compile.precompile",
+	"ts": 1755631035393448.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.573000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "14b9d56f2a6ee3948b54a66902fcecec"}
+	{
+	"name": "CachingAutotuner.synchronize",
+	"ts": 1755631035572957.2,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.574000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "8cf869e1a2d7513f7d1f051bc3752248"}
+	{
+	"name": "CachingAutotuner.synchronize",
+	"ts": 1755631035574477.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.577000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "1520e9418c35f8b68a956c5a133d7103"}
+	{
+	"name": "async_compile.precompile",
+	"ts": 1755631035576984.2,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.580000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "a49e9a0416c8316f4d9c9c56b2c0ced0"}
+	{
+	"name": "async_compile.wait",
+	"ts": 1755631035580767.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.581000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "d58f64a9c8f0d510a46f58227488e6fb"}
+	{
+	"name": "async_compile.wait",
+	"ts": 1755631035581827.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.585000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_inductor/graph.py", 28]}
+V0819 12:17:15.586000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_inductor/codegen/cpp_wrapper_gpu.py", 29]}
+V0819 12:17:15.586000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_inductor/codegen/cpp_wrapper_cpu.py", 30]}
+V0819 12:17:15.586000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_inductor/codegen/wrapper.py", 31]}
+V0819 12:17:15.586000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["<string>", 32]}
+V0819 12:17:15.587000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_inductor/async_compile.py", 33]}
+V0819 12:17:15.587000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/async_compile.py:117] {"artifact": {"name": "triton_kernel_info", "encoding": "json"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2511, "name": "compile_fx", "filename": 21, "loc": "return inference_compiler(unlifted_gm, example_inputs_)"}, {"line": 1267, "name": "__call__", "filename": 25, "loc": "return self.compiler_fn(gm, example_inputs)"}, {"line": 2374, "name": "fw_compiler_base", "filename": 21, "loc": "return inner_compile("}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 776, "name": "compile_fx_inner", "filename": 21, "loc": "return wrap_compiler_debug(_compile_fx_inner, compiler_name=\"inductor\")("}, {"line": 141, "name": "debug_wrapper", "filename": 26, "loc": "inner_compiled_fn = compiler_fn(gm, example_inputs)"}, {"line": 167, "name": "newFunction", "filename": 27, "loc": "return old_func(*args, **kwargs)"}, {"line": 955, "name": "_compile_fx_inner", "filename": 21, "loc": "mb_compiled_graph = fx_codegen_and_compile("}, {"line": 1654, "name": "fx_codegen_and_compile", "filename": 21, "loc": "return scheme.codegen_and_compile(gm, example_inputs, inputs_to_check, graph_kwargs)"}, {"line": 1465, "name": "codegen_and_compile", "filename": 21, "loc": "wrapper_code, kernel_code = graph.codegen_with_cpp_wrapper()"}, {"line": 2219, "name": "codegen_with_cpp_wrapper", "filename": 28, "loc": "return self.codegen()"}, {"line": 2270, "name": "codegen", "filename": 28, "loc": "result = self.wrapper_code.generate(self.is_inference)"}, {"line": 355, "name": "generate", "filename": 29, "loc": "return super().generate(is_inference)"}, {"line": 977, "name": "generate", "filename": 30, "loc": "return super().generate(is_inference)"}, {"line": 1465, "name": "generate", "filename": 31, "loc": "return self._generate(is_inference)"}, {"line": 1528, "name": "_generate", "filename": 31, "loc": "self.generate_and_run_autotune_block()"}, {"line": 1606, "name": "generate_and_run_autotune_block", "filename": 31, "loc": "exec(tuning_code, scope)"}, {"line": 115, "name": "<module>", "filename": 32, "loc": ""}, {"line": 583, "name": "wait", "filename": 33, "loc": "_compile_end()"}, {"line": 117, "name": "_compile_end", "filename": 33, "loc": "torch._logging.trace_structured("}], "has_payload": "64835d9f685ee082909facee8a5ee175"}
+	{"triton_poi_fused_addmm_gelu_2": {"autotune_cache_state": "miss", "num_configs": 2, "compile_time_us": 182513}, "triton_poi_fused_addmm_relu_sigmoid_0": {"autotune_cache_state": "only 1 config", "only_config": [["XBLOCK", 128], ["num_warps", 4], ["num_stages", 1]], "compile_time_us": 166812}, "triton_poi_fused_mul_1": {"autotune_cache_state": "miss", "num_configs": 2, "compile_time_us": 183348}}
+V0819 12:17:15.590000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "ad4ab41028b8d64322890f1ea6e52020"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1755631035590284.2,
+	"args": {
+	"kernel_name": "triton_poi_fused_mul_1",
+	"is_backward": false,
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.591000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "15d8d0a903e4a45cdb7c6e2f083a30d3"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755631035591686.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.638000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "d6ea5aba8c6809f793323c5c722ae78b"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755631035638345.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.639000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "6012720b1ae40bd8f126bb4afa3194a6"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755631035639761.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.688000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "226d671a240d857cdd6131619ce51ff0"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755631035687937.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.688000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "4bae60db312c469ca4ab3076f437d4b8"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1755631035688898.0,
+	"args": {
+	"kernel_name": "triton_poi_fused_mul_1",
+	"is_backward": false,
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.694000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "8254544d8e0f9c83be896a08de9dd48c"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1755631035694228.8,
+	"args": {
+	"kernel_name": "triton_poi_fused_addmm_gelu_2",
+	"is_backward": false,
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.695000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "bc095e74b312f69c84cf06b71d6f94ea"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755631035695570.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.755000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "b803ca2ed15090323113e6f24e54e727"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755631035755541.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.757000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "acbb78661f81ed5b283e84a3e46f2237"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755631035757007.2,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.815000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "4f44b57f9e263de22a3dfa9d7bb4686d"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755631035815819.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.816000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "bd6eb280833425e5fd7a410c577cb8ec"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1755631035816716.2,
+	"args": {
+	"kernel_name": "triton_poi_fused_addmm_gelu_2",
+	"is_backward": false,
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.826000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "d11d04c0060a6c5ab3111b49cfd51389"}
+	{
+	"name": "PythonWrapperCodegen.generate",
+	"ts": 1755631035826016.2,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.826000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "d23f889cf870423c0766500f2436a532"}
+	{
+	"name": "CppWrapperCpu.generate",
+	"ts": 1755631035826839.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.830000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "93f015aa33a6d3a9590a0b8aeb2de231"}
+	{
+	"name": "CppWrapperGpu.generate",
+	"ts": 1755631035830160.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.833000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "466bf05776662915805ecd996f683c05"}
+	{
+	"name": "GraphLowering.codegen",
+	"ts": 1755631035833401.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.836000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "6a85f95f036828ca68e6367ef8ccfd26"}
+	{
+	"name": "AotCodeCompiler.compile",
+	"ts": 1755631035836648.2,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:15.846000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_inductor/codecache.py", 34]}
+V0819 12:17:15.846000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/codecache.py:1790] {"graph_dump": {"name": "inductor_aot_wrapper_code", "type": "cpp", "filename": "/tmp/tmpspd28pc5/cwhkamk7hukdm5d55b4fxkyyok5x57mzbc2hzfy243x4xp2dcbtz/c2zi7pbvbb6r2z2ilqqn22mpt7jxdy72w5fymrtjqrpewk5akujk.wrapper.cpp"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2511, "name": "compile_fx", "filename": 21, "loc": "return inference_compiler(unlifted_gm, example_inputs_)"}, {"line": 1267, "name": "__call__", "filename": 25, "loc": "return self.compiler_fn(gm, example_inputs)"}, {"line": 2374, "name": "fw_compiler_base", "filename": 21, "loc": "return inner_compile("}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 776, "name": "compile_fx_inner", "filename": 21, "loc": "return wrap_compiler_debug(_compile_fx_inner, compiler_name=\"inductor\")("}, {"line": 141, "name": "debug_wrapper", "filename": 26, "loc": "inner_compiled_fn = compiler_fn(gm, example_inputs)"}, {"line": 167, "name": "newFunction", "filename": 27, "loc": "return old_func(*args, **kwargs)"}, {"line": 955, "name": "_compile_fx_inner", "filename": 21, "loc": "mb_compiled_graph = fx_codegen_and_compile("}, {"line": 1654, "name": "fx_codegen_and_compile", "filename": 21, "loc": "return scheme.codegen_and_compile(gm, example_inputs, inputs_to_check, graph_kwargs)"}, {"line": 1490, "name": "codegen_and_compile", "filename": 21, "loc": "compiled_fn = AotCodeCompiler.compile("}, {"line": 1790, "name": "compile", "filename": 34, "loc": "trace_structured("}], "has_payload": "78b00c43b8ee3127faca1bd40f61ff57"}
+	
+	#include <torch/csrc/inductor/aoti_include/cuda.h>
+	// Definition of AOTI runtime interface functions
+	
+	#include <torch/csrc/inductor/aoti_runtime/interface.h>
+	#include <torch/csrc/inductor/aoti_runtime/model_container.h>
+	
+	#include <iostream>
+	#include <vector>
+	
+	#define CONVERT_EXCEPTION_TO_ERROR_CODE(...)      \
+	  try {                                           \
+	    __VA_ARGS__                                   \
+	  } catch (const std::exception& e) {             \
+	    std::cerr << "Error: " << e.what() << '\n';   \
+	    return AOTI_RUNTIME_FAILURE;                  \
+	  } catch (...) {                                 \
+	    std::cerr << "Unknown exception occurred.\n"; \
+	    return AOTI_RUNTIME_FAILURE;                  \
+	  }                                               \
+	  return AOTI_RUNTIME_SUCCESS;
+	
+	#define AOTI_VECTOR_SIZE_CHECK(actual_size, expected_size, name)  \
+	  do {                                                            \
+	    AOTI_RUNTIME_CHECK(                                           \
+	        actual_size == expected_size,                             \
+	        "expected " + std::string(name) + " vector size to be " + \
+	            std::to_string(expected_size) + ", but got " +        \
+	            std::to_string(actual_size));                         \
+	  } while (0)
+	
+	// AOTInductor uses at::addmm_out, which doesn't supports
+	// arguments that requires gradient. For this reason, we
+	// enforce no_grad context for run APIs.
+	//
+	// A RAII, thread local (!) guard that enables or disables grad mode upon
+	// construction, and sets it back to the original value upon destruction.
+	struct AOTINoGradGuard {
+	  AOTINoGradGuard() {
+	    aoti_torch_grad_mode_set_enabled(false);
+	  }
+	  AOTINoGradGuard(const AOTINoGradGuard&) = delete;
+	  AOTINoGradGuard(AOTINoGradGuard&&) noexcept = delete;
+	  ~AOTINoGradGuard() {
+	    aoti_torch_grad_mode_set_enabled(prev_mode);
+	  }
+	  AOTINoGradGuard& operator=(const AOTINoGradGuard&) = delete;
+	  AOTINoGradGuard& operator=(AOTINoGradGuard&&) noexcept = delete;
+	  bool prev_mode{aoti_torch_grad_mode_is_enabled()};
+	};
+	
+	extern "C" {
+	
+	AOTIRuntimeError AOTInductorModelContainerCreate(
+	    AOTInductorModelContainerHandle* container_handle,
+	    size_t num_models,
+	    bool is_cpu,
+	    const char* cubin_dir) {
+	      return AOTInductorModelContainerCreateWithDevice(
+	        container_handle,
+	        num_models,
+	        is_cpu ? "cpu" : "cuda",
+	        cubin_dir);
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerCreateWithDevice(
+	    AOTInductorModelContainerHandle* container_handle,
+	    size_t num_models,
+	    const char* device_str,
+	    const char* cubin_dir) {
+	  if (num_models == 0) {
+	    std::cerr << "Error: num_models must be positive, but got 0\n";
+	    return AOTI_RUNTIME_FAILURE;
+	  }
+	  CONVERT_EXCEPTION_TO_ERROR_CODE({
+	    std::optional<std::string> cubin_dir_opt;
+	    if (cubin_dir != nullptr) {
+	      cubin_dir_opt.emplace(cubin_dir);
+	    }
+	    auto* container = new torch::aot_inductor::AOTInductorModelContainer(
+	        num_models, std::string(device_str), cubin_dir_opt);
+	    *container_handle =
+	        reinterpret_cast<AOTInductorModelContainerHandle>(container);
+	  })
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerDelete(
+	    AOTInductorModelContainerHandle container_handle) {
+	  CONVERT_EXCEPTION_TO_ERROR_CODE({
+	    auto* container =
+	        reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
+	            container_handle);
+	    delete container;
+	  });
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerRun(
+	    AOTInductorModelContainerHandle container_handle,
+	    AtenTensorHandle* input_handles, // array of input AtenTensorHandle; handles
+	                                     // are stolen; the array itself is borrowed
+	    size_t num_inputs,
+	    AtenTensorHandle*
+	        output_handles, // array for writing output AtenTensorHandle; handles
+	                        // will be stolen by the caller; the array itself is
+	                        // borrowed
+	    size_t num_outputs,
+	    AOTInductorStreamHandle stream_handle,
+	    AOTIProxyExecutorHandle proxy_executor_handle) {
+	  auto* container =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
+	          container_handle);
+	  AOTI_VECTOR_SIZE_CHECK(num_inputs, container->num_inputs(), "inputs");
+	  AOTI_VECTOR_SIZE_CHECK(num_outputs, container->num_outputs(), "outputs");
+	
+	  auto stream =
+	      reinterpret_cast<torch::aot_inductor::DeviceStreamType>(stream_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE({
+	    AOTINoGradGuard guard;
+	    container->run(
+	        input_handles, output_handles, stream, proxy_executor_handle);
+	  })
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerRunSingleThreaded(
+	    AOTInductorModelContainerHandle container_handle,
+	    AtenTensorHandle* input_handles, // array of input AtenTensorHandle; handles
+	                                     // are stolen; the array itself is borrowed
+	    size_t num_inputs,
+	    AtenTensorHandle*
+	        output_handles, // array for writing output AtenTensorHandle; handles
+	                        // will be stolen by the caller; the array itself is
+	                        // borrowed
+	    size_t num_outputs,
+	    AOTInductorStreamHandle stream_handle,
+	    AOTIProxyExecutorHandle proxy_executor_handle) {
+	  auto* container =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
+	          container_handle);
+	  AOTI_VECTOR_SIZE_CHECK(num_inputs, container->num_inputs(), "inputs");
+	  AOTI_VECTOR_SIZE_CHECK(num_outputs, container->num_outputs(), "outputs");
+	
+	  auto stream =
+	      reinterpret_cast<torch::aot_inductor::DeviceStreamType>(stream_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE({
+	    AOTINoGradGuard guard;
+	    container->run_single_threaded(
+	        input_handles, output_handles, stream, proxy_executor_handle);
+	  })
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerGetNumConstants(
+	    AOTInductorModelContainerHandle container_handle,
+	    size_t* num_constants) {
+	  auto* container =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
+	          container_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE(
+	    { *num_constants = container->num_constants(); })
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerGetConstantName(
+	    AOTInductorModelContainerHandle container_handle,
+	    size_t idx,
+	    const char** name) {
+	  auto* container =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
+	          container_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE(
+	    { *name = container->constant_name(idx); })
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerGetConstantOriginalFQN(
+	    AOTInductorModelContainerHandle container_handle,
+	    size_t idx,
+	    const char** original_fqn) {
+	  auto* container =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
+	          container_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE(
+	    { *original_fqn = container->constant_original_fqn(idx); })
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerGetConstantFromFolded(
+	    AOTInductorModelContainerHandle container_handle,
+	    size_t idx,
+	    bool* from_folded) {
+	  auto* container =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(container_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE({ *from_folded = container->constant_from_folded(idx); })
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerGetConstantType(
+	    AOTInductorModelContainerHandle container_handle,
+	    size_t idx,
+	    int32_t* type) {
+	  auto* container =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(container_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE({ *type = container->constant_type(idx); })
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerGetConstantDtype(
+	    AOTInductorModelContainerHandle container_handle,
+	    size_t idx,
+	    int32_t* dtype) {
+	  auto* container =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
+	          container_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE(
+	    { *dtype = container->constant_dtype(idx); })
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerGetConstantDataSize(
+	  AOTInductorModelContainerHandle container_handle,
+	  size_t idx,
+	  size_t* data_size) {
+	  auto* container =
+	    reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
+	        container_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE(
+	    { *data_size = container->constant_data_size(idx); })
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerExtractConstantsMap(
+	    AOTInductorModelContainerHandle container_handle,
+	    AOTInductorConstantMapHandle constant_map_handle,
+	    bool use_inactive) {
+	  auto* container =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
+	          container_handle);
+	  auto constants_map = reinterpret_cast<std::unordered_map<std::string, AtenTensorHandle>*>(constant_map_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE(
+	    { const auto ret = container->extract_constants_map(use_inactive);
+	      for (const auto& pair: ret) {
+	        constants_map->emplace(pair.first, pair.second);
+	      }
+	    })
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerUpdateUserManagedConstantBuffer(
+	    AOTInductorModelContainerHandle container_handle,
+	    AOTInductorConstantMapHandle constant_map_handle,
+	    bool use_inactive,
+	    bool validate_full_update) {
+	  auto* container =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
+	          container_handle);
+	  auto input_map = reinterpret_cast<std::unordered_map<std::string, AtenTensorHandle>*>(constant_map_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE({
+	    container->update_constant_buffer(
+	        *input_map, use_inactive, validate_full_update, /* user_managed = */ true);
+	  })
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerUpdateConstantBuffer(
+	    AOTInductorModelContainerHandle container_handle,
+	    AOTInductorConstantMapHandle constant_map_handle,
+	    bool use_inactive,
+	    bool validate_full_update) {
+	  auto* container =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
+	          container_handle);
+	  auto input_map = reinterpret_cast<std::unordered_map<std::string, AtenTensorHandle>*>(constant_map_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE({
+	    container->update_constant_buffer(
+	        *input_map, use_inactive, validate_full_update);
+	  })
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerUpdateInactiveConstantBuffer(
+	    AOTInductorModelContainerHandle container_handle,
+	    AOTInductorConstantMapHandle constant_map_handle) {
+	  return AOTInductorModelContainerUpdateConstantBuffer(container_handle,
+	          constant_map_handle,
+	          /*use_inactive*/ true,
+	          /*validate_full_update*/ true);
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerFreeInactiveConstantBuffer(
+	    AOTInductorModelContainerHandle container_handle) {
+	  auto* container =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
+	          container_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE({
+	    container->free_inactive_constant_buffer();
+	  })
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerRunConstantFolding(
+	    AOTInductorModelContainerHandle container_handle,
+	    bool use_inactive,
+	    AOTInductorStreamHandle stream_handle,
+	    AOTIProxyExecutorHandle proxy_executor_handle) {
+	  auto* container =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
+	          container_handle);
+	  auto stream =
+	      reinterpret_cast<torch::aot_inductor::DeviceStreamType>(stream_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE({
+	    AOTINoGradGuard guard;
+	    container->run_const_fold(use_inactive, stream, proxy_executor_handle);
+	  })
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerSwapConstantBuffer(
+	    AOTInductorModelContainerHandle container_handle) {
+	  auto* container =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
+	          container_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE({
+	    container->swap_constant_buffer();
+	  })
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerGetNumInputs(
+	    AOTInductorModelContainerHandle container_handle,
+	    size_t* ret_num_inputs) {
+	  auto* container =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
+	          container_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE(
+	      { *ret_num_inputs = container->num_inputs(); })
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerGetInputName(
+	    AOTInductorModelContainerHandle container_handle,
+	    size_t input_idx,
+	    const char** ret_input_names) {
+	  auto* container =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
+	          container_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE(
+	      { *ret_input_names = container->input_name(input_idx); })
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerGetNumOutputs(
+	    AOTInductorModelContainerHandle container_handle,
+	    size_t* ret_num_outputs) {
+	  auto* container =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
+	          container_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE(
+	      { *ret_num_outputs = container->num_outputs(); })
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerGetOutputName(
+	    AOTInductorModelContainerHandle container_handle,
+	    size_t output_idx,
+	    const char** ret_output_names) {
+	  auto* container =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
+	          container_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE(
+	      { *ret_output_names = container->output_name(output_idx); })
+	}
+	
+	AOTIRuntimeError AOTInductorModelContainerGetCallSpec(
+	    AOTInductorModelContainerHandle container_handle,
+	    const char** in_spec,
+	    const char** out_spec) {
+	  auto* container =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModelContainer*>(
+	          container_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE({
+	    *in_spec = container->get_in_spec();
+	    *out_spec = container->get_out_spec();
+	  })
+	}
+	
+	AOTIRuntimeError AOTInductorModelCreate(
+	    AOTInductorModelHandle* model_handle,
+	    AOTInductorConstantMapHandle constant_map_handle){
+	    CONVERT_EXCEPTION_TO_ERROR_CODE({
+	      auto constant_map = std::make_shared<torch::aot_inductor::ConstantMap>();
+	      auto constant_array = std::make_shared<std::vector<torch::aot_inductor::ConstantHandle>>();
+	      auto input_map = reinterpret_cast<std::unordered_map<std::string, AtenTensorHandle>*>(constant_map_handle);
+	
+	      auto model = new torch::aot_inductor::AOTInductorModel(
+	          constant_map,
+	          constant_array,
+	          "cpu", // device_str is hardcoded, as AOTInductorModelCreate is only use for CPU models
+	          ""
+	      );
+	
+	      if (input_map) {
+	        for (auto const& kv : *input_map) {
+	          constant_map->emplace(kv.first, kv.second);
+	        }
+	      } else {
+	        model->load_constants();
+	      }
+	
+	      *model_handle = reinterpret_cast<AOTInductorModelHandle>(model);
+	    })}
+	
+	AOTIRuntimeError AOTInductorModelRun(
+	    AOTInductorModelHandle model_handle,
+	    AtenTensorHandle* input_handles,
+	    AtenTensorHandle* output_handles) {
+	  auto model =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModel*>(model_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE({
+	    AOTINoGradGuard guard;
+	    model->run_impl(
+	        input_handles,
+	        output_handles,
+	        (torch::aot_inductor::DeviceStreamType) nullptr,
+	        nullptr);
+	  })
+	}
+	
+	AOTIRuntimeError AOTInductorModelDelete(AOTInductorModelHandle model_handle){
+	    CONVERT_EXCEPTION_TO_ERROR_CODE({
+	      auto model = reinterpret_cast<torch::aot_inductor::AOTInductorModel*>(
+	          model_handle);
+	      delete model;
+	    })}
+	
+	AOTIRuntimeError AOTInductorModelGetNumOutputs(
+	    AOTInductorModelHandle model_handle,
+	    size_t* ret_num_outputs) {
+	  CONVERT_EXCEPTION_TO_ERROR_CODE({
+	      auto model = reinterpret_cast<torch::aot_inductor::AOTInductorModel*>(model_handle);
+	      *ret_num_outputs = model->num_outputs();
+	  })
+	}
+	
+	AOTIRuntimeError AOTInductorModelUpdateConstantsMap(
+	    AOTInductorModelHandle model_handle,
+	    AOTInductorConstantMapHandle constant_map_handle) {
+	  auto model =
+	      reinterpret_cast<torch::aot_inductor::AOTInductorModel*>(model_handle);
+	  CONVERT_EXCEPTION_TO_ERROR_CODE({
+	    auto constant_map = std::make_shared<torch::aot_inductor::ConstantMap>();
+	    auto input_map =
+	        reinterpret_cast<std::unordered_map<std::string, AtenTensorHandle>*>(
+	            constant_map_handle);
+	
+	    for (auto const& kv : *input_map) {
+	      constant_map->emplace(kv.first, kv.second);
+	    }
+	    model->update_constants_map(std::move(constant_map));
+	  })
+	}
+	
+	} // extern "C"
+	
+	
+	#define CUDA_DRIVER_CHECK(EXPR)                    \
+	do {                                               \
+	    CUresult code = EXPR;                          \
+	    const char *msg;                               \
+	    CUresult code_get_error = cuGetErrorString(code, &msg); \
+	    if (code_get_error != CUDA_SUCCESS) {          \
+	        throw std::runtime_error(                  \
+	            std::string("CUDA driver error: ") +   \
+	            std::string("invalid error code!"));   \
+	    }                                              \
+	    if (code != CUDA_SUCCESS) {                    \
+	        throw std::runtime_error(                  \
+	            std::string("CUDA driver error: ") +   \
+	            std::string(msg));                     \
+	    }                                              \
+	} while (0);
+	
+	static inline CUfunction loadKernel(
+	        std::string filePath,
+	        const std::string &funcName,
+	        uint32_t sharedMemBytes,
+	        const std::optional<std::string> &cubinDir = std::nullopt) {
+	    if (cubinDir) {
+	        std::filesystem::path p1{*cubinDir};
+	        std::filesystem::path p2{filePath};
+	        filePath = (p1 / p2.filename()).string();
+	    }
+	
+	    CUmodule mod;
+	    CUfunction func;
+	    CUDA_DRIVER_CHECK(cuModuleLoad(&mod, filePath.c_str()));
+	    CUDA_DRIVER_CHECK(cuModuleGetFunction(&func, mod, funcName.c_str()));
+	    if (sharedMemBytes > 0) {
+	        CUDA_DRIVER_CHECK(cuFuncSetAttribute(
+	            func,
+	            CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
+	            sharedMemBytes
+	        ))
+	    }
+	    return func;
+	}
+	
+	static inline CUfunction loadKernel(const void* start, const std::string &funcName, uint32_t sharedMemBytes) {
+	    CUmodule mod;
+	    CUfunction func;
+	    CUDA_DRIVER_CHECK(cuModuleLoadData(&mod, start));
+	    CUDA_DRIVER_CHECK(cuModuleGetFunction(&func, mod, funcName.c_str()));
+	    if (sharedMemBytes > 0) {
+	        CUDA_DRIVER_CHECK(cuFuncSetAttribute(
+	            func,
+	            CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
+	            sharedMemBytes
+	        ))
+	    }
+	    return func;
+	}
+	
+	static inline void launchKernel(
+	        CUfunction func,
+	        uint32_t gridX,
+	        uint32_t gridY,
+	        uint32_t gridZ,
+	        uint32_t numWarps,
+	        uint32_t sharedMemBytes,
+	        void* args[],
+	        cudaStream_t stream) {
+	    CUDA_DRIVER_CHECK(cuLaunchKernel(
+	        func, gridX, gridY, gridZ, 32*numWarps, 1, 1, sharedMemBytes, stream, args, nullptr
+	    ));
+	}
+	CACHE_TORCH_DTYPE(float32);
+	CACHE_TORCH_DEVICE(cuda);
+	CACHE_TORCH_LAYOUT(strided);
+	namespace torch::aot_inductor {
+	namespace {
+	class AOTInductorModelKernels : public AOTInductorModelKernelsBase {
+	  public:
+	    CUfunction triton_poi_fused_addmm_gelu_2{nullptr};
+	    CUfunction triton_poi_fused_addmm_relu_sigmoid_0{nullptr};
+	    CUfunction triton_poi_fused_mul_1{nullptr};
+	};
+	}  // namespace
+	
+	
+	
+	AOTInductorModel::AOTInductorModel(std::shared_ptr<ConstantMap> constants_map,
+	                                   std::shared_ptr<std::vector<ConstantHandle>> constants_array,
+	                                   const std::string& device_str,
+	                                   std::optional<std::string> cubin_dir)
+	    : AOTInductorModelBase(4,
+	                           2,
+	                           2,
+	                           device_str,
+	                           std::move(cubin_dir),
+	                           true) {
+	    inputs_info_[0].name = "arg2_1";
+	    inputs_info_[1].name = "arg3_1";
+	    inputs_info_[2].name = "arg4_1";
+	    inputs_info_[3].name = "arg5_1";
+	    constants_info_[0].name = "fc1_weight";
+	    constants_info_[0].dtype = static_cast<int32_t>(cached_torch_dtype_float32);
+	    constants_info_[0].offset = 0;
+	    constants_info_[0].data_size = 640;
+	    constants_info_[0].from_folded = false;
+	    constants_info_[0].type = static_cast<int32_t>(torch::aot_inductor::ConstantType::Parameter);
+	    constants_info_[0].shape = {16, 10};
+	    constants_info_[0].stride = {10, 1};
+	    constants_info_[0].layout = static_cast<int32_t>(cached_torch_layout_strided);
+	    constants_info_[0].original_fqn = "fc1.weight";
+	    constants_info_[1].name = "fc1_bias";
+	    constants_info_[1].dtype = static_cast<int32_t>(cached_torch_dtype_float32);
+	    constants_info_[1].offset = 0;
+	    constants_info_[1].data_size = 64;
+	    constants_info_[1].from_folded = false;
+	    constants_info_[1].type = static_cast<int32_t>(torch::aot_inductor::ConstantType::Parameter);
+	    constants_info_[1].shape = {16};
+	    constants_info_[1].stride = {1};
+	    constants_info_[1].layout = static_cast<int32_t>(cached_torch_layout_strided);
+	    constants_info_[1].original_fqn = "fc1.bias";
+	    update_constants_map(std::move(constants_map));
+	    update_constants_array(std::move(constants_array));
+	    in_spec_ = R"([1, {"type": "builtins.tuple", "context": "null", "children_spec": [{"type": "builtins.tuple", "context": "null", "children_spec": [{"type": null, "context": null, "children_spec": []}, {"type": null, "context": null, "children_spec": []}, {"type": null, "context": null, "children_spec": []}, {"type": null, "context": null, "children_spec": []}]}, {"type": "builtins.dict", "context": "[]", "children_spec": []}]}])";
+	    out_spec_ = R"([1, {"type": "builtins.tuple", "context": "null", "children_spec": [{"type": null, "context": null, "children_spec": []}, {"type": null, "context": null, "children_spec": []}]}])";
+	    outputs_info_[0].name = "output0";
+	    outputs_info_[1].name = "output1";
+	    this->kernels_ = std::make_unique<AOTInductorModelKernels>();
+	}
+	
+	std::unordered_map<std::string, AtenTensorHandle> AOTInductorModel::const_run_impl(
+	    DeviceStreamType stream,
+	    AOTIProxyExecutorHandle proxy_executor,
+	    bool initialization
+	) {
+	
+	    if (!initialization) {
+	        std::cerr << "[WARNING] Calling constant_folding in model, but compiled with config: "
+	                  << "aot_inductor.use_runtime_constant_folding=False\n";
+	    }
+	    return {};
+	}
+	} // namespace torch::aot_inductor
+	using namespace torch::aot_inductor;
+	
+	template <typename in_out_ptr0_type_, typename in_ptr0_type_, typename kernels_type_>
+	static inline void call_triton_poi_fused_addmm_relu_sigmoid_0(
+	    const in_out_ptr0_type_& in_out_ptr0,
+	    const in_ptr0_type_& in_ptr0,
+	    int64_t xnumel,
+	    int32_t device_idx_,
+	    cudaStream_t stream_,
+	    kernels_type_& kernels_,
+	    const std::optional<std::string>& cubin_dir_ = std::nullopt
+	){
+	    /*
+	    async_compile.triton('triton_poi_fused_addmm_relu_sigmoid_0', '''
+	    import triton
+	    import triton.language as tl
+	
+	    from torch._inductor.runtime import triton_helpers, triton_heuristics
+	    from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	    from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, DeviceProperties
+	    triton_helpers.set_driver_to_gpu()
+	
+	    @triton_heuristics.pointwise(
+	        size_hints={'x': 128}, 
+	        filename=__file__,
+	        triton_meta={'signature': {'in_out_ptr0': '*fp32', 'in_ptr0': '*fp32', 'xnumel': 'i32', 'XBLOCK': 'constexpr'}, 'device': DeviceProperties(type='cuda', index=0, multi_processor_count=108, cc=80, major=8, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, warp_size=32), 'constants': {}, 'configs': [{(0,): [['tt.divisibility', 16]], (1,): [['tt.divisibility', 16]], (2,): [['tt.divisibility', 16]]}]},
+	        inductor_meta={'grid_type': 'Grid1D', 'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_addmm_relu_sigmoid_0', 'mutated_arg_names': ['in_out_ptr0'], 'optimize_mem': True, 'no_x_dim': False, 'num_load': 2, 'num_reduction': 0, 'backend_hash': '3E91F1C483CA40D8EC1B9AFBB282475C75659A34F6F2D59AE8336D7E5E05BEAA', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': True, 'is_fbcode': True},
+	        min_elem_per_thread=0
+	    )
+	    @triton.jit
+	    def triton_poi_fused_addmm_relu_sigmoid_0(in_out_ptr0, in_ptr0, xnumel, XBLOCK : tl.constexpr):
+	        xnumel = 128
+	        xoffset = tl.program_id(0) * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x2 = xindex
+	        x0 = (xindex % 16)
+	        tmp0 = tl.load(in_out_ptr0 + (x2), xmask)
+	        tmp1 = tl.load(in_ptr0 + (x0), xmask, eviction_policy='evict_last')
+	        tmp2 = tmp0 + tmp1
+	        tmp3 = tl.full([1], 0, tl.int32)
+	        tmp4 = triton_helpers.maximum(tmp3, tmp2)
+	        tmp5 = tl.sigmoid(tmp4)
+	        tl.store(in_out_ptr0 + (x2), tmp5, xmask)
+	    ''', device_str='cuda')
+	    */
+	    uint32_t grid_0 = ((xnumel + (128 - 1)) / (128));
+	    uint32_t grid_1 = 1;
+	    uint32_t grid_2 = 1;
+	    if (grid_0 == 0 || grid_1 == 0 || grid_2 == 0) return;
+	    if (kernels_.triton_poi_fused_addmm_relu_sigmoid_0 == nullptr) {
+	        kernels_.triton_poi_fused_addmm_relu_sigmoid_0 = loadKernel("/tmp/tmpspd28pc5/cwhkamk7hukdm5d55b4fxkyyok5x57mzbc2hzfy243x4xp2dcbtz/clccbkkoi5xmntp42dsrk4vjjdegwzvxj2dkuqgrmyblionocbn2.cubin", "triton_poi_fused_addmm_relu_sigmoid_0", 0, cubin_dir_); 
+	    }
+	    CUdeviceptr var_0 = reinterpret_cast<CUdeviceptr>(in_out_ptr0.data_ptr());
+	    CUdeviceptr var_1 = reinterpret_cast<CUdeviceptr>(in_ptr0.data_ptr());
+	    int var_2 = xnumel;
+	    CUdeviceptr global_scratch_scratch_3 = 0;
+	    void* kernel_args_[] = {&var_0, &var_1, &var_2, &global_scratch_scratch_3};
+	    launchKernel(kernels_.triton_poi_fused_addmm_relu_sigmoid_0, grid_0, grid_1, grid_2, 4, 0, kernel_args_, stream_);
+	}
+	
+	template <typename in_ptr0_type_, typename out_ptr0_type_, typename kernels_type_>
+	static inline void call_triton_poi_fused_mul_1(
+	    const in_ptr0_type_& in_ptr0,
+	    const out_ptr0_type_& out_ptr0,
+	    int64_t xnumel,
+	    int32_t device_idx_,
+	    cudaStream_t stream_,
+	    kernels_type_& kernels_,
+	    const std::optional<std::string>& cubin_dir_ = std::nullopt
+	){
+	    /*
+	    async_compile.triton('triton_poi_fused_mul_1', '''
+	    import triton
+	    import triton.language as tl
+	
+	    from torch._inductor.runtime import triton_helpers, triton_heuristics
+	    from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	    from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, DeviceProperties
+	    triton_helpers.set_driver_to_gpu()
+	
+	    @triton_heuristics.pointwise(
+	        size_hints={'x': 256}, 
+	        filename=__file__,
+	        triton_meta={'signature': {'in_ptr0': '*fp32', 'out_ptr0': '*fp32', 'xnumel': 'i32', 'XBLOCK': 'constexpr'}, 'device': DeviceProperties(type='cuda', index=0, multi_processor_count=108, cc=80, major=8, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, warp_size=32), 'constants': {}, 'configs': [{(0,): [['tt.divisibility', 16]], (1,): [['tt.divisibility', 16]]}]},
+	        inductor_meta={'grid_type': 'Grid1D', 'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_mul_1', 'mutated_arg_names': [], 'optimize_mem': True, 'no_x_dim': False, 'num_load': 1, 'num_reduction': 0, 'backend_hash': '3E91F1C483CA40D8EC1B9AFBB282475C75659A34F6F2D59AE8336D7E5E05BEAA', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': True, 'is_fbcode': True},
+	        min_elem_per_thread=0
+	    )
+	    @triton.jit
+	    def triton_poi_fused_mul_1(in_ptr0, out_ptr0, xnumel, XBLOCK : tl.constexpr):
+	        xnumel = 200
+	        xoffset = tl.program_id(0) * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x0 = xindex
+	        tmp0 = tl.load(in_ptr0 + (x0), xmask)
+	        tmp1 = 3.14
+	        tmp2 = tmp0 * tmp1
+	        tl.store(out_ptr0 + (x0), tmp2, xmask)
+	    ''', device_str='cuda')
+	    */
+	    uint32_t grid_0 = ((xnumel + (256 - 1)) / (256));
+	    uint32_t grid_1 = 1;
+	    uint32_t grid_2 = 1;
+	    if (grid_0 == 0 || grid_1 == 0 || grid_2 == 0) return;
+	    if (kernels_.triton_poi_fused_mul_1 == nullptr) {
+	        kernels_.triton_poi_fused_mul_1 = loadKernel("/tmp/tmpspd28pc5/cwhkamk7hukdm5d55b4fxkyyok5x57mzbc2hzfy243x4xp2dcbtz/cwu4klxzdejby66dnoubpn5xgs6wb5eomanc62dcz5a42lgsz7uz.cubin", "triton_poi_fused_mul_1", 0, cubin_dir_); 
+	    }
+	    CUdeviceptr var_4 = reinterpret_cast<CUdeviceptr>(in_ptr0.data_ptr());
+	    CUdeviceptr var_5 = reinterpret_cast<CUdeviceptr>(out_ptr0.data_ptr());
+	    int var_6 = xnumel;
+	    CUdeviceptr global_scratch_scratch_7 = 0;
+	    void* kernel_args_[] = {&var_4, &var_5, &var_6, &global_scratch_scratch_7};
+	    launchKernel(kernels_.triton_poi_fused_mul_1, grid_0, grid_1, grid_2, 4, 0, kernel_args_, stream_);
+	}
+	
+	template <typename in_out_ptr0_type_, typename in_ptr0_type_, typename kernels_type_>
+	static inline void call_triton_poi_fused_addmm_gelu_2(
+	    const in_out_ptr0_type_& in_out_ptr0,
+	    const in_ptr0_type_& in_ptr0,
+	    int64_t xnumel,
+	    int32_t device_idx_,
+	    cudaStream_t stream_,
+	    kernels_type_& kernels_,
+	    const std::optional<std::string>& cubin_dir_ = std::nullopt
+	){
+	    /*
+	    async_compile.triton('triton_poi_fused_addmm_gelu_2', '''
+	    import triton
+	    import triton.language as tl
+	
+	    from torch._inductor.runtime import triton_helpers, triton_heuristics
+	    from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	    from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, DeviceProperties
+	    triton_helpers.set_driver_to_gpu()
+	
+	    @triton_heuristics.pointwise(
+	        size_hints={'x': 512}, 
+	        filename=__file__,
+	        triton_meta={'signature': {'in_out_ptr0': '*fp32', 'in_ptr0': '*fp32', 'xnumel': 'i32', 'XBLOCK': 'constexpr'}, 'device': DeviceProperties(type='cuda', index=0, multi_processor_count=108, cc=80, major=8, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, warp_size=32), 'constants': {}, 'configs': [{(0,): [['tt.divisibility', 16]], (1,): [['tt.divisibility', 16]]}]},
+	        inductor_meta={'grid_type': 'Grid1D', 'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_addmm_gelu_2', 'mutated_arg_names': ['in_out_ptr0'], 'optimize_mem': True, 'no_x_dim': False, 'num_load': 2, 'num_reduction': 0, 'backend_hash': '3E91F1C483CA40D8EC1B9AFBB282475C75659A34F6F2D59AE8336D7E5E05BEAA', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': True, 'is_fbcode': True},
+	        min_elem_per_thread=0
+	    )
+	    @triton.jit
+	    def triton_poi_fused_addmm_gelu_2(in_out_ptr0, in_ptr0, xnumel, XBLOCK : tl.constexpr):
+	        xnumel = 300
+	        xoffset = tl.program_id(0) * XBLOCK
+	        xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	        xmask = xindex < xnumel
+	        x0 = xindex
+	        tmp0 = tl.load(in_out_ptr0 + (x0), xmask)
+	        tmp1 = tl.load(in_ptr0 + (x0), xmask)
+	        tmp2 = tmp0 + tmp1
+	        tmp3 = 0.5
+	        tmp4 = tmp2 * tmp3
+	        tmp5 = 0.7071067811865476
+	        tmp6 = tmp2 * tmp5
+	        tmp7 = libdevice.erf(tmp6)
+	        tmp8 = 1.0
+	        tmp9 = tmp7 + tmp8
+	        tmp10 = tmp4 * tmp9
+	        tl.store(in_out_ptr0 + (x0), tmp10, xmask)
+	    ''', device_str='cuda')
+	    */
+	    uint32_t grid_0 = ((xnumel + (256 - 1)) / (256));
+	    uint32_t grid_1 = 1;
+	    uint32_t grid_2 = 1;
+	    if (grid_0 == 0 || grid_1 == 0 || grid_2 == 0) return;
+	    if (kernels_.triton_poi_fused_addmm_gelu_2 == nullptr) {
+	        kernels_.triton_poi_fused_addmm_gelu_2 = loadKernel("/tmp/tmpspd28pc5/cwhkamk7hukdm5d55b4fxkyyok5x57mzbc2hzfy243x4xp2dcbtz/ccwrkckegnvy2eonhehywcr42tj5q645p2oguulvb3gphpowfpp3.cubin", "triton_poi_fused_addmm_gelu_2", 0, cubin_dir_); 
+	    }
+	    CUdeviceptr var_8 = reinterpret_cast<CUdeviceptr>(in_out_ptr0.data_ptr());
+	    CUdeviceptr var_9 = reinterpret_cast<CUdeviceptr>(in_ptr0.data_ptr());
+	    int var_10 = xnumel;
+	    CUdeviceptr global_scratch_scratch_11 = 0;
+	    void* kernel_args_[] = {&var_8, &var_9, &var_10, &global_scratch_scratch_11};
+	    launchKernel(kernels_.triton_poi_fused_addmm_gelu_2, grid_0, grid_1, grid_2, 4, 0, kernel_args_, stream_);
+	}
+	
+	namespace torch::aot_inductor {
+	
+	void AOTInductorModel::_const_run_impl(
+	    std::vector<AtenTensorHandle>& output_handles,
+	    DeviceStreamType stream,
+	    AOTIProxyExecutorHandle proxy_executor
+	) {}
+	
+	AOTI_NOINLINE static void check_input_0(
+	    AtenTensorHandle* input_handles
+	) {
+	    ConstantHandle arg2_1 = ConstantHandle(input_handles[0]);
+	    int32_t arg2_1_dtype;
+	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_get_dtype(arg2_1, &arg2_1_dtype));
+	
+	    int32_t arg2_1_expected_dtype = aoti_torch_dtype_float32();
+	    if (arg2_1_expected_dtype != arg2_1_dtype) {
+	        std::stringstream ss;
+	        ss << "input_handles[0]: unmatched dtype, "
+	           << "expected: " << arg2_1_expected_dtype << "(at::kFloat), "
+	           << "but got: " << arg2_1_dtype << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	    auto arg2_1_size = arg2_1.sizes();
+	
+	    if (8 != arg2_1_size[0]) {
+	        std::stringstream ss;
+	        ss << "input_handles[0]: unmatched dim value at 0, "
+	           << "expected: 8, " << "but got: " << arg2_1_size[0]
+	           << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	
+	    if (10 != arg2_1_size[1]) {
+	        std::stringstream ss;
+	        ss << "input_handles[0]: unmatched dim value at 1, "
+	           << "expected: 10, " << "but got: " << arg2_1_size[1]
+	           << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	    auto arg2_1_stride = arg2_1.strides();
+	
+	    if (10 != arg2_1_stride[0]) {
+	        std::stringstream ss;
+	        ss << "input_handles[0]: unmatched stride value at 0, "
+	           << "expected: 10, " << "but got: " << arg2_1_stride[0]
+	           << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	
+	    if (1 != arg2_1_stride[1]) {
+	        std::stringstream ss;
+	        ss << "input_handles[0]: unmatched stride value at 1, "
+	           << "expected: 1, " << "but got: " << arg2_1_stride[1]
+	           << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	    int32_t arg2_1_device_type;
+	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_get_device_type(arg2_1, &arg2_1_device_type));
+	
+	    int32_t arg2_1_expected_device_type = 1;
+	    if (arg2_1_expected_device_type != arg2_1_device_type) {
+	        std::stringstream ss;
+	        ss << "input_handles[0]: unmatched device type, "
+	        << "expected: " << arg2_1_expected_device_type << "1(cuda), "
+	        << "but got: " << arg2_1_device_type << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	}
+	
+	AOTI_NOINLINE static void check_input_1(
+	    AtenTensorHandle* input_handles
+	) {
+	    ConstantHandle arg3_1 = ConstantHandle(input_handles[1]);
+	    int32_t arg3_1_dtype;
+	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_get_dtype(arg3_1, &arg3_1_dtype));
+	
+	    int32_t arg3_1_expected_dtype = aoti_torch_dtype_float32();
+	    if (arg3_1_expected_dtype != arg3_1_dtype) {
+	        std::stringstream ss;
+	        ss << "input_handles[1]: unmatched dtype, "
+	           << "expected: " << arg3_1_expected_dtype << "(at::kFloat), "
+	           << "but got: " << arg3_1_dtype << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	    auto arg3_1_size = arg3_1.sizes();
+	
+	    if (10 != arg3_1_size[0]) {
+	        std::stringstream ss;
+	        ss << "input_handles[1]: unmatched dim value at 0, "
+	           << "expected: 10, " << "but got: " << arg3_1_size[0]
+	           << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	
+	    if (20 != arg3_1_size[1]) {
+	        std::stringstream ss;
+	        ss << "input_handles[1]: unmatched dim value at 1, "
+	           << "expected: 20, " << "but got: " << arg3_1_size[1]
+	           << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	    auto arg3_1_stride = arg3_1.strides();
+	
+	    if (20 != arg3_1_stride[0]) {
+	        std::stringstream ss;
+	        ss << "input_handles[1]: unmatched stride value at 0, "
+	           << "expected: 20, " << "but got: " << arg3_1_stride[0]
+	           << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	
+	    if (1 != arg3_1_stride[1]) {
+	        std::stringstream ss;
+	        ss << "input_handles[1]: unmatched stride value at 1, "
+	           << "expected: 1, " << "but got: " << arg3_1_stride[1]
+	           << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	    int32_t arg3_1_device_type;
+	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_get_device_type(arg3_1, &arg3_1_device_type));
+	
+	    int32_t arg3_1_expected_device_type = 1;
+	    if (arg3_1_expected_device_type != arg3_1_device_type) {
+	        std::stringstream ss;
+	        ss << "input_handles[1]: unmatched device type, "
+	        << "expected: " << arg3_1_expected_device_type << "1(cuda), "
+	        << "but got: " << arg3_1_device_type << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	}
+	
+	AOTI_NOINLINE static void check_input_2(
+	    AtenTensorHandle* input_handles
+	) {
+	    ConstantHandle arg4_1 = ConstantHandle(input_handles[2]);
+	    int32_t arg4_1_dtype;
+	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_get_dtype(arg4_1, &arg4_1_dtype));
+	
+	    int32_t arg4_1_expected_dtype = aoti_torch_dtype_float32();
+	    if (arg4_1_expected_dtype != arg4_1_dtype) {
+	        std::stringstream ss;
+	        ss << "input_handles[2]: unmatched dtype, "
+	           << "expected: " << arg4_1_expected_dtype << "(at::kFloat), "
+	           << "but got: " << arg4_1_dtype << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	    auto arg4_1_size = arg4_1.sizes();
+	
+	    if (20 != arg4_1_size[0]) {
+	        std::stringstream ss;
+	        ss << "input_handles[2]: unmatched dim value at 0, "
+	           << "expected: 20, " << "but got: " << arg4_1_size[0]
+	           << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	
+	    if (30 != arg4_1_size[1]) {
+	        std::stringstream ss;
+	        ss << "input_handles[2]: unmatched dim value at 1, "
+	           << "expected: 30, " << "but got: " << arg4_1_size[1]
+	           << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	    auto arg4_1_stride = arg4_1.strides();
+	
+	    if (30 != arg4_1_stride[0]) {
+	        std::stringstream ss;
+	        ss << "input_handles[2]: unmatched stride value at 0, "
+	           << "expected: 30, " << "but got: " << arg4_1_stride[0]
+	           << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	
+	    if (1 != arg4_1_stride[1]) {
+	        std::stringstream ss;
+	        ss << "input_handles[2]: unmatched stride value at 1, "
+	           << "expected: 1, " << "but got: " << arg4_1_stride[1]
+	           << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	    int32_t arg4_1_device_type;
+	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_get_device_type(arg4_1, &arg4_1_device_type));
+	
+	    int32_t arg4_1_expected_device_type = 1;
+	    if (arg4_1_expected_device_type != arg4_1_device_type) {
+	        std::stringstream ss;
+	        ss << "input_handles[2]: unmatched device type, "
+	        << "expected: " << arg4_1_expected_device_type << "1(cuda), "
+	        << "but got: " << arg4_1_device_type << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	}
+	
+	AOTI_NOINLINE static void check_input_3(
+	    AtenTensorHandle* input_handles
+	) {
+	    ConstantHandle arg5_1 = ConstantHandle(input_handles[3]);
+	    int32_t arg5_1_dtype;
+	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_get_dtype(arg5_1, &arg5_1_dtype));
+	
+	    int32_t arg5_1_expected_dtype = aoti_torch_dtype_float32();
+	    if (arg5_1_expected_dtype != arg5_1_dtype) {
+	        std::stringstream ss;
+	        ss << "input_handles[3]: unmatched dtype, "
+	           << "expected: " << arg5_1_expected_dtype << "(at::kFloat), "
+	           << "but got: " << arg5_1_dtype << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	    auto arg5_1_size = arg5_1.sizes();
+	
+	    if (10 != arg5_1_size[0]) {
+	        std::stringstream ss;
+	        ss << "input_handles[3]: unmatched dim value at 0, "
+	           << "expected: 10, " << "but got: " << arg5_1_size[0]
+	           << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	
+	    if (30 != arg5_1_size[1]) {
+	        std::stringstream ss;
+	        ss << "input_handles[3]: unmatched dim value at 1, "
+	           << "expected: 30, " << "but got: " << arg5_1_size[1]
+	           << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	    auto arg5_1_stride = arg5_1.strides();
+	
+	    if (30 != arg5_1_stride[0]) {
+	        std::stringstream ss;
+	        ss << "input_handles[3]: unmatched stride value at 0, "
+	           << "expected: 30, " << "but got: " << arg5_1_stride[0]
+	           << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	
+	    if (1 != arg5_1_stride[1]) {
+	        std::stringstream ss;
+	        ss << "input_handles[3]: unmatched stride value at 1, "
+	           << "expected: 1, " << "but got: " << arg5_1_stride[1]
+	           << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	    int32_t arg5_1_device_type;
+	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_get_device_type(arg5_1, &arg5_1_device_type));
+	
+	    int32_t arg5_1_expected_device_type = 1;
+	    if (arg5_1_expected_device_type != arg5_1_device_type) {
+	        std::stringstream ss;
+	        ss << "input_handles[3]: unmatched device type, "
+	        << "expected: " << arg5_1_expected_device_type << "1(cuda), "
+	        << "but got: " << arg5_1_device_type << "\n";
+	        throw std::runtime_error(ss.str());
+	    }
+	}
+	
+	static bool _check_aoti_runtime_check_inputs_env() {
+	    const static char* env_var_value = getenv("AOTI_RUNTIME_CHECK_INPUTS");
+	    const static bool result = env_var_value != nullptr && env_var_value[0] != '0';
+	    return result;
+	}
+	
+	AOTI_NOINLINE static void __check_inputs_outputs(
+	    AtenTensorHandle* input_handles,
+	    AtenTensorHandle* output_handles) {
+	    if (!_check_aoti_runtime_check_inputs_env()){
+	        return;
+	    }
+	    check_input_0(input_handles);
+	    check_input_1(input_handles);
+	    check_input_2(input_handles);
+	    check_input_3(input_handles);
+	}
+	
+	void AOTInductorModel::run_impl(
+	    AtenTensorHandle*
+	        input_handles, // array of input AtenTensorHandle; handles
+	                        // are stolen; the array itself is borrowed
+	    AtenTensorHandle*
+	        output_handles, // array for writing output AtenTensorHandle; handles
+	                        // will be stolen by the caller; the array itself is
+	                        // borrowed
+	    DeviceStreamType stream,
+	    AOTIProxyExecutorHandle proxy_executor
+	) {
+	    __check_inputs_outputs(input_handles, output_handles);
+	
+	    auto inputs = steal_from_raw_handles_to_raii_handles(input_handles, 4);
+	    auto arg2_1 = std::move(inputs[0]);
+	    auto arg3_1 = std::move(inputs[1]);
+	    auto arg4_1 = std::move(inputs[2]);
+	    auto arg5_1 = std::move(inputs[3]);
+	    [[maybe_unused]] auto& fc1_weight = constants_->at(0);
+	    [[maybe_unused]] auto& fc1_bias = constants_->at(1);
+	    inputs.clear();
+	    [[maybe_unused]] auto& kernels = static_cast<AOTInductorModelKernels&>(*this->kernels_.get());
+	
+	    AOTICudaStreamGuard stream_guard(stream, this->device_idx_);
+	    static constexpr int64_t int_array_0[] = {8L, 16L};
+	    static constexpr int64_t int_array_1[] = {16L, 1L};
+	    AtenTensorHandle buf0_handle;
+	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_empty_strided(2, int_array_0, int_array_1, cached_torch_dtype_float32, cached_torch_device_type_cuda, this->device_idx_, &buf0_handle));
+	    RAIIAtenTensorHandle buf0(buf0_handle);
+	    // Topologically Sorted Source Nodes: [linear, ], Original ATen: [aten.t, aten.addmm]
+	    static constexpr int64_t int_array_2[] = {10L, 16L};
+	    static constexpr int64_t int_array_3[] = {1L, 10L};
+	    // [Provenance debug handles] aoti_torch_cuda_mm_out:4
+	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_cuda_mm_out(buf0, arg2_1, wrap_with_raii_handle_if_needed(reinterpret_tensor_wrapper(fc1_weight, 2, int_array_2, int_array_3, 0L))));
+	    arg2_1.reset();
+	    auto buf1 = std::move(buf0);  // reuse
+	    // Topologically Sorted Source Nodes: [, relu, sigmoid], Original ATen: [aten.addmm, aten.relu, aten.sigmoid]
+	    // [Provenance debug handles] triton_poi_fused_addmm_relu_sigmoid_0:1
+	    call_triton_poi_fused_addmm_relu_sigmoid_0(buf1, fc1_bias, 128L, this->device_idx_, stream, kernels, this->cubin_dir_);
+	    static constexpr int64_t int_array_4[] = {10L, 20L};
+	    static constexpr int64_t int_array_5[] = {20L, 1L};
+	    AtenTensorHandle buf2_handle;
+	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_empty_strided(2, int_array_4, int_array_5, cached_torch_dtype_float32, cached_torch_device_type_cuda, this->device_idx_, &buf2_handle));
+	    RAIIAtenTensorHandle buf2(buf2_handle);
+	    // Topologically Sorted Source Nodes: [mul], Original ATen: [aten.mul]
+	    // [Provenance debug handles] triton_poi_fused_mul_1:2
+	    call_triton_poi_fused_mul_1(arg3_1, buf2, 200L, this->device_idx_, stream, kernels, this->cubin_dir_);
+	    arg3_1.reset();
+	    static constexpr int64_t int_array_6[] = {10L, 30L};
+	    static constexpr int64_t int_array_7[] = {30L, 1L};
+	    AtenTensorHandle buf3_handle;
+	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_empty_strided(2, int_array_6, int_array_7, cached_torch_dtype_float32, cached_torch_device_type_cuda, this->device_idx_, &buf3_handle));
+	    RAIIAtenTensorHandle buf3(buf3_handle);
+	    // Topologically Sorted Source Nodes: [mul, ], Original ATen: [aten.mul, aten.addmm]
+	    // [Provenance debug handles] aoti_torch_cuda_mm_out:5
+	    AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_cuda_mm_out(buf3, buf2, arg4_1));
+	    arg4_1.reset();
+	    buf2.reset();
+	    auto buf4 = std::move(buf3);  // reuse
+	    // Topologically Sorted Source Nodes: [, gelu], Original ATen: [aten.addmm, aten.gelu]
+	    // [Provenance debug handles] triton_poi_fused_addmm_gelu_2:3
+	    call_triton_poi_fused_addmm_gelu_2(buf4, arg5_1, 300L, this->device_idx_, stream, kernels, this->cubin_dir_);
+	    arg5_1.reset();
+	    output_handles[0] = buf1.release();
+	    output_handles[1] = buf4.release();
+	} // AOTInductorModel::run_impl
+	} // namespace torch::aot_inductor
+	
+	
+	
+	
+V0819 12:17:15.848000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/codecache.py:1799] {"graph_dump": {"name": "inductor_aot_kernel_code", "type": "cpp", "filename": "/tmp/tmpspd28pc5/cwhkamk7hukdm5d55b4fxkyyok5x57mzbc2hzfy243x4xp2dcbtz/cb4ashn4alx6bnx7tb5oh4tbrnwu3vng2clhvxxaf45plbd7wmsn.kernel.cpp"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2511, "name": "compile_fx", "filename": 21, "loc": "return inference_compiler(unlifted_gm, example_inputs_)"}, {"line": 1267, "name": "__call__", "filename": 25, "loc": "return self.compiler_fn(gm, example_inputs)"}, {"line": 2374, "name": "fw_compiler_base", "filename": 21, "loc": "return inner_compile("}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 776, "name": "compile_fx_inner", "filename": 21, "loc": "return wrap_compiler_debug(_compile_fx_inner, compiler_name=\"inductor\")("}, {"line": 141, "name": "debug_wrapper", "filename": 26, "loc": "inner_compiled_fn = compiler_fn(gm, example_inputs)"}, {"line": 167, "name": "newFunction", "filename": 27, "loc": "return old_func(*args, **kwargs)"}, {"line": 955, "name": "_compile_fx_inner", "filename": 21, "loc": "mb_compiled_graph = fx_codegen_and_compile("}, {"line": 1654, "name": "fx_codegen_and_compile", "filename": 21, "loc": "return scheme.codegen_and_compile(gm, example_inputs, inputs_to_check, graph_kwargs)"}, {"line": 1490, "name": "codegen_and_compile", "filename": 21, "loc": "compiled_fn = AotCodeCompiler.compile("}, {"line": 1799, "name": "compile", "filename": 34, "loc": "trace_structured("}], "has_payload": "b01447e33a18a899a90703e2f10b6675"}
+	// Triton kernels are embedded as comments in /tmp/tmpspd28pc5/cwhkamk7hukdm5d55b4fxkyyok5x57mzbc2hzfy243x4xp2dcbtz/c2zi7pbvbb6r2z2ilqqn22mpt7jxdy72w5fymrtjqrpewk5akujk.wrapper.cpp
+	
+V0819 12:17:15.856000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "2902a73f3334687482e4d85513a4e5b8"}
+	{
+	"name": "compile_file",
+	"ts": 1755631035856672.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:20.379000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "8368a694a2061e76b9bf8bd3681297c6"}
+	{
+	"name": "compile_file",
+	"ts": 1755631040379538.2,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:20.381000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "3cfc6d83d56b375202e1981736f37720"}
+	{
+	"name": "compile_file",
+	"ts": 1755631040381197.2,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:20.412000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "737dac716e47f7382fdcf8719e6285c7"}
+	{
+	"name": "compile_file",
+	"ts": 1755631040412038.2,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:20.416000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "5db9b489851c626c900408e3a2b39535"}
+	{
+	"name": "compile_file",
+	"ts": 1755631040416577.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:20.445000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "d1f583a80da80193320f0b83cee5115e"}
+	{
+	"name": "compile_file",
+	"ts": 1755631040445140.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:20.449000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "8cd5c84d309cccb04e67b5c8640f9d2a"}
+	{
+	"name": "compile_file",
+	"ts": 1755631040449043.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:20.486000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "ef79323064969b3cdaeb047d0bab0fcf"}
+	{
+	"name": "compile_file",
+	"ts": 1755631040485729.2,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:20.487000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "3d30c5d871af47f489170e67717d1543"}
+	{
+	"name": "AotCodeCompiler.compile",
+	"ts": 1755631040487847.2,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:20.491000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "6d0e402e60e25eb6033cef1f2ed7ec09"}
+	{
+	"name": "GraphLowering.compile_to_fn",
+	"ts": 1755631040491533.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:20.496000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_dynamo/utils.py", 35]}
+V0819 12:17:20.496000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1985] {"chromium_event": {}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2511, "name": "compile_fx", "filename": 21, "loc": "return inference_compiler(unlifted_gm, example_inputs_)"}, {"line": 1267, "name": "__call__", "filename": 25, "loc": "return self.compiler_fn(gm, example_inputs)"}, {"line": 2374, "name": "fw_compiler_base", "filename": 21, "loc": "return inner_compile("}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 776, "name": "compile_fx_inner", "filename": 21, "loc": "return wrap_compiler_debug(_compile_fx_inner, compiler_name=\"inductor\")("}, {"line": 141, "name": "debug_wrapper", "filename": 26, "loc": "inner_compiled_fn = compiler_fn(gm, example_inputs)"}, {"line": 167, "name": "newFunction", "filename": 27, "loc": "return old_func(*args, **kwargs)"}, {"line": 1025, "name": "_compile_fx_inner", "filename": 21, "loc": "CompileEventLogger.instant("}, {"line": 616, "name": "instant", "filename": 35, "loc": "CompileEventLogger.log_instant_event("}, {"line": 410, "name": "log_instant_event", "filename": 35, "loc": "chromium_log.log_instant_event("}, {"line": 1985, "name": "log_instant_event", "filename": 35, "loc": "torch._logging.trace_structured("}], "has_payload": "899dba35067fad12333b53168351cea7"}
+	{
+	"name": "fx_graph_cache_disabled",
+	"ts": 1755631034354989.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "i",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0,
+	"s": "p"
+	}
+V0819 12:17:20.497000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "5dc602214c67c8b4c3fd5121c69f7566"}
+	{
+	"name": "fx_codegen_and_compile",
+	"ts": 1755631040497217.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:20.500000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:1063] {"artifact": {"name": "inductor_provenance_tracking_node_mappings", "encoding": "json"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2511, "name": "compile_fx", "filename": 21, "loc": "return inference_compiler(unlifted_gm, example_inputs_)"}, {"line": 1267, "name": "__call__", "filename": 25, "loc": "return self.compiler_fn(gm, example_inputs)"}, {"line": 2374, "name": "fw_compiler_base", "filename": 21, "loc": "return inner_compile("}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 776, "name": "compile_fx_inner", "filename": 21, "loc": "return wrap_compiler_debug(_compile_fx_inner, compiler_name=\"inductor\")("}, {"line": 141, "name": "debug_wrapper", "filename": 26, "loc": "inner_compiled_fn = compiler_fn(gm, example_inputs)"}, {"line": 167, "name": "newFunction", "filename": 27, "loc": "return old_func(*args, **kwargs)"}, {"line": 1063, "name": "_compile_fx_inner", "filename": 21, "loc": "trace_structured("}], "has_payload": "589a4aa33cba4db6a5e0f1e82b52c551"}
+	{"preToPost": {"linear": ["permute", "mm_default_1", "add_tensor_1"], "relu": ["relu"], "sigmoid": ["sigmoid"], "mul": ["mul"], "addmm": ["mm_default", "add_tensor"], "gelu": ["mul_1", "mul_2", "erf", "add", "mul_3"]}, "postToPre": {"permute": ["linear"], "mm_default_1": ["linear"], "add_tensor_1": ["linear"], "relu": ["relu"], "sigmoid": ["sigmoid"], "mul": ["mul"], "mm_default": ["addmm"], "add_tensor": ["addmm"], "mul_1": ["gelu"], "mul_2": ["gelu"], "erf": ["gelu"], "add": ["gelu"], "mul_3": ["gelu"]}, "cppCodeToPost": {"triton_poi_fused_addmm_relu_sigmoid_0:1": ["sigmoid", "relu", "add_tensor_1"], "triton_poi_fused_mul_1:2": ["mul"], "triton_poi_fused_addmm_gelu_2:3": ["mul_3", "mul_1", "add_tensor", "add", "erf", "mul_2"], "aoti_torch_cuda_mm_out:4": ["mm_default_1"], "aoti_torch_cuda_mm_out:5": ["mm_default"]}, "postToCppCode": {"sigmoid": ["triton_poi_fused_addmm_relu_sigmoid_0:1"], "relu": ["triton_poi_fused_addmm_relu_sigmoid_0:1"], "add_tensor_1": ["triton_poi_fused_addmm_relu_sigmoid_0:1"], "mul": ["triton_poi_fused_mul_1:2"], "mul_3": ["triton_poi_fused_addmm_gelu_2:3"], "mul_1": ["triton_poi_fused_addmm_gelu_2:3"], "add_tensor": ["triton_poi_fused_addmm_gelu_2:3"], "add": ["triton_poi_fused_addmm_gelu_2:3"], "erf": ["triton_poi_fused_addmm_gelu_2:3"], "mul_2": ["triton_poi_fused_addmm_gelu_2:3"], "mm_default_1": ["aoti_torch_cuda_mm_out:4"], "mm_default": ["aoti_torch_cuda_mm_out:5"]}, "version": 2.0}
+V0819 12:17:20.501000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:1073] {"artifact": {"name": "inductor_provenance_tracking_kernel_stack_traces", "encoding": "json"}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1900, "name": "compile_fx_aot", "filename": 21, "loc": "compiled_artifacts = compile_fx("}, {"line": 2116, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2173, "name": "compile_fx", "filename": 21, "loc": "return compile_fx("}, {"line": 2511, "name": "compile_fx", "filename": 21, "loc": "return inference_compiler(unlifted_gm, example_inputs_)"}, {"line": 1267, "name": "__call__", "filename": 25, "loc": "return self.compiler_fn(gm, example_inputs)"}, {"line": 2374, "name": "fw_compiler_base", "filename": 21, "loc": "return inner_compile("}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 776, "name": "compile_fx_inner", "filename": 21, "loc": "return wrap_compiler_debug(_compile_fx_inner, compiler_name=\"inductor\")("}, {"line": 141, "name": "debug_wrapper", "filename": 26, "loc": "inner_compiled_fn = compiler_fn(gm, example_inputs)"}, {"line": 167, "name": "newFunction", "filename": 27, "loc": "return old_func(*args, **kwargs)"}, {"line": 1073, "name": "_compile_fx_inner", "filename": 21, "loc": "trace_structured("}], "has_payload": "2542f0a704bc078bccd4359742da5bd6"}
+	{"triton_poi_fused_addmm_relu_sigmoid_0:1": ["File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 81, in forward\n    x = self.sigmoid(x)\n  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py\", line 359, in forward\n    return torch.sigmoid(input)", "File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 80, in forward\n    x = self.relu(x)\n  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/activation.py\", line 144, in forward\n    return F.relu(input, inplace=self.inplace)", "File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 79, in forward\n    x = self.fc1(x)\n  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/linear.py\", line 134, in forward\n    return F.linear(input, self.weight, self.bias)"], "triton_poi_fused_mul_1:2": ["File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 82, in forward\n    d = a * 3.14"], "triton_poi_fused_addmm_gelu_2:3": ["File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 84, in forward\n    z = torch.nn.functional.gelu(y)", "File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 83, in forward\n    y = torch.addmm(c, d, b)"], "aoti_torch_cuda_mm_out:4": ["File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 79, in forward\n    x = self.fc1(x)\n  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/linear.py\", line 134, in forward\n    return F.linear(input, self.weight, self.bias)"], "aoti_torch_cuda_mm_out:5": ["File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 83, in forward\n    y = torch.addmm(c, d, b)"]}
+V0819 12:17:20.503000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "80f190ba3c221310881910c65dc1036b"}
+	{
+	"name": "inductor_compile",
+	"ts": 1755631040502942.2,
+	"args": {
+	"fn_name": "compile_fx_inner",
+	"compile_id": "None",
+	"is_backward": false,
+	"cache_state": "disabled",
+	"cache_event_time": 1755631034354989637,
+	"key": null,
+	"components": null,
+	"cache_bypass_reason": "cache not enabled",
+	"remote_cache_enabled": false,
+	"local_cache_enabled": true
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:20.506000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "cbf3c70c8c3726d682875cddbf996f29"}
+	{
+	"name": "compile_fx.<locals>.fw_compiler_base",
+	"ts": 1755631040506393.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:17:20.510000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_dynamo/metrics_context.py", 36]}
+V0819 12:17:20.511000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1641] {"compilation_metrics": {"compile_id": null, "frame_key": null, "co_name": null, "co_filename": null, "co_firstlineno": null, "cache_size": null, "accumulated_cache_size": null, "guard_count": null, "shape_env_guard_count": null, "graph_op_count": null, "graph_node_count": null, "graph_input_count": null, "start_time": 1755631031.215075, "entire_frame_compile_time_s": null, "backend_compile_time_s": null, "inductor_compile_time_s": null, "code_gen_time_s": null, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": null, "compliant_custom_ops": null, "restart_reasons": null, "dynamo_time_before_restart_s": null, "stack_trace": null, "graph_node_shapes": null, "has_guarded_code": null, "remote_cache_time_saved_s": null, "structured_logging_overhead_s": null, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": null, "dynamo_config": "{\"_autograd_backward_strict_mode_conditional_banned_ops\": [\"stride\", \"storage_offset\", \"is_contiguous\"], \"_unsafe_skip_fsdp_module_guards\": false, \"accumulated_recompile_limit\": 256, \"allow_complex_guards_as_runtime_asserts\": false, \"allow_empty_graphs\": false, \"allow_ignore_mark_dynamic\": false, \"allow_rnn\": false, \"allow_unspec_int_on_nn_module\": false, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch._prims\", \"torch._refs\", \"torch.distributions\", \"torch.testing\"], \"assume_dunder_attributes_remain_unchanged\": true, \"assume_static_by_default\": true, \"automatic_dynamic_local_pgo\": true, \"automatic_dynamic_remote_pgo\": null, \"automatic_dynamic_shapes\": true, \"automatic_dynamic_shapes_mark_as\": \"dynamic\", \"caching_precompile\": false, \"capture_autograd_function\": true, \"capture_dynamic_output_shape_ops\": false, \"capture_func_transforms\": true, \"capture_scalar_outputs\": false, \"capture_sparse_compute\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"cprofile\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"dead_code_elimination\": true, \"disable\": false, \"do_not_emit_runtime_asserts\": false, \"dont_skip_tracing\": false, \"dynamic_shapes\": true, \"enable_compiler_collectives\": false, \"enable_cpp_framelocals_guard_eval\": true, \"enable_cpp_guard_manager\": true, \"enable_cpp_symbolic_shape_guards\": false, \"enable_faithful_generator_behavior\": true, \"enable_trace_contextlib\": true, \"enable_trace_unittest\": false, \"error_on_nested_fx_trace\": true, \"error_on_nested_jit_trace\": true, \"error_on_recompile\": false, \"fail_on_recompile_limit_hit\": false, \"fake_tensor_cache_crosscheck_enabled\": false, \"fake_tensor_cache_enabled\": true, \"fake_tensor_disable_inference_mode\": true, \"force_nn_module_property_static_shapes\": true, \"force_parameter_static_shapes\": true, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"graph_break_on_nn_param_ctor\": true, \"graph_deduplication_lint\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"inline_inbuilt_nn_modules\": true, \"install_free_tensors\": false, \"issue_3_13_0_warning\": true, \"max_saved_pointers_for_recursive_dict_tags_check\": 256, \"minimum_call_count\": 1, \"numpy_default_complex\": \"complex128\", \"numpy_default_float\": \"float64\", \"numpy_default_int\": \"int64\", \"only_allow_pt2_compliant_ops\": false, \"optimize_ddp\": true, \"optimize_ddp_lazy_compile\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"prepare_freezing\": false, \"pt2_compile_id_prefix\": null, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"recompile_limit\": 8, \"record_compile_time_instruction_count\": false, \"record_runtime_overhead\": true, \"replay_record_enabled\": false, \"report_guard_failures\": true, \"rewrite_assert_with_torch_assert\": true, \"run_gc_after_compile\": true, \"skip_code_recursive_on_recompile_limit_hit\": true, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_guards_on_constant_func_defaults\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"skip_tensor_guards_with_matching_dict_tags\": true, \"skip_torchrec\": true, \"skipfiles_inline_module_allowlist\": {}, \"specialize_float\": false, \"specialize_int\": false, \"suppress_errors\": false, \"trace_numpy\": true, \"track_nodes_for_deduplication\": false, \"use_graph_deduplication\": false, \"use_lamba_guard_for_object_aliasing\": true, \"use_lazy_graph_module\": true, \"use_numpy_random_stream\": false, \"use_recursive_dict_tags_for_guards\": true, \"verify_correctness\": false, \"wrap_top_frame\": false}", "is_forward": null, "num_triton_bundles": null, "remote_fx_graph_cache_get_time_ms": null, "remote_fx_graph_cache_put_time_ms": null, "start_time_us": 1755631031215075, "duration_us": null, "dynamo_cumulative_compile_time_us": null, "aot_autograd_cumulative_compile_time_us": null, "inductor_cumulative_compile_time_us": null, "inductor_code_gen_cumulative_compile_time_us": null, "triton_compile_time_us": null, "runtime_cudagraphify_time_us": null, "runtime_triton_autotune_time_us": null, "dynamo_compile_time_before_restart_us": null, "distributed_ephemeral_timeout_us": null, "structured_logging_overhead_us": null, "remote_fx_graph_cache_get_time_us": null, "remote_fx_graph_cache_put_time_us": null, "backward_cumulative_compile_time_us": null, "end_time_us": 1755631040507360, "pre_grad_pass_time_us": null, "post_grad_pass_time_us": null, "joint_graph_pass_time_us": null, "log_format_version": 3, "inductor_config": "{\"TYPE_CHECKING\": false, \"_cache_config_ignore_prefix\": [\"trace\", \"cuda.cutlass_dir\", \"worker_start_method\", \"compile_threads\", \"post_grad_custom_post_pass\", \"post_grad_custom_pre_pass\", \"joint_custom_pre_pass\", \"joint_custom_post_pass\", \"_fuse_ddp_communication_passes\", \"_pre_fusion_custom_pass\", \"always_complex_memory_overlap_TESTING_ONLY\", \"fx_graph_cache\", \"fx_graph_remote_cache\", \"autotune_local_cache\", \"autotune_remote_cache\"], \"_collective.auto_select\": false, \"_collective.one_shot_all_reduce_threshold_bytes\": 131072, \"_fuse_ddp_bucket_size\": 25, \"_fuse_ddp_communication\": false, \"_fuse_ddp_communication_passes\": [\"fuse_ddp_with_concat_op\", \"schedule_comm_wait\"], \"_micro_pipeline_tp\": false, \"_post_fusion_custom_pass\": null, \"_pre_fusion_custom_pass\": null, \"_profile_var\": \"\", \"_raise_error_for_testing\": false, \"_save_config_ignore\": [\"trace.upload_tar\", \"joint_custom_pre_pass\", \"joint_custom_post_pass\", \"pre_grad_custom_pass\", \"aot_inductor.repro_level\", \"aot_inductor.dump_aoti_minifier\", \"post_grad_custom_pre_pass\", \"post_grad_custom_post_pass\", \"_fuse_ddp_communication_passes\", \"_pre_fusion_custom_pass\"], \"add_pre_grad_passes\": null, \"aggressive_fusion\": false, \"alignment_asserts\": false, \"allow_buffer_reuse\": true, \"always_complex_memory_overlap_TESTING_ONLY\": false, \"always_keep_tensor_constants\": false, \"annotate_training\": false, \"aot_inductor.allow_stack_allocation\": false, \"aot_inductor.compile_standalone\": false, \"aot_inductor.compile_wrapper_opt_level\": \"O1\", \"aot_inductor.custom_op_libs\": null, \"aot_inductor.custom_ops_to_c_shims\": {}, \"aot_inductor.debug_compile\": false, \"aot_inductor.debug_intermediate_value_printer\": \"0\", \"aot_inductor.dump_aoti_minifier\": false, \"aot_inductor.embed_kernel_binary\": null, \"aot_inductor.emit_multi_arch_kernel\": null, \"aot_inductor.enable_lto\": false, \"aot_inductor.filtered_kernel_names\": null, \"aot_inductor.force_mmap_weights\": false, \"aot_inductor.metadata\": {\"AOTI_DEVICE_KEY\": \"cuda\"}, \"aot_inductor.model_name_for_generated_files\": null, \"aot_inductor.output_path\": \"\", \"aot_inductor.package\": false, \"aot_inductor.package_constants_in_so\": true, \"aot_inductor.package_constants_on_disk\": false, \"aot_inductor.package_cpp_only\": null, \"aot_inductor.precompile_headers\": false, \"aot_inductor.presets\": {}, \"aot_inductor.raise_error_on_ignored_optimization\": true, \"aot_inductor.repro_level\": 2, \"aot_inductor.serialized_in_spec\": \"\", \"aot_inductor.serialized_out_spec\": \"\", \"aot_inductor.use_consts_asm_build\": true, \"aot_inductor.use_minimal_arrayref_interface\": false, \"aot_inductor.use_runtime_constant_folding\": false, \"aot_inductor.weight_use_caching_allocator\": false, \"assert_indirect_indexing\": true, \"assume_aligned_inputs\": false, \"assume_unaligned_fallback_output\": false, \"autoheuristic_collect\": \"\", \"autoheuristic_log_path\": \"DEFAULT\", \"autoheuristic_use\": \"mixed_mm\", \"autotune_fallback_to_aten\": false, \"autotune_in_subproc\": false, \"autotune_local_cache\": true, \"autotune_lookup_table\": {}, \"autotune_multi_device\": false, \"autotune_num_choices_displayed\": 10, \"autotune_remote_cache\": null, \"b2b_gemm_pass\": false, \"batch_fusion\": true, \"benchmark_combo_kernel\": false, \"benchmark_epilogue_fusion\": true, \"benchmark_fusion\": false, \"benchmark_harness\": true, \"benchmark_kernel\": false, \"bfloat16_atomic_adds_enabled\": true, \"bucket_all_gathers_fx\": \"none\", \"bucket_all_gathers_fx_bucket_size_determinator\": null, \"bucket_reduce_scatters_fx\": \"none\", \"bucket_reduce_scatters_fx_bucket_size_determinator\": null, \"bundle_triton_into_fx_graph_cache\": null, \"bundled_autotune_remote_cache\": null, \"bw_outputs_user_visible\": true, \"can_inplace_pad_graph_input\": false, \"check_stack_no_cycles_TESTING_ONLY\": false, \"combo_kernel_allow_mixed_sizes\": 1, \"combo_kernel_foreach_dynamic_shapes\": true, \"combo_kernels\": false, \"combo_kernels_autotune\": 1, \"comment_origin\": false, \"compile_threads\": 32, \"comprehensive_padding\": true, \"compute_all_bounds\": false, \"constant_and_index_propagation\": true, \"conv_1x1_as_mm\": false, \"coordinate_descent_check_all_directions\": false, \"coordinate_descent_search_radius\": 1, \"coordinate_descent_tuning\": false, \"cpp.cxx\": [null, \"g++\"], \"cpp.descriptive_names\": \"original_aten\", \"cpp.dynamic_threads\": false, \"cpp.enable_concat_linear\": false, \"cpp.enable_floating_point_contract_flag\": \"off\", \"cpp.enable_grouped_gemm_template\": false, \"cpp.enable_kernel_profile\": false, \"cpp.enable_loop_tail_vec\": true, \"cpp.enable_tiling_heuristics\": true, \"cpp.enable_unsafe_math_opt_flag\": false, \"cpp.fallback_scatter_reduce_sum\": true, \"cpp.force_inline_kernel\": false, \"cpp.gemm_cache_blocking\": null, \"cpp.gemm_max_k_slices\": 1, \"cpp.gemm_thread_factors\": null, \"cpp.inject_log1p_bug_TESTING_ONLY\": null, \"cpp.inject_relu_bug_TESTING_ONLY\": null, \"cpp.max_horizontal_fusion_size\": 16, \"cpp.min_chunk_size\": 512, \"cpp.no_redundant_loops\": true, \"cpp.simdlen\": null, \"cpp.threads\": -1, \"cpp.use_decompose_tanh\": false, \"cpp.use_small_dequant_buffer\": false, \"cpp.vec_isa_ok\": null, \"cpp.weight_prepack\": true, \"cpp_cache_precompile_headers\": false, \"cpp_wrapper\": false, \"cpp_wrapper_build_separate\": false, \"cpu_backend\": \"cpp\", \"cuda.arch\": null, \"cuda.binary_remote_cache_force_write\": false, \"cuda.compile_opt_level\": \"-O1\", \"cuda.cuda_cxx\": null, \"cuda.cutlass_backend_min_gemm_size\": 1, \"cuda.cutlass_dir\": \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/third_party/cutlass\", \"cuda.cutlass_enabled_ops\": \"all\", \"cuda.cutlass_epilogue_fusion_enabled\": false, \"cuda.cutlass_hash_with_compile_cmd\": false, \"cuda.cutlass_instantiation_level\": \"0\", \"cuda.cutlass_max_profiling_configs\": null, \"cuda.cutlass_max_profiling_swizzle_options\": [1, 2, 4, 8], \"cuda.cutlass_op_allowlist_regex\": null, \"cuda.cutlass_op_denylist_regex\": null, \"cuda.cutlass_prescreening\": true, \"cuda.cutlass_presets\": null, \"cuda.cutlass_tma_only\": false, \"cuda.enable_caching_codegen\": true, \"cuda.enable_cuda_lto\": false, \"cuda.enable_debug_info\": false, \"cuda.enable_ptxas_info\": false, \"cuda.generate_test_runner\": false, \"cuda.upload_to_binary_remote_cache\": false, \"cuda.use_binary_remote_cache\": true, \"cuda.use_fast_math\": false, \"cuda.version\": null, \"cuda_backend\": \"triton\", \"dce\": false, \"debug\": false, \"debug_fusion\": false, \"debug_index_asserts\": false, \"debug_ir_traceback\": false, \"decompose_mem_bound_mm\": false, \"developer_warnings\": true, \"disable_cpp_codegen\": false, \"disable_padding_cpu\": true, \"disable_progress\": true, \"dynamic_scale_rblock\": true, \"efficient_conv_bn_eval_fx_passes\": false, \"emulate_precision_casts\": false, \"enable_auto_functionalized_v2\": true, \"enable_caching_generated_triton_templates\": true, \"enable_linear_binary_folding\": false, \"enabled_metric_tables\": \"\", \"epilogue_fusion\": true, \"epilogue_fusion_first\": false, \"estimate_op_runtime\": \"default\", \"external_matmul\": [], \"fallback_random\": false, \"force_fuse_int_mm_with_mul\": false, \"force_layout_optimization\": false, \"force_pointwise_cat\": false, \"force_same_precision\": false, \"force_shape_pad\": false, \"freezing\": false, \"freezing_discard_parameters\": false, \"fx_graph_cache\": true, \"fx_graph_remote_cache\": null, \"fx_passes_numeric_check\": {\"num_iterations\": 1, \"pre_grad\": false, \"precision\": 0.0001, \"requires_optimizer\": true}, \"generate_intermediate_hooks\": false, \"global_cache_dir\": null, \"graph_partition\": false, \"group_fusion\": false, \"halide.asserts\": false, \"halide.cpu_target\": \"host\", \"halide.debug\": false, \"halide.gpu_target\": \"host-cuda\", \"halide.scan_kernels\": false, \"halide.scheduler_cpu\": \"Adams2019\", \"halide.scheduler_cuda\": \"Anderson2021\", \"implicit_fallbacks\": true, \"inplace_buffers\": true, \"inplace_padding\": true, \"inter_node_bw\": 25, \"intra_node_bw\": 300, \"is_nightly_or_source\": false, \"is_predispatch\": false, \"joint_custom_post_pass\": null, \"joint_custom_pre_pass\": null, \"joint_graph_constant_folding\": true, \"keep_output_stride\": true, \"kernel_name_max_ops\": 10, \"layout_opt_default\": \"1\", \"layout_optimization\": true, \"log_tlparse\": false, \"loop_ordering_after_fusion\": false, \"max_autotune\": false, \"max_autotune_conv_backends\": \"ATEN,TRITON\", \"max_autotune_flex_search_space\": \"DEFAULT\", \"max_autotune_gemm\": false, \"max_autotune_gemm_backends\": \"ATEN,TRITON,CPP\", \"max_autotune_gemm_search_space\": \"DEFAULT\", \"max_autotune_pointwise\": false, \"max_autotune_report_choices_stats\": true, \"max_autotune_subproc_graceful_timeout_seconds\": 0.0, \"max_autotune_subproc_result_timeout_seconds\": 60.0, \"max_autotune_subproc_terminate_timeout_seconds\": 0.0, \"max_epilogue_benchmarked_choices\": 1, \"max_fusion_buffer_group_pairwise_attempts\": 64, \"max_fusion_size\": 64, \"max_pointwise_cat_inputs\": 8, \"memory_planning\": false, \"memory_pool\": \"intermediates\", \"min_num_split\": 0, \"mixed_mm_choice\": \"heuristic\", \"multi_kernel_hints\": [], \"nan_asserts\": false, \"non_blocking_remote_cache_write\": true, \"online_softmax\": true, \"optimize_scatter_upon_const_tensor\": true, \"pad_channels_last\": false, \"pad_outputs\": false, \"padding_alignment_bytes\": 128, \"padding_stride_threshold\": 1024, \"pattern_matcher\": true, \"permute_fusion\": false, \"pick_loop_orders\": true, \"post_grad_custom_post_pass\": null, \"post_grad_custom_pre_pass\": null, \"post_grad_fusion_options\": {}, \"pre_grad_custom_pass\": null, \"pre_grad_fusion_options\": {}, \"precompilation_timeout_seconds\": 3600, \"profile_bandwidth\": false, \"profile_bandwidth_output\": null, \"profile_bandwidth_regex\": \"\", \"profile_bandwidth_with_do_bench_using_profiling\": false, \"profiler_mark_wrapper_call\": false, \"prologue_fusion\": true, \"quiesce_async_compile_pool\": false, \"realize_acc_reads_size_threshold\": null, \"realize_acc_reads_threshold\": 8, \"realize_opcount_threshold\": 30, \"realize_reads_threshold\": 4, \"remote_gemm_autotune_cache\": false, \"remove_pre_grad_passes\": null, \"reorder_for_compute_comm_overlap\": false, \"reorder_for_compute_comm_overlap_passes\": [\"reorder_compute_for_overlap\", \"sink_waits\", \"raise_comms\"], \"reorder_for_locality\": true, \"reorder_for_peak_memory\": true, \"reorder_prefetch_limit\": null, \"rocm.arch\": [], \"rocm.ck_dir\": null, \"rocm.ck_max_profiling_configs\": null, \"rocm.ck_supported_arch\": [\"gfx90a\", \"gfx942\", \"gfx950\"], \"rocm.ck_tile_max_profiling_configs\": null, \"rocm.compile_opt_level\": \"-O2\", \"rocm.flush_denormals\": true, \"rocm.generate_test_runner\": false, \"rocm.is_debug\": false, \"rocm.kBatch_sweep\": null, \"rocm.n_max_profiling_configs\": null, \"rocm.print_kernel_resource_usage\": false, \"rocm.rocm_home\": null, \"rocm.save_temps\": false, \"rocm.split_k_threshold\": 16, \"rocm.use_fast_math\": true, \"rocm.use_preselected_instances\": false, \"save_args\": false, \"scalar_asserts\": true, \"score_fusion_memory_threshold\": 10, \"search_autotune_cache\": false, \"shape_padding\": true, \"size_asserts\": true, \"sleep_sec_TESTING_ONLY\": null, \"split_cat_fx_passes\": true, \"split_reductions\": true, \"static_launch_user_defined_triton_kernels\": false, \"static_weight_shapes\": true, \"strict_static_cuda_launcher\": false, \"test_configs.autotune_choice_desc_regex\": null, \"test_configs.autotune_choice_name_regex\": null, \"test_configs.force_extern_kernel_in_multi_template\": false, \"test_configs.graphsafe_rng_func_ignores_fallback_random\": false, \"test_configs.max_mm_configs\": null, \"test_configs.runtime_triton_dtype_assert\": false, \"test_configs.static_cpp_dtype_assert\": false, \"test_configs.track_memory_lifecycle\": null, \"test_configs.use_libtorch\": false, \"torchinductor_worker_logpath\": \"\", \"trace.compile_profile\": false, \"trace.debug_dir\": null, \"trace.debug_log\": false, \"trace.dot_graph_shape\": null, \"trace.draw_orig_fx_graph\": false, \"trace.enabled\": false, \"trace.fx_graph\": true, \"trace.fx_graph_transformed\": true, \"trace.graph_diagram\": false, \"trace.info_log\": false, \"trace.ir_post_fusion\": true, \"trace.ir_pre_fusion\": true, \"trace.log_autotuning_results\": false, \"trace.log_url_for_graph_xform\": null, \"trace.output_code\": true, \"trace.provenance_tracking_level\": 1, \"trace.save_real_tensors\": false, \"trace.upload_tar\": null, \"triton.autotune_at_compile_time\": null, \"triton.autotune_cublasLt\": true, \"triton.autotune_pointwise\": true, \"triton.autotune_with_sample_inputs\": false, \"triton.coalesce_tiling_analysis\": false, \"triton.codegen_upcast_to_fp32\": true, \"triton.cooperative_reductions\": false, \"triton.cudagraph_capture_sizes\": null, \"triton.cudagraph_dynamic_shape_warn_limit\": 50, \"triton.cudagraph_skip_dynamic_graphs\": false, \"triton.cudagraph_support_input_mutation\": false, \"triton.cudagraph_trees\": true, \"triton.cudagraph_trees_history_recording\": false, \"triton.cudagraph_unexpected_rerecord_limit\": 128, \"triton.cudagraphs\": false, \"triton.debug_sync_graph\": false, \"triton.debug_sync_kernel\": false, \"triton.decompose_k_threshold\": 32, \"triton.dense_indexing\": false, \"triton.descriptive_names\": \"original_aten\", \"triton.disallow_failing_autotune_kernels_TESTING_ONLY\": false, \"triton.divisible_by_16\": true, \"triton.enable_persistent_tma_matmul\": false, \"triton.fast_path_cudagraph_asserts\": false, \"triton.force_cooperative_reductions\": false, \"triton.force_cudagraph_sync\": false, \"triton.force_cudagraphs_warmup\": false, \"triton.inject_relu_bug_TESTING_ONLY\": null, \"triton.max_tiles\": null, \"triton.min_split_scan_rblock\": 256, \"triton.multi_kernel\": 0, \"triton.num_decompose_k_splits\": 10, \"triton.persistent_reductions\": true, \"triton.prefer_nd_tiling\": false, \"triton.skip_cudagraph_warmup\": false, \"triton.skip_l1_cache\": false, \"triton.slow_path_cudagraph_asserts\": true, \"triton.spill_threshold\": 16, \"triton.store_cubin\": false, \"triton.tile_reductions\": false, \"triton.tiling_prevents_pointwise_fusion\": true, \"triton.tiling_prevents_reduction_fusion\": true, \"triton.unique_kernel_names\": true, \"triton.unique_user_kernel_names\": false, \"triton.use_block_ptr\": false, \"triton.use_tensor_descriptor\": false, \"triton_kernel_default_layout_constraint\": \"needs_fixed_stride_order\", \"unbacked_symint_fallback\": 8192, \"unroll_reductions_threshold\": 8, \"unsafe_ignore_unsupported_triton_autotune_args\": false, \"unsafe_marked_cacheable_functions\": {}, \"unsafe_skip_cache_dynamic_shape_guards\": false, \"use_experimental_benchmarker\": false, \"use_fast_math\": false, \"use_mixed_mm\": true, \"use_static_cuda_launcher\": true, \"verbose_progress\": false, \"warn_mix_layout\": false, \"worker_log_path\": \"/logs/dedicated_log_torch_compile_worker_rank\", \"worker_start_method\": \"subprocess\", \"worker_suppress_logging\": true}", "remote_cache_version": null, "inductor_fx_remote_cache_hit_count": null, "inductor_fx_remote_cache_miss_count": null, "inductor_fx_remote_cache_backend_type": null, "inductor_fx_remote_cache_hit_keys": null, "inductor_fx_remote_cache_miss_keys": null, "cuda_version": "12.4.0", "triton_version": "3.3.1+fb", "feature_usage": {"fx_cache": false, "parallel_compile_post_warmup": false}, "compile_time_autotune_time_us": null, "is_runtime": false, "gc_time_us": null, "tensorify_float_attempt": null, "tensorify_float_success": null, "tensorify_float_failure": null, "guard_latency_us": null, "recompile_reason": null, "num_graph_breaks": 0, "triton_kernel_compile_times_us": "[[\"triton_poi_fused_mul_1\", 183348], [\"triton_poi_fused_addmm_gelu_2\", 182513], [\"triton_poi_fused_addmm_relu_sigmoid_0\", 166812]]", "ir_count": null, "cudagraph_skip_reason": null, "python_version": "3.10.9+fb (3.10:1dd9be6, May  4 2022, 01:23:45) [Clang 17.0.4 (mononoke://mononoke.internal.tfbnw.net/fbsource 447fcd878ef9ed82d", "pgo_put_remote_code_state_time_us": null, "pgo_get_remote_code_state_time_us": null, "param_numel": null, "param_bytes": null, "param_count": null, "recompile_user_contexts": null, "inline_inbuilt_nn_modules_candidate": false}, "stack": [{"line": 39, "name": "<module>", "filename": 0, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 0, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 1, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 2, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 3, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 3, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 4, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 4, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 5, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 4, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 4, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 4, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 4, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 6, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 7, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 9, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 9, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 8, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 8, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 10, "loc": "return func(*args, **kwds)"}, {"line": 624, "name": "test_kernel_information_generation", "filename": 11, "loc": "torch._inductor.aoti_compile_and_package(ep, package_path=pt2_file)"}, {"line": 151, "name": "aoti_compile_and_package", "filename": 19, "loc": "return aot_inductor_minifier_wrapper("}, {"line": 1254, "name": "aot_inductor_minifier_wrapper", "filename": 20, "loc": "return func("}, {"line": 194, "name": "_aoti_compile_and_package_inner", "filename": 19, "loc": "aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)"}, {"line": 301, "name": "aot_compile", "filename": 19, "loc": "return compile_fx_aot("}, {"line": 1890, "name": "compile_fx_aot", "filename": 21, "loc": "with ("}, {"line": 96, "name": "__exit__", "filename": 36, "loc": "self._on_exit("}, {"line": 1641, "name": "record_compilation_metrics", "filename": 35, "loc": "torch._logging.trace_structured("}]}
+V0819 12:17:20.512000 255690 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "e7be130af6e49eafb61d62ac7a68a2ba"}
+	{
+	"name": "compile_fx_aot",
+	"ts": 1755631040512053.8,
+	"args": {
+	"compile_id": "None",
+	"num_graph_breaks": 0,
+	"frame_key": null,
+	"co_name": null,
+	"co_filename": null,
+	"co_firstlineno": null,
+	"cache_size": null,
+	"accumulated_cache_size": null,
+	"guard_count": null,
+	"shape_env_guard_count": null,
+	"graph_op_count": null,
+	"graph_node_count": null,
+	"graph_input_count": null,
+	"fail_type": null,
+	"fail_reason": null,
+	"fail_user_frame_filename": null,
+	"fail_user_frame_lineno": null,
+	"non_compliant_ops": null,
+	"compliant_custom_ops": null,
+	"restart_reasons": null,
+	"dynamo_time_before_restart_s": null,
+	"has_guarded_code": null,
+	"dynamo_config": "{\"_autograd_backward_strict_mode_conditional_banned_ops\": [\"stride\", \"storage_offset\", \"is_contiguous\"], \"_unsafe_skip_fsdp_module_guards\": false, \"accumulated_recompile_limit\": 256, \"allow_complex_guards_as_runtime_asserts\": false, \"allow_empty_graphs\": false, \"allow_ignore_mark_dynamic\": false, \"allow_rnn\": false, \"allow_unspec_int_on_nn_module\": false, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch._prims\", \"torch._refs\", \"torch.distributions\", \"torch.testing\"], \"assume_dunder_attributes_remain_unchanged\": true, \"assume_static_by_default\": true, \"automatic_dynamic_local_pgo\": true, \"automatic_dynamic_remote_pgo\": null, \"automatic_dynamic_shapes\": true, \"automatic_dynamic_shapes_mark_as\": \"dynamic\", \"caching_precompile\": false, \"capture_autograd_function\": true, \"capture_dynamic_output_shape_ops\": false, \"capture_func_transforms\": true, \"capture_scalar_outputs\": false, \"capture_sparse_compute\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"cprofile\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"dead_code_elimination\": true, \"disable\": false, \"do_not_emit_runtime_asserts\": false, \"dont_skip_tracing\": false, \"dynamic_shapes\": true, \"enable_compiler_collectives\": false, \"enable_cpp_framelocals_guard_eval\": true, \"enable_cpp_guard_manager\": true, \"enable_cpp_symbolic_shape_guards\": false, \"enable_faithful_generator_behavior\": true, \"enable_trace_contextlib\": true, \"enable_trace_unittest\": false, \"error_on_nested_fx_trace\": true, \"error_on_nested_jit_trace\": true, \"error_on_recompile\": false, \"fail_on_recompile_limit_hit\": false, \"fake_tensor_cache_crosscheck_enabled\": false, \"fake_tensor_cache_enabled\": true, \"fake_tensor_disable_inference_mode\": true, \"force_nn_module_property_static_shapes\": true, \"force_parameter_static_shapes\": true, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"graph_break_on_nn_param_ctor\": true, \"graph_deduplication_lint\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"inline_inbuilt_nn_modules\": true, \"install_free_tensors\": false, \"issue_3_13_0_warning\": true, \"max_saved_pointers_for_recursive_dict_tags_check\": 256, \"minimum_call_count\": 1, \"numpy_default_complex\": \"complex128\", \"numpy_default_float\": \"float64\", \"numpy_default_int\": \"int64\", \"only_allow_pt2_compliant_ops\": false, \"optimize_ddp\": true, \"optimize_ddp_lazy_compile\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"prepare_freezing\": false, \"pt2_compile_id_prefix\": null, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"recompile_limit\": 8, \"record_compile_time_instruction_count\": false, \"record_runtime_overhead\": true, \"replay_record_enabled\": false, \"report_guard_failures\": true, \"rewrite_assert_with_torch_assert\": true, \"run_gc_after_compile\": true, \"skip_code_recursive_on_recompile_limit_hit\": true, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_guards_on_constant_func_defaults\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"skip_tensor_guards_with_matching_dict_tags\": true, \"skip_torchrec\": true, \"skipfiles_inline_module_allowlist\": {}, \"specialize_float\": false, \"specialize_int\": false, \"suppress_errors\": false, \"trace_numpy\": true, \"track_nodes_for_deduplication\": false, \"use_graph_deduplication\": false, \"use_lamba_guard_for_object_aliasing\": true, \"use_lazy_graph_module\": true, \"use_numpy_random_stream\": false, \"use_recursive_dict_tags_for_guards\": true, \"verify_correctness\": false, \"wrap_top_frame\": false}"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
diff --git a/tests/inputs/inductor_provenance_jit_debug_handle_log.txt b/tests/inputs/inductor_provenance_jit_debug_handle_log.txt
new file mode 100644
index 0000000..c2d8fb6
--- /dev/null
+++ b/tests/inputs/inductor_provenance_jit_debug_handle_log.txt
@@ -0,0 +1,2331 @@
+V0819 12:42:50.385000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ee061e0671fdfa82117c12d6330aa35c"}
+	{
+	"name": "dynamo",
+	"ts": 1755632570385434.8,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:50.389000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "b12a0979e0046b278cd41d7c1f5c8692"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1755632570389315.0,
+	"args": {
+	"fn_name": "_compile.compile_inner",
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:50.391000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_dynamo/convert_frame.py", 0]}
+V0819 12:42:50.392000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/__run_lpar_main__.py", 1]}
+V0819 12:42:50.392000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/__par__/meta_only/bootstrap.py", 2]}
+V0819 12:42:50.393000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/__par__/bootstrap.py", 3]}
+V0819 12:42:50.393000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/usr/local/fbcode/platform010/lib/python3.10/runpy.py", 4]}
+V0819 12:42:50.393000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/testinfra/testpilot/integration/python/adapters/unittest.py", 5]}
+V0819 12:42:50.393000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/testinfra/testpilot/integration/python/adapters/base.py", 6]}
+V0819 12:42:50.394000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/usr/local/fbcode/platform010/lib/python3.10/unittest/runner.py", 7]}
+V0819 12:42:50.394000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/usr/local/fbcode/platform010/lib/python3.10/unittest/suite.py", 8]}
+V0819 12:42:50.394000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/usr/local/fbcode/platform010/lib/python3.10/unittest/case.py", 9]}
+V0819 12:42:50.395000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/testing/_internal/common_utils.py", 10]}
+V0819 12:42:50.395000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/usr/local/fbcode/platform010/lib/python3.10/contextlib.py", 11]}
+V0819 12:42:50.395000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py", 12]}
+V0819 12:42:50.396000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_dynamo/eval_frame.py", 13]}
+V0819 12:42:50.396000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_logging/structured.py:28] {"str": ["/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/nn/modules/module.py", 14]}
+V0819 12:42:50.396000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/convert_frame.py:246] {"dynamo_start": {"stack": [{"line": 39, "name": "<module>", "filename": 1, "loc": "__invoke_main()"}, {"line": 36, "name": "__invoke_main", "filename": 1, "loc": "run_as_main(module, main_function)"}, {"line": 105, "name": "run_as_main", "filename": 2, "loc": "oss_run_as_main("}, {"line": 70, "name": "run_as_main", "filename": 3, "loc": "runpy._run_module_as_main(main_module, alter_argv=False)"}, {"line": 196, "name": "_run_module_as_main", "filename": 4, "loc": "return _run_code(code, main_globals, None,"}, {"line": 86, "name": "_run_code", "filename": 4, "loc": "exec(code, run_globals)"}, {"line": 731, "name": "<module>", "filename": 5, "loc": "sys.exit(main())"}, {"line": 727, "name": "main", "filename": 5, "loc": "return UnittestTestPilotAdapter().run(sys.argv)"}, {"line": 325, "name": "run", "filename": 6, "loc": "return self.run_human_interface(argv=argv_minus_cvg)"}, {"line": 620, "name": "run_human_interface", "filename": 5, "loc": "return self.get_test_program(argv=argv).run()"}, {"line": 582, "name": "run", "filename": 5, "loc": "result = self.run_tests(test_suite)"}, {"line": 554, "name": "run_tests", "filename": 5, "loc": "return self._run_suite_and_maybe_profile(runner, test_suite)"}, {"line": 508, "name": "_run_suite_and_maybe_profile", "filename": 5, "loc": "result = runner.run(test_suite)"}, {"line": 184, "name": "run", "filename": 7, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 8, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 8, "loc": "test(result)"}, {"line": 84, "name": "__call__", "filename": 8, "loc": "return self.run(*args, **kwds)"}, {"line": 122, "name": "run", "filename": 8, "loc": "test(result)"}, {"line": 650, "name": "__call__", "filename": 9, "loc": "return self.run(*args, **kwds)"}, {"line": 3406, "name": "run", "filename": 10, "loc": "self._run_custom("}, {"line": 3376, "name": "_run_custom", "filename": 10, "loc": "super_run(result=result)"}, {"line": 591, "name": "run", "filename": 9, "loc": "self._callTestMethod(testMethod)"}, {"line": 549, "name": "_callTestMethod", "filename": 9, "loc": "method()"}, {"line": 79, "name": "inner", "filename": 11, "loc": "return func(*args, **kwds)"}, {"line": 576, "name": "test_tlparse_kernel_stack_traces", "filename": 12, "loc": "compiled(*example_inputs)"}, {"line": 413, "name": "__call__", "filename": 13, "loc": "return super().__call__(*args, **kwargs)"}, {"line": 1775, "name": "_wrapped_call_impl", "filename": 14, "loc": "return self._call_impl(*args, **kwargs)"}, {"line": 1786, "name": "_call_impl", "filename": 14, "loc": "return forward_call(*args, **kwargs)"}, {"line": 804, "name": "compile_wrapper", "filename": 13, "loc": "return fn(*args, **kwargs)"}, {"line": 78, "name": "forward", "filename": 12}]}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0819 12:42:50.398000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "a7969404cec220df0ce9e388e8d2bc2a"}
+	{
+	"name": "compile_attempt_0",
+	"ts": 1755632570397980.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:50.403000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "30c8fed6345f96a7150ec36bdd3889c9"}
+	{
+	"name": "bytecode_tracing",
+	"ts": 1755632570403825.2,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:50.412000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:270] {"describe_storage": {"id": 0, "describer_id": 0, "size": 640}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0819 12:42:50.412000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:487] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [16, 10], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [10, 1], "storage": 0, "view_func": "_CustomViewFunc(func=<built-in method _view_func_unsafe of Parameter object at 0x7f2c0c5fec00>)", "describer_id": 0}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0819 12:42:50.413000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:1899] {"describe_source": {"describer_id": 0, "id": 0, "source": "L['self']._modules['fc1']._parameters['weight']"}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0819 12:42:50.415000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:270] {"describe_storage": {"id": 1, "describer_id": 0, "size": 64}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0819 12:42:50.416000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:487] {"describe_tensor": {"id": 1, "ndim": 1, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [16], "is_leaf": true, "requires_grad": true, "is_parameter": true, "stride": [1], "storage": 1, "view_func": "_CustomViewFunc(func=<built-in method _view_func_unsafe of Parameter object at 0x7f2c0c5fde90>)", "describer_id": 0}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0819 12:42:50.416000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:1899] {"describe_source": {"describer_id": 0, "id": 1, "source": "L['self']._modules['fc1']._parameters['bias']"}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0819 12:42:50.430000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:270] {"describe_storage": {"id": 2, "describer_id": 0, "size": 320}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0819 12:42:50.430000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:487] {"describe_tensor": {"id": 2, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [8, 10], "is_leaf": true, "stride": [10, 1], "storage": 2, "view_func": "_CustomViewFunc(func=<built-in method _view_func_unsafe of Tensor object at 0x7f2c0e420310>)", "describer_id": 0}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0819 12:42:50.431000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:1899] {"describe_source": {"describer_id": 0, "id": 2, "source": "L['x']"}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0819 12:42:50.452000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:270] {"describe_storage": {"id": 3, "describer_id": 0, "size": 800}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0819 12:42:50.452000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:487] {"describe_tensor": {"id": 11, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [10, 20], "is_leaf": true, "stride": [20, 1], "storage": 3, "view_func": "_CustomViewFunc(func=<built-in method _view_func_unsafe of Tensor object at 0x7f2c00625580>)", "describer_id": 0}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0819 12:42:50.453000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:1899] {"describe_source": {"describer_id": 0, "id": 11, "source": "L['a']"}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0819 12:42:50.456000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:270] {"describe_storage": {"id": 4, "describer_id": 0, "size": 1200}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0819 12:42:50.457000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:487] {"describe_tensor": {"id": 13, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [10, 30], "is_leaf": true, "stride": [30, 1], "storage": 4, "view_func": "_CustomViewFunc(func=<built-in method _view_func_unsafe of Tensor object at 0x7f2c006255d0>)", "describer_id": 0}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0819 12:42:50.457000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:1899] {"describe_source": {"describer_id": 0, "id": 13, "source": "L['c']"}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0819 12:42:50.459000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:270] {"describe_storage": {"id": 5, "describer_id": 0, "size": 2400}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0819 12:42:50.459000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:487] {"describe_tensor": {"id": 14, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [20, 30], "is_leaf": true, "stride": [30, 1], "storage": 5, "view_func": "_CustomViewFunc(func=<built-in method _view_func_unsafe of Tensor object at 0x7f2c00625530>)", "describer_id": 0}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0819 12:42:50.460000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_subclasses/meta_utils.py:1899] {"describe_source": {"describer_id": 0, "id": 14, "source": "L['b']"}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0819 12:42:50.469000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "bc61fdd766c8ecf323030cc0d73af522"}
+	{
+	"name": "bytecode_tracing",
+	"ts": 1755632570469877.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:50.481000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/output_graph.py:1752] {"dynamo_output_graph": {"sizes": {"l_self_modules_fc1_parameters_weight_": [16, 10], "l_self_modules_fc1_parameters_bias_": [16], "l_x_": [8, 10], "l_a_": [10, 20], "l_c_": [10, 30], "l_b_": [20, 30], "x": [8, 16], "x_1": [8, 16], "x_2": [8, 16], "d": [10, 20], "y": [10, 30], "z": [10, 30]}}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "51757187640803aabe4a6ec7c7b1fbcd"}
+	class GraphModule(torch.nn.Module):
+	    def forward(self, L_self_modules_fc1_parameters_weight_: "f32[16, 10][10, 1]cuda:0", L_self_modules_fc1_parameters_bias_: "f32[16][1]cuda:0", L_x_: "f32[8, 10][10, 1]cuda:0", L_a_: "f32[10, 20][20, 1]cuda:0", L_c_: "f32[10, 30][30, 1]cuda:0", L_b_: "f32[20, 30][30, 1]cuda:0"):
+	        l_self_modules_fc1_parameters_weight_ = L_self_modules_fc1_parameters_weight_
+	        l_self_modules_fc1_parameters_bias_ = L_self_modules_fc1_parameters_bias_
+	        l_x_ = L_x_
+	        l_a_ = L_a_
+	        l_c_ = L_c_
+	        l_b_ = L_b_
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:79 in forward, code: x = self.fc1(x)
+	        x: "f32[8, 16][16, 1]cuda:0" = torch._C._nn.linear(l_x_, l_self_modules_fc1_parameters_weight_, l_self_modules_fc1_parameters_bias_);  l_x_ = l_self_modules_fc1_parameters_weight_ = l_self_modules_fc1_parameters_bias_ = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:80 in forward, code: x = self.relu(x)
+	        x_1: "f32[8, 16][16, 1]cuda:0" = torch.nn.functional.relu(x, inplace = False);  x = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:81 in forward, code: x = self.sigmoid(x)
+	        x_2: "f32[8, 16][16, 1]cuda:0" = torch.sigmoid(x_1);  x_1 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:82 in forward, code: d = a * 3.14
+	        d: "f32[10, 20][20, 1]cuda:0" = l_a_ * 3.14;  l_a_ = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:83 in forward, code: y = torch.addmm(c, d, b)
+	        y: "f32[10, 30][30, 1]cuda:0" = torch.addmm(l_c_, d, l_b_);  l_c_ = d = l_b_ = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:84 in forward, code: z = torch.nn.functional.gelu(y)
+	        z: "f32[10, 30][30, 1]cuda:0" = torch._C._nn.gelu(y);  y = None
+	        return (x_2, z)
+	        
+V0819 12:42:50.482000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "7289af1b8f240af1db72a535ac36385b"}
+	{
+	"name": "backend_compile",
+	"ts": 1755632570482874.8,
+	"args": {
+	"fn_name": "OutputGraph.call_user_compiler",
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:50.484000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "8a90ae080ee58b2927e670861a303e26"}
+	{
+	"name": "inductor_codecache_torch_key",
+	"ts": 1755632570484152.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:50.485000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "fc50911a855e642e0f2314e83610e5c0"}
+	{
+	"name": "inductor_codecache_torch_key",
+	"ts": 1755632570485175.2,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:50.489000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:2223] {"artifact": {"name": "before_pre_grad_graph", "encoding": "string"}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "703537dea59058ca7276763cbbacee63"}
+	class GraphModule(torch.nn.Module):
+	    def forward(self, L_self_modules_fc1_parameters_weight_: "f32[16, 10][10, 1]cuda:0", L_self_modules_fc1_parameters_bias_: "f32[16][1]cuda:0", L_x_: "f32[8, 10][10, 1]cuda:0", L_a_: "f32[10, 20][20, 1]cuda:0", L_c_: "f32[10, 30][30, 1]cuda:0", L_b_: "f32[20, 30][30, 1]cuda:0"):
+	        l_self_modules_fc1_parameters_weight_ = L_self_modules_fc1_parameters_weight_
+	        l_self_modules_fc1_parameters_bias_ = L_self_modules_fc1_parameters_bias_
+	        l_x_ = L_x_
+	        l_a_ = L_a_
+	        l_c_ = L_c_
+	        l_b_ = L_b_
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:79 in forward, code: x = self.fc1(x)
+	        x: "f32[8, 16][16, 1]cuda:0" = torch._C._nn.linear(l_x_, l_self_modules_fc1_parameters_weight_, l_self_modules_fc1_parameters_bias_);  l_x_ = l_self_modules_fc1_parameters_weight_ = l_self_modules_fc1_parameters_bias_ = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:80 in forward, code: x = self.relu(x)
+	        x_1: "f32[8, 16][16, 1]cuda:0" = torch.nn.functional.relu(x, inplace = False);  x = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:81 in forward, code: x = self.sigmoid(x)
+	        x_2: "f32[8, 16][16, 1]cuda:0" = torch.sigmoid(x_1);  x_1 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:82 in forward, code: d = a * 3.14
+	        d: "f32[10, 20][20, 1]cuda:0" = l_a_ * 3.14;  l_a_ = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:83 in forward, code: y = torch.addmm(c, d, b)
+	        y: "f32[10, 30][30, 1]cuda:0" = torch.addmm(l_c_, d, l_b_);  l_c_ = d = l_b_ = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:84 in forward, code: z = torch.nn.functional.gelu(y)
+	        z: "f32[10, 30][30, 1]cuda:0" = torch._C._nn.gelu(y);  y = None
+	        return (x_2, z)
+	        
+	
+	 # graph id: 139826961857216
+V0819 12:42:50.490000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "b8d5938125640ed1585cc17e9c884cee"}
+	{
+	"name": "_recursive_pre_grad_passes",
+	"ts": 1755632570490403.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:50.506000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "09c5549264438b7d01ec173db28a83bd"}
+	{
+	"name": "_recursive_pre_grad_passes",
+	"ts": 1755632570506792.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:50.511000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:2254] {"artifact": {"name": "after_pre_grad_graph", "encoding": "string"}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "703537dea59058ca7276763cbbacee63"}
+	class GraphModule(torch.nn.Module):
+	    def forward(self, L_self_modules_fc1_parameters_weight_: "f32[16, 10][10, 1]cuda:0", L_self_modules_fc1_parameters_bias_: "f32[16][1]cuda:0", L_x_: "f32[8, 10][10, 1]cuda:0", L_a_: "f32[10, 20][20, 1]cuda:0", L_c_: "f32[10, 30][30, 1]cuda:0", L_b_: "f32[20, 30][30, 1]cuda:0"):
+	        l_self_modules_fc1_parameters_weight_ = L_self_modules_fc1_parameters_weight_
+	        l_self_modules_fc1_parameters_bias_ = L_self_modules_fc1_parameters_bias_
+	        l_x_ = L_x_
+	        l_a_ = L_a_
+	        l_c_ = L_c_
+	        l_b_ = L_b_
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:79 in forward, code: x = self.fc1(x)
+	        x: "f32[8, 16][16, 1]cuda:0" = torch._C._nn.linear(l_x_, l_self_modules_fc1_parameters_weight_, l_self_modules_fc1_parameters_bias_);  l_x_ = l_self_modules_fc1_parameters_weight_ = l_self_modules_fc1_parameters_bias_ = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:80 in forward, code: x = self.relu(x)
+	        x_1: "f32[8, 16][16, 1]cuda:0" = torch.nn.functional.relu(x, inplace = False);  x = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:81 in forward, code: x = self.sigmoid(x)
+	        x_2: "f32[8, 16][16, 1]cuda:0" = torch.sigmoid(x_1);  x_1 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:82 in forward, code: d = a * 3.14
+	        d: "f32[10, 20][20, 1]cuda:0" = l_a_ * 3.14;  l_a_ = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:83 in forward, code: y = torch.addmm(c, d, b)
+	        y: "f32[10, 30][30, 1]cuda:0" = torch.addmm(l_c_, d, l_b_);  l_c_ = d = l_b_ = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:84 in forward, code: z = torch.nn.functional.gelu(y)
+	        z: "f32[10, 30][30, 1]cuda:0" = torch._C._nn.gelu(y);  y = None
+	        return (x_2, z)
+	        
+	
+	 # graph id: 139826961857216
+V0819 12:42:50.516000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1985] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "d25074c098bc1e25f62043e18088fb33"}
+	{
+	"name": "autograd_cache_bypass",
+	"ts": 1755632570515849.2,
+	"args": {
+	"cache_bypass_reason": "FX graph cache is not enabled",
+	"cache_bypass_exception_type": "BypassAOTAutogradCache",
+	"cache_bypass_traceback": [
+	"Traceback (most recent call last):",
+	"  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_functorch/_aot_autograd/autograd_cache.py\", line 1151, in try_load",
+	"    cache_key, debug_lines = autograd_cache_key(",
+	"  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_functorch/_aot_autograd/autograd_cache.py\", line 485, in autograd_cache_key",
+	"    check_cacheable(gm)",
+	"  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_functorch/_aot_autograd/autograd_cache.py\", line 266, in check_cacheable",
+	"    raise BypassAOTAutogradCache(\"FX graph cache is not enabled\")",
+	"torch._functorch._aot_autograd.autograd_cache.BypassAOTAutogradCache: FX graph cache is not enabled",
+	""
+	],
+	"cache_bypass_hard_exception": false,
+	"key": null,
+	"cache_state": "bypass",
+	"components": [],
+	"compile_id": "0/0"
+	},
+	"ph": "i",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0,
+	"s": "p"
+	}
+V0819 12:42:50.517000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_functorch/_aot_autograd/autograd_cache.py:1268] {"artifact": {"name": "aotautograd_cache_bypass", "encoding": "json"}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "5682669b7d56332b3e6454cf411f14fe"}
+	{"cache_bypass_reason": "FX graph cache is not enabled", "cache_bypass_exception_type": "BypassAOTAutogradCache", "cache_bypass_traceback": ["Traceback (most recent call last):", "  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_functorch/_aot_autograd/autograd_cache.py\", line 1151, in try_load", "    cache_key, debug_lines = autograd_cache_key(", "  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_functorch/_aot_autograd/autograd_cache.py\", line 485, in autograd_cache_key", "    check_cacheable(gm)", "  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/torch/_functorch/_aot_autograd/autograd_cache.py\", line 266, in check_cacheable", "    raise BypassAOTAutogradCache(\"FX graph cache is not enabled\")", "torch._functorch._aot_autograd.autograd_cache.BypassAOTAutogradCache: FX graph cache is not enabled", ""], "cache_bypass_hard_exception": false, "key": null, "cache_state": "bypass", "components": [], "compile_id": "0/0"}
+V0819 12:42:50.518000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "3bf749ade16656da28aab3294d6dcdd8"}
+	{
+	"name": "create_aot_dispatcher_function",
+	"ts": 1755632570518030.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:50.522000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "3a1909f64d7606d354e727f172c1f6a5"}
+	{
+	"name": "aot_collect_metadata",
+	"ts": 1755632570522826.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:50.548000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "91f332636a88712e0716b309cd4634e4"}
+	{
+	"name": "aot_collect_metadata",
+	"ts": 1755632570548722.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:50.553000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "4676b8a7c7a3280ef353b40e1515b619"}
+	{
+	"name": "aot_trace_joint_graph",
+	"ts": 1755632570553368.2,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:50.628000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "642d4be2ea608720036d3e09fb8cee54"}
+	{
+	"name": "aot_trace_joint_graph",
+	"ts": 1755632570628324.2,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:50.635000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_functorch/_aot_autograd/graph_compile.py:1356] {"aot_joint_graph": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "83ff0df1921db9f62b4e00eafe25910a"}
+	class inner_f(torch.nn.Module):
+	    def forward(
+	        self,
+	        primals,
+	        tangents,
+	    ):
+	        primals_1: "f32[16, 10][10, 1]cuda:0"  # PlainAOTInput(idx=0)
+	        primals_2: "f32[16][1]cuda:0"  # PlainAOTInput(idx=1)
+	        primals_3: "f32[8, 10][10, 1]cuda:0"  # PlainAOTInput(idx=2)
+	        primals_4: "f32[10, 20][20, 1]cuda:0"  # PlainAOTInput(idx=3)
+	        primals_5: "f32[10, 30][30, 1]cuda:0"  # PlainAOTInput(idx=4)
+	        primals_6: "f32[20, 30][30, 1]cuda:0"  # PlainAOTInput(idx=5)
+	        tangents_1: "f32[8, 16][16, 1]cuda:0"  # TangentAOTInput(output=PlainAOTOutput(idx=0))
+	        primals_1, primals_2, primals_3, primals_4, primals_5, primals_6, tangents_1, = fx_pytree.tree_flatten_spec([primals, tangents], self._in_spec)
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:79 in forward, code: x = self.fc1(x)
+	        permute: "f32[10, 16][1, 10]cuda:0" = torch.ops.aten.permute.default(primals_1, [1, 0]);  primals_1 = None
+	        addmm: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.addmm.default(primals_2, primals_3, permute);  primals_2 = permute = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:80 in forward, code: x = self.relu(x)
+	        relu: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.relu.default(addmm);  addmm = None
+	        alias: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.alias.default(relu)
+	        alias_1: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.alias.default(alias);  alias = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:81 in forward, code: x = self.sigmoid(x)
+	        sigmoid: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.sigmoid.default(relu);  relu = None
+	        alias_2: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.alias.default(sigmoid)
+	        alias_3: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.alias.default(alias_2);  alias_2 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:82 in forward, code: d = a * 3.14
+	        mul: "f32[10, 20][20, 1]cuda:0" = torch.ops.aten.mul.Tensor(primals_4, 3.14);  primals_4 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:83 in forward, code: y = torch.addmm(c, d, b)
+	        addmm_1: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.addmm.default(primals_5, mul, primals_6);  primals_5 = mul = primals_6 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:84 in forward, code: z = torch.nn.functional.gelu(y)
+	        mul_1: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(addmm_1, 0.5)
+	        mul_2: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(addmm_1, 0.7071067811865476);  addmm_1 = None
+	        erf: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.erf.default(mul_2);  mul_2 = None
+	        add: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.add.Tensor(erf, 1);  erf = None
+	        mul_3: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_1, add);  mul_1 = add = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:81 in forward, code: x = self.sigmoid(x)
+	        alias_4: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.alias.default(alias_3);  alias_3 = None
+	        alias_5: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.alias.default(alias_4);  alias_4 = None
+	        sub: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, alias_5)
+	        mul_4: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.mul.Tensor(alias_5, sub);  alias_5 = sub = None
+	        mul_5: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.mul.Tensor(tangents_1, mul_4);  tangents_1 = mul_4 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:80 in forward, code: x = self.relu(x)
+	        alias_6: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.alias.default(alias_1);  alias_1 = None
+	        alias_7: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.alias.default(alias_6);  alias_6 = None
+	        le: "b8[8, 16][16, 1]cuda:0" = torch.ops.aten.le.Scalar(alias_7, 0);  alias_7 = None
+	        scalar_tensor: "f32[][]cuda:0" = torch.ops.aten.scalar_tensor.default(0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0))
+	        where: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.where.self(le, scalar_tensor, mul_5);  le = scalar_tensor = mul_5 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:79 in forward, code: x = self.fc1(x)
+	        permute_1: "f32[16, 8][1, 16]cuda:0" = torch.ops.aten.permute.default(where, [1, 0])
+	        mm: "f32[16, 10][10, 1]cuda:0" = torch.ops.aten.mm.default(permute_1, primals_3);  permute_1 = primals_3 = None
+	        permute_2: "f32[10, 16][1, 10]cuda:0" = torch.ops.aten.permute.default(mm, [1, 0]);  mm = None
+	        sum_1: "f32[1, 16][16, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(where, [0], True);  where = None
+	        view: "f32[16][1]cuda:0" = torch.ops.aten.view.default(sum_1, [16]);  sum_1 = None
+	        permute_3: "f32[16, 10][10, 1]cuda:0" = torch.ops.aten.permute.default(permute_2, [1, 0]);  permute_2 = None
+	        return pytree.tree_unflatten([
+	            sigmoid,  # PlainAOTOutput(idx=0)
+	            mul_3,  # PlainAOTOutput(idx=1)
+	            permute_3,  # GradAOTOutput(grad_of=PlainAOTInput(idx=0))
+	            view,  # GradAOTOutput(grad_of=PlainAOTInput(idx=1))
+	            None,  # None
+	            None,  # None
+	            None,  # None
+	            None,  # None
+	        ], self._out_spec)
+	        
+V0819 12:42:50.637000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "bdc8bba01d8b08c768eada6f95570ba0"}
+	{
+	"name": "_recursive_joint_graph_passes",
+	"ts": 1755632570637191.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:50.894000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "75117e73cd59195f64202776ef960079"}
+	{
+	"name": "pad_mm_benchmark",
+	"ts": 1755632570893891.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:50.895000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "39f202144014618afffca1e0970dffb5"}
+	{
+	"name": "pad_mm_benchmark_get_do_bench",
+	"ts": 1755632570895634.8,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:50.897000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "c7e0cecd8954904b29d474753f84511b"}
+	{
+	"name": "pad_mm_benchmark_get_do_bench",
+	"ts": 1755632570896941.2,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:52.530000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "4fa863a30f3bdee4d0d0143e4579001b"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755632572530213.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:52.786000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "8ad7e7cb38bc8d9670e6e865bbbc844e"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755632572785958.8,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:52.788000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ac567ff45c9d9539a7594fddc5a5cef9"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755632572787982.2,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:52.926000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ac4c6d19311555460e45049cec594a25"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755632572926602.2,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:52.928000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "1c7f0d06b2caea6bb2d382f0fc54c398"}
+	{
+	"name": "pad_mm_benchmark",
+	"ts": 1755632572928892.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:52.933000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "4af2863759964af7373dd5d08f415830"}
+	{
+	"name": "pad_mm_benchmark",
+	"ts": 1755632572933058.2,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:52.934000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "a6510a78b15d2a0d9d1b12729b4a413a"}
+	{
+	"name": "pad_mm_benchmark_get_do_bench",
+	"ts": 1755632572934238.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:52.935000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "c4b095c5a635168bcdbba626d7606580"}
+	{
+	"name": "pad_mm_benchmark_get_do_bench",
+	"ts": 1755632572935410.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:52.937000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "7bb206105a4cfcfc0d375ea25437076a"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755632572937122.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.050000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "80794bf1b786ae2a1bc080d7ccd984fa"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755632573050662.2,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.052000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "e1d0639ab1327309823b036b5b1fbd8b"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755632573052484.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.180000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ec94f85bb5e39ac894efe141c0613b52"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755632573180214.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.182000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "bd4a2f73e2eedf1673b2fd0ebb0ea154"}
+	{
+	"name": "pad_mm_benchmark",
+	"ts": 1755632573182192.8,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.185000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "d6b1b3da214ca57febb76fb9982860f3"}
+	{
+	"name": "pad_mm_benchmark",
+	"ts": 1755632573185574.2,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.186000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "6ca53d799344816743c46ab4aa29df2a"}
+	{
+	"name": "pad_mm_benchmark_get_do_bench",
+	"ts": 1755632573186680.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.187000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ac3c30d5940e852b421699e7cd8215cb"}
+	{
+	"name": "pad_mm_benchmark_get_do_bench",
+	"ts": 1755632573187882.8,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.189000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "90cfce6952e588dfd5f2b9c0e97effb5"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755632573189445.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.300000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "9ad638d6061e02ae7503d88e0b172785"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755632573300644.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.302000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "3fd10706551dba29ec3acfbd76700a1c"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755632573302799.2,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.433000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "f9125dc9b7ff4ea9a9819a6e03c8b621"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755632573432949.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.435000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "c507175927dd0ad388c7b457c6771120"}
+	{
+	"name": "pad_mm_benchmark",
+	"ts": 1755632573435024.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.437000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "95c79239257c999978473c69aade44fb"}
+	{
+	"name": "_recursive_joint_graph_passes",
+	"ts": 1755632573437146.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.441000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "e899485d992b0351f768a90d21f315b6"}
+	{
+	"name": "min_cut_rematerialization_partition",
+	"ts": 1755632573441181.2,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.468000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "f00e072ec7442bf6ce01961fa9765ab4"}
+	{
+	"name": "min_cut_rematerialization_partition",
+	"ts": 1755632573467993.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.472000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_functorch/_aot_autograd/graph_compile.py:1466] {"artifact": {"name": "torch._functorch.config", "encoding": "string"}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "7007154042740213bd56d02775b8adb8"}
+	{
+	"TYPE_CHECKING": false,
+	"functionalize_rng_ops": false,
+	"fake_tensor_allow_meta": true,
+	"debug_assert": false,
+	"debug_partitioner": false,
+	"decompose_custom_triton_ops": true,
+	"static_weight_shapes": true,
+	"treat_parameters_as_free_to_save": true,
+	"cse": true,
+	"enable_autograd_cache": true,
+	"autograd_cache_allow_custom_autograd_functions": false,
+	"bundled_autograd_cache": false,
+	"autograd_cache_normalize_inputs": false,
+	"enable_remote_autograd_cache": null,
+	"view_replay_for_aliased_outputs": false,
+	"max_dist_from_bw": 1000,
+	"ban_recompute_used_far_apart": true,
+	"ban_recompute_long_fusible_chains": true,
+	"ban_recompute_materialized_backward": true,
+	"ban_recompute_not_in_allowlist": true,
+	"ban_recompute_reductions": true,
+	"recompute_views": false,
+	"activation_memory_budget": 1.0,
+	"activation_memory_budget_runtime_estimator": "flops",
+	"activation_memory_budget_solver": "dp",
+	"visualize_memory_budget_pareto": false,
+	"memory_budget_pareto_dir": null,
+	"aggressive_recomputation": false,
+	"fake_tensor_allow_unsafe_data_ptr_access": true,
+	"unlift_effect_tokens": true,
+	"custom_op_default_layout_constraint": "needs_exact_strides",
+	"fake_tensor_crossref": false,
+	"fake_tensor_propagate_real_tensors": false,
+	"backward_pass_autocast": "same_as_forward",
+	"donated_buffer": false,
+	"torch_compile_graph_format": "svg",
+	"generate_fake_kernels_from_real_mismatches": false,
+	"fake_tensor_prefer_device_type": null,
+	"graphsafe_rng_functionalization": true,
+	"strict_autograd_cache": false,
+	"unsafe_allow_optimization_of_collectives": false,
+	"disable_guess_zero_tangent_for_mutated_input_subclass": false,
+	"guess_tangent_strides_as_outputs": false,
+	"_sync_decision_cross_ranks": false,
+	"saved_tensors_hooks_filtering_mode": "donated"
+	}
+V0819 12:42:53.476000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_functorch/_aot_autograd/graph_compile.py:1615] {"artifact": {"name": "aot_forward_graph_fw_metadata", "encoding": "string"}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "9918bf222549381412745764dda8a320"}
+	ViewAndMutationMeta(input_info=[InputAliasInfo(is_leaf=True,
+	                                              mutates_data=False,
+	                                              mutates_metadata=False,
+	                                              mutations_hidden_from_autograd=True,
+	                                              mutations_under_no_grad_or_inference_mode=False,
+	                                              mutation_inductor_storage_resize=False,
+	                                              mutates_storage_metadata=False,
+	                                              requires_grad=True,
+	                                              keep_input_mutations=True),
+	                               InputAliasInfo(is_leaf=True,
+	                                              mutates_data=False,
+	                                              mutates_metadata=False,
+	                                              mutations_hidden_from_autograd=True,
+	                                              mutations_under_no_grad_or_inference_mode=False,
+	                                              mutation_inductor_storage_resize=False,
+	                                              mutates_storage_metadata=False,
+	                                              requires_grad=True,
+	                                              keep_input_mutations=True),
+	                               InputAliasInfo(is_leaf=True,
+	                                              mutates_data=False,
+	                                              mutates_metadata=False,
+	                                              mutations_hidden_from_autograd=True,
+	                                              mutations_under_no_grad_or_inference_mode=False,
+	                                              mutation_inductor_storage_resize=False,
+	                                              mutates_storage_metadata=False,
+	                                              requires_grad=False,
+	                                              keep_input_mutations=True),
+	                               InputAliasInfo(is_leaf=True,
+	                                              mutates_data=False,
+	                                              mutates_metadata=False,
+	                                              mutations_hidden_from_autograd=True,
+	                                              mutations_under_no_grad_or_inference_mode=False,
+	                                              mutation_inductor_storage_resize=False,
+	                                              mutates_storage_metadata=False,
+	                                              requires_grad=False,
+	                                              keep_input_mutations=True),
+	                               InputAliasInfo(is_leaf=True,
+	                                              mutates_data=False,
+	                                              mutates_metadata=False,
+	                                              mutations_hidden_from_autograd=True,
+	                                              mutations_under_no_grad_or_inference_mode=False,
+	                                              mutation_inductor_storage_resize=False,
+	                                              mutates_storage_metadata=False,
+	                                              requires_grad=False,
+	                                              keep_input_mutations=True),
+	                               InputAliasInfo(is_leaf=True,
+	                                              mutates_data=False,
+	                                              mutates_metadata=False,
+	                                              mutations_hidden_from_autograd=True,
+	                                              mutations_under_no_grad_or_inference_mode=False,
+	                                              mutation_inductor_storage_resize=False,
+	                                              mutates_storage_metadata=False,
+	                                              requires_grad=False,
+	                                              keep_input_mutations=True)],
+	                    output_info=[OutputAliasInfo(output_type=<OutputType.non_alias: 1>,
+	                                                raw_type=<class 'torch._subclasses.functional_tensor.FunctionalTensor'>,
+	                                                base_idx=None,
+	                                                dynamic_dims=set(),
+	                                                requires_grad=True,
+	                                                functional_tensor=None),
+	                                OutputAliasInfo(output_type=<OutputType.non_alias: 1>,
+	                                                raw_type=<class 'torch._subclasses.functional_tensor.FunctionalTensor'>,
+	                                                base_idx=None,
+	                                                dynamic_dims=set(),
+	                                                requires_grad=False,
+	                                                functional_tensor=None)],
+	                    num_intermediate_bases=0,
+	                    keep_input_mutations=True,
+	                    traced_tangents=[FakeTensor(..., device='cuda:0', size=(8, 16))],
+	                    traced_tangents_descs=[TangentAOTInput(output=PlainAOTOutput(idx=0))],
+	                    subclass_inp_meta=[PlainTensorMeta(unwrapped_idx=0,
+	                                                      memory_format=None),
+	                                      PlainTensorMeta(unwrapped_idx=1,
+	                                                      memory_format=None),
+	                                      PlainTensorMeta(unwrapped_idx=2,
+	                                                      memory_format=None),
+	                                      PlainTensorMeta(unwrapped_idx=3,
+	                                                      memory_format=None),
+	                                      PlainTensorMeta(unwrapped_idx=4,
+	                                                      memory_format=None),
+	                                      PlainTensorMeta(unwrapped_idx=5,
+	                                                      memory_format=None)],
+	                    subclass_fw_graph_out_meta=[PlainTensorMeta(unwrapped_idx=0,
+	                                                               memory_format=None),
+	                                               PlainTensorMeta(unwrapped_idx=1,
+	                                                               memory_format=None)],
+	                    subclass_tangent_meta=[PlainTensorMeta(unwrapped_idx=0,
+	                                                          memory_format=MemoryFormatMeta(size=None,
+	                                                                                         stride=None,
+	                                                                                         memory_format=torch.contiguous_format))],
+	                    is_train=True,
+	                    traced_tangent_metas=None,
+	                    num_symints_saved_for_bw=0,
+	                    grad_enabled_mutation=None,
+	                    deterministic=False,
+	                    static_input_indices=[0, 1],
+	                    tokens={},
+	                    indices_of_inputs_that_requires_grad_with_mutations_in_bw=[],
+	                    bw_donated_idxs=None,
+	                    num_backward_tokens=0,
+	                    num_graphsafe_rng_states=0,
+	                    graphsafe_rng_state_index=None)
+V0819 12:42:53.477000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_functorch/_aot_autograd/graph_compile.py:1633] {"aot_forward_graph": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "3010b909f4e8aff8b4836e455482cf22"}
+	class GraphModule(torch.nn.Module):
+	    def forward(
+	        self,
+	        primals_1: "f32[16, 10][10, 1]cuda:0",  # PlainAOTInput(idx=0)
+	        primals_2: "f32[16][1]cuda:0",  # PlainAOTInput(idx=1)
+	        primals_3: "f32[8, 10][10, 1]cuda:0",  # PlainAOTInput(idx=2)
+	        primals_4: "f32[10, 20][20, 1]cuda:0",  # PlainAOTInput(idx=3)
+	        primals_5: "f32[10, 30][30, 1]cuda:0",  # PlainAOTInput(idx=4)
+	        primals_6: "f32[20, 30][30, 1]cuda:0",  # PlainAOTInput(idx=5)
+	    ):
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:79 in forward, code: x = self.fc1(x)
+	        permute: "f32[10, 16][1, 10]cuda:0" = torch.ops.aten.permute.default(primals_1, [1, 0]);  primals_1 = None
+	        addmm: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.addmm.default(primals_2, primals_3, permute);  primals_2 = permute = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:80 in forward, code: x = self.relu(x)
+	        relu: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.relu.default(addmm);  addmm = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:81 in forward, code: x = self.sigmoid(x)
+	        sigmoid: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.sigmoid.default(relu)
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:82 in forward, code: d = a * 3.14
+	        mul: "f32[10, 20][20, 1]cuda:0" = torch.ops.aten.mul.Tensor(primals_4, 3.14);  primals_4 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:83 in forward, code: y = torch.addmm(c, d, b)
+	        addmm_1: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.addmm.default(primals_5, mul, primals_6);  primals_5 = mul = primals_6 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:84 in forward, code: z = torch.nn.functional.gelu(y)
+	        mul_1: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(addmm_1, 0.5)
+	        mul_2: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(addmm_1, 0.7071067811865476);  addmm_1 = None
+	        erf: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.erf.default(mul_2);  mul_2 = None
+	        add: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.add.Tensor(erf, 1);  erf = None
+	        mul_3: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_1, add);  mul_1 = add = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:80 in forward, code: x = self.relu(x)
+	        le: "b8[8, 16][16, 1]cuda:0" = torch.ops.aten.le.Scalar(relu, 0);  relu = None
+	        return (
+	            sigmoid,  # PlainAOTOutput(idx=0)
+	            mul_3,  # PlainAOTOutput(idx=1)
+	            primals_3,  # SavedForBackwardsAOTOutput(idx=0)
+	            sigmoid,  # SavedForBackwardsAOTOutput(idx=1)
+	            le,  # SavedForBackwardsAOTOutput(idx=2)
+	        )
+	        
+V0819 12:42:53.478000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_functorch/_aot_autograd/graph_compile.py:1637] {"aot_backward_graph": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "b06342ef51acd1126229bc821c58771f"}
+	class GraphModule(torch.nn.Module):
+	    def forward(
+	        self,
+	        primals_3: "f32[8, 10][10, 1]cuda:0",  # PlainAOTInput(idx=2)
+	        sigmoid: "f32[8, 16][16, 1]cuda:0",
+	        le: "b8[8, 16][16, 1]cuda:0",
+	        tangents_1: "f32[8, 16][16, 1]cuda:0",  # TangentAOTInput(output=PlainAOTOutput(idx=0))
+	    ):
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:81 in forward, code: x = self.sigmoid(x)
+	        sub: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.sub.Tensor(1, sigmoid)
+	        mul_4: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.mul.Tensor(sigmoid, sub);  sigmoid = sub = None
+	        mul_5: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.mul.Tensor(tangents_1, mul_4);  tangents_1 = mul_4 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:80 in forward, code: x = self.relu(x)
+	        full_default: "f32[][]cuda:0" = torch.ops.aten.full.default([], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False)
+	        where: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.where.self(le, full_default, mul_5);  le = full_default = mul_5 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:79 in forward, code: x = self.fc1(x)
+	        permute_1: "f32[16, 8][1, 16]cuda:0" = torch.ops.aten.permute.default(where, [1, 0])
+	        mm: "f32[16, 10][10, 1]cuda:0" = torch.ops.aten.mm.default(permute_1, primals_3);  permute_1 = primals_3 = None
+	        sum_1: "f32[1, 16][16, 1]cuda:0" = torch.ops.aten.sum.dim_IntList(where, [0], True);  where = None
+	        view: "f32[16][1]cuda:0" = torch.ops.aten.view.default(sum_1, [16]);  sum_1 = None
+	        return (
+	            mm,  # GradAOTOutput(grad_of=PlainAOTInput(idx=0))
+	            view,  # GradAOTOutput(grad_of=PlainAOTInput(idx=1))
+	            None,  # None
+	            None,  # None
+	            None,  # None
+	            None,  # None
+	        )
+	        
+V0819 12:42:53.479000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "f559caff936906c061c98930f06d83aa"}
+	{
+	"name": "compile_fx.<locals>.fw_compiler_base",
+	"ts": 1755632573478953.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.480000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "747a22c3326da6580bda9cd466d4c24e"}
+	{
+	"name": "inductor_compile",
+	"ts": 1755632573480172.5,
+	"args": {
+	"fn_name": "compile_fx_inner",
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.491000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "52e7aa6f1fa8226e3d6bc64f2e45a0a1"}
+	{
+	"name": "fx_codegen_and_compile",
+	"ts": 1755632573491278.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.502000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:1230] {"artifact": {"name": "fx_graph_runnable", "encoding": "string"}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "002bd4e1d2a83064e7eba20f485fef64"}
+	
+	import os
+	os.environ['PYTORCH_TEST_FBCODE'] = '1'
+	os.environ['TORCH_TRACE'] = '/home/shangdiy/my_trace_log_dir'
+	os.environ['PYTORCH_TEST_REMOTE_GPU'] = '1'
+	os.environ['PYTORCH_DDP_USE_SIDE_STREAM'] = '0'
+	os.environ['TRITON_ALLOW_NON_CONSTEXPR_GLOBALS'] = '1'
+	os.environ['TRITON_LIBHIP_PATH'] = '/usr/local/fbcode/platform010/lib/rocm-6.2.1/lib/libamdhip64.so'
+	os.environ['TRITON_CUPTI_LIB_PATH'] = '/usr/local/fbcode/platform010/lib/libcupti.so'
+	os.environ['TRITON_HOME'] = '/tmp/shangdiy'
+	os.environ['TORCHINDUCTOR_CACHE_DIR'] = '/tmp/tmp4zkba_w7'
+	os.environ['TRITON_CACHE_DIR'] = '/tmp/tmp4zkba_w7/triton'
+	
+	import torch
+	from torch import tensor, device
+	import torch.fx as fx
+	from torch._dynamo.testing import rand_strided
+	from math import inf
+	import torch._inductor.inductor_prims
+	
+	
+	
+	import torch._dynamo.config
+	import torch._inductor.config
+	import torch._functorch.config
+	import torch.fx.experimental._config
+	torch._dynamo.config.suppress_errors = False
+	torch._dynamo.config.raise_on_ctx_manager_usage = True
+	torch._dynamo.config.log_compilation_metrics = False
+	torch._inductor.config.fx_graph_cache = False
+	torch._inductor.config.compile_threads = 32
+	torch._inductor.config.trace.provenance_tracking_level = 2
+	torch._functorch.config.functionalize_rng_ops = False
+	torch._functorch.config.enable_autograd_cache = True
+	torch._functorch.config.fake_tensor_allow_unsafe_data_ptr_access = True
+	torch._functorch.config.unlift_effect_tokens = True
+	
+	
+	
+	isolate_fails_code_str = None
+	
+	torch.ops.load_library("//caffe2/torch/fb/sparsenn:sparsenn_operators_gpu")
+	torch.ops.load_library("//caffe2/torch/fb/sparsenn:sparsenn_operators")
+	torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")
+	torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
+	
+	"""
+	To run this script in fbcode:
+	- Create a directory (//scripts/{your_unixname}/repro)
+	- Put this file in scripts/{your_unixname}/repro/fx_graph_runnable.py
+	- Add a TARGETS file that looks like the following
+	- `buck2 run //scripts/{your_unixname}/repro:repro`
+	
+	NOTE: you may need additional deps to actually be able to run the script.
+	```
+	# Contents of TARGETS file
+	load("@fbcode_macros//build_defs:python_binary.bzl", "python_binary")
+	
+	python_binary(
+	    name = "repro",
+	    main_src = "fx_graph_runnable.py",
+	    deps = [
+	        "//caffe2:torch",
+	        "//caffe2/torch/fb/sparsenn:sparsenn_operators_gpu",
+	        "//caffe2/torch/fb/sparsenn:sparsenn_operators",
+	        "//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu",
+	        "//deeplearning/fbgemm/fbgemm_gpu:sparse_ops",
+	    ],
+	)
+	```
+	"""
+	
+	# torch version: 2.9.0a0+fb
+	# torch cuda version: 12.4.0
+	# CUDA Info: 
+	# nvcc: NVIDIA (R) Cuda compiler driver 
+	# Copyright (c) 2005-2024 NVIDIA Corporation 
+	# Built on Tue_Oct_29_23:50:19_PDT_2024 
+	# Cuda compilation tools, release 12.6, V12.6.85 
+	# Build cuda_12.6.r12.6/compiler.35059454_0 
+	
+	# GPU Hardware Info: 
+	# NVIDIA PG509-210 : 8 
+	
+	
+	from torch.nn import *
+	class Repro(torch.nn.Module):
+	    def __init__(self) -> None:
+	        super().__init__()
+	
+	    
+	    
+	    def forward(self, primals_1, primals_2, primals_3, primals_4, primals_5, primals_6):
+	        permute = torch.ops.aten.permute.default(primals_1, [1, 0]);  primals_1 = None
+	        addmm = torch.ops.aten.addmm.default(primals_2, primals_3, permute);  primals_2 = permute = None
+	        relu = torch.ops.aten.relu.default(addmm);  addmm = None
+	        sigmoid = torch.ops.aten.sigmoid.default(relu)
+	        mul = torch.ops.aten.mul.Tensor(primals_4, 3.14);  primals_4 = None
+	        addmm_1 = torch.ops.aten.addmm.default(primals_5, mul, primals_6);  primals_5 = mul = primals_6 = None
+	        mul_1 = torch.ops.aten.mul.Tensor(addmm_1, 0.5)
+	        mul_2 = torch.ops.aten.mul.Tensor(addmm_1, 0.7071067811865476);  addmm_1 = None
+	        erf = torch.ops.aten.erf.default(mul_2);  mul_2 = None
+	        add = torch.ops.aten.add.Tensor(erf, 1);  erf = None
+	        mul_3 = torch.ops.aten.mul.Tensor(mul_1, add);  mul_1 = add = None
+	        le = torch.ops.aten.le.Scalar(relu, 0);  relu = None
+	        return (sigmoid, mul_3, primals_3, sigmoid, le)
+	        
+	def load_args(reader):
+	    buf0 = reader.storage(None, 640, device=device(type='cuda', index=0))
+	    reader.tensor(buf0, (16, 10), is_leaf=True)  # primals_1
+	    buf1 = reader.storage(None, 64, device=device(type='cuda', index=0))
+	    reader.tensor(buf1, (16,), is_leaf=True)  # primals_2
+	    buf2 = reader.storage(None, 320, device=device(type='cuda', index=0))
+	    reader.tensor(buf2, (8, 10), is_leaf=True)  # primals_3
+	    buf3 = reader.storage(None, 800, device=device(type='cuda', index=0))
+	    reader.tensor(buf3, (10, 20), is_leaf=True)  # primals_4
+	    buf4 = reader.storage(None, 1200, device=device(type='cuda', index=0))
+	    reader.tensor(buf4, (10, 30), is_leaf=True)  # primals_5
+	    buf5 = reader.storage(None, 2400, device=device(type='cuda', index=0))
+	    reader.tensor(buf5, (20, 30), is_leaf=True)  # primals_6
+	load_args._version = 0
+	mod = Repro()
+	if __name__ == '__main__':
+	    from torch._dynamo.repro.after_aot import run_repro
+	    with torch.no_grad():
+	        run_repro(mod, load_args, accuracy=False, command='run', save_dir=None, tracing_mode='real', check_str=None)
+	        # To run it separately, do 
+	        # mod, args = run_repro(mod, load_args, accuracy=False, command='get_args', save_dir=None, tracing_mode='real', check_str=None)
+	        # mod(*args)
+V0819 12:42:53.503000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ac7c2882c731332363d72b954984d1e7"}
+	{
+	"name": "additional_fake_tensor_prop",
+	"ts": 1755632573503927.8,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.514000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "2fb798aaeeef6a00280e52d9b4f31a39"}
+	{
+	"name": "additional_fake_tensor_prop",
+	"ts": 1755632573514025.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:53.518000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:1279] {"artifact": {"name": "before_post_grad_graph", "encoding": "string"}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "d7397799ecb65f1b291aeba43c7b27ee"}
+	class GraphModule(torch.nn.Module):
+	    def forward(self, primals_1: "f32[16, 10][10, 1]cuda:0", primals_2: "f32[16][1]cuda:0", primals_3: "f32[8, 10][10, 1]cuda:0", primals_4: "f32[10, 20][20, 1]cuda:0", primals_5: "f32[10, 30][30, 1]cuda:0", primals_6: "f32[20, 30][30, 1]cuda:0"):
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:79 in forward, code: x = self.fc1(x)
+	        permute: "f32[10, 16][1, 10]cuda:0" = torch.ops.aten.permute.default(primals_1, [1, 0]);  primals_1 = None
+	        addmm: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.addmm.default(primals_2, primals_3, permute);  primals_2 = permute = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:80 in forward, code: x = self.relu(x)
+	        relu: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.relu.default(addmm);  addmm = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:81 in forward, code: x = self.sigmoid(x)
+	        sigmoid: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.sigmoid.default(relu)
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:82 in forward, code: d = a * 3.14
+	        mul: "f32[10, 20][20, 1]cuda:0" = torch.ops.aten.mul.Tensor(primals_4, 3.14);  primals_4 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:83 in forward, code: y = torch.addmm(c, d, b)
+	        addmm_1: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.addmm.default(primals_5, mul, primals_6);  primals_5 = mul = primals_6 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:84 in forward, code: z = torch.nn.functional.gelu(y)
+	        mul_1: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(addmm_1, 0.5)
+	        mul_2: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(addmm_1, 0.7071067811865476);  addmm_1 = None
+	        erf: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.erf.default(mul_2);  mul_2 = None
+	        add: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.add.Tensor(erf, 1);  erf = None
+	        mul_3: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_1, add);  mul_1 = add = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:80 in forward, code: x = self.relu(x)
+	        le: "b8[8, 16][16, 1]cuda:0" = torch.ops.aten.le.Scalar(relu, 0);  relu = None
+	        return (sigmoid, mul_3, primals_3, sigmoid, le)
+	        
+V0819 12:42:53.519000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "4d3611a08af2c259b5b3178ce834c433"}
+	{
+	"name": "_recursive_post_grad_passes",
+	"ts": 1755632573519769.8,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.023000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "de5f68a0837548d83e164e4c28a4bd0f"}
+	{
+	"name": "_recursive_post_grad_passes",
+	"ts": 1755632574023809.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.029000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:1317] {"artifact": {"name": "after_post_grad_graph", "encoding": "string"}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "55c355b98406711dcfc2b53cc8260d85"}
+	class GraphModule(torch.nn.Module):
+	    def forward(self, primals_1: "f32[16, 10][10, 1]cuda:0", primals_2: "f32[16][1]cuda:0", primals_3: "f32[8, 10][10, 1]cuda:0", primals_4: "f32[10, 20][20, 1]cuda:0", primals_5: "f32[10, 30][30, 1]cuda:0", primals_6: "f32[20, 30][30, 1]cuda:0"):
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:79 in forward, code: x = self.fc1(x)
+	        permute: "f32[10, 16][1, 10]cuda:0" = torch.ops.aten.permute.default(primals_1, [1, 0]);  primals_1 = None
+	        mm_default_1: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.mm.default(primals_3, permute);  permute = None
+	        add_tensor_1: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default_1, primals_2);  mm_default_1 = primals_2 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:80 in forward, code: x = self.relu(x)
+	        relu: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.relu.default(add_tensor_1);  add_tensor_1 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:81 in forward, code: x = self.sigmoid(x)
+	        sigmoid: "f32[8, 16][16, 1]cuda:0" = torch.ops.aten.sigmoid.default(relu)
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:82 in forward, code: d = a * 3.14
+	        mul: "f32[10, 20][20, 1]cuda:0" = torch.ops.aten.mul.Tensor(primals_4, 3.14);  primals_4 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:83 in forward, code: y = torch.addmm(c, d, b)
+	        mm_default: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mm.default(mul, primals_6);  mul = primals_6 = None
+	        add_tensor: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.add.Tensor(mm_default, primals_5);  mm_default = primals_5 = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:84 in forward, code: z = torch.nn.functional.gelu(y)
+	        mul_1: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_tensor, 0.5)
+	        mul_2: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(add_tensor, 0.7071067811865476);  add_tensor = None
+	        erf: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.erf.default(mul_2);  mul_2 = None
+	        add: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.add.Tensor(erf, 1);  erf = None
+	        mul_3: "f32[10, 30][30, 1]cuda:0" = torch.ops.aten.mul.Tensor(mul_1, add);  mul_1 = add = None
+	        
+	         # File: /data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py:80 in forward, code: x = self.relu(x)
+	        le: "b8[8, 16][16, 1]cuda:0" = torch.ops.aten.le.Scalar(relu, 0);  relu = None
+	        return (sigmoid, mul_3, primals_3, sigmoid, le)
+	        
+V0819 12:42:54.035000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ae156d92edb776c6a4120149b5e2a0b3"}
+	{
+	"name": "GraphLowering.run",
+	"ts": 1755632574035641.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.103000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ab2fa8f71a871e97346613110ab9798d"}
+	{
+	"name": "GraphLowering.run",
+	"ts": 1755632574103576.8,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.104000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "e452fcf169605b042b784ecbc8cc6504"}
+	{
+	"name": "GraphLowering.compile_to_fn",
+	"ts": 1755632574104815.2,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.105000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "3ccfa3ce291d782ecf716ea7d972cdee"}
+	{
+	"name": "code_gen",
+	"ts": 1755632574105677.0,
+	"args": {
+	"fn_name": "GraphLowering.compile_to_module",
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.106000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "8118a56adb46013ffcc39a3c3a9a5d12"}
+	{
+	"name": "GraphLowering.codegen",
+	"ts": 1755632574106488.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.110000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "88e3bca74e0397644913a65f93405f0c"}
+	{
+	"name": "Scheduler.__init__",
+	"ts": 1755632574110509.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.141000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "57133080862abf658a847f48b3f39832"}
+	{
+	"name": "Scheduler.fused_nodes",
+	"ts": 1755632574141727.2,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.154000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "76e52cec0d716c1d47d4405be0c4dd4d"}
+	{
+	"name": "Scheduler.fused_nodes",
+	"ts": 1755632574154321.8,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.160000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "d5069c06c5ea22c79b0da36b325bd93d"}
+	{
+	"name": "Scheduler.__init__",
+	"ts": 1755632574160444.2,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.161000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "7162b93587b8feb7bafab2173b358b1e"}
+	{
+	"name": "Scheduler.codegen",
+	"ts": 1755632574161351.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.227000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "739d673b7f9beb0d875b1efd77be4824"}
+	{
+	"name": "Scheduler.codegen",
+	"ts": 1755632574226857.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.228000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "59fe5546a4b209031e1bcfccbb5c5aa6"}
+	{
+	"name": "PythonWrapperCodegen.generate",
+	"ts": 1755632574228339.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.233000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ef20165e23ed56e6e66829f7f65a8a27"}
+	{
+	"name": "PythonWrapperCodegen.generate",
+	"ts": 1755632574233032.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.234000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "1154a7e498b305efee89318b5c3d4135"}
+	{
+	"name": "GraphLowering.codegen",
+	"ts": 1755632574234045.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.238000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/graph.py:2390] {"inductor_output_code": {"filename": "/tmp/tmp4zkba_w7/lr/clrftghodm4tm4zqkq3os2ku43gh6fxhq2gvsumm3gyvlongh3ut.py"}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "2aca68c4d8cdb34111248013b1275458"}
+	# AOT ID: ['0_forward']
+	from ctypes import c_void_p, c_long, c_int
+	import torch
+	import math
+	import random
+	import os
+	import tempfile
+	from math import inf, nan
+	from cmath import nanj
+	from torch._inductor.hooks import run_intermediate_hooks
+	from torch._inductor.utils import maybe_profile
+	from torch._inductor.codegen.memory_planning import _align as align
+	from torch import device, empty_strided
+	from torch._inductor.async_compile import AsyncCompile
+	from torch._inductor.select_algorithm import extern_kernels
+	import triton
+	import triton.language as tl
+	from torch._inductor.runtime.triton_heuristics import start_graph, end_graph
+	from torch._C import _cuda_getCurrentRawStream as get_raw_stream
+	from torch._C import _cuda_getCurrentRawStream as get_raw_stream
+	
+	aten = torch.ops.aten
+	inductor_ops = torch.ops.inductor
+	_quantized = torch.ops._quantized
+	assert_size_stride = torch._C._dynamo.guards.assert_size_stride
+	assert_alignment = torch._C._dynamo.guards.assert_alignment
+	empty_strided_cpu = torch._C._dynamo.guards._empty_strided_cpu
+	empty_strided_cpu_pinned = torch._C._dynamo.guards._empty_strided_cpu_pinned
+	empty_strided_cuda = torch._C._dynamo.guards._empty_strided_cuda
+	empty_strided_xpu = torch._C._dynamo.guards._empty_strided_xpu
+	empty_strided_mtia = torch._C._dynamo.guards._empty_strided_mtia
+	reinterpret_tensor = torch._C._dynamo.guards._reinterpret_tensor
+	alloc_from_pool = torch.ops.inductor._alloc_from_pool
+	async_compile = AsyncCompile()
+	empty_strided_p2p = torch._C._distributed_c10d._SymmetricMemory.empty_strided_p2p
+	
+	
+	# kernel path: /tmp/tmp4zkba_w7/jw/cjwtvb3tnvjggpind3kxph3ilqkw3wboapeietzp536phkvymnyh.py
+	# Topologically Sorted Source Nodes: [x, x_1, x_2], Original ATen: [aten.addmm, aten.relu, aten.sigmoid, aten.threshold_backward]
+	# Source node to ATen node mapping:
+	#   x => add_tensor_1
+	#   x_1 => relu
+	#   x_2 => sigmoid
+	# Graph fragment:
+	#   %mm_default_1 : Tensor "f32[8, 16][16, 1]cuda:0" = PlaceHolder[target=mm_default_1]
+	#   %primals_2 : Tensor "f32[16][1]cuda:0" = PlaceHolder[target=primals_2]
+	#   %add_tensor_1 : Tensor "f32[8, 16][16, 1]cuda:0"[num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mm_default_1, %primals_2), kwargs = {})
+	#   %relu : Tensor "f32[8, 16][16, 1]cuda:0"[num_users=2] = call_function[target=torch.ops.aten.relu.default](args = (%add_tensor_1,), kwargs = {})
+	#   %sigmoid : Tensor "f32[8, 16][16, 1]cuda:0"[num_users=1] = call_function[target=torch.ops.aten.sigmoid.default](args = (%relu,), kwargs = {})
+	#   %le : Tensor "b8[8, 16][16, 1]cuda:0"[num_users=1] = call_function[target=torch.ops.aten.le.Scalar](args = (%relu, 0), kwargs = {})
+	#   return %sigmoid,%le
+	triton_poi_fused_addmm_relu_sigmoid_threshold_backward_0 = async_compile.triton('triton_poi_fused_addmm_relu_sigmoid_threshold_backward_0', '''
+	import triton
+	import triton.language as tl
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, DeviceProperties
+	triton_helpers.set_driver_to_gpu()
+	
+	@triton_heuristics.pointwise(
+	    size_hints={'x': 128}, 
+	    filename=__file__,
+	    triton_meta={'signature': {'in_ptr0': '*fp32', 'in_ptr1': '*fp32', 'out_ptr0': '*fp32', 'out_ptr1': '*i1', 'xnumel': 'i32', 'XBLOCK': 'constexpr'}, 'device': DeviceProperties(type='cuda', index=0, multi_processor_count=108, cc=80, major=8, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, warp_size=32), 'constants': {}, 'configs': [{(0,): [['tt.divisibility', 16]], (1,): [['tt.divisibility', 16]], (2,): [['tt.divisibility', 16]], (3,): [['tt.divisibility', 16]], (4,): [['tt.divisibility', 16]]}]},
+	    inductor_meta={'grid_type': 'Grid1D', 'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_addmm_relu_sigmoid_threshold_backward_0', 'mutated_arg_names': [], 'optimize_mem': False, 'no_x_dim': False, 'num_load': 2, 'num_reduction': 0, 'backend_hash': '3E91F1C483CA40D8EC1B9AFBB282475C75659A34F6F2D59AE8336D7E5E05BEAA', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False, 'is_fbcode': True},
+	    min_elem_per_thread=0
+	)
+	@triton.jit
+	def triton_poi_fused_addmm_relu_sigmoid_threshold_backward_0(in_ptr0, in_ptr1, out_ptr0, out_ptr1, xnumel, XBLOCK : tl.constexpr):
+	    xnumel = 128
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	    xmask = xindex < xnumel
+	    x2 = xindex
+	    x0 = (xindex % 16)
+	    tmp0 = tl.load(in_ptr0 + (x2), xmask)
+	    tmp1 = tl.load(in_ptr1 + (x0), xmask, eviction_policy='evict_last')
+	    tmp2 = tmp0 + tmp1
+	    tmp3 = tl.full([1], 0, tl.int32)
+	    tmp4 = triton_helpers.maximum(tmp3, tmp2)
+	    tmp5 = tl.sigmoid(tmp4)
+	    tmp6 = 0.0
+	    tmp7 = tmp4 <= tmp6
+	    tl.store(out_ptr0 + (x2), tmp5, xmask)
+	    tl.store(out_ptr1 + (x2), tmp7, xmask)
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp4zkba_w7/sw/csw3fwcx7phxodgoonbshla6edvt7ptbs6rwnb5dqrbtuxbikvsd.py
+	# Topologically Sorted Source Nodes: [d], Original ATen: [aten.mul]
+	# Source node to ATen node mapping:
+	#   d => mul
+	# Graph fragment:
+	#   %primals_4 : Tensor "f32[10, 20][20, 1]cuda:0" = PlaceHolder[target=primals_4]
+	#   %mul : Tensor "f32[10, 20][20, 1]cuda:0"[num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%primals_4, 3.14), kwargs = {})
+	#   return %mul
+	triton_poi_fused_mul_1 = async_compile.triton('triton_poi_fused_mul_1', '''
+	import triton
+	import triton.language as tl
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, DeviceProperties
+	triton_helpers.set_driver_to_gpu()
+	
+	@triton_heuristics.pointwise(
+	    size_hints={'x': 256}, 
+	    filename=__file__,
+	    triton_meta={'signature': {'in_ptr0': '*fp32', 'out_ptr0': '*fp32', 'xnumel': 'i32', 'XBLOCK': 'constexpr'}, 'device': DeviceProperties(type='cuda', index=0, multi_processor_count=108, cc=80, major=8, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, warp_size=32), 'constants': {}, 'configs': [{(0,): [['tt.divisibility', 16]], (1,): [['tt.divisibility', 16]]}]},
+	    inductor_meta={'grid_type': 'Grid1D', 'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_mul_1', 'mutated_arg_names': [], 'optimize_mem': False, 'no_x_dim': False, 'num_load': 1, 'num_reduction': 0, 'backend_hash': '3E91F1C483CA40D8EC1B9AFBB282475C75659A34F6F2D59AE8336D7E5E05BEAA', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False, 'is_fbcode': True},
+	    min_elem_per_thread=0
+	)
+	@triton.jit
+	def triton_poi_fused_mul_1(in_ptr0, out_ptr0, xnumel, XBLOCK : tl.constexpr):
+	    xnumel = 200
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	    xmask = xindex < xnumel
+	    x0 = xindex
+	    tmp0 = tl.load(in_ptr0 + (x0), xmask)
+	    tmp1 = 3.14
+	    tmp2 = tmp0 * tmp1
+	    tl.store(out_ptr0 + (x0), tmp2, xmask)
+	''', device_str='cuda')
+	
+	
+	# kernel path: /tmp/tmp4zkba_w7/l7/cl72yepoy7kxj4gf4i6tobtpamazzhfyrhurp4tqvdrio2cva2q2.py
+	# Topologically Sorted Source Nodes: [y, z], Original ATen: [aten.addmm, aten.gelu]
+	# Source node to ATen node mapping:
+	#   y => add_tensor
+	#   z => add, erf, mul_1, mul_2, mul_3
+	# Graph fragment:
+	#   %mm_default : Tensor "f32[10, 30][30, 1]cuda:0" = PlaceHolder[target=mm_default]
+	#   %primals_5 : Tensor "f32[10, 30][30, 1]cuda:0" = PlaceHolder[target=primals_5]
+	#   %add_tensor : Tensor "f32[10, 30][30, 1]cuda:0"[num_users=2] = call_function[target=torch.ops.aten.add.Tensor](args = (%mm_default, %primals_5), kwargs = {})
+	#   %mul_1 : Tensor "f32[10, 30][30, 1]cuda:0"[num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_tensor, 0.5), kwargs = {})
+	#   %mul_2 : Tensor "f32[10, 30][30, 1]cuda:0"[num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_tensor, 0.7071067811865476), kwargs = {})
+	#   %erf : Tensor "f32[10, 30][30, 1]cuda:0"[num_users=1] = call_function[target=torch.ops.aten.erf.default](args = (%mul_2,), kwargs = {})
+	#   %add : Tensor "f32[10, 30][30, 1]cuda:0"[num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%erf, 1), kwargs = {})
+	#   %mul_3 : Tensor "f32[10, 30][30, 1]cuda:0"[num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_1, %add), kwargs = {})
+	#   return %mul_3
+	triton_poi_fused_addmm_gelu_2 = async_compile.triton('triton_poi_fused_addmm_gelu_2', '''
+	import triton
+	import triton.language as tl
+	
+	from torch._inductor.runtime import triton_helpers, triton_heuristics
+	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
+	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, DeviceProperties
+	triton_helpers.set_driver_to_gpu()
+	
+	@triton_heuristics.pointwise(
+	    size_hints={'x': 512}, 
+	    filename=__file__,
+	    triton_meta={'signature': {'in_out_ptr0': '*fp32', 'in_ptr0': '*fp32', 'xnumel': 'i32', 'XBLOCK': 'constexpr'}, 'device': DeviceProperties(type='cuda', index=0, multi_processor_count=108, cc=80, major=8, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, warp_size=32), 'constants': {}, 'configs': [{(0,): [['tt.divisibility', 16]], (1,): [['tt.divisibility', 16]]}]},
+	    inductor_meta={'grid_type': 'Grid1D', 'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_addmm_gelu_2', 'mutated_arg_names': ['in_out_ptr0'], 'optimize_mem': False, 'no_x_dim': False, 'num_load': 2, 'num_reduction': 0, 'backend_hash': '3E91F1C483CA40D8EC1B9AFBB282475C75659A34F6F2D59AE8336D7E5E05BEAA', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False, 'is_fbcode': True},
+	    min_elem_per_thread=0
+	)
+	@triton.jit
+	def triton_poi_fused_addmm_gelu_2(in_out_ptr0, in_ptr0, xnumel, XBLOCK : tl.constexpr):
+	    xnumel = 300
+	    xoffset = tl.program_id(0) * XBLOCK
+	    xindex = xoffset + tl.arange(0, XBLOCK)[:]
+	    xmask = xindex < xnumel
+	    x0 = xindex
+	    tmp0 = tl.load(in_out_ptr0 + (x0), xmask)
+	    tmp1 = tl.load(in_ptr0 + (x0), xmask)
+	    tmp2 = tmp0 + tmp1
+	    tmp3 = 0.5
+	    tmp4 = tmp2 * tmp3
+	    tmp5 = 0.7071067811865476
+	    tmp6 = tmp2 * tmp5
+	    tmp7 = libdevice.erf(tmp6)
+	    tmp8 = 1.0
+	    tmp9 = tmp7 + tmp8
+	    tmp10 = tmp4 * tmp9
+	    tl.store(in_out_ptr0 + (x0), tmp10, xmask)
+	''', device_str='cuda')
+	
+	
+	async_compile.wait(globals())
+	del async_compile
+	
+	def call(args):
+	    primals_1, primals_2, primals_3, primals_4, primals_5, primals_6 = args
+	    args.clear()
+	    assert_size_stride(primals_1, (16, 10), (10, 1))
+	    assert_size_stride(primals_2, (16, ), (1, ))
+	    assert_size_stride(primals_3, (8, 10), (10, 1))
+	    assert_size_stride(primals_4, (10, 20), (20, 1))
+	    assert_size_stride(primals_5, (10, 30), (30, 1))
+	    assert_size_stride(primals_6, (20, 30), (30, 1))
+	    with torch.cuda._DeviceGuard(0):
+	        torch.cuda.set_device(0)
+	        buf0 = empty_strided_cuda((8, 16), (16, 1), torch.float32)
+	        # Topologically Sorted Source Nodes: [x], Original ATen: [aten.t, aten.addmm]
+	        # [Provenance debug handles] extern_kernels.mm:4
+	        extern_kernels.mm(primals_3, reinterpret_tensor(primals_1, (10, 16), (1, 10), 0), out=buf0)
+	        del primals_1
+	        buf1 = empty_strided_cuda((8, 16), (16, 1), torch.float32)
+	        buf5 = empty_strided_cuda((8, 16), (16, 1), torch.bool)
+	        # Topologically Sorted Source Nodes: [x, x_1, x_2], Original ATen: [aten.addmm, aten.relu, aten.sigmoid, aten.threshold_backward]
+	        # [Provenance debug handles] triton_poi_fused_addmm_relu_sigmoid_threshold_backward_0:1
+	        stream0 = get_raw_stream(0)
+	        triton_poi_fused_addmm_relu_sigmoid_threshold_backward_0.run(buf0, primals_2, buf1, buf5, 128, stream=stream0)
+	        del buf0
+	        del primals_2
+	        buf2 = empty_strided_cuda((10, 20), (20, 1), torch.float32)
+	        # Topologically Sorted Source Nodes: [d], Original ATen: [aten.mul]
+	        # [Provenance debug handles] triton_poi_fused_mul_1:2
+	        stream0 = get_raw_stream(0)
+	        triton_poi_fused_mul_1.run(primals_4, buf2, 200, stream=stream0)
+	        del primals_4
+	        buf3 = empty_strided_cuda((10, 30), (30, 1), torch.float32)
+	        # Topologically Sorted Source Nodes: [d, y], Original ATen: [aten.mul, aten.addmm]
+	        # [Provenance debug handles] extern_kernels.mm:5
+	        extern_kernels.mm(buf2, primals_6, out=buf3)
+	        del buf2
+	        del primals_6
+	        buf4 = buf3; del buf3  # reuse
+	        # Topologically Sorted Source Nodes: [y, z], Original ATen: [aten.addmm, aten.gelu]
+	        # [Provenance debug handles] triton_poi_fused_addmm_gelu_2:3
+	        stream0 = get_raw_stream(0)
+	        triton_poi_fused_addmm_gelu_2.run(buf4, primals_5, 300, stream=stream0)
+	        del primals_5
+	    return (buf1, buf4, primals_3, buf1, buf5, )
+	
+	
+	def benchmark_compiled_module(times=10, repeat=10):
+	    from torch._dynamo.testing import rand_strided
+	    from torch._inductor.utils import print_performance
+	    primals_1 = rand_strided((16, 10), (10, 1), device='cuda:0', dtype=torch.float32)
+	    primals_2 = rand_strided((16, ), (1, ), device='cuda:0', dtype=torch.float32)
+	    primals_3 = rand_strided((8, 10), (10, 1), device='cuda:0', dtype=torch.float32)
+	    primals_4 = rand_strided((10, 20), (20, 1), device='cuda:0', dtype=torch.float32)
+	    primals_5 = rand_strided((10, 30), (30, 1), device='cuda:0', dtype=torch.float32)
+	    primals_6 = rand_strided((20, 30), (30, 1), device='cuda:0', dtype=torch.float32)
+	    fn = lambda: call([primals_1, primals_2, primals_3, primals_4, primals_5, primals_6])
+	    return print_performance(fn, times=times, repeat=repeat)
+	
+	
+	if __name__ == "__main__":
+	    from torch._inductor.wrapper_benchmark import compiled_module_main
+	    compiled_module_main('None', benchmark_compiled_module)
+	
+V0819 12:42:54.239000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "5c342a257698eac725fbe89854619148"}
+	{
+	"name": "PyCodeCache.load_by_key_path",
+	"ts": 1755632574239640.2,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.249000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "3632eb66f9772710599a44efb37232c9"}
+	{
+	"name": "async_compile.precompile",
+	"ts": 1755632574249049.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.371000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "c74b154bca8a89e6485c166aa5ec6733"}
+	{
+	"name": "CachingAutotuner.synchronize",
+	"ts": 1755632574371379.2,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.373000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "31a45ed766500eb3769f9345fa2ff430"}
+	{
+	"name": "CachingAutotuner.synchronize",
+	"ts": 1755632574372975.8,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.374000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "8fc72fe2afb99fe571afb7dc1299ef99"}
+	{
+	"name": "async_compile.precompile",
+	"ts": 1755632574374455.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.378000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "310496f61306f45da0c59da9ecb59f02"}
+	{
+	"name": "async_compile.precompile",
+	"ts": 1755632574378392.8,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.536000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "0bb4c8686fb3742ef4e92338c60b31ba"}
+	{
+	"name": "CachingAutotuner.synchronize",
+	"ts": 1755632574536041.8,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.537000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "7f7cb93aea8eb05798dcc5e98e52f889"}
+	{
+	"name": "CachingAutotuner.synchronize",
+	"ts": 1755632574537687.8,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.539000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "c5b3ad5e35735da09b16986be6f3c58c"}
+	{
+	"name": "async_compile.precompile",
+	"ts": 1755632574539459.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.543000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "91b724efd2961babb4167c4be95db295"}
+	{
+	"name": "async_compile.precompile",
+	"ts": 1755632574543414.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.723000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "dee4369ce4575519cfb838ee97455b2c"}
+	{
+	"name": "CachingAutotuner.synchronize",
+	"ts": 1755632574723032.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.724000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "e14813d4bf717a7079eb89eec8053e09"}
+	{
+	"name": "CachingAutotuner.synchronize",
+	"ts": 1755632574724583.8,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.726000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "d6f9ff96a9810f90282fa0b1a880f65b"}
+	{
+	"name": "async_compile.precompile",
+	"ts": 1755632574726362.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.730000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "8bb30a695354ed73001095feb8ba193e"}
+	{
+	"name": "async_compile.wait",
+	"ts": 1755632574730066.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.731000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "af4c39608061a2fd56f5ba0481d845c2"}
+	{
+	"name": "async_compile.wait",
+	"ts": 1755632574731118.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.734000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/async_compile.py:117] {"artifact": {"name": "triton_kernel_info", "encoding": "json"}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "9a834116f1119d7f72a4db3a52a1d0cc"}
+	{"triton_poi_fused_addmm_gelu_2": {"autotune_cache_state": "miss", "num_configs": 2, "compile_time_us": 181999}, "triton_poi_fused_addmm_relu_sigmoid_threshold_backward_0": {"autotune_cache_state": "only 1 config", "only_config": [["XBLOCK", 128], ["num_warps", 4], ["num_stages", 1]], "compile_time_us": 124070}, "triton_poi_fused_mul_1": {"autotune_cache_state": "miss", "num_configs": 2, "compile_time_us": 159751}}
+V0819 12:42:54.735000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "38a254a981190e2096065acc0b28cf6f"}
+	{
+	"name": "PyCodeCache.load_by_key_path",
+	"ts": 1755632574735427.8,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.738000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "0e87c466f2822b5d7b530c07e2154f29"}
+	{
+	"name": "code_gen",
+	"ts": 1755632574738822.0,
+	"args": {
+	"fn_name": "GraphLowering.compile_to_module",
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.742000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "27f724bbbaf5d7b79fefd6c590eb37b5"}
+	{
+	"name": "GraphLowering.compile_to_fn",
+	"ts": 1755632574742125.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.746000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1985] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "dd9227a53fd5b42a6a26178879c73f3f"}
+	{
+	"name": "fx_graph_cache_disabled",
+	"ts": 1755632573492397.8,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "i",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0,
+	"s": "p"
+	}
+V0819 12:42:54.746000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "180452f3e201a30dc89ed77d8403c074"}
+	{
+	"name": "fx_codegen_and_compile",
+	"ts": 1755632574746730.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.750000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:1063] {"artifact": {"name": "inductor_provenance_tracking_node_mappings", "encoding": "json"}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "693bab66f261c3fd7e68883cd9b971aa"}
+	{"preToPost": {"x": ["permute", "mm_default_1", "add_tensor_1"], "x_1": ["relu"], "x_2": ["sigmoid"], "d": ["mul"], "y": ["mm_default", "add_tensor"], "z": ["mul_1", "mul_2", "erf", "add", "mul_3"]}, "postToPre": {"permute": ["x"], "mm_default_1": ["x"], "add_tensor_1": ["x"], "relu": ["x_1"], "sigmoid": ["x_2"], "mul": ["d"], "mm_default": ["y"], "add_tensor": ["y"], "mul_1": ["z"], "mul_2": ["z"], "erf": ["z"], "add": ["z"], "mul_3": ["z"]}, "cppCodeToPost": {"triton_poi_fused_addmm_relu_sigmoid_threshold_backward_0:1": ["sigmoid", "relu", "add_tensor_1", "le"], "triton_poi_fused_mul_1:2": ["mul"], "triton_poi_fused_addmm_gelu_2:3": ["mul_3", "mul_1", "add_tensor", "add", "erf", "mul_2"], "extern_kernels.mm:4": ["mm_default_1"], "extern_kernels.mm:5": ["mm_default"]}, "postToCppCode": {"sigmoid": ["triton_poi_fused_addmm_relu_sigmoid_threshold_backward_0:1"], "relu": ["triton_poi_fused_addmm_relu_sigmoid_threshold_backward_0:1"], "add_tensor_1": ["triton_poi_fused_addmm_relu_sigmoid_threshold_backward_0:1"], "le": ["triton_poi_fused_addmm_relu_sigmoid_threshold_backward_0:1"], "mul": ["triton_poi_fused_mul_1:2"], "mul_3": ["triton_poi_fused_addmm_gelu_2:3"], "mul_1": ["triton_poi_fused_addmm_gelu_2:3"], "add_tensor": ["triton_poi_fused_addmm_gelu_2:3"], "add": ["triton_poi_fused_addmm_gelu_2:3"], "erf": ["triton_poi_fused_addmm_gelu_2:3"], "mul_2": ["triton_poi_fused_addmm_gelu_2:3"], "mm_default_1": ["extern_kernels.mm:4"], "mm_default": ["extern_kernels.mm:5"]}, "version": 2.0}
+V0819 12:42:54.751000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_inductor/compile_fx.py:1073] {"artifact": {"name": "inductor_provenance_tracking_kernel_stack_traces", "encoding": "json"}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "e9ad04d87757893f736df0f03ad65de2"}
+	{"triton_poi_fused_addmm_relu_sigmoid_threshold_backward_0:1": ["  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 81, in forward\n    x = self.sigmoid(x)\n", "  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 80, in forward\n    x = self.relu(x)\n", "  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 79, in forward\n    x = self.fc1(x)\n"], "triton_poi_fused_mul_1:2": ["  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 82, in forward\n    d = a * 3.14\n"], "triton_poi_fused_addmm_gelu_2:3": ["  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 84, in forward\n    z = torch.nn.functional.gelu(y)\n", "  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 83, in forward\n    y = torch.addmm(c, d, b)\n"], "extern_kernels.mm:4": ["  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 79, in forward\n    x = self.fc1(x)\n"], "extern_kernels.mm:5": ["  File \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py\", line 83, in forward\n    y = torch.addmm(c, d, b)\n"]}
+V0819 12:42:54.752000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "7ef155dbae9b3ec3186bbca643396998"}
+	{
+	"name": "inductor_compile",
+	"ts": 1755632574752206.5,
+	"args": {
+	"fn_name": "compile_fx_inner",
+	"compile_id": "0/0",
+	"is_backward": false,
+	"cache_state": "disabled",
+	"cache_event_time": 1755632573492397803,
+	"key": null,
+	"components": null,
+	"cache_bypass_reason": "cache not enabled",
+	"remote_cache_enabled": false,
+	"local_cache_enabled": false
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.756000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "38ef8fedb5a9894b523ec979c96dec19"}
+	{
+	"name": "compile_fx.<locals>.fw_compiler_base",
+	"ts": 1755632574755921.8,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.760000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "06f5d81d016638edb7977c8cbe67b066"}
+	{
+	"name": "create_aot_dispatcher_function",
+	"ts": 1755632574760736.0,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.764000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "aebd90b663e7467e4cb6c07ae02ecdfb"}
+	{
+	"name": "backend_compile",
+	"ts": 1755632574764390.5,
+	"args": {
+	"fn_name": "OutputGraph.call_user_compiler",
+	"compile_id": "0/0",
+	"cache_state": "bypass",
+	"cache_event_time": 1755632570515849137,
+	"key": null,
+	"components": [],
+	"cache_bypass_reason": "FX graph cache is not enabled",
+	"remote_cache_enabled": false,
+	"local_cache_enabled": true,
+	"requires_subclass_dispatch": false,
+	"dispatch_mode": "autograd"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.769000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ea123fb53b862dc8c19c25841d50103e"}
+	{
+	"name": "compile_attempt_0",
+	"ts": 1755632574769712.5,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.773000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "eb03b2b9eb962a92f6b8fc97d36c58ce"}
+	{
+	"name": "build_guards",
+	"ts": 1755632574773313.8,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.798000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/guards.py:3456] {"dynamo_cpp_guards_str": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "686495bda144a45bb3019a5204d7ebbd"}
+	
+	TREE_GUARD_MANAGER:
+	+- RootGuardManager
+	| +- LAMBDA_GUARD: torch._functorch.aot_autograd.utils.top_saved_tensors_hooks ids == None  # _dynamo/output_graph.py:655 in init_ambient_guards
+	| +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None                           # _dynamo/output_graph.py:643 in init_ambient_guards
+	| +- GLOBAL_STATE: ___check_global_state()
+	| +- TORCH_FUNCTION_MODE_STACK: ___check_torch_function_mode_stack()
+	| +- GuardManager: source=L['a'], accessed_by=FrameLocalsGuardAccessor(key='a', framelocals_idx=2), type=<class 'torch.Tensor'>, tag_safe=(True, False)
+	| | +- TENSOR_MATCH: check_tensor(L['a'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[10, 20], stride=[20, 1])  # d = a * 3.14  # caffe2/test/inductor/test_provenance_tracing.py:82 in forward
+	| | +- NO_HASATTR: hasattr(L['a'], '_dynamo_dynamic_indices') == False           # d = a * 3.14  # caffe2/test/inductor/test_provenance_tracing.py:82 in forward
+	| | +- NO_TENSOR_ALIASING: check_no_aliasing(L['a'], L['b'], L['c'], L['x'])
+	| +- GuardManager: source=L['b'], accessed_by=FrameLocalsGuardAccessor(key='b', framelocals_idx=3), type=<class 'torch.Tensor'>, tag_safe=(True, False)
+	| | +- TENSOR_MATCH: check_tensor(L['b'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[20, 30], stride=[30, 1])  # y = torch.addmm(c, d, b)  # caffe2/test/inductor/test_provenance_tracing.py:83 in forward
+	| | +- NO_HASATTR: hasattr(L['b'], '_dynamo_dynamic_indices') == False           # y = torch.addmm(c, d, b)  # caffe2/test/inductor/test_provenance_tracing.py:83 in forward
+	| | +- NO_TENSOR_ALIASING
+	| +- GuardManager: source=L['c'], accessed_by=FrameLocalsGuardAccessor(key='c', framelocals_idx=4), type=<class 'torch.Tensor'>, tag_safe=(True, False)
+	| | +- TENSOR_MATCH: check_tensor(L['c'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[10, 30], stride=[30, 1])  # y = torch.addmm(c, d, b)  # caffe2/test/inductor/test_provenance_tracing.py:83 in forward
+	| | +- NO_HASATTR: hasattr(L['c'], '_dynamo_dynamic_indices') == False           # y = torch.addmm(c, d, b)  # caffe2/test/inductor/test_provenance_tracing.py:83 in forward
+	| | +- NO_TENSOR_ALIASING
+	| +- GuardManager: source=L['x'], accessed_by=FrameLocalsGuardAccessor(key='x', framelocals_idx=1), type=<class 'torch.Tensor'>, tag_safe=(True, False)
+	| | +- TENSOR_MATCH: check_tensor(L['x'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[8, 10], stride=[10, 1])  # return F.linear(input, self.weight, self.bias)  # nn/modules/linear.py:134 in forward
+	| | +- NO_HASATTR: hasattr(L['x'], '_dynamo_dynamic_indices') == False           # return F.linear(input, self.weight, self.bias)  # nn/modules/linear.py:134 in forward
+	| | +- NO_TENSOR_ALIASING
+	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor, type=<class 'dict'>, tag_safe=(False, False)
+	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_linear'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_nn_dot_modules_dot_linear'), type=<class 'module'>, tag_safe=(False, False)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_linear'], 139827799258304)  # return F.linear(input, self.weight, self.bias)  # nn/modules/linear.py:134 in forward
+	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_linear'].F, accessed_by=GetAttrGuardAccessor(F), type=<class 'module'>, tag_safe=(False, False)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_linear'].F, 139827799260464)  # return F.linear(input, self.weight, self.bias)  # nn/modules/linear.py:134 in forward
+	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_linear'].F.gelu, accessed_by=GetAttrGuardAccessor(gelu), type=<class 'builtin_function_or_method'>, tag_safe=(True, False)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_linear'].F.gelu, 139827806389488)  # z = torch.nn.functional.gelu(y)  # caffe2/test/inductor/test_provenance_tracing.py:84 in forward
+	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_linear'].F.relu, accessed_by=GetAttrGuardAccessor(relu), type=<class 'function'>, tag_safe=(True, False)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_linear'].F.relu, 139827795318320)  # return F.relu(input, inplace=self.inplace)  # nn/modules/activation.py:144 in forward
+	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_linear'].F.linear, accessed_by=GetAttrGuardAccessor(linear), type=<class 'builtin_function_or_method'>, tag_safe=(True, False)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_linear'].F.linear, 139827806390608)  # return F.linear(input, self.weight, self.bias)  # nn/modules/linear.py:134 in forward
+	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_module'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_nn_dot_modules_dot_module'), type=<class 'module'>, tag_safe=(False, False)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_module'], 139827802391712)  # x = self.fc1(x)  # caffe2/test/inductor/test_provenance_tracing.py:79 in forward
+	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_module']._global_forward_hooks, accessed_by=GetAttrGuardAccessor(_global_forward_hooks), type=<class 'collections.OrderedDict'>, tag_safe=(True, False)
+	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_nn_dot_modules_dot_module']._global_forward_hooks, 139829228758288)  # x = self.fc1(x)  # caffe2/test/inductor/test_provenance_tracing.py:79 in forward
+	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_module']._global_backward_hooks, accessed_by=GetAttrGuardAccessor(_global_backward_hooks), type=<class 'collections.OrderedDict'>, tag_safe=(True, False)
+	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_nn_dot_modules_dot_module']._global_backward_hooks, 139829228758288)  # x = self.fc1(x)  # caffe2/test/inductor/test_provenance_tracing.py:79 in forward
+	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_module']._global_forward_pre_hooks, accessed_by=GetAttrGuardAccessor(_global_forward_pre_hooks), type=<class 'collections.OrderedDict'>, tag_safe=(True, False)
+	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_nn_dot_modules_dot_module']._global_forward_pre_hooks, 139829228758288)  # x = self.fc1(x)  # caffe2/test/inductor/test_provenance_tracing.py:79 in forward
+	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_module']._global_backward_pre_hooks, accessed_by=GetAttrGuardAccessor(_global_backward_pre_hooks), type=<class 'collections.OrderedDict'>, tag_safe=(True, False)
+	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_nn_dot_modules_dot_module']._global_backward_pre_hooks, 139829228758288)  # x = self.fc1(x)  # caffe2/test/inductor/test_provenance_tracing.py:79 in forward
+	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_activation'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_nn_dot_modules_dot_activation'), type=<class 'module'>, tag_safe=(False, False)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_activation'], 139827796023056)  # return F.relu(input, inplace=self.inplace)  # nn/modules/activation.py:144 in forward
+	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_activation'].torch, accessed_by=GetAttrGuardAccessor(torch), type=<class 'module'>, tag_safe=(False, False)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_activation'].torch, 139828009059728)  # return torch.sigmoid(input)  # nn/modules/activation.py:359 in forward
+	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_activation'].torch.nn, accessed_by=GetAttrGuardAccessor(nn), type=<class 'module'>, tag_safe=(False, False)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_activation'].torch.nn, 139827802301568)  # z = torch.nn.functional.gelu(y)  # caffe2/test/inductor/test_provenance_tracing.py:84 in forward
+	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_activation'].torch.addmm, accessed_by=GetAttrGuardAccessor(addmm), type=<class 'builtin_function_or_method'>, tag_safe=(True, False)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_activation'].torch.addmm, 139827891983824)  # y = torch.addmm(c, d, b)  # caffe2/test/inductor/test_provenance_tracing.py:83 in forward
+	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_activation'].torch.sigmoid, accessed_by=GetAttrGuardAccessor(sigmoid), type=<class 'builtin_function_or_method'>, tag_safe=(True, False)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_activation'].torch.sigmoid, 139827891979024)  # return torch.sigmoid(input)  # nn/modules/activation.py:359 in forward
+	| +- GuardManager: source=L['self'], accessed_by=FrameLocalsGuardAccessor(key='self', framelocals_idx=0), type=<class 'caffe2.test.inductor.test_provenance_tracing.Model4'>, tag_safe=(True, True)
+	| | +- TYPE_MATCH: ___check_type_id(L['self'], 139827223933968)                  # x = self.fc1(x)  # caffe2/test/inductor/test_provenance_tracing.py:79 in forward
+	| | +- GuardManager: source=L['self'].__dict__, accessed_by=GetGenericDictGuardAccessor, type=<class 'dict'>, tag_safe=(True, False)
+	| | | +- GuardManager: source=L['self']._modules, accessed_by=DictGetItemGuardAccessor('_modules'), type=<class 'dict'>, tag_safe=(True, False)
+	| | | | +- TYPE_MATCH: ___check_type_id(L['self']._modules, 139829228698104)         # x = self.fc1(x)  # caffe2/test/inductor/test_provenance_tracing.py:79 in forward
+	| | | | +- GuardManager: source=L['self']._modules['fc1'], accessed_by=DictGetItemGuardAccessor('fc1'), type=<class 'torch.nn.modules.linear.Linear'>, tag_safe=(True, False)
+	| | | | | +- TYPE_MATCH: ___check_type_id(L['self']._modules['fc1'], 139827924299792)  # x = self.fc1(x)  # caffe2/test/inductor/test_provenance_tracing.py:79 in forward
+	| | | | | +- GuardManager: source=L['self']._modules['fc1'].__dict__, accessed_by=GetGenericDictGuardAccessor, type=<class 'dict'>, tag_safe=(True, False)
+	| | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['self']._modules['fc1'].__dict__)  # x = self.fc1(x)  # caffe2/test/inductor/test_provenance_tracing.py:79 in forward
+	| | | | | | +- GuardManager: source=L['self']._modules['fc1']._parameters, accessed_by=DictGetItemGuardAccessor('_parameters'), type=<class 'dict'>, tag_safe=(True, False)
+	| | | | | | | +- TYPE_MATCH: ___check_type_id(L['self']._modules['fc1']._parameters, 139829228698104)  # return F.linear(input, self.weight, self.bias)  # nn/modules/linear.py:134 in forward
+	| | | | | | | +- GuardManager: source=L['self']._modules['fc1']._parameters['bias'], accessed_by=DictGetItemGuardAccessor('bias'), type=<class 'torch.nn.parameter.Parameter'>, tag_safe=(True, False)
+	| | | | | | | | +- TENSOR_MATCH: check_tensor(L['self']._modules['fc1']._parameters['bias'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[16], stride=[1])  # return F.linear(input, self.weight, self.bias)  # nn/modules/linear.py:134 in forward
+	| | | | | | | +- GuardManager: source=L['self']._modules['fc1']._parameters['weight'], accessed_by=DictGetItemGuardAccessor('weight'), type=<class 'torch.nn.parameter.Parameter'>, tag_safe=(True, False)
+	| | | | | | | | +- TENSOR_MATCH: check_tensor(L['self']._modules['fc1']._parameters['weight'], Parameter, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=True, size=[16, 10], stride=[10, 1])  # return F.linear(input, self.weight, self.bias)  # nn/modules/linear.py:134 in forward
+	| | | | +- GuardManager: source=L['self']._modules['relu'], accessed_by=DictGetItemGuardAccessor('relu'), type=<class 'torch.nn.modules.activation.ReLU'>, tag_safe=(True, False)
+	| | | | | +- TYPE_MATCH: ___check_type_id(L['self']._modules['relu'], 139827924398096)  # x = self.relu(x)  # caffe2/test/inductor/test_provenance_tracing.py:80 in forward
+	| | | | | +- GuardManager: source=L['self']._modules['relu'].__dict__, accessed_by=GetGenericDictGuardAccessor, type=<class 'dict'>, tag_safe=(True, False)
+	| | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['self']._modules['relu'].__dict__)  # x = self.relu(x)  # caffe2/test/inductor/test_provenance_tracing.py:80 in forward
+	| | | | | | +- GuardManager: source=L['self']._modules['relu'].inplace, accessed_by=DictGetItemGuardAccessor('inplace'), type=<class 'bool'>, tag_safe=(True, False)
+	| | | | | | | +- FALSE_MATCH: L['self']._modules['relu'].inplace == False                   # return F.relu(input, inplace=self.inplace)  # nn/modules/activation.py:144 in forward
+	| | | | +- GuardManager: source=L['self']._modules['sigmoid'], accessed_by=DictGetItemGuardAccessor('sigmoid'), type=<class 'torch.nn.modules.activation.Sigmoid'>, tag_safe=(True, False)
+	| | | | | +- TYPE_MATCH: ___check_type_id(L['self']._modules['sigmoid'], 139827925541904)  # x = self.sigmoid(x)  # caffe2/test/inductor/test_provenance_tracing.py:81 in forward
+	| | | | | +- GuardManager: source=L['self']._modules['sigmoid'].__dict__, accessed_by=GetGenericDictGuardAccessor, type=<class 'dict'>, tag_safe=(True, False)
+	| | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['self']._modules['sigmoid'].__dict__)  # x = self.sigmoid(x)  # caffe2/test/inductor/test_provenance_tracing.py:81 in forward
+	| | | +- GuardManager: source=L['self']._parameters, accessed_by=DictGetItemGuardAccessor('_parameters'), type=<class 'dict'>, tag_safe=(True, False)
+	| | | | +- TYPE_MATCH: ___check_type_id(L['self']._parameters, 139829228698104)      # x = self.fc1(x)  # caffe2/test/inductor/test_provenance_tracing.py:79 in forward
+	+- LAMBDA_GUARD: G['__import_torch_dot_nn_dot_modules_dot_activation'].torch is G['torch']  # y = torch.addmm(c, d, b)  # caffe2/test/inductor/test_provenance_tracing.py:83 in forward
+	+- LAMBDA_GUARD: G['__import_torch_dot_nn_dot_modules_dot_linear'].F is G['__import_torch_dot_nn_dot_modules_dot_activation'].F  # return F.relu(input, inplace=self.inplace)  # nn/modules/activation.py:144 in forward
+	+- LAMBDA_GUARD: G['__import_torch_dot_nn_dot_modules_dot_linear'].F is G['__import_torch_dot_nn_dot_modules_dot_activation'].torch.nn.functional  # z = torch.nn.functional.gelu(y)  # caffe2/test/inductor/test_provenance_tracing.py:84 in forward
+	
+	Guard latency = 33.54 us
+V0819 12:42:54.799000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "bf4d22f90b804f34c266d5b03ea06df0"}
+	{
+	"name": "build_guards",
+	"ts": 1755632574799701.8,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.804000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "f444b10d3520d5d59ca27e3557790487"}
+	{
+	"name": "gc",
+	"ts": 1755632574804025.2,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.809000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "7370e2c2bd146593f20b3361b506926b"}
+	{
+	"name": "gc",
+	"ts": 1755632574809118.8,
+	"args": {
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.810000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ce74400bade59f2c5ac2ecbdbc502e23"}
+	{
+	"name": "entire_frame_compile",
+	"ts": 1755632574810257.8,
+	"args": {
+	"fn_name": "_compile.compile_inner",
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.814000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1641] {"compilation_metrics": {"compile_id": "0/0", "frame_key": "1", "co_name": "forward", "co_filename": "/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py", "co_firstlineno": 78, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 57, "shape_env_guard_count": 0, "graph_op_count": 6, "graph_node_count": 13, "graph_input_count": 6, "start_time": 1755632570.389302, "entire_frame_compile_time_s": 4.420942, "backend_compile_time_s": 4.281515, "inductor_compile_time_s": 1.272034, "code_gen_time_s": 0.633145, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "stack_trace": ["Line: 39, Name: <module>, Filename: 1", "Line: 36, Name: __invoke_main, Filename: 1", "Line: 105, Name: run_as_main, Filename: 2", "Line: 70, Name: run_as_main, Filename: 3", "Line: 196, Name: _run_module_as_main, Filename: 4", "Line: 86, Name: _run_code, Filename: 4", "Line: 731, Name: <module>, Filename: 5", "Line: 727, Name: main, Filename: 5", "Line: 325, Name: run, Filename: 6", "Line: 620, Name: run_human_interface, Filename: 5", "Line: 582, Name: run, Filename: 5", "Line: 554, Name: run_tests, Filename: 5", "Line: 508, Name: _run_suite_and_maybe_profile, Filename: 5", "Line: 184, Name: run, Filename: 7", "Line: 84, Name: __call__, Filename: 8", "Line: 122, Name: run, Filename: 8", "Line: 84, Name: __call__, Filename: 8", "Line: 122, Name: run, Filename: 8", "Line: 84, Name: __call__, Filename: 8", "Line: 122, Name: run, Filename: 8", "Line: 650, Name: __call__, Filename: 9", "Line: 3406, Name: run, Filename: 10", "Line: 3376, Name: _run_custom, Filename: 10", "Line: 591, Name: run, Filename: 9", "Line: 549, Name: _callTestMethod, Filename: 9", "Line: 79, Name: inner, Filename: 11", "Line: 576, Name: test_tlparse_kernel_stack_traces, Filename: 12", "Line: 413, Name: __call__, Filename: 13", "Line: 1775, Name: _wrapped_call_impl, Filename: 14", "Line: 1786, Name: _call_impl, Filename: 14", "Line: 804, Name: compile_wrapper, Filename: 13", "Line: 78, Name: forward, Filename: 12"], "graph_node_shapes": "{'l_self_modules_fc1_parameters_weight_': [16, 10], 'l_self_modules_fc1_parameters_bias_': [16], 'l_x_': [8, 10], 'l_a_': [10, 20], 'l_c_': [10, 30], 'l_b_': [20, 30], 'x': [8, 16], 'x_1': [8, 16], 'x_2': [8, 16], 'd': [10, 20], 'y': [10, 30], 'z': [10, 30]}", "has_guarded_code": true, "remote_cache_time_saved_s": null, "structured_logging_overhead_s": 0.080655, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"_autograd_backward_strict_mode_conditional_banned_ops\": [\"stride\", \"storage_offset\", \"is_contiguous\"], \"_unsafe_skip_fsdp_module_guards\": false, \"accumulated_recompile_limit\": 256, \"allow_complex_guards_as_runtime_asserts\": false, \"allow_empty_graphs\": false, \"allow_ignore_mark_dynamic\": false, \"allow_rnn\": false, \"allow_unspec_int_on_nn_module\": false, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch._prims\", \"torch._refs\", \"torch.distributions\", \"torch.testing\"], \"assume_dunder_attributes_remain_unchanged\": true, \"assume_static_by_default\": true, \"automatic_dynamic_local_pgo\": true, \"automatic_dynamic_remote_pgo\": null, \"automatic_dynamic_shapes\": true, \"automatic_dynamic_shapes_mark_as\": \"dynamic\", \"caching_precompile\": false, \"capture_autograd_function\": true, \"capture_dynamic_output_shape_ops\": false, \"capture_func_transforms\": true, \"capture_scalar_outputs\": false, \"capture_sparse_compute\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"cprofile\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"dead_code_elimination\": true, \"disable\": false, \"do_not_emit_runtime_asserts\": false, \"dont_skip_tracing\": false, \"dynamic_shapes\": true, \"enable_compiler_collectives\": false, \"enable_cpp_framelocals_guard_eval\": true, \"enable_cpp_guard_manager\": true, \"enable_cpp_symbolic_shape_guards\": false, \"enable_faithful_generator_behavior\": true, \"enable_trace_contextlib\": true, \"enable_trace_unittest\": false, \"error_on_nested_fx_trace\": true, \"error_on_nested_jit_trace\": true, \"error_on_recompile\": false, \"fail_on_recompile_limit_hit\": false, \"fake_tensor_cache_crosscheck_enabled\": false, \"fake_tensor_cache_enabled\": true, \"fake_tensor_disable_inference_mode\": true, \"force_nn_module_property_static_shapes\": true, \"force_parameter_static_shapes\": true, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"graph_break_on_nn_param_ctor\": true, \"graph_deduplication_lint\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"inline_inbuilt_nn_modules\": true, \"install_free_tensors\": false, \"issue_3_13_0_warning\": true, \"max_saved_pointers_for_recursive_dict_tags_check\": 256, \"minimum_call_count\": 1, \"numpy_default_complex\": \"complex128\", \"numpy_default_float\": \"float64\", \"numpy_default_int\": \"int64\", \"only_allow_pt2_compliant_ops\": false, \"optimize_ddp\": true, \"optimize_ddp_lazy_compile\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"prepare_freezing\": false, \"pt2_compile_id_prefix\": null, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"recompile_limit\": 8, \"record_compile_time_instruction_count\": false, \"record_runtime_overhead\": true, \"replay_record_enabled\": false, \"report_guard_failures\": true, \"rewrite_assert_with_torch_assert\": true, \"run_gc_after_compile\": true, \"skip_code_recursive_on_recompile_limit_hit\": true, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_guards_on_constant_func_defaults\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"skip_tensor_guards_with_matching_dict_tags\": true, \"skip_torchrec\": true, \"skipfiles_inline_module_allowlist\": {}, \"specialize_float\": false, \"specialize_int\": false, \"suppress_errors\": false, \"trace_numpy\": true, \"track_nodes_for_deduplication\": false, \"use_graph_deduplication\": false, \"use_lamba_guard_for_object_aliasing\": true, \"use_lazy_graph_module\": true, \"use_numpy_random_stream\": false, \"use_recursive_dict_tags_for_guards\": true, \"verify_correctness\": false, \"wrap_top_frame\": false}", "is_forward": true, "num_triton_bundles": null, "remote_fx_graph_cache_get_time_ms": null, "remote_fx_graph_cache_put_time_ms": null, "start_time_us": 1755632570389302, "duration_us": 4420942, "dynamo_cumulative_compile_time_us": 4420942, "aot_autograd_cumulative_compile_time_us": 4281515, "inductor_cumulative_compile_time_us": 1272034, "inductor_code_gen_cumulative_compile_time_us": 633145, "triton_compile_time_us": 470470, "runtime_cudagraphify_time_us": null, "runtime_triton_autotune_time_us": null, "dynamo_compile_time_before_restart_us": 0, "distributed_ephemeral_timeout_us": null, "structured_logging_overhead_us": 80655, "remote_fx_graph_cache_get_time_us": null, "remote_fx_graph_cache_put_time_us": null, "backward_cumulative_compile_time_us": null, "end_time_us": 1755632574811075, "pre_grad_pass_time_us": 16388, "post_grad_pass_time_us": 504039, "joint_graph_pass_time_us": 2799955, "log_format_version": 3, "inductor_config": "{\"TYPE_CHECKING\": false, \"_cache_config_ignore_prefix\": [\"trace\", \"cuda.cutlass_dir\", \"worker_start_method\", \"compile_threads\", \"post_grad_custom_post_pass\", \"post_grad_custom_pre_pass\", \"joint_custom_pre_pass\", \"joint_custom_post_pass\", \"_fuse_ddp_communication_passes\", \"_pre_fusion_custom_pass\", \"always_complex_memory_overlap_TESTING_ONLY\", \"fx_graph_cache\", \"fx_graph_remote_cache\", \"autotune_local_cache\", \"autotune_remote_cache\"], \"_collective.auto_select\": false, \"_collective.one_shot_all_reduce_threshold_bytes\": 131072, \"_fuse_ddp_bucket_size\": 25, \"_fuse_ddp_communication\": false, \"_fuse_ddp_communication_passes\": [\"fuse_ddp_with_concat_op\", \"schedule_comm_wait\"], \"_micro_pipeline_tp\": false, \"_post_fusion_custom_pass\": null, \"_pre_fusion_custom_pass\": null, \"_profile_var\": \"\", \"_raise_error_for_testing\": false, \"_save_config_ignore\": [\"trace.upload_tar\", \"joint_custom_pre_pass\", \"joint_custom_post_pass\", \"pre_grad_custom_pass\", \"aot_inductor.repro_level\", \"aot_inductor.dump_aoti_minifier\", \"post_grad_custom_pre_pass\", \"post_grad_custom_post_pass\", \"_fuse_ddp_communication_passes\", \"_pre_fusion_custom_pass\"], \"add_pre_grad_passes\": null, \"aggressive_fusion\": false, \"alignment_asserts\": false, \"allow_buffer_reuse\": true, \"always_complex_memory_overlap_TESTING_ONLY\": false, \"always_keep_tensor_constants\": false, \"annotate_training\": false, \"aot_inductor.allow_stack_allocation\": false, \"aot_inductor.compile_standalone\": false, \"aot_inductor.compile_wrapper_opt_level\": \"O1\", \"aot_inductor.custom_op_libs\": null, \"aot_inductor.custom_ops_to_c_shims\": {}, \"aot_inductor.debug_compile\": false, \"aot_inductor.debug_intermediate_value_printer\": \"0\", \"aot_inductor.dump_aoti_minifier\": false, \"aot_inductor.embed_kernel_binary\": null, \"aot_inductor.emit_multi_arch_kernel\": null, \"aot_inductor.enable_lto\": false, \"aot_inductor.filtered_kernel_names\": null, \"aot_inductor.force_mmap_weights\": false, \"aot_inductor.metadata\": {}, \"aot_inductor.model_name_for_generated_files\": null, \"aot_inductor.output_path\": \"\", \"aot_inductor.package\": false, \"aot_inductor.package_constants_in_so\": true, \"aot_inductor.package_constants_on_disk\": false, \"aot_inductor.package_cpp_only\": null, \"aot_inductor.precompile_headers\": false, \"aot_inductor.presets\": {}, \"aot_inductor.raise_error_on_ignored_optimization\": true, \"aot_inductor.repro_level\": 2, \"aot_inductor.serialized_in_spec\": \"\", \"aot_inductor.serialized_out_spec\": \"\", \"aot_inductor.use_consts_asm_build\": true, \"aot_inductor.use_minimal_arrayref_interface\": false, \"aot_inductor.use_runtime_constant_folding\": false, \"aot_inductor.weight_use_caching_allocator\": false, \"assert_indirect_indexing\": true, \"assume_aligned_inputs\": false, \"assume_unaligned_fallback_output\": false, \"autoheuristic_collect\": \"\", \"autoheuristic_log_path\": \"DEFAULT\", \"autoheuristic_use\": \"mixed_mm\", \"autotune_fallback_to_aten\": false, \"autotune_in_subproc\": false, \"autotune_local_cache\": true, \"autotune_lookup_table\": {}, \"autotune_multi_device\": false, \"autotune_num_choices_displayed\": 10, \"autotune_remote_cache\": null, \"b2b_gemm_pass\": false, \"batch_fusion\": true, \"benchmark_combo_kernel\": false, \"benchmark_epilogue_fusion\": true, \"benchmark_fusion\": false, \"benchmark_harness\": true, \"benchmark_kernel\": false, \"bfloat16_atomic_adds_enabled\": true, \"bucket_all_gathers_fx\": \"none\", \"bucket_all_gathers_fx_bucket_size_determinator\": null, \"bucket_reduce_scatters_fx\": \"none\", \"bucket_reduce_scatters_fx_bucket_size_determinator\": null, \"bundle_triton_into_fx_graph_cache\": null, \"bundled_autotune_remote_cache\": null, \"bw_outputs_user_visible\": true, \"can_inplace_pad_graph_input\": false, \"check_stack_no_cycles_TESTING_ONLY\": false, \"combo_kernel_allow_mixed_sizes\": 1, \"combo_kernel_foreach_dynamic_shapes\": true, \"combo_kernels\": false, \"combo_kernels_autotune\": 1, \"comment_origin\": false, \"compile_threads\": 32, \"comprehensive_padding\": true, \"compute_all_bounds\": false, \"constant_and_index_propagation\": true, \"conv_1x1_as_mm\": false, \"coordinate_descent_check_all_directions\": false, \"coordinate_descent_search_radius\": 1, \"coordinate_descent_tuning\": false, \"cpp.cxx\": [null, \"g++\"], \"cpp.descriptive_names\": \"original_aten\", \"cpp.dynamic_threads\": false, \"cpp.enable_concat_linear\": false, \"cpp.enable_floating_point_contract_flag\": \"off\", \"cpp.enable_grouped_gemm_template\": false, \"cpp.enable_kernel_profile\": false, \"cpp.enable_loop_tail_vec\": true, \"cpp.enable_tiling_heuristics\": true, \"cpp.enable_unsafe_math_opt_flag\": false, \"cpp.fallback_scatter_reduce_sum\": true, \"cpp.force_inline_kernel\": false, \"cpp.gemm_cache_blocking\": null, \"cpp.gemm_max_k_slices\": 1, \"cpp.gemm_thread_factors\": null, \"cpp.inject_log1p_bug_TESTING_ONLY\": null, \"cpp.inject_relu_bug_TESTING_ONLY\": null, \"cpp.max_horizontal_fusion_size\": 16, \"cpp.min_chunk_size\": 512, \"cpp.no_redundant_loops\": true, \"cpp.simdlen\": null, \"cpp.threads\": -1, \"cpp.use_decompose_tanh\": false, \"cpp.use_small_dequant_buffer\": false, \"cpp.vec_isa_ok\": null, \"cpp.weight_prepack\": true, \"cpp_cache_precompile_headers\": false, \"cpp_wrapper\": false, \"cpp_wrapper_build_separate\": false, \"cpu_backend\": \"cpp\", \"cuda.arch\": null, \"cuda.binary_remote_cache_force_write\": false, \"cuda.compile_opt_level\": \"-O1\", \"cuda.cuda_cxx\": null, \"cuda.cutlass_backend_min_gemm_size\": 1, \"cuda.cutlass_dir\": \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/third_party/cutlass\", \"cuda.cutlass_enabled_ops\": \"all\", \"cuda.cutlass_epilogue_fusion_enabled\": false, \"cuda.cutlass_hash_with_compile_cmd\": false, \"cuda.cutlass_instantiation_level\": \"0\", \"cuda.cutlass_max_profiling_configs\": null, \"cuda.cutlass_max_profiling_swizzle_options\": [1, 2, 4, 8], \"cuda.cutlass_op_allowlist_regex\": null, \"cuda.cutlass_op_denylist_regex\": null, \"cuda.cutlass_prescreening\": true, \"cuda.cutlass_presets\": null, \"cuda.cutlass_tma_only\": false, \"cuda.enable_caching_codegen\": true, \"cuda.enable_cuda_lto\": false, \"cuda.enable_debug_info\": false, \"cuda.enable_ptxas_info\": false, \"cuda.generate_test_runner\": false, \"cuda.upload_to_binary_remote_cache\": false, \"cuda.use_binary_remote_cache\": true, \"cuda.use_fast_math\": false, \"cuda.version\": null, \"cuda_backend\": \"triton\", \"dce\": false, \"debug\": false, \"debug_fusion\": false, \"debug_index_asserts\": false, \"debug_ir_traceback\": false, \"decompose_mem_bound_mm\": false, \"developer_warnings\": true, \"disable_cpp_codegen\": false, \"disable_padding_cpu\": true, \"disable_progress\": true, \"dynamic_scale_rblock\": true, \"efficient_conv_bn_eval_fx_passes\": false, \"emulate_precision_casts\": false, \"enable_auto_functionalized_v2\": true, \"enable_caching_generated_triton_templates\": true, \"enable_linear_binary_folding\": false, \"enabled_metric_tables\": \"\", \"epilogue_fusion\": true, \"epilogue_fusion_first\": false, \"estimate_op_runtime\": \"default\", \"external_matmul\": [], \"fallback_random\": false, \"force_fuse_int_mm_with_mul\": false, \"force_layout_optimization\": false, \"force_pointwise_cat\": false, \"force_same_precision\": false, \"force_shape_pad\": false, \"freezing\": false, \"freezing_discard_parameters\": false, \"fx_graph_cache\": false, \"fx_graph_remote_cache\": null, \"fx_passes_numeric_check\": {\"num_iterations\": 1, \"pre_grad\": false, \"precision\": 0.0001, \"requires_optimizer\": true}, \"generate_intermediate_hooks\": false, \"global_cache_dir\": null, \"graph_partition\": false, \"group_fusion\": false, \"halide.asserts\": false, \"halide.cpu_target\": \"host\", \"halide.debug\": false, \"halide.gpu_target\": \"host-cuda\", \"halide.scan_kernels\": false, \"halide.scheduler_cpu\": \"Adams2019\", \"halide.scheduler_cuda\": \"Anderson2021\", \"implicit_fallbacks\": true, \"inplace_buffers\": true, \"inplace_padding\": true, \"inter_node_bw\": 25, \"intra_node_bw\": 300, \"is_nightly_or_source\": false, \"is_predispatch\": false, \"joint_custom_post_pass\": null, \"joint_custom_pre_pass\": null, \"joint_graph_constant_folding\": true, \"keep_output_stride\": true, \"kernel_name_max_ops\": 10, \"layout_opt_default\": \"1\", \"layout_optimization\": true, \"log_tlparse\": false, \"loop_ordering_after_fusion\": false, \"max_autotune\": false, \"max_autotune_conv_backends\": \"ATEN,TRITON\", \"max_autotune_flex_search_space\": \"DEFAULT\", \"max_autotune_gemm\": false, \"max_autotune_gemm_backends\": \"ATEN,TRITON,CPP\", \"max_autotune_gemm_search_space\": \"DEFAULT\", \"max_autotune_pointwise\": false, \"max_autotune_report_choices_stats\": true, \"max_autotune_subproc_graceful_timeout_seconds\": 0.0, \"max_autotune_subproc_result_timeout_seconds\": 60.0, \"max_autotune_subproc_terminate_timeout_seconds\": 0.0, \"max_epilogue_benchmarked_choices\": 1, \"max_fusion_buffer_group_pairwise_attempts\": 64, \"max_fusion_size\": 64, \"max_pointwise_cat_inputs\": 8, \"memory_planning\": false, \"memory_pool\": \"intermediates\", \"min_num_split\": 0, \"mixed_mm_choice\": \"heuristic\", \"multi_kernel_hints\": [], \"nan_asserts\": false, \"non_blocking_remote_cache_write\": true, \"online_softmax\": true, \"optimize_scatter_upon_const_tensor\": true, \"pad_channels_last\": false, \"pad_outputs\": false, \"padding_alignment_bytes\": 128, \"padding_stride_threshold\": 1024, \"pattern_matcher\": true, \"permute_fusion\": false, \"pick_loop_orders\": true, \"post_grad_custom_post_pass\": null, \"post_grad_custom_pre_pass\": null, \"post_grad_fusion_options\": {}, \"pre_grad_custom_pass\": null, \"pre_grad_fusion_options\": {}, \"precompilation_timeout_seconds\": 3600, \"profile_bandwidth\": false, \"profile_bandwidth_output\": null, \"profile_bandwidth_regex\": \"\", \"profile_bandwidth_with_do_bench_using_profiling\": false, \"profiler_mark_wrapper_call\": false, \"prologue_fusion\": true, \"quiesce_async_compile_pool\": false, \"realize_acc_reads_size_threshold\": null, \"realize_acc_reads_threshold\": 8, \"realize_opcount_threshold\": 30, \"realize_reads_threshold\": 4, \"remote_gemm_autotune_cache\": false, \"remove_pre_grad_passes\": null, \"reorder_for_compute_comm_overlap\": false, \"reorder_for_compute_comm_overlap_passes\": [\"reorder_compute_for_overlap\", \"sink_waits\", \"raise_comms\"], \"reorder_for_locality\": true, \"reorder_for_peak_memory\": true, \"reorder_prefetch_limit\": null, \"rocm.arch\": [], \"rocm.ck_dir\": null, \"rocm.ck_max_profiling_configs\": null, \"rocm.ck_supported_arch\": [\"gfx90a\", \"gfx942\", \"gfx950\"], \"rocm.ck_tile_max_profiling_configs\": null, \"rocm.compile_opt_level\": \"-O2\", \"rocm.flush_denormals\": true, \"rocm.generate_test_runner\": false, \"rocm.is_debug\": false, \"rocm.kBatch_sweep\": null, \"rocm.n_max_profiling_configs\": null, \"rocm.print_kernel_resource_usage\": false, \"rocm.rocm_home\": null, \"rocm.save_temps\": false, \"rocm.split_k_threshold\": 16, \"rocm.use_fast_math\": true, \"rocm.use_preselected_instances\": false, \"save_args\": false, \"scalar_asserts\": true, \"score_fusion_memory_threshold\": 10, \"search_autotune_cache\": false, \"shape_padding\": true, \"size_asserts\": true, \"sleep_sec_TESTING_ONLY\": null, \"split_cat_fx_passes\": true, \"split_reductions\": true, \"static_launch_user_defined_triton_kernels\": false, \"static_weight_shapes\": true, \"strict_static_cuda_launcher\": false, \"test_configs.autotune_choice_desc_regex\": null, \"test_configs.autotune_choice_name_regex\": null, \"test_configs.force_extern_kernel_in_multi_template\": false, \"test_configs.graphsafe_rng_func_ignores_fallback_random\": false, \"test_configs.max_mm_configs\": null, \"test_configs.runtime_triton_dtype_assert\": false, \"test_configs.static_cpp_dtype_assert\": false, \"test_configs.track_memory_lifecycle\": null, \"test_configs.use_libtorch\": false, \"torchinductor_worker_logpath\": \"\", \"trace.compile_profile\": false, \"trace.debug_dir\": null, \"trace.debug_log\": false, \"trace.dot_graph_shape\": null, \"trace.draw_orig_fx_graph\": false, \"trace.enabled\": false, \"trace.fx_graph\": true, \"trace.fx_graph_transformed\": true, \"trace.graph_diagram\": false, \"trace.info_log\": false, \"trace.ir_post_fusion\": true, \"trace.ir_pre_fusion\": true, \"trace.log_autotuning_results\": false, \"trace.log_url_for_graph_xform\": null, \"trace.output_code\": true, \"trace.provenance_tracking_level\": 2, \"trace.save_real_tensors\": false, \"trace.upload_tar\": null, \"triton.autotune_at_compile_time\": null, \"triton.autotune_cublasLt\": true, \"triton.autotune_pointwise\": true, \"triton.autotune_with_sample_inputs\": false, \"triton.coalesce_tiling_analysis\": false, \"triton.codegen_upcast_to_fp32\": true, \"triton.cooperative_reductions\": false, \"triton.cudagraph_capture_sizes\": null, \"triton.cudagraph_dynamic_shape_warn_limit\": 50, \"triton.cudagraph_skip_dynamic_graphs\": false, \"triton.cudagraph_support_input_mutation\": false, \"triton.cudagraph_trees\": true, \"triton.cudagraph_trees_history_recording\": false, \"triton.cudagraph_unexpected_rerecord_limit\": 128, \"triton.cudagraphs\": false, \"triton.debug_sync_graph\": false, \"triton.debug_sync_kernel\": false, \"triton.decompose_k_threshold\": 32, \"triton.dense_indexing\": false, \"triton.descriptive_names\": \"original_aten\", \"triton.disallow_failing_autotune_kernels_TESTING_ONLY\": false, \"triton.divisible_by_16\": true, \"triton.enable_persistent_tma_matmul\": false, \"triton.fast_path_cudagraph_asserts\": false, \"triton.force_cooperative_reductions\": false, \"triton.force_cudagraph_sync\": false, \"triton.force_cudagraphs_warmup\": false, \"triton.inject_relu_bug_TESTING_ONLY\": null, \"triton.max_tiles\": null, \"triton.min_split_scan_rblock\": 256, \"triton.multi_kernel\": 0, \"triton.num_decompose_k_splits\": 10, \"triton.persistent_reductions\": true, \"triton.prefer_nd_tiling\": false, \"triton.skip_cudagraph_warmup\": false, \"triton.skip_l1_cache\": false, \"triton.slow_path_cudagraph_asserts\": true, \"triton.spill_threshold\": 16, \"triton.store_cubin\": false, \"triton.tile_reductions\": false, \"triton.tiling_prevents_pointwise_fusion\": true, \"triton.tiling_prevents_reduction_fusion\": true, \"triton.unique_kernel_names\": true, \"triton.unique_user_kernel_names\": false, \"triton.use_block_ptr\": false, \"triton.use_tensor_descriptor\": false, \"triton_kernel_default_layout_constraint\": \"needs_fixed_stride_order\", \"unbacked_symint_fallback\": 8192, \"unroll_reductions_threshold\": 8, \"unsafe_ignore_unsupported_triton_autotune_args\": false, \"unsafe_marked_cacheable_functions\": {}, \"unsafe_skip_cache_dynamic_shape_guards\": false, \"use_experimental_benchmarker\": false, \"use_fast_math\": false, \"use_mixed_mm\": true, \"use_static_cuda_launcher\": true, \"verbose_progress\": false, \"warn_mix_layout\": false, \"worker_log_path\": \"/logs/dedicated_log_torch_compile_worker_rank\", \"worker_start_method\": \"subprocess\", \"worker_suppress_logging\": true}", "remote_cache_version": null, "inductor_fx_remote_cache_hit_count": null, "inductor_fx_remote_cache_miss_count": null, "inductor_fx_remote_cache_backend_type": null, "inductor_fx_remote_cache_hit_keys": null, "inductor_fx_remote_cache_miss_keys": null, "cuda_version": "12.4.0", "triton_version": "3.3.1+fb", "feature_usage": {"aot_autograd_remote_cache": false, "fx_cache": false, "parallel_compile_post_warmup": false, "static_cuda_launcher": true}, "compile_time_autotune_time_us": 2533583, "is_runtime": false, "gc_time_us": 5093, "tensorify_float_attempt": null, "tensorify_float_success": null, "tensorify_float_failure": null, "guard_latency_us": 33, "recompile_reason": null, "num_graph_breaks": 0, "triton_kernel_compile_times_us": "[[\"triton_poi_fused_addmm_gelu_2\", 181999], [\"triton_poi_fused_mul_1\", 159751], [\"triton_poi_fused_addmm_relu_sigmoid_threshold_backward_0\", 124070]]", "ir_count": 37, "cudagraph_skip_reason": null, "python_version": "3.10.9+fb (3.10:1dd9be6, May  4 2022, 01:23:45) [Clang 17.0.4 (mononoke://mononoke.internal.tfbnw.net/fbsource 447fcd878ef9ed82d", "pgo_put_remote_code_state_time_us": null, "pgo_get_remote_code_state_time_us": null, "param_numel": null, "param_bytes": null, "param_count": null, "recompile_user_contexts": null, "inline_inbuilt_nn_modules_candidate": false}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V0819 12:42:54.815000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "c4c0fbce8b1c0adf01f73be09d366f15"}
+	{
+	"name": "dynamo",
+	"ts": 1755632574815486.0,
+	"args": {
+	"compile_id": "0/0",
+	"num_graph_breaks": 0,
+	"guard_latency_us": 33,
+	"frame_key": "1",
+	"co_name": "forward",
+	"co_filename": "/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/caffe2/test/inductor/test_provenance_tracing.py",
+	"co_firstlineno": 78,
+	"cache_size": 0,
+	"accumulated_cache_size": 0,
+	"guard_count": 57,
+	"shape_env_guard_count": 0,
+	"graph_op_count": 6,
+	"graph_node_count": 13,
+	"graph_input_count": 6,
+	"fail_type": null,
+	"fail_reason": null,
+	"fail_user_frame_filename": null,
+	"fail_user_frame_lineno": null,
+	"non_compliant_ops": [],
+	"compliant_custom_ops": [],
+	"restart_reasons": [],
+	"dynamo_time_before_restart_s": 0.0,
+	"has_guarded_code": true,
+	"dynamo_config": "{\"_autograd_backward_strict_mode_conditional_banned_ops\": [\"stride\", \"storage_offset\", \"is_contiguous\"], \"_unsafe_skip_fsdp_module_guards\": false, \"accumulated_recompile_limit\": 256, \"allow_complex_guards_as_runtime_asserts\": false, \"allow_empty_graphs\": false, \"allow_ignore_mark_dynamic\": false, \"allow_rnn\": false, \"allow_unspec_int_on_nn_module\": false, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch._prims\", \"torch._refs\", \"torch.distributions\", \"torch.testing\"], \"assume_dunder_attributes_remain_unchanged\": true, \"assume_static_by_default\": true, \"automatic_dynamic_local_pgo\": true, \"automatic_dynamic_remote_pgo\": null, \"automatic_dynamic_shapes\": true, \"automatic_dynamic_shapes_mark_as\": \"dynamic\", \"caching_precompile\": false, \"capture_autograd_function\": true, \"capture_dynamic_output_shape_ops\": false, \"capture_func_transforms\": true, \"capture_scalar_outputs\": false, \"capture_sparse_compute\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"cprofile\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"dead_code_elimination\": true, \"disable\": false, \"do_not_emit_runtime_asserts\": false, \"dont_skip_tracing\": false, \"dynamic_shapes\": true, \"enable_compiler_collectives\": false, \"enable_cpp_framelocals_guard_eval\": true, \"enable_cpp_guard_manager\": true, \"enable_cpp_symbolic_shape_guards\": false, \"enable_faithful_generator_behavior\": true, \"enable_trace_contextlib\": true, \"enable_trace_unittest\": false, \"error_on_nested_fx_trace\": true, \"error_on_nested_jit_trace\": true, \"error_on_recompile\": false, \"fail_on_recompile_limit_hit\": false, \"fake_tensor_cache_crosscheck_enabled\": false, \"fake_tensor_cache_enabled\": true, \"fake_tensor_disable_inference_mode\": true, \"force_nn_module_property_static_shapes\": true, \"force_parameter_static_shapes\": true, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"graph_break_on_nn_param_ctor\": true, \"graph_deduplication_lint\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"inline_inbuilt_nn_modules\": true, \"install_free_tensors\": false, \"issue_3_13_0_warning\": true, \"max_saved_pointers_for_recursive_dict_tags_check\": 256, \"minimum_call_count\": 1, \"numpy_default_complex\": \"complex128\", \"numpy_default_float\": \"float64\", \"numpy_default_int\": \"int64\", \"only_allow_pt2_compliant_ops\": false, \"optimize_ddp\": true, \"optimize_ddp_lazy_compile\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"prepare_freezing\": false, \"pt2_compile_id_prefix\": null, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"recompile_limit\": 8, \"record_compile_time_instruction_count\": false, \"record_runtime_overhead\": true, \"replay_record_enabled\": false, \"report_guard_failures\": true, \"rewrite_assert_with_torch_assert\": true, \"run_gc_after_compile\": true, \"skip_code_recursive_on_recompile_limit_hit\": true, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_guards_on_constant_func_defaults\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"skip_tensor_guards_with_matching_dict_tags\": true, \"skip_torchrec\": true, \"skipfiles_inline_module_allowlist\": {}, \"specialize_float\": false, \"specialize_int\": false, \"suppress_errors\": false, \"trace_numpy\": true, \"track_nodes_for_deduplication\": false, \"use_graph_deduplication\": false, \"use_lamba_guard_for_object_aliasing\": true, \"use_lazy_graph_module\": true, \"use_numpy_random_stream\": false, \"use_recursive_dict_tags_for_guards\": true, \"verify_correctness\": false, \"wrap_top_frame\": false}"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.821000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "3c9fdd83a3a08af9e99211d0c1d99731"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1755632574821199.5,
+	"args": {
+	"kernel_name": "triton_poi_fused_mul_1",
+	"is_backward": false,
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.822000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "6ea01e692005358d42613296d879d446"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755632574822487.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.871000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "78669134527e70ea06ae02cdf52bddae"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755632574871350.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.873000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "1cb1d3f39a4389a634d2abb0ccb0af87"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755632574872921.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.921000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "cdc89db91f86cbefaab9b2edd3421bc1"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755632574921528.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.923000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "37b148dd760d2350ededc4c967973809"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1755632574922968.8,
+	"args": {
+	"kernel_name": "triton_poi_fused_mul_1",
+	"is_backward": false,
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.927000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "2c5e179186e90e71f157e87381102e76"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1755632574927581.0,
+	"args": {
+	"kernel_name": "triton_poi_fused_addmm_gelu_2",
+	"is_backward": false,
+	"compile_id": "0/0"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.928000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "d9dc0465ca53578a3c588b05eb9fa6ef"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755632574928566.8,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.982000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "b6be5bc3a106bc0640ef8c16bce09013"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755632574982805.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:54.983000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "b90bb92baf7e32988123a06878237301"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755632574983700.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:55.037000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "80cc8cb4510c09a1926bb813dd3c9e87"}
+	{
+	"name": "TritonBenchmarker.benchmark_gpu",
+	"ts": 1755632575037656.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:55.038000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1946] {"chromium_event": {}, "has_payload": "afb5e60dbea0ca85ef94b1df955a0687"}
+	{
+	"name": "CachingAutotuner.benchmark_all_configs",
+	"ts": 1755632575038720.8,
+	"args": {
+	"kernel_name": "triton_poi_fused_addmm_gelu_2",
+	"is_backward": false,
+	"compile_id": "0/0"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V0819 12:42:55.046000 888578 /data/users/shangdiy/fbsource/fbcode/caffe2/torch/_dynamo/utils.py:1641] {"compilation_metrics_runtime": {"compile_id": "0/0", "frame_key": null, "co_name": null, "co_filename": null, "co_firstlineno": null, "cache_size": null, "accumulated_cache_size": null, "guard_count": null, "shape_env_guard_count": null, "graph_op_count": null, "graph_node_count": null, "graph_input_count": null, "start_time": 1755632574.926778, "entire_frame_compile_time_s": null, "backend_compile_time_s": null, "inductor_compile_time_s": null, "code_gen_time_s": null, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": null, "compliant_custom_ops": null, "restart_reasons": null, "dynamo_time_before_restart_s": null, "stack_trace": null, "graph_node_shapes": null, "has_guarded_code": null, "remote_cache_time_saved_s": null, "structured_logging_overhead_s": null, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": null, "dynamo_config": "{\"_autograd_backward_strict_mode_conditional_banned_ops\": [\"stride\", \"storage_offset\", \"is_contiguous\"], \"_unsafe_skip_fsdp_module_guards\": false, \"accumulated_recompile_limit\": 256, \"allow_complex_guards_as_runtime_asserts\": false, \"allow_empty_graphs\": false, \"allow_ignore_mark_dynamic\": false, \"allow_rnn\": false, \"allow_unspec_int_on_nn_module\": false, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch._prims\", \"torch._refs\", \"torch.distributions\", \"torch.testing\"], \"assume_dunder_attributes_remain_unchanged\": true, \"assume_static_by_default\": true, \"automatic_dynamic_local_pgo\": true, \"automatic_dynamic_remote_pgo\": null, \"automatic_dynamic_shapes\": true, \"automatic_dynamic_shapes_mark_as\": \"dynamic\", \"caching_precompile\": false, \"capture_autograd_function\": true, \"capture_dynamic_output_shape_ops\": false, \"capture_func_transforms\": true, \"capture_scalar_outputs\": false, \"capture_sparse_compute\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"cprofile\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"dead_code_elimination\": true, \"disable\": false, \"do_not_emit_runtime_asserts\": false, \"dont_skip_tracing\": false, \"dynamic_shapes\": true, \"enable_compiler_collectives\": false, \"enable_cpp_framelocals_guard_eval\": true, \"enable_cpp_guard_manager\": true, \"enable_cpp_symbolic_shape_guards\": false, \"enable_faithful_generator_behavior\": true, \"enable_trace_contextlib\": true, \"enable_trace_unittest\": false, \"error_on_nested_fx_trace\": true, \"error_on_nested_jit_trace\": true, \"error_on_recompile\": false, \"fail_on_recompile_limit_hit\": false, \"fake_tensor_cache_crosscheck_enabled\": false, \"fake_tensor_cache_enabled\": true, \"fake_tensor_disable_inference_mode\": true, \"force_nn_module_property_static_shapes\": true, \"force_parameter_static_shapes\": true, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"graph_break_on_nn_param_ctor\": true, \"graph_deduplication_lint\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"inline_inbuilt_nn_modules\": true, \"install_free_tensors\": false, \"issue_3_13_0_warning\": true, \"max_saved_pointers_for_recursive_dict_tags_check\": 256, \"minimum_call_count\": 1, \"numpy_default_complex\": \"complex128\", \"numpy_default_float\": \"float64\", \"numpy_default_int\": \"int64\", \"only_allow_pt2_compliant_ops\": false, \"optimize_ddp\": true, \"optimize_ddp_lazy_compile\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"prepare_freezing\": false, \"pt2_compile_id_prefix\": null, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"recompile_limit\": 8, \"record_compile_time_instruction_count\": false, \"record_runtime_overhead\": true, \"replay_record_enabled\": false, \"report_guard_failures\": true, \"rewrite_assert_with_torch_assert\": true, \"run_gc_after_compile\": true, \"skip_code_recursive_on_recompile_limit_hit\": true, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_guards_on_constant_func_defaults\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"skip_tensor_guards_with_matching_dict_tags\": true, \"skip_torchrec\": true, \"skipfiles_inline_module_allowlist\": {}, \"specialize_float\": false, \"specialize_int\": false, \"suppress_errors\": false, \"trace_numpy\": true, \"track_nodes_for_deduplication\": false, \"use_graph_deduplication\": false, \"use_lamba_guard_for_object_aliasing\": true, \"use_lazy_graph_module\": true, \"use_numpy_random_stream\": false, \"use_recursive_dict_tags_for_guards\": true, \"verify_correctness\": false, \"wrap_top_frame\": false}", "is_forward": true, "num_triton_bundles": null, "remote_fx_graph_cache_get_time_ms": null, "remote_fx_graph_cache_put_time_ms": null, "start_time_us": 1755632574926778, "duration_us": 212908, "dynamo_cumulative_compile_time_us": null, "aot_autograd_cumulative_compile_time_us": null, "inductor_cumulative_compile_time_us": null, "inductor_code_gen_cumulative_compile_time_us": null, "triton_compile_time_us": null, "runtime_cudagraphify_time_us": null, "runtime_triton_autotune_time_us": 212908, "dynamo_compile_time_before_restart_us": null, "distributed_ephemeral_timeout_us": null, "structured_logging_overhead_us": null, "remote_fx_graph_cache_get_time_us": null, "remote_fx_graph_cache_put_time_us": null, "backward_cumulative_compile_time_us": null, "end_time_us": 1755632575043063, "pre_grad_pass_time_us": null, "post_grad_pass_time_us": null, "joint_graph_pass_time_us": null, "log_format_version": 3, "inductor_config": "{\"TYPE_CHECKING\": false, \"_cache_config_ignore_prefix\": [\"trace\", \"cuda.cutlass_dir\", \"worker_start_method\", \"compile_threads\", \"post_grad_custom_post_pass\", \"post_grad_custom_pre_pass\", \"joint_custom_pre_pass\", \"joint_custom_post_pass\", \"_fuse_ddp_communication_passes\", \"_pre_fusion_custom_pass\", \"always_complex_memory_overlap_TESTING_ONLY\", \"fx_graph_cache\", \"fx_graph_remote_cache\", \"autotune_local_cache\", \"autotune_remote_cache\"], \"_collective.auto_select\": false, \"_collective.one_shot_all_reduce_threshold_bytes\": 131072, \"_fuse_ddp_bucket_size\": 25, \"_fuse_ddp_communication\": false, \"_fuse_ddp_communication_passes\": [\"fuse_ddp_with_concat_op\", \"schedule_comm_wait\"], \"_micro_pipeline_tp\": false, \"_post_fusion_custom_pass\": null, \"_pre_fusion_custom_pass\": null, \"_profile_var\": \"\", \"_raise_error_for_testing\": false, \"_save_config_ignore\": [\"trace.upload_tar\", \"joint_custom_pre_pass\", \"joint_custom_post_pass\", \"pre_grad_custom_pass\", \"aot_inductor.repro_level\", \"aot_inductor.dump_aoti_minifier\", \"post_grad_custom_pre_pass\", \"post_grad_custom_post_pass\", \"_fuse_ddp_communication_passes\", \"_pre_fusion_custom_pass\"], \"add_pre_grad_passes\": null, \"aggressive_fusion\": false, \"alignment_asserts\": false, \"allow_buffer_reuse\": true, \"always_complex_memory_overlap_TESTING_ONLY\": false, \"always_keep_tensor_constants\": false, \"annotate_training\": false, \"aot_inductor.allow_stack_allocation\": false, \"aot_inductor.compile_standalone\": false, \"aot_inductor.compile_wrapper_opt_level\": \"O1\", \"aot_inductor.custom_op_libs\": null, \"aot_inductor.custom_ops_to_c_shims\": {}, \"aot_inductor.debug_compile\": false, \"aot_inductor.debug_intermediate_value_printer\": \"0\", \"aot_inductor.dump_aoti_minifier\": false, \"aot_inductor.embed_kernel_binary\": null, \"aot_inductor.emit_multi_arch_kernel\": null, \"aot_inductor.enable_lto\": false, \"aot_inductor.filtered_kernel_names\": null, \"aot_inductor.force_mmap_weights\": false, \"aot_inductor.metadata\": {}, \"aot_inductor.model_name_for_generated_files\": null, \"aot_inductor.output_path\": \"\", \"aot_inductor.package\": false, \"aot_inductor.package_constants_in_so\": true, \"aot_inductor.package_constants_on_disk\": false, \"aot_inductor.package_cpp_only\": null, \"aot_inductor.precompile_headers\": false, \"aot_inductor.presets\": {}, \"aot_inductor.raise_error_on_ignored_optimization\": true, \"aot_inductor.repro_level\": 2, \"aot_inductor.serialized_in_spec\": \"\", \"aot_inductor.serialized_out_spec\": \"\", \"aot_inductor.use_consts_asm_build\": true, \"aot_inductor.use_minimal_arrayref_interface\": false, \"aot_inductor.use_runtime_constant_folding\": false, \"aot_inductor.weight_use_caching_allocator\": false, \"assert_indirect_indexing\": true, \"assume_aligned_inputs\": false, \"assume_unaligned_fallback_output\": false, \"autoheuristic_collect\": \"\", \"autoheuristic_log_path\": \"DEFAULT\", \"autoheuristic_use\": \"mixed_mm\", \"autotune_fallback_to_aten\": false, \"autotune_in_subproc\": false, \"autotune_local_cache\": true, \"autotune_lookup_table\": {}, \"autotune_multi_device\": false, \"autotune_num_choices_displayed\": 10, \"autotune_remote_cache\": null, \"b2b_gemm_pass\": false, \"batch_fusion\": true, \"benchmark_combo_kernel\": false, \"benchmark_epilogue_fusion\": true, \"benchmark_fusion\": false, \"benchmark_harness\": true, \"benchmark_kernel\": false, \"bfloat16_atomic_adds_enabled\": true, \"bucket_all_gathers_fx\": \"none\", \"bucket_all_gathers_fx_bucket_size_determinator\": null, \"bucket_reduce_scatters_fx\": \"none\", \"bucket_reduce_scatters_fx_bucket_size_determinator\": null, \"bundle_triton_into_fx_graph_cache\": null, \"bundled_autotune_remote_cache\": null, \"bw_outputs_user_visible\": true, \"can_inplace_pad_graph_input\": false, \"check_stack_no_cycles_TESTING_ONLY\": false, \"combo_kernel_allow_mixed_sizes\": 1, \"combo_kernel_foreach_dynamic_shapes\": true, \"combo_kernels\": false, \"combo_kernels_autotune\": 1, \"comment_origin\": false, \"compile_threads\": 32, \"comprehensive_padding\": true, \"compute_all_bounds\": false, \"constant_and_index_propagation\": true, \"conv_1x1_as_mm\": false, \"coordinate_descent_check_all_directions\": false, \"coordinate_descent_search_radius\": 1, \"coordinate_descent_tuning\": false, \"cpp.cxx\": [null, \"g++\"], \"cpp.descriptive_names\": \"original_aten\", \"cpp.dynamic_threads\": false, \"cpp.enable_concat_linear\": false, \"cpp.enable_floating_point_contract_flag\": \"off\", \"cpp.enable_grouped_gemm_template\": false, \"cpp.enable_kernel_profile\": false, \"cpp.enable_loop_tail_vec\": true, \"cpp.enable_tiling_heuristics\": true, \"cpp.enable_unsafe_math_opt_flag\": false, \"cpp.fallback_scatter_reduce_sum\": true, \"cpp.force_inline_kernel\": false, \"cpp.gemm_cache_blocking\": null, \"cpp.gemm_max_k_slices\": 1, \"cpp.gemm_thread_factors\": null, \"cpp.inject_log1p_bug_TESTING_ONLY\": null, \"cpp.inject_relu_bug_TESTING_ONLY\": null, \"cpp.max_horizontal_fusion_size\": 16, \"cpp.min_chunk_size\": 512, \"cpp.no_redundant_loops\": true, \"cpp.simdlen\": null, \"cpp.threads\": -1, \"cpp.use_decompose_tanh\": false, \"cpp.use_small_dequant_buffer\": false, \"cpp.vec_isa_ok\": null, \"cpp.weight_prepack\": true, \"cpp_cache_precompile_headers\": false, \"cpp_wrapper\": false, \"cpp_wrapper_build_separate\": false, \"cpu_backend\": \"cpp\", \"cuda.arch\": null, \"cuda.binary_remote_cache_force_write\": false, \"cuda.compile_opt_level\": \"-O1\", \"cuda.cuda_cxx\": null, \"cuda.cutlass_backend_min_gemm_size\": 1, \"cuda.cutlass_dir\": \"/data/users/shangdiy/fbsource/buck-out/v2/gen/fbcode/4de56deb453463b3/caffe2/test/inductor/__provenance_tracing__/provenance_tracing#link-tree/third_party/cutlass\", \"cuda.cutlass_enabled_ops\": \"all\", \"cuda.cutlass_epilogue_fusion_enabled\": false, \"cuda.cutlass_hash_with_compile_cmd\": false, \"cuda.cutlass_instantiation_level\": \"0\", \"cuda.cutlass_max_profiling_configs\": null, \"cuda.cutlass_max_profiling_swizzle_options\": [1, 2, 4, 8], \"cuda.cutlass_op_allowlist_regex\": null, \"cuda.cutlass_op_denylist_regex\": null, \"cuda.cutlass_prescreening\": true, \"cuda.cutlass_presets\": null, \"cuda.cutlass_tma_only\": false, \"cuda.enable_caching_codegen\": true, \"cuda.enable_cuda_lto\": false, \"cuda.enable_debug_info\": false, \"cuda.enable_ptxas_info\": false, \"cuda.generate_test_runner\": false, \"cuda.upload_to_binary_remote_cache\": false, \"cuda.use_binary_remote_cache\": true, \"cuda.use_fast_math\": false, \"cuda.version\": null, \"cuda_backend\": \"triton\", \"dce\": false, \"debug\": false, \"debug_fusion\": false, \"debug_index_asserts\": false, \"debug_ir_traceback\": false, \"decompose_mem_bound_mm\": false, \"developer_warnings\": true, \"disable_cpp_codegen\": false, \"disable_padding_cpu\": true, \"disable_progress\": true, \"dynamic_scale_rblock\": true, \"efficient_conv_bn_eval_fx_passes\": false, \"emulate_precision_casts\": false, \"enable_auto_functionalized_v2\": true, \"enable_caching_generated_triton_templates\": true, \"enable_linear_binary_folding\": false, \"enabled_metric_tables\": \"\", \"epilogue_fusion\": true, \"epilogue_fusion_first\": false, \"estimate_op_runtime\": \"default\", \"external_matmul\": [], \"fallback_random\": false, \"force_fuse_int_mm_with_mul\": false, \"force_layout_optimization\": false, \"force_pointwise_cat\": false, \"force_same_precision\": false, \"force_shape_pad\": false, \"freezing\": false, \"freezing_discard_parameters\": false, \"fx_graph_cache\": false, \"fx_graph_remote_cache\": null, \"fx_passes_numeric_check\": {\"num_iterations\": 1, \"pre_grad\": false, \"precision\": 0.0001, \"requires_optimizer\": true}, \"generate_intermediate_hooks\": false, \"global_cache_dir\": null, \"graph_partition\": false, \"group_fusion\": false, \"halide.asserts\": false, \"halide.cpu_target\": \"host\", \"halide.debug\": false, \"halide.gpu_target\": \"host-cuda\", \"halide.scan_kernels\": false, \"halide.scheduler_cpu\": \"Adams2019\", \"halide.scheduler_cuda\": \"Anderson2021\", \"implicit_fallbacks\": true, \"inplace_buffers\": true, \"inplace_padding\": true, \"inter_node_bw\": 25, \"intra_node_bw\": 300, \"is_nightly_or_source\": false, \"is_predispatch\": false, \"joint_custom_post_pass\": null, \"joint_custom_pre_pass\": null, \"joint_graph_constant_folding\": true, \"keep_output_stride\": true, \"kernel_name_max_ops\": 10, \"layout_opt_default\": \"1\", \"layout_optimization\": true, \"log_tlparse\": false, \"loop_ordering_after_fusion\": false, \"max_autotune\": false, \"max_autotune_conv_backends\": \"ATEN,TRITON\", \"max_autotune_flex_search_space\": \"DEFAULT\", \"max_autotune_gemm\": false, \"max_autotune_gemm_backends\": \"ATEN,TRITON,CPP\", \"max_autotune_gemm_search_space\": \"DEFAULT\", \"max_autotune_pointwise\": false, \"max_autotune_report_choices_stats\": true, \"max_autotune_subproc_graceful_timeout_seconds\": 0.0, \"max_autotune_subproc_result_timeout_seconds\": 60.0, \"max_autotune_subproc_terminate_timeout_seconds\": 0.0, \"max_epilogue_benchmarked_choices\": 1, \"max_fusion_buffer_group_pairwise_attempts\": 64, \"max_fusion_size\": 64, \"max_pointwise_cat_inputs\": 8, \"memory_planning\": false, \"memory_pool\": \"intermediates\", \"min_num_split\": 0, \"mixed_mm_choice\": \"heuristic\", \"multi_kernel_hints\": [], \"nan_asserts\": false, \"non_blocking_remote_cache_write\": true, \"online_softmax\": true, \"optimize_scatter_upon_const_tensor\": true, \"pad_channels_last\": false, \"pad_outputs\": false, \"padding_alignment_bytes\": 128, \"padding_stride_threshold\": 1024, \"pattern_matcher\": true, \"permute_fusion\": false, \"pick_loop_orders\": true, \"post_grad_custom_post_pass\": null, \"post_grad_custom_pre_pass\": null, \"post_grad_fusion_options\": {}, \"pre_grad_custom_pass\": null, \"pre_grad_fusion_options\": {}, \"precompilation_timeout_seconds\": 3600, \"profile_bandwidth\": false, \"profile_bandwidth_output\": null, \"profile_bandwidth_regex\": \"\", \"profile_bandwidth_with_do_bench_using_profiling\": false, \"profiler_mark_wrapper_call\": false, \"prologue_fusion\": true, \"quiesce_async_compile_pool\": false, \"realize_acc_reads_size_threshold\": null, \"realize_acc_reads_threshold\": 8, \"realize_opcount_threshold\": 30, \"realize_reads_threshold\": 4, \"remote_gemm_autotune_cache\": false, \"remove_pre_grad_passes\": null, \"reorder_for_compute_comm_overlap\": false, \"reorder_for_compute_comm_overlap_passes\": [\"reorder_compute_for_overlap\", \"sink_waits\", \"raise_comms\"], \"reorder_for_locality\": true, \"reorder_for_peak_memory\": true, \"reorder_prefetch_limit\": null, \"rocm.arch\": [], \"rocm.ck_dir\": null, \"rocm.ck_max_profiling_configs\": null, \"rocm.ck_supported_arch\": [\"gfx90a\", \"gfx942\", \"gfx950\"], \"rocm.ck_tile_max_profiling_configs\": null, \"rocm.compile_opt_level\": \"-O2\", \"rocm.flush_denormals\": true, \"rocm.generate_test_runner\": false, \"rocm.is_debug\": false, \"rocm.kBatch_sweep\": null, \"rocm.n_max_profiling_configs\": null, \"rocm.print_kernel_resource_usage\": false, \"rocm.rocm_home\": null, \"rocm.save_temps\": false, \"rocm.split_k_threshold\": 16, \"rocm.use_fast_math\": true, \"rocm.use_preselected_instances\": false, \"save_args\": false, \"scalar_asserts\": true, \"score_fusion_memory_threshold\": 10, \"search_autotune_cache\": false, \"shape_padding\": true, \"size_asserts\": true, \"sleep_sec_TESTING_ONLY\": null, \"split_cat_fx_passes\": true, \"split_reductions\": true, \"static_launch_user_defined_triton_kernels\": false, \"static_weight_shapes\": true, \"strict_static_cuda_launcher\": false, \"test_configs.autotune_choice_desc_regex\": null, \"test_configs.autotune_choice_name_regex\": null, \"test_configs.force_extern_kernel_in_multi_template\": false, \"test_configs.graphsafe_rng_func_ignores_fallback_random\": false, \"test_configs.max_mm_configs\": null, \"test_configs.runtime_triton_dtype_assert\": false, \"test_configs.static_cpp_dtype_assert\": false, \"test_configs.track_memory_lifecycle\": null, \"test_configs.use_libtorch\": false, \"torchinductor_worker_logpath\": \"\", \"trace.compile_profile\": false, \"trace.debug_dir\": null, \"trace.debug_log\": false, \"trace.dot_graph_shape\": null, \"trace.draw_orig_fx_graph\": false, \"trace.enabled\": false, \"trace.fx_graph\": true, \"trace.fx_graph_transformed\": true, \"trace.graph_diagram\": false, \"trace.info_log\": false, \"trace.ir_post_fusion\": true, \"trace.ir_pre_fusion\": true, \"trace.log_autotuning_results\": false, \"trace.log_url_for_graph_xform\": null, \"trace.output_code\": true, \"trace.provenance_tracking_level\": 2, \"trace.save_real_tensors\": false, \"trace.upload_tar\": null, \"triton.autotune_at_compile_time\": null, \"triton.autotune_cublasLt\": true, \"triton.autotune_pointwise\": true, \"triton.autotune_with_sample_inputs\": false, \"triton.coalesce_tiling_analysis\": false, \"triton.codegen_upcast_to_fp32\": true, \"triton.cooperative_reductions\": false, \"triton.cudagraph_capture_sizes\": null, \"triton.cudagraph_dynamic_shape_warn_limit\": 50, \"triton.cudagraph_skip_dynamic_graphs\": false, \"triton.cudagraph_support_input_mutation\": false, \"triton.cudagraph_trees\": true, \"triton.cudagraph_trees_history_recording\": false, \"triton.cudagraph_unexpected_rerecord_limit\": 128, \"triton.cudagraphs\": false, \"triton.debug_sync_graph\": false, \"triton.debug_sync_kernel\": false, \"triton.decompose_k_threshold\": 32, \"triton.dense_indexing\": false, \"triton.descriptive_names\": \"original_aten\", \"triton.disallow_failing_autotune_kernels_TESTING_ONLY\": false, \"triton.divisible_by_16\": true, \"triton.enable_persistent_tma_matmul\": false, \"triton.fast_path_cudagraph_asserts\": false, \"triton.force_cooperative_reductions\": false, \"triton.force_cudagraph_sync\": false, \"triton.force_cudagraphs_warmup\": false, \"triton.inject_relu_bug_TESTING_ONLY\": null, \"triton.max_tiles\": null, \"triton.min_split_scan_rblock\": 256, \"triton.multi_kernel\": 0, \"triton.num_decompose_k_splits\": 10, \"triton.persistent_reductions\": true, \"triton.prefer_nd_tiling\": false, \"triton.skip_cudagraph_warmup\": false, \"triton.skip_l1_cache\": false, \"triton.slow_path_cudagraph_asserts\": true, \"triton.spill_threshold\": 16, \"triton.store_cubin\": false, \"triton.tile_reductions\": false, \"triton.tiling_prevents_pointwise_fusion\": true, \"triton.tiling_prevents_reduction_fusion\": true, \"triton.unique_kernel_names\": true, \"triton.unique_user_kernel_names\": false, \"triton.use_block_ptr\": false, \"triton.use_tensor_descriptor\": false, \"triton_kernel_default_layout_constraint\": \"needs_fixed_stride_order\", \"unbacked_symint_fallback\": 8192, \"unroll_reductions_threshold\": 8, \"unsafe_ignore_unsupported_triton_autotune_args\": false, \"unsafe_marked_cacheable_functions\": {}, \"unsafe_skip_cache_dynamic_shape_guards\": false, \"use_experimental_benchmarker\": false, \"use_fast_math\": false, \"use_mixed_mm\": true, \"use_static_cuda_launcher\": true, \"verbose_progress\": false, \"warn_mix_layout\": false, \"worker_log_path\": \"/logs/dedicated_log_torch_compile_worker_rank\", \"worker_start_method\": \"subprocess\", \"worker_suppress_logging\": true}", "remote_cache_version": null, "inductor_fx_remote_cache_hit_count": null, "inductor_fx_remote_cache_miss_count": null, "inductor_fx_remote_cache_backend_type": null, "inductor_fx_remote_cache_hit_keys": null, "inductor_fx_remote_cache_miss_keys": null, "cuda_version": "12.4.0", "triton_version": "3.3.1+fb", "feature_usage": null, "compile_time_autotune_time_us": null, "is_runtime": true, "gc_time_us": null, "tensorify_float_attempt": null, "tensorify_float_success": null, "tensorify_float_failure": null, "guard_latency_us": null, "recompile_reason": null, "num_graph_breaks": null, "triton_kernel_compile_times_us": null, "ir_count": null, "cudagraph_skip_reason": null, "python_version": "3.10.9+fb (3.10:1dd9be6, May  4 2022, 01:23:45) [Clang 17.0.4 (mononoke://mononoke.internal.tfbnw.net/fbsource 447fcd878ef9ed82d", "pgo_put_remote_code_state_time_us": null, "pgo_get_remote_code_state_time_us": null, "param_numel": null, "param_bytes": null, "param_count": null, "recompile_user_contexts": null, "inline_inbuilt_nn_modules_candidate": false}, "frame_id": 0, "frame_compile_id": 0}
diff --git a/tests/integration_test.rs b/tests/integration_test.rs
index fa67171..2569cb7 100644
--- a/tests/integration_test.rs
+++ b/tests/integration_test.rs
@@ -398,7 +398,7 @@ fn test_export_guard_report() {
 }
 
 #[test]
-fn test_provenance_tracking() {
+fn test_provenance_tracking_aot_cuda() {
     let expected_files = [
         "-_-_-_-/before_pre_grad_graph_0.txt",
         "-_-_-_-/after_post_grad_graph_6.txt",
@@ -423,6 +423,1148 @@ fn test_provenance_tracking() {
             prefix
         );
     }
+
+    // Read the HTML file and verify the line mappings
+    let html_path = map
+        .keys()
+        .find(|p| {
+            p.to_str()
+                .unwrap()
+                .contains("provenance_tracking_-_-_-_-.html")
+        })
+        .unwrap();
+    let html_content = map.get(html_path).unwrap();
+
+    // Extract the line mappings JSON from the script tag
+    let script_start = html_content
+        .find(r#"<script id="lineMappings" type="application/json">"#)
+        .unwrap();
+    let json_start = html_content[script_start..].find(">").unwrap() + script_start + 1;
+    let json_end = html_content[json_start..].find("</script>").unwrap() + json_start;
+    let line_mappings_str = &html_content[json_start..json_end];
+    let line_mappings: serde_json::Value = serde_json::from_str(line_mappings_str).unwrap();
+
+    // Verify the line mappings match the expected values
+    let expected_mappings = serde_json::json!({
+        "cppCodeToPost": {
+            "1060": [14, 27, 24],
+            "1064": [21, 18, 15],
+            "1071": [24],
+            "1079": [14, 27, 24],
+            "1084": [35, 31, 28, 34, 33, 32]
+        },
+        "postToCppCode": {
+            "14": [1060, 1079],
+            "15": [1064],
+            "18": [1064],
+            "21": [1064],
+            "24": [1071, 1060, 1079],
+            "27": [1060, 1079],
+            "28": [1084],
+            "31": [1084],
+            "32": [1084],
+            "33": [1084],
+            "34": [1084],
+            "35": [1084]
+        },
+        "postToPre": {
+            "11": [8],
+            "14": [8],
+            "15": [8],
+            "18": [11],
+            "21": [14],
+            "24": [17],
+            "27": [20],
+            "28": [20],
+            "31": [23],
+            "32": [23],
+            "33": [23],
+            "34": [23],
+            "35": [23]
+        },
+        "postToPyCode": {},
+        "preToPost": {
+            "11": [18],
+            "14": [21],
+            "17": [24],
+            "20": [27, 28],
+            "23": [31, 32, 33, 34, 35],
+            "8": [11, 14, 15]
+        },
+        "pyCodeToPost": {}
+    });
+
+    assert_eq!(line_mappings, expected_mappings);
+}
+
+#[test]
+fn test_provenance_tracking_aot_debug_handle() {
+    let expected_files = [
+        "-_-_-_-/before_pre_grad_graph_0.txt",
+        "-_-_-_-/after_post_grad_graph_6.txt",
+        "provenance_tracking_-_-_-_-.html",
+        "-_-_-_-/inductor_provenance_tracking_node_mappings_10.json",
+    ];
+
+    let path = Path::new("tests/inputs/inductor_provenance_aot_debug_handle_log.txt").to_path_buf();
+    let config = tlparse::ParseConfig {
+        inductor_provenance: true,
+        ..Default::default()
+    };
+    let output = tlparse::parse_path(&path, &config);
+    assert!(output.is_ok());
+    let map: HashMap<PathBuf, String> = output.unwrap().into_iter().collect();
+
+    // Check all files are present
+    for prefix in expected_files {
+        assert!(
+            prefix_exists(&map, prefix),
+            "{} not found in output",
+            prefix
+        );
+    }
+
+    // Read the HTML file and verify the line mappings
+    let html_path = map
+        .keys()
+        .find(|p| {
+            p.to_str()
+                .unwrap()
+                .contains("provenance_tracking_-_-_-_-.html")
+        })
+        .unwrap();
+    let html_content = map.get(html_path).unwrap();
+
+    // Extract the line mappings JSON from the script tag
+    let script_start = html_content
+        .find(r#"<script id="lineMappings" type="application/json">"#)
+        .unwrap();
+    let json_start = html_content[script_start..].find(">").unwrap() + script_start + 1;
+    let json_end = html_content[json_start..].find("</script>").unwrap() + json_start;
+    let line_mappings_str = &html_content[json_start..json_end];
+    let line_mappings: serde_json::Value = serde_json::from_str(line_mappings_str).unwrap();
+
+    // Verify the line mappings match the expected values for debug handle version
+    let expected_mappings = serde_json::json!(        {
+      "cppCodeToPost": {
+        "1074": [
+          12
+        ],
+        "1079": [
+          19,
+          16,
+          13
+        ],
+        "1087": [
+          22
+        ],
+        "1096": [
+          25
+        ],
+        "1102": [
+          33,
+          29,
+          26,
+          32,
+          31,
+          30
+        ]
+      },
+      "postToCppCode": {
+        "12": [
+          1074
+        ],
+        "13": [
+          1079
+        ],
+        "16": [
+          1079
+        ],
+        "19": [
+          1079
+        ],
+        "22": [
+          1087
+        ],
+        "25": [
+          1096
+        ],
+        "26": [
+          1102
+        ],
+        "29": [
+          1102
+        ],
+        "30": [
+          1102
+        ],
+        "31": [
+          1102
+        ],
+        "32": [
+          1102
+        ],
+        "33": [
+          1102
+        ]
+      },
+      "postToPre": {
+        "11": [
+          8
+        ],
+        "12": [
+          8
+        ],
+        "13": [
+          8
+        ],
+        "16": [
+          11
+        ],
+        "19": [
+          14
+        ],
+        "22": [
+          17
+        ],
+        "25": [
+          20
+        ],
+        "26": [
+          20
+        ],
+        "29": [
+          23
+        ],
+        "30": [
+          23
+        ],
+        "31": [
+          23
+        ],
+        "32": [
+          23
+        ],
+        "33": [
+          23
+        ]
+      },
+      "postToPyCode": {},
+      "preToPost": {
+        "11": [
+          16
+        ],
+        "14": [
+          19
+        ],
+        "17": [
+          22
+        ],
+        "20": [
+          25,
+          26
+        ],
+        "23": [
+          29,
+          30,
+          31,
+          32,
+          33
+        ],
+        "8": [
+          11,
+          12,
+          13
+        ]
+      },
+      "pyCodeToPost": {}
+    });
+
+    assert_eq!(line_mappings, expected_mappings);
+}
+
+#[test]
+fn test_provenance_tracking_aot_log() {
+    let expected_files = [
+        "-_-_-_-/before_pre_grad_graph_0.txt",
+        "-_-_-_-/after_post_grad_graph_6.txt",
+        "provenance_tracking_-_-_-_-.html",
+        "-_-_-_-/inductor_provenance_tracking_node_mappings_11.json",
+    ];
+
+    let path = Path::new("tests/inputs/inductor_provenance_aot_log.txt").to_path_buf();
+    let config = tlparse::ParseConfig {
+        inductor_provenance: true,
+        ..Default::default()
+    };
+    let output = tlparse::parse_path(&path, &config);
+    assert!(output.is_ok());
+    let map: HashMap<PathBuf, String> = output.unwrap().into_iter().collect();
+
+    // Check all files are present
+    for prefix in expected_files {
+        assert!(
+            prefix_exists(&map, prefix),
+            "{} not found in output",
+            prefix
+        );
+    }
+
+    // Read the HTML file and verify the line mappings
+    let html_path = map
+        .keys()
+        .find(|p| {
+            p.to_str()
+                .unwrap()
+                .contains("provenance_tracking_-_-_-_-.html")
+        })
+        .unwrap();
+    let html_content = map.get(html_path).unwrap();
+
+    // Extract the line mappings JSON from the script tag
+    let script_start = html_content
+        .find(r#"<script id="lineMappings" type="application/json">"#)
+        .unwrap();
+    let json_start = html_content[script_start..].find(">").unwrap() + script_start + 1;
+    let json_end = html_content[json_start..].find("</script>").unwrap() + json_start;
+    let line_mappings_str = &html_content[json_start..json_end];
+    let line_mappings: serde_json::Value = serde_json::from_str(line_mappings_str).unwrap();
+
+    // For jit log, we expect similar structure to jit cuda but with different kernel names
+    let expected_mappings = serde_json::json!(  {
+      "cppCodeToPost": {
+        "813": [
+          12,
+          24,
+          21
+        ],
+        "821": [
+          18,
+          15,
+          21
+        ],
+        "829": [
+          12,
+          24,
+          21
+        ],
+        "834": [
+          31,
+          27,
+          30,
+          29,
+          28
+        ]
+      },
+      "postToCppCode": {
+        "12": [
+          813,
+          829
+        ],
+        "15": [
+          821
+        ],
+        "18": [
+          821
+        ],
+        "21": [
+          821,
+          813,
+          829
+        ],
+        "24": [
+          813,
+          829
+        ],
+        "27": [
+          834
+        ],
+        "28": [
+          834
+        ],
+        "29": [
+          834
+        ],
+        "30": [
+          834
+        ],
+        "31": [
+          834
+        ]
+      },
+      "postToPre": {
+        "11": [
+          8
+        ],
+        "12": [
+          8
+        ],
+        "15": [
+          11
+        ],
+        "18": [
+          14
+        ],
+        "21": [
+          17
+        ],
+        "24": [
+          20
+        ],
+        "27": [
+          23
+        ],
+        "28": [
+          23
+        ],
+        "29": [
+          23
+        ],
+        "30": [
+          23
+        ],
+        "31": [
+          23
+        ]
+      },
+      "postToPyCode": {},
+      "preToPost": {
+        "11": [
+          15
+        ],
+        "14": [
+          18
+        ],
+        "17": [
+          21
+        ],
+        "20": [
+          24
+        ],
+        "23": [
+          27,
+          28,
+          29,
+          30,
+          31
+        ],
+        "8": [
+          11,
+          12
+        ]
+      },
+      "pyCodeToPost": {}
+    });
+
+    assert_eq!(line_mappings, expected_mappings);
+}
+
+#[test]
+fn test_provenance_tracking_aot_log_old() {
+    let expected_files = [
+        "-_-_-_-/inductor_pre_grad_graph_0.txt",
+        "-_-_-_-/inductor_post_grad_graph_8.txt",
+        "provenance_tracking_-_-_-_-.html",
+        "-_-_-_-/inductor_provenance_tracking_node_mappings_11.json",
+    ];
+
+    let path = Path::new("tests/inputs/inductor_provenance_aot_log_old.txt").to_path_buf();
+    let config = tlparse::ParseConfig {
+        inductor_provenance: true,
+        ..Default::default()
+    };
+    let output = tlparse::parse_path(&path, &config);
+    assert!(output.is_ok());
+    let map: HashMap<PathBuf, String> = output.unwrap().into_iter().collect();
+
+    // Check all files are present
+    for prefix in expected_files {
+        assert!(
+            prefix_exists(&map, prefix),
+            "{} not found in output",
+            prefix
+        );
+    }
+
+    // Read the HTML file and verify the line mappings
+    let html_path = map
+        .keys()
+        .find(|p| {
+            p.to_str()
+                .unwrap()
+                .contains("provenance_tracking_-_-_-_-.html")
+        })
+        .unwrap();
+    let html_content = map.get(html_path).unwrap();
+
+    // Extract the line mappings JSON from the script tag
+    let script_start = html_content
+        .find(r#"<script id="lineMappings" type="application/json">"#)
+        .unwrap();
+    let json_start = html_content[script_start..].find(">").unwrap() + script_start + 1;
+    let json_end = html_content[json_start..].find("</script>").unwrap() + json_start;
+    let line_mappings_str = &html_content[json_start..json_end];
+    let line_mappings: serde_json::Value = serde_json::from_str(line_mappings_str).unwrap();
+
+    // For old log, we expect the same structure as the regular aot log
+    let expected_mappings = serde_json::json!(       {
+      "cppCodeToPost": {
+        "704": [
+          21
+        ],
+        "717": [
+          31,
+          27,
+          30,
+          29,
+          28
+        ]
+      },
+      "postToCppCode": {
+        "21": [
+          704
+        ],
+        "27": [
+          717
+        ],
+        "28": [
+          717
+        ],
+        "29": [
+          717
+        ],
+        "30": [
+          717
+        ],
+        "31": [
+          717
+        ]
+      },
+      "postToPre": {
+        "11": [
+          8
+        ],
+        "12": [
+          8
+        ],
+        "15": [
+          11
+        ],
+        "18": [
+          14
+        ],
+        "21": [
+          17
+        ],
+        "24": [
+          20
+        ],
+        "27": [
+          23
+        ],
+        "28": [
+          23
+        ],
+        "29": [
+          23
+        ],
+        "30": [
+          23
+        ],
+        "31": [
+          23
+        ]
+      },
+      "postToPyCode": {},
+      "preToPost": {
+        "11": [
+          15
+        ],
+        "14": [
+          18
+        ],
+        "17": [
+          21
+        ],
+        "20": [
+          24
+        ],
+        "23": [
+          27,
+          28,
+          29,
+          30,
+          31
+        ],
+        "8": [
+          11,
+          12
+        ]
+      },
+      "pyCodeToPost": {}
+    });
+
+    assert_eq!(line_mappings, expected_mappings);
+}
+
+#[test]
+fn test_provenance_tracking_jit_cuda() {
+    let expected_files = [
+        "-_0_0_0/before_pre_grad_graph_1.txt",
+        "-_0_0_0/after_post_grad_graph_8.txt",
+        "provenance_tracking_-_0_0_0.html",
+        "-_0_0_0/inductor_provenance_tracking_node_mappings_14.json",
+    ];
+
+    let path = Path::new("tests/inputs/inductor_provenance_jit_cuda_log.txt").to_path_buf();
+    let config = tlparse::ParseConfig {
+        inductor_provenance: true,
+        ..Default::default()
+    };
+    let output = tlparse::parse_path(&path, &config);
+    assert!(output.is_ok());
+    let map: HashMap<PathBuf, String> = output.unwrap().into_iter().collect();
+
+    // Check all files are present
+    for prefix in expected_files {
+        assert!(
+            prefix_exists(&map, prefix),
+            "{} not found in output",
+            prefix
+        );
+    }
+
+    // Read the HTML file and verify the line mappings
+    let html_path = map
+        .keys()
+        .find(|p| {
+            p.to_str()
+                .unwrap()
+                .contains("provenance_tracking_-_0_0_0.html")
+        })
+        .unwrap();
+    let html_content = map.get(html_path).unwrap();
+
+    // Extract the line mappings JSON from the script tag
+    let script_start = html_content
+        .find(r#"<script id="lineMappings" type="application/json">"#)
+        .unwrap();
+    let json_start = html_content[script_start..].find(">").unwrap() + script_start + 1;
+    let json_end = html_content[json_start..].find("</script>").unwrap() + json_start;
+    let line_mappings_str = &html_content[json_start..json_end];
+    let line_mappings: serde_json::Value = serde_json::from_str(line_mappings_str).unwrap();
+
+    // Verify the line mappings match the expected values for jit cuda
+    let expected_mappings = serde_json::json!(        {
+      "cppCodeToPost": {},
+      "postToCppCode": {},
+      "postToPre": {
+        "11": [
+          14
+        ],
+        "14": [
+          17
+        ],
+        "17": [
+          20
+        ],
+        "20": [
+          23
+        ],
+        "21": [
+          23
+        ],
+        "24": [
+          26
+        ],
+        "25": [
+          26
+        ],
+        "26": [
+          26
+        ],
+        "27": [
+          26
+        ],
+        "28": [
+          26
+        ],
+        "4": [
+          11
+        ],
+        "7": [
+          11
+        ],
+        "8": [
+          11
+        ]
+      },
+      "postToPyCode": {
+        "11": [
+          192
+        ],
+        "14": [
+          192
+        ],
+        "17": [
+          197,
+          186,
+          201
+        ],
+        "20": [
+          186,
+          201
+        ],
+        "21": [
+          207
+        ],
+        "24": [
+          207
+        ],
+        "25": [
+          207
+        ],
+        "26": [
+          207
+        ],
+        "27": [
+          207
+        ],
+        "28": [
+          207
+        ],
+        "7": [
+          186,
+          201
+        ],
+        "8": [
+          192
+        ]
+      },
+      "preToPost": {
+        "11": [
+          4,
+          7,
+          8
+        ],
+        "14": [
+          11
+        ],
+        "17": [
+          14
+        ],
+        "20": [
+          17
+        ],
+        "23": [
+          20,
+          21
+        ],
+        "26": [
+          24,
+          25,
+          26,
+          27,
+          28
+        ]
+      },
+      "pyCodeToPost": {
+        "186": [
+          7,
+          20,
+          17
+        ],
+        "192": [
+          14,
+          11,
+          8
+        ],
+        "197": [
+          17
+        ],
+        "201": [
+          7,
+          20,
+          17
+        ],
+        "207": [
+          28,
+          24,
+          21,
+          27,
+          26,
+          25
+        ]
+      }
+    });
+
+    assert_eq!(line_mappings, expected_mappings);
+}
+
+#[test]
+fn test_provenance_tracking_jit_log() {
+    let expected_files = [
+        "-_0_0_0/before_pre_grad_graph_1.txt",
+        "-_0_0_0/after_post_grad_graph_8.txt",
+        "provenance_tracking_-_0_0_0.html",
+        "-_0_0_0/inductor_provenance_tracking_node_mappings_13.json",
+    ];
+
+    let path = Path::new("tests/inputs/inductor_provenance_jit_log.txt").to_path_buf();
+    let config = tlparse::ParseConfig {
+        inductor_provenance: true,
+        ..Default::default()
+    };
+    let output = tlparse::parse_path(&path, &config);
+    assert!(output.is_ok());
+    let map: HashMap<PathBuf, String> = output.unwrap().into_iter().collect();
+
+    // Check all files are present
+    for prefix in expected_files {
+        assert!(
+            prefix_exists(&map, prefix),
+            "{} not found in output",
+            prefix
+        );
+    }
+
+    // Read the HTML file and verify the line mappings
+    let html_path = map
+        .keys()
+        .find(|p| {
+            p.to_str()
+                .unwrap()
+                .contains("provenance_tracking_-_0_0_0.html")
+        })
+        .unwrap();
+    let html_content = map.get(html_path).unwrap();
+
+    // Extract the line mappings JSON from the script tag
+    let script_start = html_content
+        .find(r#"<script id="lineMappings" type="application/json">"#)
+        .unwrap();
+    let json_start = html_content[script_start..].find(">").unwrap() + script_start + 1;
+    let json_end = html_content[json_start..].find("</script>").unwrap() + json_start;
+    let line_mappings_str = &html_content[json_start..json_end];
+    let line_mappings: serde_json::Value = serde_json::from_str(line_mappings_str).unwrap();
+
+    // For jit log, we expect similar structure to jit cuda but with different kernel names
+    let expected_mappings = serde_json::json!(       {
+      "cppCodeToPost": {},
+      "postToCppCode": {},
+      "postToPre": {
+        "11": [
+          17
+        ],
+        "14": [
+          20
+        ],
+        "17": [
+          23
+        ],
+        "20": [
+          26
+        ],
+        "21": [
+          26
+        ],
+        "22": [
+          26
+        ],
+        "23": [
+          26
+        ],
+        "24": [
+          26
+        ],
+        "4": [
+          11
+        ],
+        "5": [
+          11
+        ],
+        "8": [
+          14
+        ]
+      },
+      "postToPyCode": {
+        "11": [
+          138
+        ],
+        "14": [
+          138,
+          132,
+          142
+        ],
+        "17": [
+          132,
+          142
+        ],
+        "20": [
+          147
+        ],
+        "21": [
+          147
+        ],
+        "22": [
+          147
+        ],
+        "23": [
+          147
+        ],
+        "24": [
+          147
+        ],
+        "5": [
+          132,
+          142
+        ],
+        "8": [
+          138
+        ]
+      },
+      "preToPost": {
+        "11": [
+          4,
+          5
+        ],
+        "14": [
+          8
+        ],
+        "17": [
+          11
+        ],
+        "20": [
+          14
+        ],
+        "23": [
+          17
+        ],
+        "26": [
+          20,
+          21,
+          22,
+          23,
+          24
+        ]
+      },
+      "pyCodeToPost": {
+        "132": [
+          5,
+          17,
+          14
+        ],
+        "138": [
+          11,
+          8,
+          14
+        ],
+        "142": [
+          5,
+          17,
+          14
+        ],
+        "147": [
+          24,
+          20,
+          23,
+          22,
+          21
+        ]
+      }
+    });
+
+    assert_eq!(line_mappings, expected_mappings);
+}
+
+#[test]
+fn test_provenance_tracking_jit_debug_handle() {
+    let expected_files = [
+        "-_0_0_0/before_pre_grad_graph_1.txt",
+        "-_0_0_0/after_post_grad_graph_11.txt",
+        "provenance_tracking_-_0_0_0.html",
+        "-_0_0_0/inductor_provenance_tracking_node_mappings_14.json",
+    ];
+
+    let path = Path::new("tests/inputs/inductor_provenance_jit_debug_handle_log.txt").to_path_buf();
+    let config = tlparse::ParseConfig {
+        inductor_provenance: true,
+        ..Default::default()
+    };
+    let output = tlparse::parse_path(&path, &config);
+    assert!(output.is_ok());
+    let map: HashMap<PathBuf, String> = output.unwrap().into_iter().collect();
+
+    // Check all files are present
+    for prefix in expected_files {
+        assert!(
+            prefix_exists(&map, prefix),
+            "{} not found in output",
+            prefix
+        );
+    }
+
+    // Read the HTML file and verify the line mappings
+    let html_path = map
+        .keys()
+        .find(|p| {
+            p.to_str()
+                .unwrap()
+                .contains("provenance_tracking_-_0_0_0.html")
+        })
+        .unwrap();
+    let html_content = map.get(html_path).unwrap();
+
+    // Extract the line mappings JSON from the script tag
+    let script_start = html_content
+        .find(r#"<script id="lineMappings" type="application/json">"#)
+        .unwrap();
+    let json_start = html_content[script_start..].find(">").unwrap() + script_start + 1;
+    let json_end = html_content[json_start..].find("</script>").unwrap() + json_start;
+    let line_mappings_str = &html_content[json_start..json_end];
+    let line_mappings: serde_json::Value = serde_json::from_str(line_mappings_str).unwrap();
+
+    // For jit log, we expect similar structure to jit cuda but with different kernel names
+    let expected_mappings = serde_json::json!(         {
+      "cppCodeToPost": {},
+      "postToCppCode": {},
+      "postToPre": {
+        "12": [
+          17
+        ],
+        "15": [
+          20
+        ],
+        "18": [
+          23
+        ],
+        "19": [
+          23
+        ],
+        "22": [
+          26
+        ],
+        "23": [
+          26
+        ],
+        "24": [
+          26
+        ],
+        "25": [
+          26
+        ],
+        "26": [
+          26
+        ],
+        "4": [
+          11
+        ],
+        "5": [
+          11
+        ],
+        "6": [
+          11
+        ],
+        "9": [
+          14
+        ]
+      },
+      "postToPyCode": {
+        "12": [
+          204
+        ],
+        "15": [
+          211
+        ],
+        "18": [
+          216
+        ],
+        "19": [
+          223
+        ],
+        "22": [
+          223
+        ],
+        "23": [
+          223
+        ],
+        "24": [
+          223
+        ],
+        "25": [
+          223
+        ],
+        "26": [
+          223
+        ],
+        "29": [
+          204
+        ],
+        "5": [
+          197
+        ],
+        "6": [
+          204
+        ],
+        "9": [
+          204
+        ]
+      },
+      "preToPost": {
+        "11": [
+          4,
+          5,
+          6
+        ],
+        "14": [
+          9
+        ],
+        "17": [
+          12
+        ],
+        "20": [
+          15
+        ],
+        "23": [
+          18,
+          19
+        ],
+        "26": [
+          22,
+          23,
+          24,
+          25,
+          26
+        ]
+      },
+      "pyCodeToPost": {
+        "197": [
+          5
+        ],
+        "204": [
+          12,
+          9,
+          6,
+          29
+        ],
+        "211": [
+          15
+        ],
+        "216": [
+          18
+        ],
+        "223": [
+          26,
+          22,
+          19,
+          25,
+          24,
+          23
+        ]
+      }
+    });
+
+    assert_eq!(line_mappings, expected_mappings);
 }
 
 #[test]