From 76dac58c9a77d9fb78a33c832f80d40f236ecd66 Mon Sep 17 00:00:00 2001
From: Guray Ozen <guray.ozen@gmail.com>
Date: Tue, 18 Nov 2025 12:56:42 +0100
Subject: [PATCH 01/52] [MLIR][NVVM] Move the docs to markdown file (#168375)

---
 mlir/docs/Dialects/NVVM/_index.md           | 84 +++++++++++++++++++++
 mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 78 -------------------
 2 files changed, 84 insertions(+), 78 deletions(-)
 create mode 100644 mlir/docs/Dialects/NVVM/_index.md

diff --git a/mlir/docs/Dialects/NVVM/_index.md b/mlir/docs/Dialects/NVVM/_index.md
new file mode 100644
index 0000000000000..f4832f76f86ad
--- /dev/null
+++ b/mlir/docs/Dialects/NVVM/_index.md
@@ -0,0 +1,84 @@
+# NVVM Dialect
+
+The NVVM dialect is MLIR's LLVM-IR-based, NVIDIA-specific backend dialect. It
+models NVVM intrinsics and public ISA functionality and introduces NVIDIA
+extensions to the MLIR/LLVM type system and address spaces (e.g., global,
+shared, and cluster memory), enabling faithful lowering of GPU kernels to the
+NVPTX toolchain. While a NVVM op usually maps to a single LLVM IR intrinsic,
+the NVVM dialect uses type polymorphism and other attributes so that a single
+NVVM op can map to different LLVM intrinsics.
+
+## Scope and Capabilities
+
+The dialect covers core GPU features such as thread/block builtins, barriers
+and atomics, warp-level collectives (e.g., shuffle/vote), matrix/tensor core
+operations (e.g., `mma.sync`, `wgmma`), tensor memory accelerator (TMA)
+operations, asynchronous copies (`cp.async`, bulk/tensor variants) with memory
+barriers, cache and prefetch controls, and NVVM-specific attributes and enums
+(e.g., FP rounding modes, memory scopes, and MMA types/layouts).
+
+## Placement in the Lowering Pipeline
+
+NVVM sits below target-agnostic dialects like `gpu` and NVIDIA's `nvgpu`.
+Typical pipelines convert `gpu`/`nvgpu` ops into NVVM using
+`-convert-gpu-to-nvvm` and `-convert-nvgpu-to-nvvm`, then translate into LLVM
+for final code generation via NVPTX backend.
+
+## Target Configuration and Serialization
+
+NVVM provides a `#nvvm.target` attribute to describe the GPU target (SM,
+features, and flags). In conjunction with `gpu` serialization (e.g.,
+`gpu-module-to-binary`), this enables producing architecture-specific GPU
+binaries (such as CUBIN) from nested GPU modules.
+
+## Inline PTX
+
+When an intrinsic is unavailable or a performance-critical sequence must be
+expressed directly, NVVM provides an `nvvm.inline_ptx` op to embed PTX inline
+as a last-resort escape hatch, with explicit operands and results.
+
+## Memory Spaces
+
+The NVVM dialect introduces the following memory spaces, each with distinct
+scopes and lifetimes:
+
+| Memory Space      | Address Space | Scope                |
+|-------------------|---------------|----------------------|
+| `generic`         | 0             | All threads          |
+| `global`          | 1             | All threads (device) |
+| `shared`          | 3             | Thread block (CTA)   |
+| `constant`        | 4             | All threads          |
+| `local`           | 5             | Single thread        |
+| `tensor`          | 6             | Thread block (CTA)   |
+| `shared_cluster`  | 7             | Thread block cluster |
+
+### Memory Space Details
+
+- **generic**: Can point to any memory space; requires runtime resolution of
+  actual address space. Use when pointer origin is unknown at compile time.
+  Performance varies based on the underlying memory space.
+- **global**: Accessible by all threads across all blocks; persists across
+  kernel launches. Highest latency but largest capacity (device memory). Best
+  for large data and inter-kernel communication.
+- **shared**: Shared within a thread block (CTA); very fast on-chip memory for
+  cooperation between threads in the same block. Limited capacity. Ideal for
+  block-level collaboration, caching, and reducing global memory traffic.
+- **constant**: Read-only memory cached per SM. Size typically limited to 64KB.
+  Best for read-only data and uniform values accessed by all threads.
+- **local**: Private to each thread. Use for per-thread private data and
+  automatic variables that don't fit in registers.
+- **tensor**: Special memory space for tensor core operations. Used by
+  `tcgen05` instructions on SM 100+ for tensor input/output operations.
+- **shared_cluster**: Distributed shared memory across thread blocks within a
+  cluster (SM 90+). Enables collaboration beyond single-block scope with fast
+  access across cluster threads.
+
+
+## Non-Goals
+
+NVVM is not a place for convenience or "wrapper" ops. It is not intended to
+introduce high-level ops that expand into multiple unrelated NVVM intrinsics or
+that lower to no intrinsic at all. Such abstractions belong in higher-level
+dialects (e.g., `nvgpu`, `gpu`, or project-specific dialects). The design
+intent is a thin, predictable, low-level surface with near-mechanical lowering
+to NVVM/LLVM IR.
\ No newline at end of file
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 87c73c4587485..524b9f820f290 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -37,84 +37,6 @@ def LLVM_PointerSharedCluster : LLVM_PointerInAddressSpace<7>;
 //===----------------------------------------------------------------------===//
 
 def NVVM_Dialect : Dialect {
-  let summary = "The NVVM dialect that models NVIDIA's public ISA";
-
-  let description = [{
-    The NVVM dialect is MLIR's LLVM-IR-based, NVIDIA-specific backend dialect. It
-    models NVVM intrinsics and public ISA functionality and introduces NVIDIA
-    extensions to the MLIR/LLVM type system and address spaces (e.g., global,
-    shared, and cluster memory), enabling faithful lowering of GPU kernels to the
-    NVPTX toolchain. While a NVVM op usually maps to a single LLVM IR intrinsic,
-    the NVVM dialect uses type polymorphism and other attributes so that a single
-    NVVM op can map to different LLVM intrinsics.
-
-    **Scope and capabilities:** The dialect covers core GPU features such as
-    thread/block builtins, barriers and atomics, warp-level collectives (e.g.,
-    shuffle/vote), matrix/tensor core operations (e.g., `mma.sync`, `wgmma`),
-    tensor memory accelerator (TMA) operations, asynchronous copies (`cp.async`,
-    bulk/tensor variants) with memory barriers, cache and prefetch controls, and
-    NVVM-specific attributes and enums (e.g., FP rounding modes, memory scopes,
-    and MMA types/layouts).
-
-    **Non-goals:** NVVM is not a place for convenience or “wrapper” ops. It is
-    not intended to introduce high-level ops that expand into multiple unrelated
-    NVVM intrinsics or that lower to no intrinsic at all. Such abstractions belong
-    in higher-level dialects (e.g., `nvgpu`, `gpu`, or project-specific dialects).
-    The design intent is a thin, predictable, low-level surface with
-    near-mechanical lowering to NVVM/LLVM IR.
-
-    **Placement in the lowering pipeline:** NVVM sits below target-agnostic
-    dialects like `gpu` and NVIDIA's `nvgpu`. Typical pipelines convert
-    `gpu`/`nvgpu` ops into NVVM using `-convert-gpu-to-nvvm` and
-    `-convert-nvgpu-to-nvvm`, then translate into LLVM for final code
-    generation via NVPTX backend.
-
-    **Target configuration and serialization:** NVVM provides a `#nvvm.target`
-    attribute to describe the GPU target (SM, features, and flags). In
-    conjunction with `gpu` serialization (e.g., `gpu-module-to-binary`), this
-    enables producing architecture-specific GPU binaries (such as CUBIN) from
-    nested GPU modules.
-
-    **Inline PTX:** When an intrinsic is unavailable or a performance-critical
-    sequence must be expressed directly, NVVM provides an `nvvm.inline_ptx` op to
-    embed PTX inline as a last-resort escape hatch, with explicit operands and
-    results.
-
-
-    **Memory Spaces:** The NVVM dialect introduces the following memory spaces,
-    each with distinct scopes and lifetimes:
-```
-    | Memory Space      | Address Space | Scope                | Lifetime          |
-    |-------------------|---------------|----------------------|-------------------|
-    | `generic`         | 0             | All threads          | Context-dependent |
-    | `global`          | 1             | All threads (device) | Application       |
-    | `shared`          | 3             | Thread block (CTA)   | Kernel execution  |
-    | `constant`        | 4             | All threads (RO)     | Application       |
-    | `local`           | 5             | Single thread        | Kernel execution  |
-    | `tensor`          | 6             | Thread block (CTA)   | Kernel execution  |
-    | `shared_cluster`  | 7             | Thread block cluster | Kernel execution  |
-```
-    **Memory Space Details:**
-    - **generic**: Can point to any memory space; requires runtime resolution of
-      actual address space. Use when pointer origin is unknown at compile time.
-      Performance varies based on the underlying memory space.
-    - **global**: Accessible by all threads across all blocks; persists across
-      kernel launches. Highest latency but largest capacity (device memory). Best
-      for large data and inter-kernel communication.
-    - **shared**: Shared within a thread block (CTA); very fast on-chip memory for
-      cooperation between threads in the same block. Limited capacity. Ideal for 
-      block-level collaboration, caching, and reducing global memory traffic.
-    - **constant**: Read-only memory cached per SM. Size typically limited to 
-      64KB. Best for read-only data and uniform values accessed by all threads.
-    - **local**: Private to each thread. Use for per-thread private data and
-      automatic variables that don't fit in registers.
-    - **tensor**: Special memory space for tensor core operations. Used by
-      `tcgen05` instructions on SM 100+ for tensor input/output operations.
-    - **shared_cluster**: Distributed shared memory across thread blocks within
-      a cluster (SM 90+). Enables collaboration beyond single-block scope with
-      fast access across cluster threads.
-  }];
-
   let name = "nvvm";
   let cppNamespace = "::mlir::NVVM";
   let dependentDialects = ["LLVM::LLVMDialect"];

From 4ecfaa602f56a29ea8acd3fd39cf0cf3958b4dae Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Tue, 18 Nov 2025 12:05:02 +0000
Subject: [PATCH 02/52] [AArch64][GlobalISel] Add better basic legalization for
 llround. (#168427)

This adds handling for f16 and f128 lround/llround under LP64 targets,
promoting the f16 where needed and using a libcall for f128. This
codegen is now identical to the selection dag version.
---
 llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp      | 12 ++++++++++++
 .../Target/AArch64/GISel/AArch64LegalizerInfo.cpp    |  8 +++++---
 llvm/test/CodeGen/AArch64/llround-conv-fp16.ll       |  8 ++------
 llvm/test/CodeGen/AArch64/llround-conv.ll            |  5 +----
 llvm/test/CodeGen/AArch64/lround-conv-fp16.ll        |  8 ++------
 llvm/test/CodeGen/AArch64/lround-conv.ll             |  5 +----
 6 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index cacb292acee18..ba28e4dda3313 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3439,6 +3439,18 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     Observer.changedInstr(MI);
     return Legalized;
   }
+  case TargetOpcode::G_LROUND:
+  case TargetOpcode::G_LLROUND:
+    Observer.changingInstr(MI);
+
+    if (TypeIdx == 0)
+      widenScalarDst(MI, WideTy);
+    else
+      widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+
+    Observer.changedInstr(MI);
+    return Legalized;
+
   case TargetOpcode::G_INTTOPTR:
     if (TypeIdx != 1)
       return UnableToLegalize;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index a88817c9d2d19..fdf69b04bf676 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -449,10 +449,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .minScalar(0, s32)
       .libcallFor({{s32, s32}, {s64, s32}, {s128, s32}});
 
-  // TODO: Libcall support for s128.
-  // TODO: s16 should be legal with full FP16 support.
   getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
-      .legalFor({{s64, s32}, {s64, s64}});
+      .legalFor({{s64, s32}, {s64, s64}})
+      .legalFor(HasFP16, {{s64, s16}})
+      .minScalar(0, s64)
+      .minScalar(1, s32)
+      .libcallFor({{s64, s128}});
 
   // TODO: Custom legalization for mismatched types.
   getActionDefinitionsBuilder(G_FCOPYSIGN)
diff --git a/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll b/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
index cb042757a4a42..3a4be1bda7cd6 100644
--- a/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
@@ -1,12 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16
 ; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16
-; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK-NOFP16,CHECK-GI
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK-FP16,CHECK-GI
-
-; CHECK-GI:       warning: Instruction selection used fallback path for testmhhs
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for testmhws
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for testmhxs
+; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK-NOFP16
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK-FP16
 
 define i16 @testmhhs(half %x) {
 ; CHECK-NOFP16-LABEL: testmhhs:
diff --git a/llvm/test/CodeGen/AArch64/llround-conv.ll b/llvm/test/CodeGen/AArch64/llround-conv.ll
index 4cc089804ce97..bdee73076347a 100644
--- a/llvm/test/CodeGen/AArch64/llround-conv.ll
+++ b/llvm/test/CodeGen/AArch64/llround-conv.ll
@@ -1,9 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s
-; RUN: llc < %s -mtriple=aarch64 -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI:       warning: Instruction selection used fallback path for testmswl
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for testmsll
+; RUN: llc < %s -mtriple=aarch64 -mattr=+neon -global-isel | FileCheck %s
 
 define i32 @testmsws(float %x) {
 ; CHECK-LABEL: testmsws:
diff --git a/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll b/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
index a29dea0eb9f9f..0b18f220067ca 100644
--- a/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
@@ -1,12 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16
 ; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16
-; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK-NOFP16,CHECK-GI
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK-FP16,CHECK-GI
-
-; CHECK-GI:       warning: Instruction selection used fallback path for testmhhs
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for testmhws
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for testmhxs
+; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK-NOFP16
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK-FP16
 
 define i16 @testmhhs(half %x) {
 ; CHECK-NOFP16-LABEL: testmhhs:
diff --git a/llvm/test/CodeGen/AArch64/lround-conv.ll b/llvm/test/CodeGen/AArch64/lround-conv.ll
index 0bf82b538e70c..4b1782457cc10 100644
--- a/llvm/test/CodeGen/AArch64/lround-conv.ll
+++ b/llvm/test/CodeGen/AArch64/lround-conv.ll
@@ -1,9 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s
-; RUN: llc < %s -mtriple=aarch64 -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI:       warning: Instruction selection used fallback path for testmswl
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for testmsll
+; RUN: llc < %s -mtriple=aarch64 -mattr=+neon -global-isel | FileCheck %s
 
 define i32 @testmsws(float %x) {
 ; CHECK-LABEL: testmsws:

From 59ed6dfe97b35a4dc88f69e3d830edf8caa99d10 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker@arm.com>
Date: Tue, 18 Nov 2025 12:15:38 +0000
Subject: [PATCH 03/52] [LLVM][CodeGen][SVE] Use DUPM for constantfp splats.
 (#168391)

This helps cases where the immediate range of FDUP is not sufficient.
---
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp    |  71 +++--
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |   2 +-
 llvm/lib/Target/AArch64/SVEInstrFormats.td    |  25 ++
 .../test/CodeGen/AArch64/sve-bf16-combines.ll |   8 +-
 llvm/test/CodeGen/AArch64/sve-fp-combine.ll   |  15 +-
 .../CodeGen/AArch64/sve-fp-reduce-fadda.ll    |  15 +-
 llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll   | 121 ++++----
 llvm/test/CodeGen/AArch64/sve-llrint.ll       | 202 ++++++------
 llvm/test/CodeGen/AArch64/sve-lrint.ll        | 202 ++++++------
 llvm/test/CodeGen/AArch64/sve-vector-splat.ll | 292 +++++++++++++++++-
 llvm/test/CodeGen/AArch64/sve-vselect-imm.ll  |  18 +-
 11 files changed, 626 insertions(+), 345 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index f1db05dda4e40..08466667c0fa5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -4403,43 +4403,46 @@ bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
 
 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
                                               bool Invert) {
-  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
-    uint64_t ImmVal = CNode->getZExtValue();
-    SDLoc DL(N);
-
-    if (Invert)
-      ImmVal = ~ImmVal;
+  uint64_t ImmVal;
+  if (auto CI = dyn_cast<ConstantSDNode>(N))
+    ImmVal = CI->getZExtValue();
+  else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
+    ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+  else
+    return false;
 
-    // Shift mask depending on type size.
-    switch (VT.SimpleTy) {
-    case MVT::i8:
-      ImmVal &= 0xFF;
-      ImmVal |= ImmVal << 8;
-      ImmVal |= ImmVal << 16;
-      ImmVal |= ImmVal << 32;
-      break;
-    case MVT::i16:
-      ImmVal &= 0xFFFF;
-      ImmVal |= ImmVal << 16;
-      ImmVal |= ImmVal << 32;
-      break;
-    case MVT::i32:
-      ImmVal &= 0xFFFFFFFF;
-      ImmVal |= ImmVal << 32;
-      break;
-    case MVT::i64:
-      break;
-    default:
-      llvm_unreachable("Unexpected type");
-    }
+  if (Invert)
+    ImmVal = ~ImmVal;
 
-    uint64_t encoding;
-    if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
-      Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
-      return true;
-    }
+  // Shift mask depending on type size.
+  switch (VT.SimpleTy) {
+  case MVT::i8:
+    ImmVal &= 0xFF;
+    ImmVal |= ImmVal << 8;
+    ImmVal |= ImmVal << 16;
+    ImmVal |= ImmVal << 32;
+    break;
+  case MVT::i16:
+    ImmVal &= 0xFFFF;
+    ImmVal |= ImmVal << 16;
+    ImmVal |= ImmVal << 32;
+    break;
+  case MVT::i32:
+    ImmVal &= 0xFFFFFFFF;
+    ImmVal |= ImmVal << 32;
+    break;
+  case MVT::i64:
+    break;
+  default:
+    llvm_unreachable("Unexpected type");
   }
-  return false;
+
+  uint64_t encoding;
+  if (!AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding))
+    return false;
+
+  Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
+  return true;
 }
 
 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index c8c21c4822ffe..e99b3f8ff07e0 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -989,7 +989,7 @@ let Predicates = [HasSVE_or_SME] in {
             (DUP_ZR_D (MOVi64imm (bitcast_fpimm_to_i64 f64:$val)))>;
 
   // Duplicate FP immediate into all vector elements
-  let AddedComplexity = 2 in {
+  let AddedComplexity = 3 in {
     def : Pat<(nxv8f16 (splat_vector fpimm16:$imm8)),
               (FDUP_ZI_H fpimm16:$imm8)>;
     def : Pat<(nxv4f16 (splat_vector fpimm16:$imm8)),
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 1664f4ad0c8fa..1e771e1fb9403 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -347,6 +347,11 @@ def SVELogicalImm16Pat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i16>",
 def SVELogicalImm32Pat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i32>", []>;
 def SVELogicalImm64Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64>", []>;
 
+def SVELogicalFPImm16Pat : ComplexPattern<f16, 1, "SelectSVELogicalImm<MVT::i16>", []>;
+def SVELogicalFPImm32Pat : ComplexPattern<f32, 1, "SelectSVELogicalImm<MVT::i32>", []>;
+def SVELogicalFPImm64Pat : ComplexPattern<f64, 1, "SelectSVELogicalImm<MVT::i64>", []>;
+def SVELogicalBFPImmPat : ComplexPattern<bf16, 1, "SelectSVELogicalImm<MVT::i16>", []>;
+
 def SVELogicalImm8NotPat  : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i8, true>", []>;
 def SVELogicalImm16NotPat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i16, true>", []>;
 def SVELogicalImm32NotPat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i32, true>", []>;
@@ -2160,6 +2165,26 @@ multiclass sve_int_dup_mask_imm<string asm> {
             (!cast<Instruction>(NAME) i64:$imm)>;
   def : Pat<(nxv2i64 (splat_vector (i64 (SVELogicalImm64Pat i64:$imm)))),
             (!cast<Instruction>(NAME) i64:$imm)>;
+
+  def : Pat<(nxv8f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv4f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv2f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv4f32 (splat_vector (f32 (SVELogicalFPImm32Pat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv2f32 (splat_vector (f32 (SVELogicalFPImm32Pat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv2f64 (splat_vector (f64 (SVELogicalFPImm64Pat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+
+  def : Pat<(nxv8bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv4bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv2bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll b/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll
index 16e8feb0dc5bb..fc3e018f2ec7a 100644
--- a/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll
+++ b/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll
@@ -632,7 +632,6 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    uunpkhi z3.s, z2.h
 ; SVE-NEXT:    uunpkhi z4.s, z1.h
-; SVE-NEXT:    mov w8, #32768 // =0x8000
 ; SVE-NEXT:    uunpklo z2.s, z2.h
 ; SVE-NEXT:    uunpklo z1.s, z1.h
 ; SVE-NEXT:    ptrue p1.s
@@ -643,9 +642,8 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
 ; SVE-NEXT:    fmul z3.s, z4.s, z3.s
 ; SVE-NEXT:    fmul z1.s, z1.s, z2.s
 ; SVE-NEXT:    bfcvt z2.h, p1/m, z3.s
-; SVE-NEXT:    fmov h3, w8
+; SVE-NEXT:    dupm z3.h, #0x8000
 ; SVE-NEXT:    bfcvt z1.h, p1/m, z1.s
-; SVE-NEXT:    mov z3.h, h3
 ; SVE-NEXT:    uzp1 z1.h, z1.h, z2.h
 ; SVE-NEXT:    sel z1.h, p0, z1.h, z3.h
 ; SVE-NEXT:    uunpkhi z3.s, z0.h
@@ -665,10 +663,8 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
 ;
 ; SVE-B16B16-LABEL: fsub_sel_fmul_negzero_nxv8bf16:
 ; SVE-B16B16:       // %bb.0:
-; SVE-B16B16-NEXT:    mov w8, #32768 // =0x8000
+; SVE-B16B16-NEXT:    dupm z3.h, #0x8000
 ; SVE-B16B16-NEXT:    bfmul z1.h, z1.h, z2.h
-; SVE-B16B16-NEXT:    fmov h3, w8
-; SVE-B16B16-NEXT:    mov z3.h, h3
 ; SVE-B16B16-NEXT:    sel z1.h, p0, z1.h, z3.h
 ; SVE-B16B16-NEXT:    bfsub z0.h, z0.h, z1.h
 ; SVE-B16B16-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-fp-combine.ll b/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
index 53aba04028d62..57389ad2fe9b2 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
@@ -1134,10 +1134,9 @@ define <vscale x 2 x double> @fadd_sel_fmul_d_negzero(<vscale x 2 x double> %a,
 define <vscale x 8 x half> @fsub_sel_fmul_h_negzero(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x i1> %mask) {
 ; CHECK-LABEL: fsub_sel_fmul_h_negzero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-NEXT:    dupm z3.h, #0x8000
 ; CHECK-NEXT:    fmul z1.h, z1.h, z2.h
-; CHECK-NEXT:    mov z2.h, w8
-; CHECK-NEXT:    sel z1.h, p0, z1.h, z2.h
+; CHECK-NEXT:    sel z1.h, p0, z1.h, z3.h
 ; CHECK-NEXT:    fsub z0.h, z0.h, z1.h
 ; CHECK-NEXT:    ret
   %fmul = fmul <vscale x 8 x half> %b, %c
@@ -1150,10 +1149,9 @@ define <vscale x 8 x half> @fsub_sel_fmul_h_negzero(<vscale x 8 x half> %a, <vsc
 define <vscale x 4 x float> @fsub_sel_fmul_s_negzero(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %mask) {
 ; CHECK-LABEL: fsub_sel_fmul_s_negzero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
+; CHECK-NEXT:    mov z3.s, #0x80000000
 ; CHECK-NEXT:    fmul z1.s, z1.s, z2.s
-; CHECK-NEXT:    mov z2.s, w8
-; CHECK-NEXT:    sel z1.s, p0, z1.s, z2.s
+; CHECK-NEXT:    sel z1.s, p0, z1.s, z3.s
 ; CHECK-NEXT:    fsub z0.s, z0.s, z1.s
 ; CHECK-NEXT:    ret
   %fmul = fmul <vscale x 4 x float> %b, %c
@@ -1166,10 +1164,9 @@ define <vscale x 4 x float> @fsub_sel_fmul_s_negzero(<vscale x 4 x float> %a, <v
 define <vscale x 2 x double> @fsub_sel_fmul_d_negzero(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %mask) {
 ; CHECK-LABEL: fsub_sel_fmul_d_negzero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-NEXT:    mov z3.d, #0x8000000000000000
 ; CHECK-NEXT:    fmul z1.d, z1.d, z2.d
-; CHECK-NEXT:    mov z2.d, x8
-; CHECK-NEXT:    sel z1.d, p0, z1.d, z2.d
+; CHECK-NEXT:    sel z1.d, p0, z1.d, z3.d
 ; CHECK-NEXT:    fsub z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %fmul = fmul <vscale x 2 x double> %b, %c
diff --git a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
index 8750867c56731..1223ae1c0cbdd 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
@@ -51,10 +51,9 @@ define half @fadda_nxv6f16(<vscale x 6 x half> %v, half %s) {
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-NEXT:    dupm z2.h, #0x8000
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    str z0, [sp]
-; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    fmov s0, s1
 ; CHECK-NEXT:    st1h { z2.d }, p0, [sp, #3, mul vl]
 ; CHECK-NEXT:    ptrue p0.h
@@ -77,12 +76,11 @@ define half @fadda_nxv10f16(<vscale x 10 x half> %v, half %s) {
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
 ; CHECK-NEXT:    str z1, [sp]
+; CHECK-NEXT:    addvl x8, sp, #1
 ; CHECK-NEXT:    ptrue p1.d
 ; CHECK-NEXT:    fadda h2, p0, h2, z0.h
-; CHECK-NEXT:    mov z0.h, w8
-; CHECK-NEXT:    addvl x8, sp, #1
+; CHECK-NEXT:    dupm z0.h, #0x8000
 ; CHECK-NEXT:    st1h { z0.d }, p1, [sp, #1, mul vl]
 ; CHECK-NEXT:    ldr z1, [sp]
 ; CHECK-NEXT:    str z1, [sp, #1, mul vl]
@@ -105,11 +103,10 @@ define half @fadda_nxv12f16(<vscale x 12 x half> %v, half %s) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    fadda h2, p0, h2, z0.h
-; CHECK-NEXT:    uunpklo z0.s, z1.h
-; CHECK-NEXT:    mov z1.h, w8
-; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    uzp1 z0.h, z1.h, z0.h
 ; CHECK-NEXT:    fadda h2, p0, h2, z0.h
 ; CHECK-NEXT:    fmov s0, s2
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
index 4ae7ac7b292e9..897ade00320db 100644
--- a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
@@ -454,18 +454,17 @@ declare <vscale x 4 x i64> @llvm.fptosi.sat.nxv4f16.nxv4i64(<vscale x 4 x half>)
 define <vscale x 2 x i32> @test_signed_v2f16_v2i32(<vscale x 2 x half> %f) {
 ; CHECK-LABEL: test_signed_v2f16_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
-; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    mov z2.d, #0xffffffff80000000
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.d, #0xffffffff80000000
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z0.h, z2.h
-; CHECK-NEXT:    mov z2.d, #0x7fffffff
+; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    fcvtzs z2.d, p1/m, z0.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    mov z1.d, #0x7fffffff
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    fcvtzs z1.d, p1/m, z0.h
-; CHECK-NEXT:    sel z0.d, p2, z2.d, z1.d
+; CHECK-NEXT:    sel z0.d, p1, z1.d, z2.d
 ; CHECK-NEXT:    mov z0.d, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 2 x i32> @llvm.fptosi.sat.nxv2f16.nxv2i32(<vscale x 2 x half> %f)
@@ -475,18 +474,17 @@ define <vscale x 2 x i32> @test_signed_v2f16_v2i32(<vscale x 2 x half> %f) {
 define <vscale x 4 x i32> @test_signed_v4f16_v4i32(<vscale x 4 x half> %f) {
 ; CHECK-LABEL: test_signed_v4f16_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
-; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    mov z2.s, #0x80000000
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.s, #0x80000000
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z0.h, z2.h
-; CHECK-NEXT:    mov z2.s, #0x7fffffff
+; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    fcvtzs z2.s, p1/m, z0.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    mov z1.s, #0x7fffffff
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    fcvtzs z1.s, p1/m, z0.h
-; CHECK-NEXT:    sel z0.s, p2, z2.s, z1.s
+; CHECK-NEXT:    sel z0.s, p1, z1.s, z2.s
 ; CHECK-NEXT:    mov z0.s, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 4 x i32> @llvm.fptosi.sat.nxv4f16.nxv4i32(<vscale x 4 x half> %f)
@@ -496,26 +494,25 @@ define <vscale x 4 x i32> @test_signed_v4f16_v4i32(<vscale x 4 x half> %f) {
 define <vscale x 8 x i32> @test_signed_v8f16_v8i32(<vscale x 8 x half> %f) {
 ; CHECK-LABEL: test_signed_v8f16_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
-; CHECK-NEXT:    uunpklo z1.s, z0.h
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
+; CHECK-NEXT:    uunpklo z2.s, z0.h
+; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    uunpkhi z0.s, z0.h
-; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    mov z3.s, #0x80000000
 ; CHECK-NEXT:    mov z4.s, #0x80000000
 ; CHECK-NEXT:    mov z5.h, w8
-; CHECK-NEXT:    fcmge p1.h, p0/z, z1.h, z2.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z0.h, z2.h
-; CHECK-NEXT:    mov z2.s, #0x7fffffff
+; CHECK-NEXT:    fcmge p1.h, p0/z, z2.h, z1.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    mov z1.s, #0x7fffffff
 ; CHECK-NEXT:    fcmgt p3.h, p0/z, z0.h, z5.h
-; CHECK-NEXT:    fcvtzs z3.s, p1/m, z1.h
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z5.h
+; CHECK-NEXT:    fcvtzs z3.s, p1/m, z2.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z2.h, z5.h
 ; CHECK-NEXT:    fcvtzs z4.s, p2/m, z0.h
-; CHECK-NEXT:    fcmuo p2.h, p0/z, z1.h, z1.h
+; CHECK-NEXT:    fcmuo p2.h, p0/z, z2.h, z2.h
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    sel z0.s, p1, z2.s, z3.s
-; CHECK-NEXT:    sel z1.s, p3, z2.s, z4.s
+; CHECK-NEXT:    sel z0.s, p1, z1.s, z3.s
+; CHECK-NEXT:    sel z1.s, p3, z1.s, z4.s
 ; CHECK-NEXT:    mov z0.s, p2/m, #0 // =0x0
 ; CHECK-NEXT:    mov z1.s, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
@@ -526,18 +523,17 @@ define <vscale x 8 x i32> @test_signed_v8f16_v8i32(<vscale x 8 x half> %f) {
 define <vscale x 4 x i16> @test_signed_v4f16_v4i16(<vscale x 4 x half> %f) {
 ; CHECK-LABEL: test_signed_v4f16_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #63488 // =0xf800
+; CHECK-NEXT:    dupm z1.h, #0xf800
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z2.s, #-32768 // =0xffffffffffff8000
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #30719 // =0x77ff
+; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.h, w8
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.s, #32767 // =0x7fff
-; CHECK-NEXT:    fcvtzs z2.s, p1/m, z0.h
+; CHECK-NEXT:    mov z1.s, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT:    fcvtzs z1.s, p1/m, z0.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z2.h
+; CHECK-NEXT:    mov z2.s, #32767 // =0x7fff
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    sel z0.s, p2, z1.s, z2.s
+; CHECK-NEXT:    sel z0.s, p1, z2.s, z1.s
 ; CHECK-NEXT:    mov z0.s, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f16.nxv4i16(<vscale x 4 x half> %f)
@@ -547,18 +543,17 @@ define <vscale x 4 x i16> @test_signed_v4f16_v4i16(<vscale x 4 x half> %f) {
 define <vscale x 8 x i16> @test_signed_v8f16_v8i16(<vscale x 8 x half> %f) {
 ; CHECK-LABEL: test_signed_v8f16_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #63488 // =0xf800
+; CHECK-NEXT:    dupm z1.h, #0xf800
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z2.h, #-32768 // =0xffffffffffff8000
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #30719 // =0x77ff
+; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.h, w8
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.h, #32767 // =0x7fff
-; CHECK-NEXT:    fcvtzs z2.h, p1/m, z0.h
+; CHECK-NEXT:    mov z1.h, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT:    fcvtzs z1.h, p1/m, z0.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z2.h
+; CHECK-NEXT:    mov z2.h, #32767 // =0x7fff
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    sel z0.h, p2, z1.h, z2.h
+; CHECK-NEXT:    sel z0.h, p1, z2.h, z1.h
 ; CHECK-NEXT:    mov z0.h, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f16.nxv8i16(<vscale x 8 x half> %f)
@@ -568,18 +563,17 @@ define <vscale x 8 x i16> @test_signed_v8f16_v8i16(<vscale x 8 x half> %f) {
 define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) {
 ; CHECK-LABEL: test_signed_v2f16_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
-; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    mov z2.d, #0x8000000000000000
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.d, #0x8000000000000000
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z0.h, z2.h
-; CHECK-NEXT:    mov z2.d, #0x7fffffffffffffff
+; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    fcvtzs z2.d, p1/m, z0.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    mov z1.d, #0x7fffffffffffffff
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    fcvtzs z1.d, p1/m, z0.h
-; CHECK-NEXT:    sel z0.d, p2, z2.d, z1.d
+; CHECK-NEXT:    sel z0.d, p1, z1.d, z2.d
 ; CHECK-NEXT:    mov z0.d, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 2 x i64> @llvm.fptosi.sat.nxv2f16.nxv2i64(<vscale x 2 x half> %f)
@@ -589,26 +583,25 @@ define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) {
 define <vscale x 4 x i64> @test_signed_v4f16_v4i64(<vscale x 4 x half> %f) {
 ; CHECK-LABEL: test_signed_v4f16_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
-; CHECK-NEXT:    uunpklo z1.d, z0.s
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
+; CHECK-NEXT:    uunpklo z2.d, z0.s
+; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    uunpkhi z0.d, z0.s
-; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    mov z3.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z4.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z5.h, w8
-; CHECK-NEXT:    fcmge p1.h, p0/z, z1.h, z2.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z0.h, z2.h
-; CHECK-NEXT:    mov z2.d, #0x7fffffffffffffff
+; CHECK-NEXT:    fcmge p1.h, p0/z, z2.h, z1.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    mov z1.d, #0x7fffffffffffffff
 ; CHECK-NEXT:    fcmgt p3.h, p0/z, z0.h, z5.h
-; CHECK-NEXT:    fcvtzs z3.d, p1/m, z1.h
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z5.h
+; CHECK-NEXT:    fcvtzs z3.d, p1/m, z2.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z2.h, z5.h
 ; CHECK-NEXT:    fcvtzs z4.d, p2/m, z0.h
-; CHECK-NEXT:    fcmuo p2.h, p0/z, z1.h, z1.h
+; CHECK-NEXT:    fcmuo p2.h, p0/z, z2.h, z2.h
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    sel z0.d, p1, z2.d, z3.d
-; CHECK-NEXT:    sel z1.d, p3, z2.d, z4.d
+; CHECK-NEXT:    sel z0.d, p1, z1.d, z3.d
+; CHECK-NEXT:    sel z1.d, p3, z1.d, z4.d
 ; CHECK-NEXT:    mov z0.d, p2/m, #0 // =0x0
 ; CHECK-NEXT:    mov z1.d, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-llrint.ll b/llvm/test/CodeGen/AArch64/sve-llrint.ll
index f964d70e0a05c..c2bb0c81ab405 100644
--- a/llvm/test/CodeGen/AArch64/sve-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-llrint.ll
@@ -5,9 +5,8 @@ define <vscale x 1 x i64> @llrint_v1i64_v1f16(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: llrint_v1i64_v1f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z2.d, #0x8000000000000000
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
@@ -28,9 +27,8 @@ define <vscale x 2 x i64> @llrint_v1i64_v2f16(<vscale x 2 x half> %x) {
 ; CHECK-LABEL: llrint_v1i64_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z2.d, #0x8000000000000000
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
@@ -52,10 +50,9 @@ define <vscale x 4 x i64> @llrint_v4i64_v4f16(<vscale x 4 x half> %x) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uunpklo z1.d, z0.s
 ; CHECK-NEXT:    uunpkhi z0.d, z0.s
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z2.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z3.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z4.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z5.d, #0x7fffffffffffffff
@@ -92,10 +89,9 @@ define <vscale x 8 x i64> @llrint_v8i64_v8f16(<vscale x 8 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    uunpklo z1.s, z0.h
 ; CHECK-NEXT:    uunpkhi z0.s, z0.h
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z4.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z4.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z5.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z6.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z7.d, #0x8000000000000000
@@ -162,12 +158,13 @@ define <vscale x 16 x i64> @llrint_v16i64_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    uunpklo z2.s, z0.h
 ; CHECK-NEXT:    uunpkhi z3.s, z0.h
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    uunpklo z7.s, z1.h
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z0.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    uunpkhi z1.s, z1.h
-; CHECK-NEXT:    mov z0.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z5.d, #0x8000000000000000
+; CHECK-NEXT:    mov z29.h, w8
 ; CHECK-NEXT:    mov z31.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpklo z4.d, z2.s
 ; CHECK-NEXT:    uunpklo z24.d, z3.s
@@ -175,10 +172,8 @@ define <vscale x 16 x i64> @llrint_v16i64_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    uunpkhi z6.d, z2.s
 ; CHECK-NEXT:    uunpklo z26.d, z7.s
 ; CHECK-NEXT:    uunpkhi z7.d, z7.s
-; CHECK-NEXT:    mov z2.h, w8
-; CHECK-NEXT:    mov w8, #31743 // =0x7bff
+; CHECK-NEXT:    mov z2.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpklo z30.d, z1.s
-; CHECK-NEXT:    mov z29.h, w8
 ; CHECK-NEXT:    mov z3.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpkhi z1.d, z1.s
 ; CHECK-NEXT:    movprfx z27, z4
@@ -191,17 +186,17 @@ define <vscale x 16 x i64> @llrint_v16i64_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    frintx z26.h, p0/m, z26.h
 ; CHECK-NEXT:    frintx z7.h, p0/m, z7.h
 ; CHECK-NEXT:    mov z6.d, #0x8000000000000000
-; CHECK-NEXT:    fcmge p1.h, p0/z, z27.h, z2.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z24.h, z2.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z25.h, z2.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z28.h, z2.h
-; CHECK-NEXT:    fcmge p5.h, p0/z, z26.h, z2.h
-; CHECK-NEXT:    fcvtzs z0.d, p1/m, z27.h
+; CHECK-NEXT:    fcmge p1.h, p0/z, z27.h, z0.h
+; CHECK-NEXT:    fcmge p3.h, p0/z, z24.h, z0.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z25.h, z0.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z28.h, z0.h
+; CHECK-NEXT:    fcmge p5.h, p0/z, z26.h, z0.h
+; CHECK-NEXT:    fcvtzs z2.d, p1/m, z27.h
 ; CHECK-NEXT:    fcvtzs z4.d, p3/m, z24.h
 ; CHECK-NEXT:    fcvtzs z5.d, p4/m, z25.h
 ; CHECK-NEXT:    fcmgt p3.h, p0/z, z27.h, z29.h
 ; CHECK-NEXT:    fcvtzs z3.d, p2/m, z28.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z7.h, z2.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z7.h, z0.h
 ; CHECK-NEXT:    fcvtzs z6.d, p5/m, z26.h
 ; CHECK-NEXT:    fcmuo p1.h, p0/z, z27.h, z27.h
 ; CHECK-NEXT:    movprfx z27, z30
@@ -212,7 +207,7 @@ define <vscale x 16 x i64> @llrint_v16i64_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    fcmuo p2.h, p0/z, z28.h, z28.h
 ; CHECK-NEXT:    mov z28.d, #0x8000000000000000
 ; CHECK-NEXT:    fcvtzs z31.d, p4/m, z7.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z2.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z0.h
 ; CHECK-NEXT:    fcmgt p6.h, p0/z, z24.h, z29.h
 ; CHECK-NEXT:    fcmuo p7.h, p0/z, z24.h, z24.h
 ; CHECK-NEXT:    mov z24.d, #0x7fffffffffffffff
@@ -221,31 +216,31 @@ define <vscale x 16 x i64> @llrint_v16i64_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    fcmuo p10.h, p0/z, z25.h, z25.h
 ; CHECK-NEXT:    mov z25.d, #0x8000000000000000
 ; CHECK-NEXT:    sel z1.d, p5, z24.d, z3.d
-; CHECK-NEXT:    mov z0.d, p3/m, z24.d
 ; CHECK-NEXT:    sel z3.d, p8, z24.d, z5.d
-; CHECK-NEXT:    fcmge p4.h, p0/z, z30.h, z2.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z30.h, z0.h
+; CHECK-NEXT:    sel z0.d, p3, z24.d, z2.d
 ; CHECK-NEXT:    sel z2.d, p6, z24.d, z4.d
-; CHECK-NEXT:    mov z0.d, p1/m, #0 // =0x0
 ; CHECK-NEXT:    mov z1.d, p2/m, #0 // =0x0
 ; CHECK-NEXT:    mov z3.d, p10/m, #0 // =0x0
 ; CHECK-NEXT:    ldr p10, [sp, #1, mul vl] // 2-byte Reload
+; CHECK-NEXT:    fcmgt p9.h, p0/z, z26.h, z29.h
 ; CHECK-NEXT:    mov z2.d, p7/m, #0 // =0x0
 ; CHECK-NEXT:    ldr p7, [sp, #4, mul vl] // 2-byte Reload
-; CHECK-NEXT:    fcmgt p9.h, p0/z, z26.h, z29.h
 ; CHECK-NEXT:    fcvtzs z25.d, p4/m, z30.h
+; CHECK-NEXT:    mov z0.d, p1/m, #0 // =0x0
 ; CHECK-NEXT:    fcmgt p5.h, p0/z, z7.h, z29.h
 ; CHECK-NEXT:    fcmgt p6.h, p0/z, z27.h, z29.h
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z30.h, z29.h
 ; CHECK-NEXT:    sel z4.d, p9, z24.d, z6.d
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z30.h, z29.h
 ; CHECK-NEXT:    fcmuo p8.h, p0/z, z7.h, z7.h
 ; CHECK-NEXT:    sel z5.d, p5, z24.d, z31.d
 ; CHECK-NEXT:    ldr p5, [sp, #6, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    sel z6.d, p6, z24.d, z28.d
 ; CHECK-NEXT:    ldr p6, [sp, #5, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    fcmuo p9.h, p0/z, z27.h, z27.h
+; CHECK-NEXT:    fcmuo p3.h, p0/z, z26.h, z26.h
 ; CHECK-NEXT:    sel z7.d, p4, z24.d, z25.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Reload
-; CHECK-NEXT:    fcmuo p3.h, p0/z, z26.h, z26.h
 ; CHECK-NEXT:    mov z5.d, p8/m, #0 // =0x0
 ; CHECK-NEXT:    ldr p8, [sp, #3, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z30.h, z30.h
@@ -302,48 +297,47 @@ define <vscale x 32 x i64> @llrint_v32i64_v32f16(<vscale x 32 x half> %x) {
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4e, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x40, 0x1c // $d14 @ cfa - 56 * VG - 16
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4f, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x40, 0x1c // $d15 @ cfa - 64 * VG - 16
 ; CHECK-NEXT:    uunpklo z4.s, z0.h
-; CHECK-NEXT:    uunpkhi z5.s, z0.h
-; CHECK-NEXT:    mov w9, #64511 // =0xfbff
-; CHECK-NEXT:    uunpklo z6.s, z1.h
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    uunpkhi z28.s, z1.h
-; CHECK-NEXT:    mov z30.h, w9
+; CHECK-NEXT:    uunpkhi z0.s, z0.h
 ; CHECK-NEXT:    mov w9, #31743 // =0x7bff
+; CHECK-NEXT:    uunpklo z5.s, z1.h
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z28.h, #-1025 // =0xfffffffffffffbff
+; CHECK-NEXT:    uunpkhi z29.s, z1.h
+; CHECK-NEXT:    mov z7.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpklo z13.s, z2.h
 ; CHECK-NEXT:    mov z9.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpkhi z14.s, z2.h
 ; CHECK-NEXT:    uunpkhi z17.s, z3.h
-; CHECK-NEXT:    uunpklo z7.d, z4.s
+; CHECK-NEXT:    uunpklo z6.d, z4.s
 ; CHECK-NEXT:    uunpkhi z4.d, z4.s
-; CHECK-NEXT:    uunpklo z27.d, z5.s
-; CHECK-NEXT:    uunpklo z31.d, z6.s
-; CHECK-NEXT:    uunpkhi z8.d, z6.s
-; CHECK-NEXT:    uunpkhi z29.d, z5.s
-; CHECK-NEXT:    uunpkhi z11.d, z28.s
-; CHECK-NEXT:    uunpklo z10.d, z28.s
+; CHECK-NEXT:    uunpklo z27.d, z0.s
+; CHECK-NEXT:    uunpklo z31.d, z5.s
+; CHECK-NEXT:    uunpkhi z8.d, z5.s
+; CHECK-NEXT:    uunpkhi z30.d, z0.s
+; CHECK-NEXT:    uunpkhi z11.d, z29.s
+; CHECK-NEXT:    uunpklo z10.d, z29.s
 ; CHECK-NEXT:    uunpklo z15.s, z3.h
 ; CHECK-NEXT:    uunpklo z16.d, z14.s
 ; CHECK-NEXT:    uunpkhi z14.d, z14.s
 ; CHECK-NEXT:    mov z24.d, #0x8000000000000000
-; CHECK-NEXT:    movprfx z1, z7
-; CHECK-NEXT:    frintx z1.h, p0/m, z7.h
 ; CHECK-NEXT:    movprfx z5, z27
 ; CHECK-NEXT:    frintx z5.h, p0/m, z27.h
+; CHECK-NEXT:    movprfx z1, z6
+; CHECK-NEXT:    frintx z1.h, p0/m, z6.h
 ; CHECK-NEXT:    frintx z4.h, p0/m, z4.h
 ; CHECK-NEXT:    movprfx z12, z31
 ; CHECK-NEXT:    frintx z12.h, p0/m, z31.h
 ; CHECK-NEXT:    movprfx z27, z8
 ; CHECK-NEXT:    frintx z27.h, p0/m, z8.h
-; CHECK-NEXT:    movprfx z6, z29
-; CHECK-NEXT:    frintx z6.h, p0/m, z29.h
+; CHECK-NEXT:    movprfx z6, z30
+; CHECK-NEXT:    frintx z6.h, p0/m, z30.h
 ; CHECK-NEXT:    movprfx z31, z10
 ; CHECK-NEXT:    frintx z31.h, p0/m, z10.h
-; CHECK-NEXT:    mov z7.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z8.d, #0x8000000000000000
+; CHECK-NEXT:    frintx z11.h, p0/m, z11.h
 ; CHECK-NEXT:    movprfx z3, z16
 ; CHECK-NEXT:    frintx z3.h, p0/m, z16.h
-; CHECK-NEXT:    frintx z11.h, p0/m, z11.h
-; CHECK-NEXT:    mov z29.h, w9
+; CHECK-NEXT:    mov z30.h, w9
 ; CHECK-NEXT:    uunpklo z10.d, z13.s
 ; CHECK-NEXT:    uunpkhi z13.d, z13.s
 ; CHECK-NEXT:    uunpkhi z20.d, z15.s
@@ -354,124 +348,124 @@ define <vscale x 32 x i64> @llrint_v32i64_v32f16(<vscale x 32 x half> %x) {
 ; CHECK-NEXT:    uunpklo z15.d, z15.s
 ; CHECK-NEXT:    mov z2.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z21.d, #0x8000000000000000
+; CHECK-NEXT:    frintx z10.h, p0/m, z10.h
 ; CHECK-NEXT:    mov z26.d, #0x8000000000000000
-; CHECK-NEXT:    mov z28.d, #0x7fffffffffffffff
+; CHECK-NEXT:    mov z29.d, #0x7fffffffffffffff
 ; CHECK-NEXT:    movprfx z19, z13
 ; CHECK-NEXT:    frintx z19.h, p0/m, z13.h
 ; CHECK-NEXT:    movprfx z13, z14
 ; CHECK-NEXT:    frintx z13.h, p0/m, z14.h
-; CHECK-NEXT:    frintx z10.h, p0/m, z10.h
 ; CHECK-NEXT:    frintx z16.h, p0/m, z16.h
 ; CHECK-NEXT:    mov z22.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z23.d, #0x8000000000000000
-; CHECK-NEXT:    frintx z15.h, p0/m, z15.h
 ; CHECK-NEXT:    mov z14.d, #0x8000000000000000
-; CHECK-NEXT:    fcmge p4.h, p0/z, z4.h, z30.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z12.h, z30.h
-; CHECK-NEXT:    fcmgt p9.h, p0/z, z12.h, z29.h
+; CHECK-NEXT:    frintx z15.h, p0/m, z15.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z4.h, z28.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z12.h, z28.h
+; CHECK-NEXT:    fcmgt p9.h, p0/z, z12.h, z30.h
 ; CHECK-NEXT:    fcmuo p8.h, p0/z, z12.h, z12.h
 ; CHECK-NEXT:    fcvtzs z7.d, p4/m, z4.h
 ; CHECK-NEXT:    fcvtzs z8.d, p2/m, z12.h
 ; CHECK-NEXT:    mov z12.d, #0x8000000000000000
-; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z30.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z28.h
 ; CHECK-NEXT:    fcmuo p10.h, p0/z, z11.h, z11.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z5.h, z30.h
-; CHECK-NEXT:    mov z8.d, p9/m, z28.d
+; CHECK-NEXT:    fcmge p3.h, p0/z, z5.h, z28.h
+; CHECK-NEXT:    mov z8.d, p9/m, z29.d
 ; CHECK-NEXT:    fcvtzs z9.d, p4/m, z27.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z11.h, z30.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z11.h, z28.h
 ; CHECK-NEXT:    fcvtzs z24.d, p3/m, z5.h
 ; CHECK-NEXT:    mov z8.d, p8/m, #0 // =0x0
-; CHECK-NEXT:    fcmge p1.h, p0/z, z6.h, z30.h
-; CHECK-NEXT:    fcmge p5.h, p0/z, z1.h, z30.h
+; CHECK-NEXT:    fcmge p1.h, p0/z, z6.h, z28.h
+; CHECK-NEXT:    fcmge p5.h, p0/z, z1.h, z28.h
 ; CHECK-NEXT:    str z8, [x8, #4, mul vl]
 ; CHECK-NEXT:    fcvtzs z12.d, p4/m, z11.h
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z29.h
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z30.h
 ; CHECK-NEXT:    uunpkhi z11.d, z17.s
 ; CHECK-NEXT:    movprfx z17, z20
 ; CHECK-NEXT:    frintx z17.h, p0/m, z20.h
 ; CHECK-NEXT:    fcvtzs z25.d, p1/m, z6.h
 ; CHECK-NEXT:    mov z20.d, #0x8000000000000000
 ; CHECK-NEXT:    fcvtzs z0.d, p5/m, z1.h
-; CHECK-NEXT:    fcmge p6.h, p0/z, z10.h, z30.h
+; CHECK-NEXT:    fcmge p6.h, p0/z, z10.h, z28.h
 ; CHECK-NEXT:    frintx z11.h, p0/m, z11.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z31.h, z30.h
-; CHECK-NEXT:    fcmge p1.h, p0/z, z13.h, z30.h
+; CHECK-NEXT:    fcmge p3.h, p0/z, z31.h, z28.h
+; CHECK-NEXT:    fcmge p1.h, p0/z, z13.h, z28.h
 ; CHECK-NEXT:    fcvtzs z18.d, p6/m, z10.h
-; CHECK-NEXT:    fcmgt p11.h, p0/z, z10.h, z29.h
-; CHECK-NEXT:    fcmge p5.h, p0/z, z11.h, z30.h
+; CHECK-NEXT:    fcmgt p11.h, p0/z, z10.h, z30.h
+; CHECK-NEXT:    fcmge p5.h, p0/z, z11.h, z28.h
 ; CHECK-NEXT:    fcvtzs z2.d, p3/m, z31.h
 ; CHECK-NEXT:    fcvtzs z21.d, p1/m, z13.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z17.h, z30.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z16.h, z30.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z17.h, z28.h
+; CHECK-NEXT:    fcmge p3.h, p0/z, z16.h, z28.h
 ; CHECK-NEXT:    fcmuo p1.h, p0/z, z10.h, z10.h
-; CHECK-NEXT:    sel z10.d, p4, z28.d, z12.d
-; CHECK-NEXT:    sel z12.d, p11, z28.d, z18.d
+; CHECK-NEXT:    sel z10.d, p4, z29.d, z12.d
+; CHECK-NEXT:    sel z12.d, p11, z29.d, z18.d
 ; CHECK-NEXT:    fcvtzs z26.d, p5/m, z11.h
 ; CHECK-NEXT:    fcvtzs z22.d, p2/m, z17.h
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z29.h
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z30.h
 ; CHECK-NEXT:    fcvtzs z23.d, p3/m, z16.h
 ; CHECK-NEXT:    mov z10.d, p10/m, #0 // =0x0
 ; CHECK-NEXT:    mov z12.d, p1/m, #0 // =0x0
-; CHECK-NEXT:    fcmge p6.h, p0/z, z19.h, z30.h
+; CHECK-NEXT:    fcmge p6.h, p0/z, z19.h, z28.h
 ; CHECK-NEXT:    str z10, [x8, #7, mul vl]
-; CHECK-NEXT:    fcmge p7.h, p0/z, z3.h, z30.h
+; CHECK-NEXT:    fcmge p7.h, p0/z, z3.h, z28.h
 ; CHECK-NEXT:    str z12, [x8, #8, mul vl]
-; CHECK-NEXT:    mov z26.d, p4/m, z28.d
-; CHECK-NEXT:    fcmge p2.h, p0/z, z15.h, z30.h
-; CHECK-NEXT:    mov z30.d, #0x8000000000000000
+; CHECK-NEXT:    mov z26.d, p4/m, z29.d
+; CHECK-NEXT:    fcmge p2.h, p0/z, z15.h, z28.h
+; CHECK-NEXT:    mov z28.d, #0x8000000000000000
 ; CHECK-NEXT:    fcvtzs z14.d, p6/m, z19.h
-; CHECK-NEXT:    fcmgt p5.h, p0/z, z16.h, z29.h
-; CHECK-NEXT:    fcmgt p3.h, p0/z, z17.h, z29.h
+; CHECK-NEXT:    fcmgt p5.h, p0/z, z16.h, z30.h
+; CHECK-NEXT:    fcmgt p3.h, p0/z, z17.h, z30.h
 ; CHECK-NEXT:    fcvtzs z20.d, p7/m, z3.h
-; CHECK-NEXT:    fcvtzs z30.d, p2/m, z15.h
+; CHECK-NEXT:    fcvtzs z28.d, p2/m, z15.h
 ; CHECK-NEXT:    fcmuo p1.h, p0/z, z11.h, z11.h
 ; CHECK-NEXT:    fcmuo p2.h, p0/z, z16.h, z16.h
-; CHECK-NEXT:    sel z11.d, p5, z28.d, z23.d
-; CHECK-NEXT:    sel z16.d, p3, z28.d, z22.d
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z19.h, z29.h
-; CHECK-NEXT:    fcmgt p3.h, p0/z, z15.h, z29.h
+; CHECK-NEXT:    sel z11.d, p5, z29.d, z23.d
+; CHECK-NEXT:    sel z16.d, p3, z29.d, z22.d
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z19.h, z30.h
+; CHECK-NEXT:    fcmgt p3.h, p0/z, z15.h, z30.h
 ; CHECK-NEXT:    mov z26.d, p1/m, #0 // =0x0
 ; CHECK-NEXT:    mov z11.d, p2/m, #0 // =0x0
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z13.h, z29.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z13.h, z30.h
 ; CHECK-NEXT:    fcmuo p6.h, p0/z, z17.h, z17.h
 ; CHECK-NEXT:    str z26, [x8, #15, mul vl]
-; CHECK-NEXT:    sel z26.d, p4, z28.d, z14.d
+; CHECK-NEXT:    sel z26.d, p4, z29.d, z14.d
 ; CHECK-NEXT:    str z11, [x8, #14, mul vl]
-; CHECK-NEXT:    mov z30.d, p3/m, z28.d
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z3.h, z29.h
+; CHECK-NEXT:    mov z28.d, p3/m, z29.d
+; CHECK-NEXT:    fcmgt p2.h, p0/z, z3.h, z30.h
 ; CHECK-NEXT:    fcmuo p4.h, p0/z, z13.h, z13.h
 ; CHECK-NEXT:    fcmuo p3.h, p0/z, z3.h, z3.h
-; CHECK-NEXT:    sel z3.d, p1, z28.d, z21.d
+; CHECK-NEXT:    sel z3.d, p1, z29.d, z21.d
 ; CHECK-NEXT:    mov z16.d, p6/m, #0 // =0x0
-; CHECK-NEXT:    fcmgt p12.h, p0/z, z27.h, z29.h
-; CHECK-NEXT:    sel z11.d, p2, z28.d, z20.d
+; CHECK-NEXT:    fcmgt p12.h, p0/z, z27.h, z30.h
+; CHECK-NEXT:    sel z11.d, p2, z29.d, z20.d
 ; CHECK-NEXT:    str z16, [x8, #13, mul vl]
 ; CHECK-NEXT:    mov z3.d, p4/m, #0 // =0x0
 ; CHECK-NEXT:    fcmuo p6.h, p0/z, z15.h, z15.h
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z4.h, z29.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z4.h, z30.h
 ; CHECK-NEXT:    mov z11.d, p3/m, #0 // =0x0
-; CHECK-NEXT:    mov z9.d, p12/m, z28.d
+; CHECK-NEXT:    mov z9.d, p12/m, z29.d
 ; CHECK-NEXT:    str z3, [x8, #11, mul vl]
 ; CHECK-NEXT:    fcmuo p5.h, p0/z, z19.h, z19.h
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z5.h, z29.h
+; CHECK-NEXT:    fcmgt p2.h, p0/z, z5.h, z30.h
 ; CHECK-NEXT:    str z11, [x8, #10, mul vl]
-; CHECK-NEXT:    mov z30.d, p6/m, #0 // =0x0
-; CHECK-NEXT:    sel z3.d, p1, z28.d, z7.d
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z6.h, z29.h
+; CHECK-NEXT:    mov z28.d, p6/m, #0 // =0x0
+; CHECK-NEXT:    sel z3.d, p1, z29.d, z7.d
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z6.h, z30.h
 ; CHECK-NEXT:    fcmuo p3.h, p0/z, z27.h, z27.h
-; CHECK-NEXT:    str z30, [x8, #12, mul vl]
+; CHECK-NEXT:    str z28, [x8, #12, mul vl]
 ; CHECK-NEXT:    mov z26.d, p5/m, #0 // =0x0
-; CHECK-NEXT:    sel z7.d, p2, z28.d, z24.d
-; CHECK-NEXT:    fcmgt p6.h, p0/z, z31.h, z29.h
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z29.h
+; CHECK-NEXT:    sel z7.d, p2, z29.d, z24.d
+; CHECK-NEXT:    fcmgt p6.h, p0/z, z31.h, z30.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z30.h
 ; CHECK-NEXT:    str z26, [x8, #9, mul vl]
-; CHECK-NEXT:    sel z24.d, p4, z28.d, z25.d
+; CHECK-NEXT:    sel z24.d, p4, z29.d, z25.d
 ; CHECK-NEXT:    mov z9.d, p3/m, #0 // =0x0
 ; CHECK-NEXT:    fcmuo p5.h, p0/z, z31.h, z31.h
 ; CHECK-NEXT:    fcmuo p2.h, p0/z, z6.h, z6.h
-; CHECK-NEXT:    mov z2.d, p6/m, z28.d
+; CHECK-NEXT:    mov z2.d, p6/m, z29.d
 ; CHECK-NEXT:    str z9, [x8, #5, mul vl]
-; CHECK-NEXT:    mov z0.d, p1/m, z28.d
+; CHECK-NEXT:    mov z0.d, p1/m, z29.d
 ; CHECK-NEXT:    fcmuo p3.h, p0/z, z5.h, z5.h
 ; CHECK-NEXT:    fcmuo p4.h, p0/z, z4.h, z4.h
 ; CHECK-NEXT:    mov z2.d, p5/m, #0 // =0x0
diff --git a/llvm/test/CodeGen/AArch64/sve-lrint.ll b/llvm/test/CodeGen/AArch64/sve-lrint.ll
index f517e7fe8dc16..f1224d30d53cc 100644
--- a/llvm/test/CodeGen/AArch64/sve-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-lrint.ll
@@ -6,9 +6,8 @@ define <vscale x 1 x iXLen> @lrint_v1f16(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: lrint_v1f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z2.d, #0x8000000000000000
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
@@ -29,9 +28,8 @@ define <vscale x 2 x iXLen> @lrint_v2f16(<vscale x 2 x half> %x) {
 ; CHECK-LABEL: lrint_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z2.d, #0x8000000000000000
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
@@ -53,10 +51,9 @@ define <vscale x 4 x iXLen> @lrint_v4f16(<vscale x 4 x half> %x) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uunpklo z1.d, z0.s
 ; CHECK-NEXT:    uunpkhi z0.d, z0.s
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z2.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z3.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z4.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z5.d, #0x7fffffffffffffff
@@ -93,10 +90,9 @@ define <vscale x 8 x iXLen> @lrint_v8f16(<vscale x 8 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    uunpklo z1.s, z0.h
 ; CHECK-NEXT:    uunpkhi z0.s, z0.h
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z4.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z4.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z5.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z6.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z7.d, #0x8000000000000000
@@ -163,12 +159,13 @@ define <vscale x 16 x iXLen> @lrint_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    uunpklo z2.s, z0.h
 ; CHECK-NEXT:    uunpkhi z3.s, z0.h
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    uunpklo z7.s, z1.h
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z0.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    uunpkhi z1.s, z1.h
-; CHECK-NEXT:    mov z0.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z5.d, #0x8000000000000000
+; CHECK-NEXT:    mov z29.h, w8
 ; CHECK-NEXT:    mov z31.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpklo z4.d, z2.s
 ; CHECK-NEXT:    uunpklo z24.d, z3.s
@@ -176,10 +173,8 @@ define <vscale x 16 x iXLen> @lrint_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    uunpkhi z6.d, z2.s
 ; CHECK-NEXT:    uunpklo z26.d, z7.s
 ; CHECK-NEXT:    uunpkhi z7.d, z7.s
-; CHECK-NEXT:    mov z2.h, w8
-; CHECK-NEXT:    mov w8, #31743 // =0x7bff
+; CHECK-NEXT:    mov z2.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpklo z30.d, z1.s
-; CHECK-NEXT:    mov z29.h, w8
 ; CHECK-NEXT:    mov z3.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpkhi z1.d, z1.s
 ; CHECK-NEXT:    movprfx z27, z4
@@ -192,17 +187,17 @@ define <vscale x 16 x iXLen> @lrint_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    frintx z26.h, p0/m, z26.h
 ; CHECK-NEXT:    frintx z7.h, p0/m, z7.h
 ; CHECK-NEXT:    mov z6.d, #0x8000000000000000
-; CHECK-NEXT:    fcmge p1.h, p0/z, z27.h, z2.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z24.h, z2.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z25.h, z2.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z28.h, z2.h
-; CHECK-NEXT:    fcmge p5.h, p0/z, z26.h, z2.h
-; CHECK-NEXT:    fcvtzs z0.d, p1/m, z27.h
+; CHECK-NEXT:    fcmge p1.h, p0/z, z27.h, z0.h
+; CHECK-NEXT:    fcmge p3.h, p0/z, z24.h, z0.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z25.h, z0.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z28.h, z0.h
+; CHECK-NEXT:    fcmge p5.h, p0/z, z26.h, z0.h
+; CHECK-NEXT:    fcvtzs z2.d, p1/m, z27.h
 ; CHECK-NEXT:    fcvtzs z4.d, p3/m, z24.h
 ; CHECK-NEXT:    fcvtzs z5.d, p4/m, z25.h
 ; CHECK-NEXT:    fcmgt p3.h, p0/z, z27.h, z29.h
 ; CHECK-NEXT:    fcvtzs z3.d, p2/m, z28.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z7.h, z2.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z7.h, z0.h
 ; CHECK-NEXT:    fcvtzs z6.d, p5/m, z26.h
 ; CHECK-NEXT:    fcmuo p1.h, p0/z, z27.h, z27.h
 ; CHECK-NEXT:    movprfx z27, z30
@@ -213,7 +208,7 @@ define <vscale x 16 x iXLen> @lrint_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    fcmuo p2.h, p0/z, z28.h, z28.h
 ; CHECK-NEXT:    mov z28.d, #0x8000000000000000
 ; CHECK-NEXT:    fcvtzs z31.d, p4/m, z7.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z2.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z0.h
 ; CHECK-NEXT:    fcmgt p6.h, p0/z, z24.h, z29.h
 ; CHECK-NEXT:    fcmuo p7.h, p0/z, z24.h, z24.h
 ; CHECK-NEXT:    mov z24.d, #0x7fffffffffffffff
@@ -222,31 +217,31 @@ define <vscale x 16 x iXLen> @lrint_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    fcmuo p10.h, p0/z, z25.h, z25.h
 ; CHECK-NEXT:    mov z25.d, #0x8000000000000000
 ; CHECK-NEXT:    sel z1.d, p5, z24.d, z3.d
-; CHECK-NEXT:    mov z0.d, p3/m, z24.d
 ; CHECK-NEXT:    sel z3.d, p8, z24.d, z5.d
-; CHECK-NEXT:    fcmge p4.h, p0/z, z30.h, z2.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z30.h, z0.h
+; CHECK-NEXT:    sel z0.d, p3, z24.d, z2.d
 ; CHECK-NEXT:    sel z2.d, p6, z24.d, z4.d
-; CHECK-NEXT:    mov z0.d, p1/m, #0 // =0x0
 ; CHECK-NEXT:    mov z1.d, p2/m, #0 // =0x0
 ; CHECK-NEXT:    mov z3.d, p10/m, #0 // =0x0
 ; CHECK-NEXT:    ldr p10, [sp, #1, mul vl] // 2-byte Reload
+; CHECK-NEXT:    fcmgt p9.h, p0/z, z26.h, z29.h
 ; CHECK-NEXT:    mov z2.d, p7/m, #0 // =0x0
 ; CHECK-NEXT:    ldr p7, [sp, #4, mul vl] // 2-byte Reload
-; CHECK-NEXT:    fcmgt p9.h, p0/z, z26.h, z29.h
 ; CHECK-NEXT:    fcvtzs z25.d, p4/m, z30.h
+; CHECK-NEXT:    mov z0.d, p1/m, #0 // =0x0
 ; CHECK-NEXT:    fcmgt p5.h, p0/z, z7.h, z29.h
 ; CHECK-NEXT:    fcmgt p6.h, p0/z, z27.h, z29.h
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z30.h, z29.h
 ; CHECK-NEXT:    sel z4.d, p9, z24.d, z6.d
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z30.h, z29.h
 ; CHECK-NEXT:    fcmuo p8.h, p0/z, z7.h, z7.h
 ; CHECK-NEXT:    sel z5.d, p5, z24.d, z31.d
 ; CHECK-NEXT:    ldr p5, [sp, #6, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    sel z6.d, p6, z24.d, z28.d
 ; CHECK-NEXT:    ldr p6, [sp, #5, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    fcmuo p9.h, p0/z, z27.h, z27.h
+; CHECK-NEXT:    fcmuo p3.h, p0/z, z26.h, z26.h
 ; CHECK-NEXT:    sel z7.d, p4, z24.d, z25.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Reload
-; CHECK-NEXT:    fcmuo p3.h, p0/z, z26.h, z26.h
 ; CHECK-NEXT:    mov z5.d, p8/m, #0 // =0x0
 ; CHECK-NEXT:    ldr p8, [sp, #3, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z30.h, z30.h
@@ -303,48 +298,47 @@ define <vscale x 32 x iXLen> @lrint_v32f16(<vscale x 32 x half> %x) {
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4e, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x40, 0x1c // $d14 @ cfa - 56 * VG - 16
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4f, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x40, 0x1c // $d15 @ cfa - 64 * VG - 16
 ; CHECK-NEXT:    uunpklo z4.s, z0.h
-; CHECK-NEXT:    uunpkhi z5.s, z0.h
-; CHECK-NEXT:    mov w9, #64511 // =0xfbff
-; CHECK-NEXT:    uunpklo z6.s, z1.h
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    uunpkhi z28.s, z1.h
-; CHECK-NEXT:    mov z30.h, w9
+; CHECK-NEXT:    uunpkhi z0.s, z0.h
 ; CHECK-NEXT:    mov w9, #31743 // =0x7bff
+; CHECK-NEXT:    uunpklo z5.s, z1.h
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z28.h, #-1025 // =0xfffffffffffffbff
+; CHECK-NEXT:    uunpkhi z29.s, z1.h
+; CHECK-NEXT:    mov z7.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpklo z13.s, z2.h
 ; CHECK-NEXT:    mov z9.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpkhi z14.s, z2.h
 ; CHECK-NEXT:    uunpkhi z17.s, z3.h
-; CHECK-NEXT:    uunpklo z7.d, z4.s
+; CHECK-NEXT:    uunpklo z6.d, z4.s
 ; CHECK-NEXT:    uunpkhi z4.d, z4.s
-; CHECK-NEXT:    uunpklo z27.d, z5.s
-; CHECK-NEXT:    uunpklo z31.d, z6.s
-; CHECK-NEXT:    uunpkhi z8.d, z6.s
-; CHECK-NEXT:    uunpkhi z29.d, z5.s
-; CHECK-NEXT:    uunpkhi z11.d, z28.s
-; CHECK-NEXT:    uunpklo z10.d, z28.s
+; CHECK-NEXT:    uunpklo z27.d, z0.s
+; CHECK-NEXT:    uunpklo z31.d, z5.s
+; CHECK-NEXT:    uunpkhi z8.d, z5.s
+; CHECK-NEXT:    uunpkhi z30.d, z0.s
+; CHECK-NEXT:    uunpkhi z11.d, z29.s
+; CHECK-NEXT:    uunpklo z10.d, z29.s
 ; CHECK-NEXT:    uunpklo z15.s, z3.h
 ; CHECK-NEXT:    uunpklo z16.d, z14.s
 ; CHECK-NEXT:    uunpkhi z14.d, z14.s
 ; CHECK-NEXT:    mov z24.d, #0x8000000000000000
-; CHECK-NEXT:    movprfx z1, z7
-; CHECK-NEXT:    frintx z1.h, p0/m, z7.h
 ; CHECK-NEXT:    movprfx z5, z27
 ; CHECK-NEXT:    frintx z5.h, p0/m, z27.h
+; CHECK-NEXT:    movprfx z1, z6
+; CHECK-NEXT:    frintx z1.h, p0/m, z6.h
 ; CHECK-NEXT:    frintx z4.h, p0/m, z4.h
 ; CHECK-NEXT:    movprfx z12, z31
 ; CHECK-NEXT:    frintx z12.h, p0/m, z31.h
 ; CHECK-NEXT:    movprfx z27, z8
 ; CHECK-NEXT:    frintx z27.h, p0/m, z8.h
-; CHECK-NEXT:    movprfx z6, z29
-; CHECK-NEXT:    frintx z6.h, p0/m, z29.h
+; CHECK-NEXT:    movprfx z6, z30
+; CHECK-NEXT:    frintx z6.h, p0/m, z30.h
 ; CHECK-NEXT:    movprfx z31, z10
 ; CHECK-NEXT:    frintx z31.h, p0/m, z10.h
-; CHECK-NEXT:    mov z7.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z8.d, #0x8000000000000000
+; CHECK-NEXT:    frintx z11.h, p0/m, z11.h
 ; CHECK-NEXT:    movprfx z3, z16
 ; CHECK-NEXT:    frintx z3.h, p0/m, z16.h
-; CHECK-NEXT:    frintx z11.h, p0/m, z11.h
-; CHECK-NEXT:    mov z29.h, w9
+; CHECK-NEXT:    mov z30.h, w9
 ; CHECK-NEXT:    uunpklo z10.d, z13.s
 ; CHECK-NEXT:    uunpkhi z13.d, z13.s
 ; CHECK-NEXT:    uunpkhi z20.d, z15.s
@@ -355,124 +349,124 @@ define <vscale x 32 x iXLen> @lrint_v32f16(<vscale x 32 x half> %x) {
 ; CHECK-NEXT:    uunpklo z15.d, z15.s
 ; CHECK-NEXT:    mov z2.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z21.d, #0x8000000000000000
+; CHECK-NEXT:    frintx z10.h, p0/m, z10.h
 ; CHECK-NEXT:    mov z26.d, #0x8000000000000000
-; CHECK-NEXT:    mov z28.d, #0x7fffffffffffffff
+; CHECK-NEXT:    mov z29.d, #0x7fffffffffffffff
 ; CHECK-NEXT:    movprfx z19, z13
 ; CHECK-NEXT:    frintx z19.h, p0/m, z13.h
 ; CHECK-NEXT:    movprfx z13, z14
 ; CHECK-NEXT:    frintx z13.h, p0/m, z14.h
-; CHECK-NEXT:    frintx z10.h, p0/m, z10.h
 ; CHECK-NEXT:    frintx z16.h, p0/m, z16.h
 ; CHECK-NEXT:    mov z22.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z23.d, #0x8000000000000000
-; CHECK-NEXT:    frintx z15.h, p0/m, z15.h
 ; CHECK-NEXT:    mov z14.d, #0x8000000000000000
-; CHECK-NEXT:    fcmge p4.h, p0/z, z4.h, z30.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z12.h, z30.h
-; CHECK-NEXT:    fcmgt p9.h, p0/z, z12.h, z29.h
+; CHECK-NEXT:    frintx z15.h, p0/m, z15.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z4.h, z28.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z12.h, z28.h
+; CHECK-NEXT:    fcmgt p9.h, p0/z, z12.h, z30.h
 ; CHECK-NEXT:    fcmuo p8.h, p0/z, z12.h, z12.h
 ; CHECK-NEXT:    fcvtzs z7.d, p4/m, z4.h
 ; CHECK-NEXT:    fcvtzs z8.d, p2/m, z12.h
 ; CHECK-NEXT:    mov z12.d, #0x8000000000000000
-; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z30.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z28.h
 ; CHECK-NEXT:    fcmuo p10.h, p0/z, z11.h, z11.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z5.h, z30.h
-; CHECK-NEXT:    mov z8.d, p9/m, z28.d
+; CHECK-NEXT:    fcmge p3.h, p0/z, z5.h, z28.h
+; CHECK-NEXT:    mov z8.d, p9/m, z29.d
 ; CHECK-NEXT:    fcvtzs z9.d, p4/m, z27.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z11.h, z30.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z11.h, z28.h
 ; CHECK-NEXT:    fcvtzs z24.d, p3/m, z5.h
 ; CHECK-NEXT:    mov z8.d, p8/m, #0 // =0x0
-; CHECK-NEXT:    fcmge p1.h, p0/z, z6.h, z30.h
-; CHECK-NEXT:    fcmge p5.h, p0/z, z1.h, z30.h
+; CHECK-NEXT:    fcmge p1.h, p0/z, z6.h, z28.h
+; CHECK-NEXT:    fcmge p5.h, p0/z, z1.h, z28.h
 ; CHECK-NEXT:    str z8, [x8, #4, mul vl]
 ; CHECK-NEXT:    fcvtzs z12.d, p4/m, z11.h
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z29.h
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z30.h
 ; CHECK-NEXT:    uunpkhi z11.d, z17.s
 ; CHECK-NEXT:    movprfx z17, z20
 ; CHECK-NEXT:    frintx z17.h, p0/m, z20.h
 ; CHECK-NEXT:    fcvtzs z25.d, p1/m, z6.h
 ; CHECK-NEXT:    mov z20.d, #0x8000000000000000
 ; CHECK-NEXT:    fcvtzs z0.d, p5/m, z1.h
-; CHECK-NEXT:    fcmge p6.h, p0/z, z10.h, z30.h
+; CHECK-NEXT:    fcmge p6.h, p0/z, z10.h, z28.h
 ; CHECK-NEXT:    frintx z11.h, p0/m, z11.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z31.h, z30.h
-; CHECK-NEXT:    fcmge p1.h, p0/z, z13.h, z30.h
+; CHECK-NEXT:    fcmge p3.h, p0/z, z31.h, z28.h
+; CHECK-NEXT:    fcmge p1.h, p0/z, z13.h, z28.h
 ; CHECK-NEXT:    fcvtzs z18.d, p6/m, z10.h
-; CHECK-NEXT:    fcmgt p11.h, p0/z, z10.h, z29.h
-; CHECK-NEXT:    fcmge p5.h, p0/z, z11.h, z30.h
+; CHECK-NEXT:    fcmgt p11.h, p0/z, z10.h, z30.h
+; CHECK-NEXT:    fcmge p5.h, p0/z, z11.h, z28.h
 ; CHECK-NEXT:    fcvtzs z2.d, p3/m, z31.h
 ; CHECK-NEXT:    fcvtzs z21.d, p1/m, z13.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z17.h, z30.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z16.h, z30.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z17.h, z28.h
+; CHECK-NEXT:    fcmge p3.h, p0/z, z16.h, z28.h
 ; CHECK-NEXT:    fcmuo p1.h, p0/z, z10.h, z10.h
-; CHECK-NEXT:    sel z10.d, p4, z28.d, z12.d
-; CHECK-NEXT:    sel z12.d, p11, z28.d, z18.d
+; CHECK-NEXT:    sel z10.d, p4, z29.d, z12.d
+; CHECK-NEXT:    sel z12.d, p11, z29.d, z18.d
 ; CHECK-NEXT:    fcvtzs z26.d, p5/m, z11.h
 ; CHECK-NEXT:    fcvtzs z22.d, p2/m, z17.h
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z29.h
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z30.h
 ; CHECK-NEXT:    fcvtzs z23.d, p3/m, z16.h
 ; CHECK-NEXT:    mov z10.d, p10/m, #0 // =0x0
 ; CHECK-NEXT:    mov z12.d, p1/m, #0 // =0x0
-; CHECK-NEXT:    fcmge p6.h, p0/z, z19.h, z30.h
+; CHECK-NEXT:    fcmge p6.h, p0/z, z19.h, z28.h
 ; CHECK-NEXT:    str z10, [x8, #7, mul vl]
-; CHECK-NEXT:    fcmge p7.h, p0/z, z3.h, z30.h
+; CHECK-NEXT:    fcmge p7.h, p0/z, z3.h, z28.h
 ; CHECK-NEXT:    str z12, [x8, #8, mul vl]
-; CHECK-NEXT:    mov z26.d, p4/m, z28.d
-; CHECK-NEXT:    fcmge p2.h, p0/z, z15.h, z30.h
-; CHECK-NEXT:    mov z30.d, #0x8000000000000000
+; CHECK-NEXT:    mov z26.d, p4/m, z29.d
+; CHECK-NEXT:    fcmge p2.h, p0/z, z15.h, z28.h
+; CHECK-NEXT:    mov z28.d, #0x8000000000000000
 ; CHECK-NEXT:    fcvtzs z14.d, p6/m, z19.h
-; CHECK-NEXT:    fcmgt p5.h, p0/z, z16.h, z29.h
-; CHECK-NEXT:    fcmgt p3.h, p0/z, z17.h, z29.h
+; CHECK-NEXT:    fcmgt p5.h, p0/z, z16.h, z30.h
+; CHECK-NEXT:    fcmgt p3.h, p0/z, z17.h, z30.h
 ; CHECK-NEXT:    fcvtzs z20.d, p7/m, z3.h
-; CHECK-NEXT:    fcvtzs z30.d, p2/m, z15.h
+; CHECK-NEXT:    fcvtzs z28.d, p2/m, z15.h
 ; CHECK-NEXT:    fcmuo p1.h, p0/z, z11.h, z11.h
 ; CHECK-NEXT:    fcmuo p2.h, p0/z, z16.h, z16.h
-; CHECK-NEXT:    sel z11.d, p5, z28.d, z23.d
-; CHECK-NEXT:    sel z16.d, p3, z28.d, z22.d
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z19.h, z29.h
-; CHECK-NEXT:    fcmgt p3.h, p0/z, z15.h, z29.h
+; CHECK-NEXT:    sel z11.d, p5, z29.d, z23.d
+; CHECK-NEXT:    sel z16.d, p3, z29.d, z22.d
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z19.h, z30.h
+; CHECK-NEXT:    fcmgt p3.h, p0/z, z15.h, z30.h
 ; CHECK-NEXT:    mov z26.d, p1/m, #0 // =0x0
 ; CHECK-NEXT:    mov z11.d, p2/m, #0 // =0x0
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z13.h, z29.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z13.h, z30.h
 ; CHECK-NEXT:    fcmuo p6.h, p0/z, z17.h, z17.h
 ; CHECK-NEXT:    str z26, [x8, #15, mul vl]
-; CHECK-NEXT:    sel z26.d, p4, z28.d, z14.d
+; CHECK-NEXT:    sel z26.d, p4, z29.d, z14.d
 ; CHECK-NEXT:    str z11, [x8, #14, mul vl]
-; CHECK-NEXT:    mov z30.d, p3/m, z28.d
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z3.h, z29.h
+; CHECK-NEXT:    mov z28.d, p3/m, z29.d
+; CHECK-NEXT:    fcmgt p2.h, p0/z, z3.h, z30.h
 ; CHECK-NEXT:    fcmuo p4.h, p0/z, z13.h, z13.h
 ; CHECK-NEXT:    fcmuo p3.h, p0/z, z3.h, z3.h
-; CHECK-NEXT:    sel z3.d, p1, z28.d, z21.d
+; CHECK-NEXT:    sel z3.d, p1, z29.d, z21.d
 ; CHECK-NEXT:    mov z16.d, p6/m, #0 // =0x0
-; CHECK-NEXT:    fcmgt p12.h, p0/z, z27.h, z29.h
-; CHECK-NEXT:    sel z11.d, p2, z28.d, z20.d
+; CHECK-NEXT:    fcmgt p12.h, p0/z, z27.h, z30.h
+; CHECK-NEXT:    sel z11.d, p2, z29.d, z20.d
 ; CHECK-NEXT:    str z16, [x8, #13, mul vl]
 ; CHECK-NEXT:    mov z3.d, p4/m, #0 // =0x0
 ; CHECK-NEXT:    fcmuo p6.h, p0/z, z15.h, z15.h
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z4.h, z29.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z4.h, z30.h
 ; CHECK-NEXT:    mov z11.d, p3/m, #0 // =0x0
-; CHECK-NEXT:    mov z9.d, p12/m, z28.d
+; CHECK-NEXT:    mov z9.d, p12/m, z29.d
 ; CHECK-NEXT:    str z3, [x8, #11, mul vl]
 ; CHECK-NEXT:    fcmuo p5.h, p0/z, z19.h, z19.h
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z5.h, z29.h
+; CHECK-NEXT:    fcmgt p2.h, p0/z, z5.h, z30.h
 ; CHECK-NEXT:    str z11, [x8, #10, mul vl]
-; CHECK-NEXT:    mov z30.d, p6/m, #0 // =0x0
-; CHECK-NEXT:    sel z3.d, p1, z28.d, z7.d
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z6.h, z29.h
+; CHECK-NEXT:    mov z28.d, p6/m, #0 // =0x0
+; CHECK-NEXT:    sel z3.d, p1, z29.d, z7.d
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z6.h, z30.h
 ; CHECK-NEXT:    fcmuo p3.h, p0/z, z27.h, z27.h
-; CHECK-NEXT:    str z30, [x8, #12, mul vl]
+; CHECK-NEXT:    str z28, [x8, #12, mul vl]
 ; CHECK-NEXT:    mov z26.d, p5/m, #0 // =0x0
-; CHECK-NEXT:    sel z7.d, p2, z28.d, z24.d
-; CHECK-NEXT:    fcmgt p6.h, p0/z, z31.h, z29.h
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z29.h
+; CHECK-NEXT:    sel z7.d, p2, z29.d, z24.d
+; CHECK-NEXT:    fcmgt p6.h, p0/z, z31.h, z30.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z30.h
 ; CHECK-NEXT:    str z26, [x8, #9, mul vl]
-; CHECK-NEXT:    sel z24.d, p4, z28.d, z25.d
+; CHECK-NEXT:    sel z24.d, p4, z29.d, z25.d
 ; CHECK-NEXT:    mov z9.d, p3/m, #0 // =0x0
 ; CHECK-NEXT:    fcmuo p5.h, p0/z, z31.h, z31.h
 ; CHECK-NEXT:    fcmuo p2.h, p0/z, z6.h, z6.h
-; CHECK-NEXT:    mov z2.d, p6/m, z28.d
+; CHECK-NEXT:    mov z2.d, p6/m, z29.d
 ; CHECK-NEXT:    str z9, [x8, #5, mul vl]
-; CHECK-NEXT:    mov z0.d, p1/m, z28.d
+; CHECK-NEXT:    mov z0.d, p1/m, z29.d
 ; CHECK-NEXT:    fcmuo p3.h, p0/z, z5.h, z5.h
 ; CHECK-NEXT:    fcmuo p4.h, p0/z, z4.h, z4.h
 ; CHECK-NEXT:    mov z2.d, p5/m, #0 // =0x0
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
index 5cca5539048b5..1ceaa5ad27734 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
@@ -509,6 +509,294 @@ define <vscale x 2 x bfloat> @splat_nxv2bf16_imm() {
   ret <vscale x 2 x bfloat> splat(bfloat 1.0)
 }
 
+define <vscale x 2 x half> @splat_nzero_nxv2f16() {
+; CHECK-LABEL: splat_nzero_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x half> splat (half -0.0)
+}
+
+define <vscale x 4 x half> @splat_nzero_nxv4f16() {
+; CHECK-LABEL: splat_nzero_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x half> splat (half -0.0)
+}
+
+define <vscale x 8 x half> @splat_nzero_nxv8f16() {
+; CHECK-LABEL: splat_nzero_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x half> splat (half -0.0)
+}
+
+define <vscale x 2 x float> @splat_nzero_nxv2f32() {
+; CHECK-LABEL: splat_nzero_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0x80000000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x float> splat (float -0.0)
+}
+
+define <vscale x 4 x float> @splat_nzero_nxv4f32() {
+; CHECK-LABEL: splat_nzero_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0x80000000
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x float> splat (float -0.0)
+}
+
+define <vscale x 2 x double> @splat_nzero_nxv2f64() {
+; CHECK-LABEL: splat_nzero_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, #0x8000000000000000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x double> splat (double -0.0)
+}
+
+define <vscale x 2 x bfloat> @splat_nzero_nxv2bf16() {
+; CHECK-LABEL: splat_nzero_nxv2bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x bfloat> splat (bfloat -0.0)
+}
+
+define <vscale x 4 x bfloat> @splat_nzero_nxv4bf16() {
+; CHECK-LABEL: splat_nzero_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x bfloat> splat (bfloat -0.0)
+}
+
+define <vscale x 8 x bfloat> @splat_nzero_nxv8bf16() {
+; CHECK-LABEL: splat_nzero_nxv8bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x bfloat> splat (bfloat -0.0)
+}
+
+define <vscale x 2 x half> @splat_pinf_nxv2f16() {
+; CHECK-LABEL: splat_pinf_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x7c00
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x half> splat (half 0x7FF0000000000000)
+}
+
+define <vscale x 4 x half> @splat_pinf_nxv4f16() {
+; CHECK-LABEL: splat_pinf_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x7c00
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x half> splat (half 0x7FF0000000000000)
+}
+
+define <vscale x 8 x half> @splat_pinf_nxv8f16() {
+; CHECK-LABEL: splat_pinf_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x7c00
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x half> splat (half 0x7FF0000000000000)
+}
+
+define <vscale x 2 x float> @splat_pinf_nxv2f32() {
+; CHECK-LABEL: splat_pinf_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0x7f800000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x float> splat (float 0x7FF0000000000000)
+}
+
+define <vscale x 4 x float> @splat_pinf_nxv4f32() {
+; CHECK-LABEL: splat_pinf_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0x7f800000
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x float> splat (float 0x7FF0000000000000)
+}
+
+define <vscale x 2 x double> @splat_pinf_nxv2f64() {
+; CHECK-LABEL: splat_pinf_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, #0x7ff0000000000000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x double> splat (double 0x7FF0000000000000)
+}
+
+define <vscale x 2 x bfloat> @splat_pinf_nxv2bf16() {
+; CHECK-LABEL: splat_pinf_nxv2bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #32640 // =0x7f80
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x bfloat> splat (bfloat 0x7FF0000000000000)
+}
+
+define <vscale x 4 x bfloat> @splat_pinf_nxv4bf16() {
+; CHECK-LABEL: splat_pinf_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #32640 // =0x7f80
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x bfloat> splat (bfloat 0x7FF0000000000000)
+}
+
+define <vscale x 8 x bfloat> @splat_pinf_nxv8bf16() {
+; CHECK-LABEL: splat_pinf_nxv8bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #32640 // =0x7f80
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x bfloat> splat (bfloat 0x7FF0000000000000)
+}
+
+define <vscale x 2 x half> @splat_ninf_nxv2f16() {
+; CHECK-LABEL: splat_ninf_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0xfc00
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x half> splat (half 0xFFF0000000000000)
+}
+
+define <vscale x 4 x half> @splat_ninf_nxv4f16() {
+; CHECK-LABEL: splat_ninf_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0xfc00
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x half> splat (half 0xFFF0000000000000)
+}
+
+define <vscale x 8 x half> @splat_ninf_nxv8f16() {
+; CHECK-LABEL: splat_ninf_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0xfc00
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x half> splat (half 0xFFF0000000000000)
+}
+
+define <vscale x 2 x float> @splat_ninf_nxv2f32() {
+; CHECK-LABEL: splat_ninf_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0xff800000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x float> splat (float 0xFFF0000000000000)
+}
+
+define <vscale x 4 x float> @splat_ninf_nxv4f32() {
+; CHECK-LABEL: splat_ninf_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0xff800000
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x float> splat (float 0xFFF0000000000000)
+}
+
+define <vscale x 2 x double> @splat_ninf_nxv2f64() {
+; CHECK-LABEL: splat_ninf_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, #0xfff0000000000000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x double> splat (double 0xFFF0000000000000)
+}
+
+define <vscale x 2 x bfloat> @splat_ninf_nxv2bf16() {
+; CHECK-LABEL: splat_ninf_nxv2bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0xff80
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x bfloat> splat (bfloat 0xFFF0000000000000)
+}
+
+define <vscale x 4 x bfloat> @splat_ninf_nxv4bf16() {
+; CHECK-LABEL: splat_ninf_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0xff80
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x bfloat> splat (bfloat 0xFFF0000000000000)
+}
+
+define <vscale x 8 x bfloat> @splat_ninf_nxv8bf16() {
+; CHECK-LABEL: splat_ninf_nxv8bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0xff80
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x bfloat> splat (bfloat 0xFFF0000000000000)
+}
+
+define <vscale x 2 x half> @splat_nan_nxv2f16() {
+; CHECK-LABEL: splat_nan_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x7e00
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x half> splat (half 0x7FF8000000000000)
+}
+
+define <vscale x 4 x half> @splat_nan_nxv4f16() {
+; CHECK-LABEL: splat_nan_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x7e00
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x half> splat (half 0x7FF8000000000000)
+}
+
+define <vscale x 8 x half> @splat_nan_nxv8f16() {
+; CHECK-LABEL: splat_nan_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x7e00
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x half> splat (half 0x7FF8000000000000)
+}
+
+define <vscale x 2 x float> @splat_nan_nxv2f32() {
+; CHECK-LABEL: splat_nan_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0x7fc00000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x float> splat (float 0x7FF8000000000000)
+}
+
+define <vscale x 4 x float> @splat_nan_nxv4f32() {
+; CHECK-LABEL: splat_nan_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0x7fc00000
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x float> splat (float 0x7FF8000000000000)
+}
+
+define <vscale x 2 x double> @splat_nan_nxv2f64() {
+; CHECK-LABEL: splat_nan_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, #0x7ff8000000000000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x double> splat (double 0x7FF8000000000000)
+}
+
+define <vscale x 2 x bfloat> @splat_nan_nxv2bf16() {
+; CHECK-LABEL: splat_nan_nxv2bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #32704 // =0x7fc0
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x bfloat> splat (bfloat 0x7FF8000000000000)
+}
+
+define <vscale x 4 x bfloat> @splat_nan_nxv4bf16() {
+; CHECK-LABEL: splat_nan_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #32704 // =0x7fc0
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x bfloat> splat (bfloat 0x7FF8000000000000)
+}
+
+define <vscale x 8 x bfloat> @splat_nan_nxv8bf16() {
+; CHECK-LABEL: splat_nan_nxv8bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #32704 // =0x7fc0
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x bfloat> splat (bfloat 0x7FF8000000000000)
+}
+
 define <vscale x 4 x i32> @splat_nxv4i32_fold(<vscale x 4 x i32> %x) {
 ; CHECK-LABEL: splat_nxv4i32_fold:
 ; CHECK:       // %bb.0:
@@ -581,8 +869,8 @@ define <vscale x 2 x double> @splat_nxv2f64_imm_out_of_range() {
 ; CHECK-LABEL: splat_nxv2f64_imm_out_of_range:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    adrp x8, .LCPI60_0
-; CHECK-NEXT:    add x8, x8, :lo12:.LCPI60_0
+; CHECK-NEXT:    adrp x8, .LCPI96_0
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI96_0
 ; CHECK-NEXT:    ld1rd { z0.d }, p0/z, [x8]
 ; CHECK-NEXT:    ret
   ret <vscale x 2 x double> splat(double 3.33)
diff --git a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
index 6b5b3d6d436cb..b04029c273ae2 100644
--- a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
@@ -338,8 +338,7 @@ ret <vscale x 2 x double> %sel
 define <vscale x 8 x half> @sel_merge_nxv8f16_negative_zero(<vscale x 8 x i1> %p, <vscale x 8 x half> %in) {
 ; CHECK-LABEL: sel_merge_nxv8f16_negative_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
-; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    dupm z1.h, #0x8000
 ; CHECK-NEXT:    mov z0.h, p0/m, z1.h
 ; CHECK-NEXT:    ret
 %sel = select <vscale x 8 x i1> %p, <vscale x 8 x half> splat (half -0.0), <vscale x 8 x half> %in
@@ -349,8 +348,7 @@ ret <vscale x 8 x half> %sel
 define <vscale x 4 x half> @sel_merge_nx4f16_negative_zero(<vscale x 4 x i1> %p, <vscale x 4 x half> %in) {
 ; CHECK-LABEL: sel_merge_nx4f16_negative_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
-; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    dupm z1.h, #0x8000
 ; CHECK-NEXT:    mov z0.s, p0/m, z1.s
 ; CHECK-NEXT:    ret
 %sel = select <vscale x 4 x i1> %p, <vscale x 4 x half> splat (half -0.0), <vscale x 4 x half> %in
@@ -360,8 +358,7 @@ ret <vscale x 4 x half> %sel
 define <vscale x 2 x half> @sel_merge_nx2f16_negative_zero(<vscale x 2 x i1> %p, <vscale x 2 x half> %in) {
 ; CHECK-LABEL: sel_merge_nx2f16_negative_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
-; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    dupm z1.h, #0x8000
 ; CHECK-NEXT:    mov z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
 %sel = select <vscale x 2 x i1> %p, <vscale x 2 x half> splat (half -0.0), <vscale x 2 x half> %in
@@ -371,8 +368,7 @@ ret <vscale x 2 x half> %sel
 define <vscale x 4 x float> @sel_merge_nx4f32_negative_zero(<vscale x 4 x i1> %p, <vscale x 4 x float> %in) {
 ; CHECK-LABEL: sel_merge_nx4f32_negative_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
-; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    mov z1.s, #0x80000000
 ; CHECK-NEXT:    mov z0.s, p0/m, z1.s
 ; CHECK-NEXT:    ret
 %sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> splat (float -0.0), <vscale x 4 x float> %in
@@ -382,8 +378,7 @@ ret <vscale x 4 x float> %sel
 define <vscale x 2 x float> @sel_merge_nx2f32_negative_zero(<vscale x 2 x i1> %p, <vscale x 2 x float> %in) {
 ; CHECK-LABEL: sel_merge_nx2f32_negative_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
-; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    mov z1.s, #0x80000000
 ; CHECK-NEXT:    mov z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
 %sel = select <vscale x 2 x i1> %p, <vscale x 2 x float> splat (float -0.0), <vscale x 2 x float> %in
@@ -393,8 +388,7 @@ ret <vscale x 2 x float> %sel
 define <vscale x 2 x double> @sel_merge_nx2f64_negative_zero(<vscale x 2 x i1> %p, <vscale x 2 x double> %in) {
 ; CHECK-LABEL: sel_merge_nx2f64_negative_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    mov z1.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
 %sel = select <vscale x 2 x i1> %p, <vscale x 2 x double> splat (double -0.0), <vscale x 2 x double> %in

From 22a2cae5d6735a510b17859848b14f60d2e5cdfa Mon Sep 17 00:00:00 2001
From: Guillot Tony <tony.guillot@protonmail.com>
Date: Tue, 18 Nov 2025 13:36:51 +0100
Subject: [PATCH 04/52] [Clang] Fix cleanup attribute by delaying type checks
 after the type is deduced (#164440)

Previously, the handling of the `cleanup` attribute had some checks
based on the type, but we were deducing the type after handling the
attribute.
This PR fixes the way the are dealing with type checks for the `cleanup`
attribute by delaying these checks after we are deducing the type.

It is also fixed in a way that the solution can be adapted for other
attributes that does some type based checks.
This is the list of C/C++ attributes that are doing type based checks
and will need to be fixed in additional PRs:
- CUDAShared
- MutualExclusions
- PassObjectSize
- InitPriority
- Sentinel
- AcquireCapability
- RequiresCapability
- LocksExcluded
- AcquireHandle

NB: Some attributes could have been missed in my shallow search.

Fixes #129631
---
 clang/docs/ReleaseNotes.rst                   |  1 +
 clang/include/clang/Basic/Attr.td             | 12 +++++++
 clang/include/clang/Sema/CMakeLists.txt       |  5 +++
 clang/include/clang/Sema/Sema.h               |  8 +++++
 clang/lib/Sema/SemaDecl.cpp                   |  9 +++++
 clang/lib/Sema/SemaDeclAttr.cpp               | 35 +++++++++++++------
 .../lib/Sema/SemaTemplateInstantiateDecl.cpp  |  9 +++++
 clang/test/Sema/type-dependent-attrs.c        | 10 ++++++
 clang/test/SemaCXX/attr-cleanup.cpp           | 25 +++++++++++++
 clang/utils/TableGen/ClangAttrEmitter.cpp     | 20 +++++++++++
 clang/utils/TableGen/TableGen.cpp             |  7 ++++
 clang/utils/TableGen/TableGenBackends.h       |  2 ++
 llvm/docs/TableGen/BackEnds.rst               |  7 ++++
 13 files changed, 140 insertions(+), 10 deletions(-)
 create mode 100644 clang/test/Sema/type-dependent-attrs.c

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7459127670cc3..c2da61e4d066a 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -500,6 +500,7 @@ Bug Fixes to Attribute Support
 - Fixes crashes or missing diagnostics with the `device_kernel` attribute. (#GH161905)
 - Fix handling of parameter indexes when an attribute is applied to a C++23 explicit object member function.
 - Fixed several false positives and false negatives in function effect (`nonblocking`) analysis. (#GH166078) (#GH166101) (#GH166110)
+- Fix ``cleanup`` attribute by delaying type checks until after the type is deduced. (#GH129631)
 
 Bug Fixes to C++ Support
 ^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 8dfe4bc08c48e..0097476bc0d8d 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -741,6 +741,17 @@ class Attr {
   // our existing general parsing we need to have a separate flag that
   // opts an attribute into strict parsing of attribute parameters
   bit StrictEnumParameters = 0;
+  // Set to true for attributes which have Sema checks which requires the type
+  // to be deduced.
+  // When `IsTypeDependent` is set to true, you should add an `ActOn*Attr`
+  // function to `Sema.h`. The signature of the function must be:
+  // `void ActOn*Attr(Decl *, const Attr *);` where the `Decl *` is the
+  // declaration the attribute will be attached to; its type will have already
+  // been deduced, and the `Attr *` is the attribute being applied to that
+  // declaration. This function should handle all type-sensitive semantics for
+  // the attribute. This function will be automatically called by
+  // `Sema::CheckAttributesOnDeducedType()`.
+  bit IsTypeDependent = 0;
   // Lists language options, one of which is required to be true for the
   // attribute to be applicable. If empty, no language options are required.
   list<LangOpt> LangOpts = [];
@@ -1400,6 +1411,7 @@ def Cleanup : InheritableAttr {
   let Args = [DeclArgument<Function, "FunctionDecl">];
   let Subjects = SubjectList<[LocalVar]>;
   let Documentation = [CleanupDocs];
+  let IsTypeDependent = 1;
   // FIXME: DeclArgument should be reworked to also store the
   // Expr instead of adding attr specific hacks like the following.
   // See the discussion in https://github.com/llvm/llvm-project/pull/14023.
diff --git a/clang/include/clang/Sema/CMakeLists.txt b/clang/include/clang/Sema/CMakeLists.txt
index 9077e22c2307c..3f540ea596871 100644
--- a/clang/include/clang/Sema/CMakeLists.txt
+++ b/clang/include/clang/Sema/CMakeLists.txt
@@ -8,6 +8,11 @@ clang_tablegen(AttrParsedAttrKinds.inc -gen-clang-attr-parsed-attr-kinds
   SOURCE ../Basic/Attr.td
   TARGET ClangAttrParsedAttrKinds)
 
+clang_tablegen(AttrIsTypeDependent.inc -gen-clang-attr-is-type-dependent
+  -I ${CMAKE_CURRENT_SOURCE_DIR}/../../
+  SOURCE ../Basic/Attr.td
+  TARGET ClangAttrIsTypeDependent)
+
 clang_tablegen(AttrSpellingListIndex.inc -gen-clang-attr-spelling-index
   -I ${CMAKE_CURRENT_SOURCE_DIR}/../../
   SOURCE ../Basic/Attr.td
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 6ca182338d6af..fd2a2469142e4 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -4456,6 +4456,10 @@ class Sema final : public SemaBase {
       NamedDecl *New, Decl *Old,
       AvailabilityMergeKind AMK = AvailabilityMergeKind::Redeclaration);
 
+  /// CheckAttributesOnDeducedType - Calls Sema functions for attributes that
+  /// requires the type to be deduced.
+  void CheckAttributesOnDeducedType(Decl *D);
+
   /// MergeTypedefNameDecl - We just parsed a typedef 'New' which has the
   /// same name and scope as a previous declaration 'Old'.  Figure out
   /// how to resolve this situation, merging decls or emitting
@@ -4760,6 +4764,8 @@ class Sema final : public SemaBase {
   // linkage or not.
   static bool mightHaveNonExternalLinkage(const DeclaratorDecl *FD);
 
+#include "clang/Sema/AttrIsTypeDependent.inc"
+
   ///@}
 
   //
@@ -15469,6 +15475,8 @@ class Sema final : public SemaBase {
   std::optional<FunctionEffectMode>
   ActOnEffectExpression(Expr *CondExpr, StringRef AttributeName);
 
+  void ActOnCleanupAttr(Decl *D, const Attr *A);
+
 private:
   /// The implementation of RequireCompleteType
   bool RequireCompleteTypeImpl(SourceLocation Loc, QualType T,
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 25b89d65847ad..b7aecadc86871 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -3355,6 +3355,11 @@ void Sema::mergeDeclAttributes(NamedDecl *New, Decl *Old,
   if (!foundAny) New->dropAttrs();
 }
 
+void Sema::CheckAttributesOnDeducedType(Decl *D) {
+  for (const Attr *A : D->attrs())
+    checkAttrIsTypeDependent(D, A);
+}
+
 // Returns the number of added attributes.
 template <class T>
 static unsigned propagateAttribute(ParmVarDecl *To, const ParmVarDecl *From,
@@ -13809,6 +13814,8 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
       return;
   }
 
+  this->CheckAttributesOnDeducedType(RealDecl);
+
   // dllimport cannot be used on variable definitions.
   if (VDecl->hasAttr<DLLImportAttr>() && !VDecl->isStaticDataMember()) {
     Diag(VDecl->getLocation(), diag::err_attribute_dllimport_data_definition);
@@ -14300,6 +14307,8 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) {
         DeduceVariableDeclarationType(Var, false, nullptr))
       return;
 
+    this->CheckAttributesOnDeducedType(RealDecl);
+
     // C++11 [class.static.data]p3: A static data member can be declared with
     // the constexpr specifier; if so, its declaration shall specify
     // a brace-or-equal-initializer.
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index a9e7b44ac9d73..bda7aa32a9348 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -3511,16 +3511,6 @@ static void handleCleanupAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
     return;
   }
 
-  // We're currently more strict than GCC about what function types we accept.
-  // If this ever proves to be a problem it should be easy to fix.
-  QualType Ty = S.Context.getPointerType(cast<VarDecl>(D)->getType());
-  QualType ParamTy = FD->getParamDecl(0)->getType();
-  if (!S.IsAssignConvertCompatible(S.CheckAssignmentConstraints(
-          FD->getParamDecl(0)->getLocation(), ParamTy, Ty))) {
-    S.Diag(Loc, diag::err_attribute_cleanup_func_arg_incompatible_type)
-      << NI.getName() << ParamTy << Ty;
-    return;
-  }
   VarDecl *VD = cast<VarDecl>(D);
   // Create a reference to the variable declaration. This is a fake/dummy
   // reference.
@@ -8311,3 +8301,28 @@ void Sema::redelayDiagnostics(DelayedDiagnosticPool &pool) {
   assert(curPool && "re-emitting in undelayed context not supported");
   curPool->steal(pool);
 }
+
+void Sema::ActOnCleanupAttr(Decl *D, const Attr *A) {
+  VarDecl *VD = cast<VarDecl>(D);
+  if (VD->getType()->isDependentType())
+    return;
+
+  // Obtains the FunctionDecl that was found when handling the attribute
+  // earlier.
+  CleanupAttr *Attr = D->getAttr<CleanupAttr>();
+  FunctionDecl *FD = Attr->getFunctionDecl();
+  DeclarationNameInfo NI = FD->getNameInfo();
+
+  // We're currently more strict than GCC about what function types we accept.
+  // If this ever proves to be a problem it should be easy to fix.
+  QualType Ty = this->Context.getPointerType(VD->getType());
+  QualType ParamTy = FD->getParamDecl(0)->getType();
+  if (!this->IsAssignConvertCompatible(this->CheckAssignmentConstraints(
+          FD->getParamDecl(0)->getLocation(), ParamTy, Ty))) {
+    this->Diag(Attr->getArgLoc(),
+               diag::err_attribute_cleanup_func_arg_incompatible_type)
+        << NI.getName() << ParamTy << Ty;
+    D->dropAttr<CleanupAttr>();
+    return;
+  }
+}
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 1b6b559c1227b..3a4b2ccc74350 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -1007,6 +1007,15 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
       continue;
     }
 
+    if (auto *A = dyn_cast<CleanupAttr>(TmplAttr)) {
+      if (!New->hasAttr<CleanupAttr>()) {
+        auto *NewAttr = A->clone(Context);
+        NewAttr->setArgLoc(A->getArgLoc());
+        New->addAttr(NewAttr);
+      }
+      continue;
+    }
+
     assert(!TmplAttr->isPackExpansion());
     if (TmplAttr->isLateParsed() && LateAttrs) {
       // Late parsed attributes must be instantiated and attached after the
diff --git a/clang/test/Sema/type-dependent-attrs.c b/clang/test/Sema/type-dependent-attrs.c
new file mode 100644
index 0000000000000..13068b3f94ad4
--- /dev/null
+++ b/clang/test/Sema/type-dependent-attrs.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -std=c23 -fsyntax-only -verify %s
+
+int open() { return 0; }
+void close(typeof(open()) *) {}
+
+void cleanup_attr() {
+  int fd_int [[gnu::cleanup(close)]] = open();
+  auto fd_auto [[gnu::cleanup(close)]] = open();
+  float fd_invalid [[gnu::cleanup(close)]] = open(); // expected-error {{'cleanup' function 'close' parameter has type 'typeof (open()) *' (aka 'int *') which is incompatible with type 'float *'}}
+}
diff --git a/clang/test/SemaCXX/attr-cleanup.cpp b/clang/test/SemaCXX/attr-cleanup.cpp
index 32d10683edebb..6048b4e92ec3f 100644
--- a/clang/test/SemaCXX/attr-cleanup.cpp
+++ b/clang/test/SemaCXX/attr-cleanup.cpp
@@ -27,3 +27,28 @@ namespace E {
     int v1 __attribute__((cleanup(c3))); // expected-error {{'c3' is not a single function}}
   }
 }
+
+namespace F {
+  int open() { return 0; }
+  void close(decltype(open()) *) {}
+
+  void test1() {
+    auto fd [[gnu::cleanup(close)]] = open();
+  }
+
+  template <typename Ty>
+  void test2() {
+    Ty fd [[gnu::cleanup(close)]] = open();
+  }
+
+  template <typename Ty>
+  void test3() {
+    Ty fd [[gnu::cleanup(close)]] = open(); // #TEST3_CLEANUP
+  }
+
+  int main() {
+    test2<int>();
+    test3<float>(); // expected-error@#TEST3_CLEANUP {{'cleanup' function 'close' parameter has type 'decltype(open()) *' (aka 'int *') which is incompatible with type 'float *'}} \
+                       expected-note {{in instantiation of function template specialization 'F::test3<float>' requested here}}
+  }
+}
diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp
index e49dcb9b70b0f..bee9a01a3b01a 100644
--- a/clang/utils/TableGen/ClangAttrEmitter.cpp
+++ b/clang/utils/TableGen/ClangAttrEmitter.cpp
@@ -5045,6 +5045,26 @@ void EmitClangAttrParsedAttrKinds(const RecordKeeper &Records,
      << "}\n";
 }
 
+// Emits Sema calls for type dependent attributes
+void EmitClangAttrIsTypeDependent(const RecordKeeper &Records,
+                                  raw_ostream &OS) {
+  emitSourceFileHeader("Attribute is type dependent", OS, Records);
+
+  OS << "void checkAttrIsTypeDependent(Decl *D, const Attr *A) {\n";
+  OS << "  switch (A->getKind()) {\n";
+  OS << "  default:\n";
+  OS << "    break;\n";
+  for (const auto *A : Records.getAllDerivedDefinitions("Attr")) {
+    if (A->getValueAsBit("IsTypeDependent")) {
+      OS << "  case attr::" << A->getName() << ":\n";
+      OS << "    ActOn" << A->getName() << "Attr(D, A);\n";
+      OS << "    break;\n";
+    }
+  }
+  OS << "  }\n";
+  OS << "}\n";
+}
+
 // Emits the code to dump an attribute.
 void EmitClangAttrTextNodeDump(const RecordKeeper &Records, raw_ostream &OS) {
   emitSourceFileHeader("Attribute text node dumper", OS, Records);
diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index 866040d503646..707ce617cb2d0 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -43,6 +43,7 @@ enum ActionType {
   GenClangAttrParsedAttrList,
   GenClangAttrParsedAttrImpl,
   GenClangAttrParsedAttrKinds,
+  GenClangAttrIsTypeDependent,
   GenClangAttrTextNodeDump,
   GenClangAttrNodeTraverse,
   GenClangBasicReader,
@@ -179,6 +180,9 @@ cl::opt<ActionType> Action(
         clEnumValN(GenClangAttrParsedAttrKinds,
                    "gen-clang-attr-parsed-attr-kinds",
                    "Generate a clang parsed attribute kinds"),
+        clEnumValN(GenClangAttrIsTypeDependent,
+                   "gen-clang-attr-is-type-dependent",
+                   "Generate clang is type dependent attribute code"),
         clEnumValN(GenClangAttrTextNodeDump, "gen-clang-attr-text-node-dump",
                    "Generate clang attribute text node dumper"),
         clEnumValN(GenClangAttrNodeTraverse, "gen-clang-attr-node-traverse",
@@ -423,6 +427,9 @@ bool ClangTableGenMain(raw_ostream &OS, const RecordKeeper &Records) {
   case GenClangAttrParsedAttrKinds:
     EmitClangAttrParsedAttrKinds(Records, OS);
     break;
+  case GenClangAttrIsTypeDependent:
+    EmitClangAttrIsTypeDependent(Records, OS);
+    break;
   case GenClangAttrTextNodeDump:
     EmitClangAttrTextNodeDump(Records, OS);
     break;
diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h
index fa49dcd289bc2..058bda3ebd246 100644
--- a/clang/utils/TableGen/TableGenBackends.h
+++ b/clang/utils/TableGen/TableGenBackends.h
@@ -82,6 +82,8 @@ void EmitClangAttrParsedAttrImpl(const llvm::RecordKeeper &Records,
                                  llvm::raw_ostream &OS);
 void EmitClangAttrParsedAttrKinds(const llvm::RecordKeeper &Records,
                                   llvm::raw_ostream &OS);
+void EmitClangAttrIsTypeDependent(const llvm::RecordKeeper &Records,
+                                  llvm::raw_ostream &OS);
 void EmitClangAttrTextNodeDump(const llvm::RecordKeeper &Records,
                                llvm::raw_ostream &OS);
 void EmitClangAttrNodeTraverse(const llvm::RecordKeeper &Records,
diff --git a/llvm/docs/TableGen/BackEnds.rst b/llvm/docs/TableGen/BackEnds.rst
index 7f571378860b2..1e3cb8783df16 100644
--- a/llvm/docs/TableGen/BackEnds.rst
+++ b/llvm/docs/TableGen/BackEnds.rst
@@ -355,6 +355,13 @@ ClangAttrParsedAttrKinds
 ``AttributeList::getKind`` function, mapping a string (and syntax) to a parsed
 attribute ``AttributeList::Kind`` enumeration.
 
+ClangAttrIsTypeDependent
+------------------------
+
+**Purpose**: Creates ``AttrIsTypeDependent.inc``, which is used to implement the
+``Sema::CheckAttributesOnDeducedType`` function, mapping an attribute kind to a
+Sema function if it exists.
+
 ClangAttrDump
 -------------
 

From 0be4218d7b7080fec73fe13bc759439d49159c05 Mon Sep 17 00:00:00 2001
From: Ivan Kosarev <ivan.kosarev@amd.com>
Date: Tue, 18 Nov 2025 12:41:53 +0000
Subject: [PATCH 05/52] [CMake] Declare all parts of *GenRegisterInfo.inc as
 outputs. (#168405)

This tells the build system to check and regenerate the
*GenRegisterInfo*.inc files, should any of them be missing for
whatever reason.

A follow-up from
<https://github.com/llvm/llvm-project/pull/167700>.
---
 llvm/cmake/modules/TableGen.cmake           | 12 +++++++++++-
 llvm/utils/TableGen/RegisterInfoEmitter.cpp |  2 ++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake
index 9a2e73a1e3718..84c03cd6432ed 100644
--- a/llvm/cmake/modules/TableGen.cmake
+++ b/llvm/cmake/modules/TableGen.cmake
@@ -66,6 +66,16 @@ function(tablegen project ofn)
     list(APPEND LLVM_TABLEGEN_FLAGS "-omit-comments")
   endif()
 
+  set(EXTRA_OUTPUTS)
+  if("-gen-register-info" IN_LIST ARGN)
+    cmake_path(GET ofn STEM OUTPUT_BASENAME)
+    list(APPEND EXTRA_OUTPUTS
+         ${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_BASENAME}Enums.inc
+         ${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_BASENAME}Header.inc
+         ${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_BASENAME}MCDesc.inc
+         ${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_BASENAME}TargetDesc.inc)
+  endif()
+
   # MSVC can't support long string literals ("long" > 65534 bytes)[1], so if there's
   # a possibility of generated tables being consumed by MSVC, generate arrays of
   # char literals, instead. If we're cross-compiling, then conservatively assume
@@ -126,7 +136,7 @@ function(tablegen project ofn)
     set(LLVM_TABLEGEN_JOB_POOL "")
   endif()
 
-  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn} ${EXTRA_OUTPUTS}
     COMMAND ${tablegen_exe} ${ARG_UNPARSED_ARGUMENTS}
     ${tblgen_includes}
     ${LLVM_TABLEGEN_FLAGS}
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index ef7b13e8940f8..3486a7a7fb08c 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -1878,6 +1878,8 @@ TableGenOutputFiles RegisterInfoEmitter::run(StringRef FilenamePrefix) {
   if (RegisterInfoDebug)
     debugDump(errs());
 
+  // The suffixes should be in sync with the tablegen function in
+  // llvm/cmake/modules/TableGen.cmake.
   return {Main,
           {{"Enums.inc", Enums},
            {"MCDesc.inc", MCDesc},

From 3c87119a910e95396b26c519fa90d63a59442267 Mon Sep 17 00:00:00 2001
From: Ivan Kosarev <ivan.kosarev@amd.com>
Date: Tue, 18 Nov 2025 12:43:10 +0000
Subject: [PATCH 06/52] [TableGen][NFCI] Change TableGenMain() to take
 function_ref. (#167888)

It was switched from a function pointer to std::function in

TableGen: Make 2nd arg MainFn of TableGenMain(argv0, MainFn) optional.
f675ec6165ab6add5e57cd43a2e9fa1a9bc21d81

but there's no mention of any particular reason for that.
---
 llvm/include/llvm/TableGen/Main.h      | 14 ++++++--------
 llvm/lib/TableGen/Main.cpp             |  6 ++----
 llvm/utils/TableGen/Basic/TableGen.cpp |  2 +-
 3 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/TableGen/Main.h b/llvm/include/llvm/TableGen/Main.h
index bafce3a463acc..daede9f5a46f0 100644
--- a/llvm/include/llvm/TableGen/Main.h
+++ b/llvm/include/llvm/TableGen/Main.h
@@ -14,7 +14,6 @@
 #define LLVM_TABLEGEN_MAIN_H
 
 #include "llvm/Support/CommandLine.h"
-#include <functional>
 #include <map>
 
 namespace llvm {
@@ -30,18 +29,17 @@ struct TableGenOutputFiles {
 };
 
 /// Returns true on error, false otherwise.
-using TableGenMainFn = bool(raw_ostream &OS, const RecordKeeper &Records);
+using TableGenMainFn =
+    function_ref<bool(raw_ostream &OS, const RecordKeeper &Records)>;
 
 /// Perform the action using Records, and store output in OutFiles.
 /// Returns true on error, false otherwise.
-using MultiFileTableGenMainFn = bool(TableGenOutputFiles &OutFiles,
-                                     const RecordKeeper &Records);
+using MultiFileTableGenMainFn = function_ref<bool(TableGenOutputFiles &OutFiles,
+                                                  const RecordKeeper &Records)>;
 
-int TableGenMain(const char *argv0,
-                 std::function<TableGenMainFn> MainFn = nullptr);
+int TableGenMain(const char *argv0, TableGenMainFn MainFn = nullptr);
 
-int TableGenMain(const char *argv0,
-                 std::function<MultiFileTableGenMainFn> MainFn = nullptr);
+int TableGenMain(const char *argv0, MultiFileTableGenMainFn MainFn = nullptr);
 
 /// Controls emitting large character arrays as strings or character arrays.
 /// Typically set to false when building with MSVC.
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 3330b70cdc2e1..939e9c6bf5d2f 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -127,8 +127,7 @@ static int WriteOutput(const TGParser &Parser, const char *argv0,
   return 0;
 }
 
-int llvm::TableGenMain(const char *argv0,
-                       std::function<MultiFileTableGenMainFn> MainFn) {
+int llvm::TableGenMain(const char *argv0, MultiFileTableGenMainFn MainFn) {
   RecordKeeper Records;
   TGTimer &Timer = Records.getTimer();
 
@@ -209,8 +208,7 @@ int llvm::TableGenMain(const char *argv0,
   return 0;
 }
 
-int llvm::TableGenMain(const char *argv0,
-                       std::function<TableGenMainFn> MainFn) {
+int llvm::TableGenMain(const char *argv0, TableGenMainFn MainFn) {
   return TableGenMain(argv0, [&MainFn](TableGenOutputFiles &OutFiles,
                                        const RecordKeeper &Records) {
     std::string S;
diff --git a/llvm/utils/TableGen/Basic/TableGen.cpp b/llvm/utils/TableGen/Basic/TableGen.cpp
index b79ae93dab4f7..a655cbbc16096 100644
--- a/llvm/utils/TableGen/Basic/TableGen.cpp
+++ b/llvm/utils/TableGen/Basic/TableGen.cpp
@@ -73,7 +73,7 @@ int tblgen_main(int argc, char **argv) {
   InitLLVM X(argc, argv);
   cl::ParseCommandLineOptions(argc, argv);
 
-  std::function<MultiFileTableGenMainFn> MainFn = nullptr;
+  MultiFileTableGenMainFn MainFn = nullptr;
   return TableGenMain(argv[0], MainFn);
 }
 

From 4c9020ded754707448f2d541c0b5d13a95725384 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= <stefan.graenitz@gmail.com>
Date: Tue, 18 Nov 2025 13:58:52 +0100
Subject: [PATCH 07/52] [ORC] Fix shlibs build: add Object to
 libLLVMOrcDebugging (#168343)

---
 llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt           | 1 +
 llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt
index ab287c7af60be..6be59b0890c44 100644
--- a/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt
+++ b/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt
@@ -22,6 +22,7 @@ add_llvm_component_library(LLVMOrcDebugging
   BinaryFormat
   DebugInfoDWARF
   JITLink
+  Object
   OrcJIT
   OrcShared
   Support
diff --git a/llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp
index 9f556b0d07a8b..653645ff03f15 100644
--- a/llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp
@@ -1,4 +1,4 @@
-//===------- ELFDebugObjectPlugin.cpp - JITLink debug objects ---------===//
+//===--------- ELFDebugObjectPlugin.cpp - JITLink debug objects -----------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.

From 52f4c360e382e6926dccb315d4402af6211e25f0 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 18 Nov 2025 13:13:35 +0000
Subject: [PATCH 08/52] [X86] combineTruncate - trunc(srl(load(p),amt)) ->
 load(p+amt/8) - ensure amt doesn't depend on original load chain (#168400)

Relax fix for #165755 / #165850 - it doesn't matter if the amt is dependent on the original load value, just any users of the chain
---
 llvm/lib/Target/X86/X86ISelLowering.cpp      |   7 +-
 llvm/test/CodeGen/X86/bittest-big-integer.ll | 272 +++++++------------
 2 files changed, 103 insertions(+), 176 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 621f1868d3311..864e5dc67682c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54688,11 +54688,14 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
       KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
       // Check the shift amount is byte aligned.
       // Check the truncation doesn't use any shifted in (zero) top bits.
-      // Check the shift amount doesn't depend on the original load.
+      // Check the shift amount doesn't depend on the original load chain.
       if (KnownAmt.countMinTrailingZeros() >= 3 &&
           KnownAmt.getMaxValue().ule(SrcVT.getSizeInBits() -
                                      VT.getSizeInBits()) &&
-          !Ld->isPredecessorOf(ShAmt.getNode())) {
+          none_of(Ld->uses(), [&ShAmt](SDUse &Use) {
+            return Use.getResNo() == 1 &&
+                   Use.getUser()->isPredecessorOf(ShAmt.getNode());
+          })) {
         EVT PtrVT = Ld->getBasePtr().getValueType();
         SDValue PtrBitOfs = DAG.getZExtOrTrunc(ShAmt, DL, PtrVT);
         SDValue PtrByteOfs =
diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll
index b85a20b9d6b6e..023fb5065b892 100644
--- a/llvm/test/CodeGen/X86/bittest-big-integer.ll
+++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll
@@ -1877,85 +1877,56 @@ define i32 @blsr_u512(ptr %word) nounwind {
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pushq %r15
 ; SSE-NEXT:    pushq %r14
-; SSE-NEXT:    pushq %r12
 ; SSE-NEXT:    pushq %rbx
-; SSE-NEXT:    pushq %rax
-; SSE-NEXT:    movq 56(%rdi), %rcx
-; SSE-NEXT:    movq 48(%rdi), %rdx
-; SSE-NEXT:    movq 40(%rdi), %rsi
-; SSE-NEXT:    movq 32(%rdi), %r11
+; SSE-NEXT:    movq 48(%rdi), %r11
+; SSE-NEXT:    movq 40(%rdi), %r9
 ; SSE-NEXT:    movq 24(%rdi), %r8
-; SSE-NEXT:    movq 16(%rdi), %r9
-; SSE-NEXT:    movq (%rdi), %rax
-; SSE-NEXT:    movq 8(%rdi), %r10
-; SSE-NEXT:    rep bsfq %rax, %rbx
-; SSE-NEXT:    rep bsfq %r10, %r14
-; SSE-NEXT:    addq $64, %r14
-; SSE-NEXT:    testq %rax, %rax
-; SSE-NEXT:    cmovneq %rbx, %r14
-; SSE-NEXT:    rep bsfq %r9, %r15
-; SSE-NEXT:    rep bsfq %r8, %rbx
+; SSE-NEXT:    movq 16(%rdi), %rdx
+; SSE-NEXT:    movq (%rdi), %rcx
+; SSE-NEXT:    movq 8(%rdi), %rsi
+; SSE-NEXT:    rep bsfq %rcx, %rax
+; SSE-NEXT:    rep bsfq %rsi, %rbx
 ; SSE-NEXT:    addq $64, %rbx
-; SSE-NEXT:    testq %r9, %r9
-; SSE-NEXT:    cmovneq %r15, %rbx
-; SSE-NEXT:    subq $-128, %rbx
-; SSE-NEXT:    movq %rax, %r15
-; SSE-NEXT:    movq %rax, %r12
-; SSE-NEXT:    orq %r10, %r12
-; SSE-NEXT:    cmovneq %r14, %rbx
-; SSE-NEXT:    rep bsfq %r11, %r12
-; SSE-NEXT:    rep bsfq %rsi, %r14
-; SSE-NEXT:    addq $64, %r14
-; SSE-NEXT:    testq %r11, %r11
-; SSE-NEXT:    cmovneq %r12, %r14
-; SSE-NEXT:    xorps %xmm0, %xmm0
-; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    rep bsfq %rdx, %r12
+; SSE-NEXT:    testq %rcx, %rcx
+; SSE-NEXT:    cmovneq %rax, %rbx
+; SSE-NEXT:    rep bsfq %rdx, %rax
+; SSE-NEXT:    rep bsfq %r8, %r10
+; SSE-NEXT:    addq $64, %r10
+; SSE-NEXT:    testq %rdx, %rdx
+; SSE-NEXT:    cmovneq %rax, %r10
+; SSE-NEXT:    movq 32(%rdi), %r14
+; SSE-NEXT:    subq $-128, %r10
+; SSE-NEXT:    movq %rcx, %rax
+; SSE-NEXT:    orq %rsi, %rax
+; SSE-NEXT:    cmovneq %rbx, %r10
+; SSE-NEXT:    rep bsfq %r14, %rax
+; SSE-NEXT:    rep bsfq %r9, %rbx
+; SSE-NEXT:    addq $64, %rbx
+; SSE-NEXT:    testq %r14, %r14
+; SSE-NEXT:    cmovneq %rax, %rbx
+; SSE-NEXT:    rep bsfq %r11, %r15
 ; SSE-NEXT:    movl $64, %eax
-; SSE-NEXT:    rep bsfq %rcx, %rax
+; SSE-NEXT:    rep bsfq 56(%rdi), %rax
 ; SSE-NEXT:    addq $64, %rax
-; SSE-NEXT:    testq %rdx, %rdx
-; SSE-NEXT:    cmovneq %r12, %rax
+; SSE-NEXT:    testq %r11, %r11
+; SSE-NEXT:    cmovneq %r15, %rax
 ; SSE-NEXT:    subq $-128, %rax
-; SSE-NEXT:    movq %r11, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    orq %rsi, %r11
-; SSE-NEXT:    cmovneq %r14, %rax
-; SSE-NEXT:    addq $256, %rax # imm = 0x100
-; SSE-NEXT:    movq %r10, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    orq %r8, %r10
-; SSE-NEXT:    orq %r9, %r15
-; SSE-NEXT:    orq %r10, %r15
+; SSE-NEXT:    orq %r9, %r14
 ; SSE-NEXT:    cmovneq %rbx, %rax
-; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    movq %r8, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    movq %r9, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    addq $256, %rax # imm = 0x100
+; SSE-NEXT:    orq %r8, %rsi
+; SSE-NEXT:    orq %rdx, %rcx
+; SSE-NEXT:    orq %rsi, %rcx
+; SSE-NEXT:    cmovneq %r10, %rax
+; SSE-NEXT:    movl $-2, %edx
+; SSE-NEXT:    movl %eax, %ecx
+; SSE-NEXT:    roll %cl, %edx
 ; SSE-NEXT:    movl %eax, %ecx
-; SSE-NEXT:    andl $32, %ecx
-; SSE-NEXT:    movl %eax, %edx
-; SSE-NEXT:    andl $480, %edx # imm = 0x1E0
-; SSE-NEXT:    shrl $3, %edx
-; SSE-NEXT:    movl %edx, %esi
-; SSE-NEXT:    andl $-8, %esi
-; SSE-NEXT:    movq -128(%rsp,%rsi), %r8
-; SSE-NEXT:    shrq %cl, %r8
-; SSE-NEXT:    movl -120(%rsp,%rsi), %esi
-; SSE-NEXT:    addl %esi, %esi
-; SSE-NEXT:    notl %ecx
-; SSE-NEXT:    # kill: def $cl killed $cl killed $ecx
-; SSE-NEXT:    shlq %cl, %rsi
-; SSE-NEXT:    orl %r8d, %esi
-; SSE-NEXT:    btrl %eax, %esi
-; SSE-NEXT:    movl %esi, (%rdi,%rdx)
+; SSE-NEXT:    shrl $3, %ecx
+; SSE-NEXT:    andl $60, %ecx
+; SSE-NEXT:    andl %edx, (%rdi,%rcx)
 ; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
-; SSE-NEXT:    addq $8, %rsp
 ; SSE-NEXT:    popq %rbx
-; SSE-NEXT:    popq %r12
 ; SSE-NEXT:    popq %r14
 ; SSE-NEXT:    popq %r15
 ; SSE-NEXT:    retq
@@ -1964,133 +1935,86 @@ define i32 @blsr_u512(ptr %word) nounwind {
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    pushq %r15
 ; AVX2-NEXT:    pushq %r14
-; AVX2-NEXT:    pushq %r13
-; AVX2-NEXT:    pushq %r12
 ; AVX2-NEXT:    pushq %rbx
-; AVX2-NEXT:    movq 56(%rdi), %rcx
-; AVX2-NEXT:    movq 40(%rdi), %rdx
-; AVX2-NEXT:    movq 32(%rdi), %r11
-; AVX2-NEXT:    movq 24(%rdi), %rsi
-; AVX2-NEXT:    movq 16(%rdi), %r8
-; AVX2-NEXT:    movq (%rdi), %r9
-; AVX2-NEXT:    movq 8(%rdi), %r10
-; AVX2-NEXT:    xorl %ebx, %ebx
-; AVX2-NEXT:    tzcntq %r9, %rbx
-; AVX2-NEXT:    tzcntq %r10, %rax
-; AVX2-NEXT:    addq $64, %rax
-; AVX2-NEXT:    testq %r9, %r9
-; AVX2-NEXT:    cmovneq %rbx, %rax
-; AVX2-NEXT:    xorl %r14d, %r14d
-; AVX2-NEXT:    tzcntq %r8, %r14
+; AVX2-NEXT:    movq 40(%rdi), %r9
+; AVX2-NEXT:    movq 32(%rdi), %r10
+; AVX2-NEXT:    movq 24(%rdi), %r8
+; AVX2-NEXT:    movq 16(%rdi), %rdx
+; AVX2-NEXT:    movq (%rdi), %rcx
+; AVX2-NEXT:    movq 8(%rdi), %rsi
+; AVX2-NEXT:    tzcntq %rcx, %rax
 ; AVX2-NEXT:    xorl %ebx, %ebx
 ; AVX2-NEXT:    tzcntq %rsi, %rbx
 ; AVX2-NEXT:    addq $64, %rbx
-; AVX2-NEXT:    testq %r8, %r8
-; AVX2-NEXT:    cmovneq %r14, %rbx
-; AVX2-NEXT:    subq $-128, %rbx
-; AVX2-NEXT:    movq %r9, %r14
-; AVX2-NEXT:    movq %r9, %r15
-; AVX2-NEXT:    orq %r10, %r15
+; AVX2-NEXT:    testq %rcx, %rcx
 ; AVX2-NEXT:    cmovneq %rax, %rbx
 ; AVX2-NEXT:    xorl %eax, %eax
-; AVX2-NEXT:    tzcntq %r11, %rax
-; AVX2-NEXT:    xorl %r12d, %r12d
-; AVX2-NEXT:    tzcntq %rdx, %r12
-; AVX2-NEXT:    addq $64, %r12
-; AVX2-NEXT:    testq %r11, %r11
-; AVX2-NEXT:    cmovneq %rax, %r12
-; AVX2-NEXT:    movq 48(%rdi), %r15
-; AVX2-NEXT:    xorl %r13d, %r13d
-; AVX2-NEXT:    tzcntq %r15, %r13
+; AVX2-NEXT:    tzcntq %rdx, %rax
+; AVX2-NEXT:    tzcntq %r8, %r11
+; AVX2-NEXT:    addq $64, %r11
+; AVX2-NEXT:    testq %rdx, %rdx
+; AVX2-NEXT:    cmovneq %rax, %r11
+; AVX2-NEXT:    subq $-128, %r11
+; AVX2-NEXT:    movq %rcx, %rax
+; AVX2-NEXT:    orq %rsi, %rax
+; AVX2-NEXT:    cmovneq %rbx, %r11
 ; AVX2-NEXT:    xorl %eax, %eax
-; AVX2-NEXT:    tzcntq %rcx, %rax
+; AVX2-NEXT:    tzcntq %r10, %rax
+; AVX2-NEXT:    xorl %ebx, %ebx
+; AVX2-NEXT:    tzcntq %r9, %rbx
+; AVX2-NEXT:    addq $64, %rbx
+; AVX2-NEXT:    testq %r10, %r10
+; AVX2-NEXT:    cmovneq %rax, %rbx
+; AVX2-NEXT:    movq 48(%rdi), %r14
+; AVX2-NEXT:    xorl %r15d, %r15d
+; AVX2-NEXT:    tzcntq %r14, %r15
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    tzcntq 56(%rdi), %rax
 ; AVX2-NEXT:    addq $64, %rax
-; AVX2-NEXT:    testq %r15, %r15
-; AVX2-NEXT:    cmovneq %r13, %rax
+; AVX2-NEXT:    testq %r14, %r14
+; AVX2-NEXT:    cmovneq %r15, %rax
 ; AVX2-NEXT:    subq $-128, %rax
-; AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0
-; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    movq %r11, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    orq %rdx, %r11
-; AVX2-NEXT:    cmovneq %r12, %rax
-; AVX2-NEXT:    addq $256, %rax # imm = 0x100
-; AVX2-NEXT:    movq %r10, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    orq %rsi, %r10
-; AVX2-NEXT:    orq %r8, %r14
-; AVX2-NEXT:    orq %r10, %r14
+; AVX2-NEXT:    orq %r9, %r10
 ; AVX2-NEXT:    cmovneq %rbx, %rax
-; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    movq %r9, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    movq %r15, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    movq %r8, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT:    addq $256, %rax # imm = 0x100
+; AVX2-NEXT:    orq %r8, %rsi
+; AVX2-NEXT:    orq %rdx, %rcx
+; AVX2-NEXT:    orq %rsi, %rcx
+; AVX2-NEXT:    cmovneq %r11, %rax
+; AVX2-NEXT:    movl $-2, %edx
+; AVX2-NEXT:    movl %eax, %ecx
+; AVX2-NEXT:    roll %cl, %edx
 ; AVX2-NEXT:    movl %eax, %ecx
-; AVX2-NEXT:    andl $32, %ecx
-; AVX2-NEXT:    movl %eax, %edx
-; AVX2-NEXT:    andl $480, %edx # imm = 0x1E0
-; AVX2-NEXT:    shrl $3, %edx
-; AVX2-NEXT:    movl %edx, %esi
-; AVX2-NEXT:    andl $-8, %esi
-; AVX2-NEXT:    shrxq %rcx, -128(%rsp,%rsi), %r8
-; AVX2-NEXT:    notl %ecx
-; AVX2-NEXT:    movl -120(%rsp,%rsi), %esi
-; AVX2-NEXT:    addl %esi, %esi
-; AVX2-NEXT:    shlxq %rcx, %rsi, %rcx
-; AVX2-NEXT:    orl %r8d, %ecx
-; AVX2-NEXT:    btrl %eax, %ecx
-; AVX2-NEXT:    movl %ecx, (%rdi,%rdx)
+; AVX2-NEXT:    shrl $3, %ecx
+; AVX2-NEXT:    andl $60, %ecx
+; AVX2-NEXT:    andl %edx, (%rdi,%rcx)
 ; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
 ; AVX2-NEXT:    popq %rbx
-; AVX2-NEXT:    popq %r12
-; AVX2-NEXT:    popq %r13
 ; AVX2-NEXT:    popq %r14
 ; AVX2-NEXT:    popq %r15
-; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: blsr_u512:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    pushq %rax
-; AVX512-NEXT:    vmovups (%rdi), %ymm0
-; AVX512-NEXT:    vmovups 32(%rdi), %ymm1
-; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm2
-; AVX512-NEXT:    vpternlogd {{.*#+}} zmm3 = -1
-; AVX512-NEXT:    vpaddq %zmm3, %zmm2, %zmm3
-; AVX512-NEXT:    vpandnq %zmm3, %zmm2, %zmm3
-; AVX512-NEXT:    vplzcntq %zmm3, %zmm3
-; AVX512-NEXT:    vmovdqa64 {{.*#+}} zmm4 = [64,128,192,256,320,384,448,512]
-; AVX512-NEXT:    vpsubq %zmm3, %zmm4, %zmm3
-; AVX512-NEXT:    vptestmq %zmm2, %zmm2, %k1
-; AVX512-NEXT:    vpbroadcastq {{.*#+}} zmm2 = [512,512,512,512,512,512,512,512]
-; AVX512-NEXT:    vpcompressq %zmm3, %zmm2 {%k1}
-; AVX512-NEXT:    vmovq %xmm2, %rax
-; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX512-NEXT:    vmovdqu %ymm2, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT:    vmovdqu %ymm2, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT:    vmovups %ymm1, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm0
+; AVX512-NEXT:    vpternlogd {{.*#+}} zmm1 = -1
+; AVX512-NEXT:    vpaddq %zmm1, %zmm0, %zmm1
+; AVX512-NEXT:    vpandnq %zmm1, %zmm0, %zmm1
+; AVX512-NEXT:    vplzcntq %zmm1, %zmm1
+; AVX512-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [64,128,192,256,320,384,448,512]
+; AVX512-NEXT:    vpsubq %zmm1, %zmm2, %zmm1
+; AVX512-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; AVX512-NEXT:    vpbroadcastq {{.*#+}} zmm0 = [512,512,512,512,512,512,512,512]
+; AVX512-NEXT:    vpcompressq %zmm1, %zmm0 {%k1}
+; AVX512-NEXT:    vmovq %xmm0, %rax
+; AVX512-NEXT:    movl $-2, %edx
+; AVX512-NEXT:    movl %eax, %ecx
+; AVX512-NEXT:    roll %cl, %edx
 ; AVX512-NEXT:    movl %eax, %ecx
-; AVX512-NEXT:    andl $32, %ecx
-; AVX512-NEXT:    movl %ecx, %edx
-; AVX512-NEXT:    notl %edx
-; AVX512-NEXT:    movl %eax, %esi
-; AVX512-NEXT:    shrl $3, %esi
-; AVX512-NEXT:    movl %esi, %r8d
-; AVX512-NEXT:    andl $56, %r8d
-; AVX512-NEXT:    movl -120(%rsp,%r8), %r9d
-; AVX512-NEXT:    addl %r9d, %r9d
-; AVX512-NEXT:    shlxq %rdx, %r9, %rdx
 ; AVX512-NEXT:    shrl $3, %ecx
-; AVX512-NEXT:    addq %rsp, %r8
-; AVX512-NEXT:    addq $-128, %r8
-; AVX512-NEXT:    orl (%rcx,%r8), %edx
-; AVX512-NEXT:    btrl %eax, %edx
-; AVX512-NEXT:    andl $60, %esi
-; AVX512-NEXT:    movl %edx, (%rdi,%rsi)
+; AVX512-NEXT:    andl $60, %ecx
+; AVX512-NEXT:    andl %edx, (%rdi,%rcx)
 ; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
-; AVX512-NEXT:    popq %rcx
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %ld = load i512, ptr %word

From 3d5d32c6058807008e579dd5ea2faced33a7943b Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi@arm.com>
Date: Tue, 18 Nov 2025 13:15:47 +0000
Subject: [PATCH 09/52] [CGP]: Optimize mul.overflow. (#148343)

- Detect cases where LHS & RHS values will not cause overflow
(when the Hi halfs are zero).
---
 llvm/include/llvm/CodeGen/TargetLowering.h    |   7 +
 llvm/lib/CodeGen/CodeGenPrepare.cpp           | 182 ++++++++++++
 .../Target/AArch64/AArch64ISelLowering.cpp    |   9 +
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |   5 +
 llvm/test/CodeGen/AArch64/i128-math.ll        | 189 ++++++++-----
 .../CodeGen/AArch64/i128_with_overflow.ll     |  93 ++++---
 .../test/CodeGen/AArch64/mul-i128-overflow.ll | 261 ++++++++++++++++++
 .../umulo-128-legalisation-lowering.ll        |  97 ++++---
 8 files changed, 699 insertions(+), 144 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/mul-i128-overflow.ll

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index cec7d09f494d6..4c932c523e423 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3492,6 +3492,13 @@ class LLVM_ABI TargetLoweringBase {
     return MathUsed && (VT.isSimple() || !isOperationExpand(Opcode, VT));
   }
 
+  // Return true if the target wants to optimize the mul overflow intrinsic
+  // for the given \p VT.
+  virtual bool shouldOptimizeMulOverflowWithZeroHighBits(LLVMContext &Context,
+                                                         EVT VT) const {
+    return false;
+  }
+
   // Return true if it is profitable to use a scalar input to a BUILD_VECTOR
   // even if the vector itself has multiple uses.
   virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const {
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index b6dd174f9be80..587c1372b19cb 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -431,6 +431,8 @@ class CodeGenPrepare {
   bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
                           unsigned AddrSpace);
   bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
+  bool optimizeMulWithOverflow(Instruction *I, bool IsSigned,
+                               ModifyDT &ModifiedDT);
   bool optimizeInlineAsmInst(CallInst *CS);
   bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
   bool optimizeExt(Instruction *&I);
@@ -2797,6 +2799,10 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
         }
       }
       return false;
+    case Intrinsic::umul_with_overflow:
+      return optimizeMulWithOverflow(II, /*IsSigned=*/false, ModifiedDT);
+    case Intrinsic::smul_with_overflow:
+      return optimizeMulWithOverflow(II, /*IsSigned=*/true, ModifiedDT);
     }
 
     SmallVector<Value *, 2> PtrOps;
@@ -6391,6 +6397,182 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
   return true;
 }
 
+// This is a helper for CodeGenPrepare::optimizeMulWithOverflow.
+// Check the pattern we are interested in where there are maximum 2 uses
+// of the intrinsic which are the extract instructions.
+static bool matchOverflowPattern(Instruction *&I, ExtractValueInst *&MulExtract,
+                                 ExtractValueInst *&OverflowExtract) {
+  // Bail out if it's more than 2 users:
+  if (I->hasNUsesOrMore(3))
+    return false;
+
+  for (User *U : I->users()) {
+    auto *Extract = dyn_cast<ExtractValueInst>(U);
+    if (!Extract || Extract->getNumIndices() != 1)
+      return false;
+
+    unsigned Index = Extract->getIndices()[0];
+    if (Index == 0)
+      MulExtract = Extract;
+    else if (Index == 1)
+      OverflowExtract = Extract;
+    else
+      return false;
+  }
+  return true;
+}
+
+// Rewrite the mul_with_overflow intrinsic by checking if both of the
+// operands' value ranges are within the legal type. If so, we can optimize the
+// multiplication algorithm. This code is supposed to be written during the step
+// of type legalization, but given that we need to reconstruct the IR which is
+// not doable there, we do it here.
+// The IR after the optimization will look like:
+// entry:
+//   if signed:
+//     ( (lhs_lo>>BW-1) ^ lhs_hi) || ( (rhs_lo>>BW-1) ^ rhs_hi) ? overflow,
+//     overflow_no
+//   else:
+//     (lhs_hi != 0) || (rhs_hi != 0) ? overflow, overflow_no
+// overflow_no:
+// overflow:
+// overflow.res:
+// \returns true if optimization was applied
+// TODO: This optimization can be further improved to optimize branching on
+// overflow where the 'overflow_no' BB can branch directly to the false
+// successor of overflow, but that would add additional complexity so we leave
+// it for future work.
+bool CodeGenPrepare::optimizeMulWithOverflow(Instruction *I, bool IsSigned,
+                                             ModifyDT &ModifiedDT) {
+  // Check if target supports this optimization.
+  if (!TLI->shouldOptimizeMulOverflowWithZeroHighBits(
+          I->getContext(),
+          TLI->getValueType(*DL, I->getType()->getContainedType(0))))
+    return false;
+
+  ExtractValueInst *MulExtract = nullptr, *OverflowExtract = nullptr;
+  if (!matchOverflowPattern(I, MulExtract, OverflowExtract))
+    return false;
+
+  // Keep track of the instruction to stop reoptimizing it again.
+  InsertedInsts.insert(I);
+
+  Value *LHS = I->getOperand(0);
+  Value *RHS = I->getOperand(1);
+  Type *Ty = LHS->getType();
+  unsigned VTHalfBitWidth = Ty->getScalarSizeInBits() / 2;
+  Type *LegalTy = Ty->getWithNewBitWidth(VTHalfBitWidth);
+
+  // New BBs:
+  BasicBlock *OverflowEntryBB =
+      I->getParent()->splitBasicBlock(I, "", /*Before*/ true);
+  OverflowEntryBB->takeName(I->getParent());
+  // Keep the 'br' instruction that is generated as a result of the split to be
+  // erased/replaced later.
+  Instruction *OldTerminator = OverflowEntryBB->getTerminator();
+  BasicBlock *NoOverflowBB =
+      BasicBlock::Create(I->getContext(), "overflow.no", I->getFunction());
+  NoOverflowBB->moveAfter(OverflowEntryBB);
+  BasicBlock *OverflowBB =
+      BasicBlock::Create(I->getContext(), "overflow", I->getFunction());
+  OverflowBB->moveAfter(NoOverflowBB);
+
+  // BB overflow.entry:
+  IRBuilder<> Builder(OverflowEntryBB);
+  // Extract low and high halves of LHS:
+  Value *LoLHS = Builder.CreateTrunc(LHS, LegalTy, "lo.lhs");
+  Value *HiLHS = Builder.CreateLShr(LHS, VTHalfBitWidth, "lhs.lsr");
+  HiLHS = Builder.CreateTrunc(HiLHS, LegalTy, "hi.lhs");
+
+  // Extract low and high halves of RHS:
+  Value *LoRHS = Builder.CreateTrunc(RHS, LegalTy, "lo.rhs");
+  Value *HiRHS = Builder.CreateLShr(RHS, VTHalfBitWidth, "rhs.lsr");
+  HiRHS = Builder.CreateTrunc(HiRHS, LegalTy, "hi.rhs");
+
+  Value *IsAnyBitTrue;
+  if (IsSigned) {
+    Value *SignLoLHS =
+        Builder.CreateAShr(LoLHS, VTHalfBitWidth - 1, "sign.lo.lhs");
+    Value *SignLoRHS =
+        Builder.CreateAShr(LoRHS, VTHalfBitWidth - 1, "sign.lo.rhs");
+    Value *XorLHS = Builder.CreateXor(HiLHS, SignLoLHS);
+    Value *XorRHS = Builder.CreateXor(HiRHS, SignLoRHS);
+    Value *Or = Builder.CreateOr(XorLHS, XorRHS, "or.lhs.rhs");
+    IsAnyBitTrue = Builder.CreateCmp(ICmpInst::ICMP_NE, Or,
+                                     ConstantInt::getNullValue(Or->getType()));
+  } else {
+    Value *CmpLHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiLHS,
+                                      ConstantInt::getNullValue(LegalTy));
+    Value *CmpRHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiRHS,
+                                      ConstantInt::getNullValue(LegalTy));
+    IsAnyBitTrue = Builder.CreateOr(CmpLHS, CmpRHS, "or.lhs.rhs");
+  }
+  Builder.CreateCondBr(IsAnyBitTrue, OverflowBB, NoOverflowBB);
+
+  // BB overflow.no:
+  Builder.SetInsertPoint(NoOverflowBB);
+  Value *ExtLoLHS, *ExtLoRHS;
+  if (IsSigned) {
+    ExtLoLHS = Builder.CreateSExt(LoLHS, Ty, "lo.lhs.ext");
+    ExtLoRHS = Builder.CreateSExt(LoRHS, Ty, "lo.rhs.ext");
+  } else {
+    ExtLoLHS = Builder.CreateZExt(LoLHS, Ty, "lo.lhs.ext");
+    ExtLoRHS = Builder.CreateZExt(LoRHS, Ty, "lo.rhs.ext");
+  }
+
+  Value *Mul = Builder.CreateMul(ExtLoLHS, ExtLoRHS, "mul.overflow.no");
+
+  // Create the 'overflow.res' BB to merge the results of
+  // the two paths:
+  BasicBlock *OverflowResBB = I->getParent();
+  OverflowResBB->setName("overflow.res");
+
+  // BB overflow.no: jump to overflow.res BB
+  Builder.CreateBr(OverflowResBB);
+  // No we don't need the old terminator in overflow.entry BB, erase it:
+  OldTerminator->eraseFromParent();
+
+  // BB overflow.res:
+  Builder.SetInsertPoint(OverflowResBB, OverflowResBB->getFirstInsertionPt());
+  // Create PHI nodes to merge results from no.overflow BB and overflow BB to
+  // replace the extract instructions.
+  PHINode *OverflowResPHI = Builder.CreatePHI(Ty, 2),
+          *OverflowFlagPHI =
+              Builder.CreatePHI(IntegerType::getInt1Ty(I->getContext()), 2);
+
+  // Add the incoming values from no.overflow BB and later from overflow BB.
+  OverflowResPHI->addIncoming(Mul, NoOverflowBB);
+  OverflowFlagPHI->addIncoming(ConstantInt::getFalse(I->getContext()),
+                               NoOverflowBB);
+
+  // Replace all users of MulExtract and OverflowExtract to use the PHI nodes.
+  if (MulExtract) {
+    MulExtract->replaceAllUsesWith(OverflowResPHI);
+    MulExtract->eraseFromParent();
+  }
+  if (OverflowExtract) {
+    OverflowExtract->replaceAllUsesWith(OverflowFlagPHI);
+    OverflowExtract->eraseFromParent();
+  }
+
+  // Remove the intrinsic from parent (overflow.res BB) as it will be part of
+  // overflow BB
+  I->removeFromParent();
+  // BB overflow:
+  I->insertInto(OverflowBB, OverflowBB->end());
+  Builder.SetInsertPoint(OverflowBB, OverflowBB->end());
+  Value *MulOverflow = Builder.CreateExtractValue(I, {0}, "mul.overflow");
+  Value *OverflowFlag = Builder.CreateExtractValue(I, {1}, "overflow.flag");
+  Builder.CreateBr(OverflowResBB);
+
+  // Add The Extracted values to the PHINodes in the overflow.res BB.
+  OverflowResPHI->addIncoming(MulOverflow, OverflowBB);
+  OverflowFlagPHI->addIncoming(OverflowFlag, OverflowBB);
+
+  ModifiedDT = ModifyDT::ModifyBBDT;
+  return true;
+}
+
 /// If there are any memory operands, use OptimizeMemoryInst to sink their
 /// address computing into the block when possible / profitable.
 bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 42567883b2594..d21e19b2ecd46 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18851,6 +18851,15 @@ bool AArch64TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
   return (Index == 0 || Index == ResVT.getVectorMinNumElements());
 }
 
+bool AArch64TargetLowering::shouldOptimizeMulOverflowWithZeroHighBits(
+    LLVMContext &Context, EVT VT) const {
+  if (getTypeAction(Context, VT) != TypeExpandInteger)
+    return false;
+
+  EVT LegalTy = EVT::getIntegerVT(Context, VT.getSizeInBits() / 2);
+  return getTypeAction(Context, LegalTy) == TargetLowering::TypeLegal;
+}
+
 /// Turn vector tests of the signbit in the form of:
 ///   xor (sra X, elt_size(X)-1), -1
 /// into:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 70bfae717fb76..be198e54cbcbf 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -333,6 +333,11 @@ class AArch64TargetLowering : public TargetLowering {
     return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
   }
 
+  // Return true if the target wants to optimize the mul overflow intrinsic
+  // for the given \p VT.
+  bool shouldOptimizeMulOverflowWithZeroHighBits(LLVMContext &Context,
+                                                 EVT VT) const override;
+
   Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
                         AtomicOrdering Ord) const override;
   Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
diff --git a/llvm/test/CodeGen/AArch64/i128-math.ll b/llvm/test/CodeGen/AArch64/i128-math.ll
index 9e1c0c1b115ab..12ae241dda4bd 100644
--- a/llvm/test/CodeGen/AArch64/i128-math.ll
+++ b/llvm/test/CodeGen/AArch64/i128-math.ll
@@ -262,20 +262,28 @@ define i128 @u128_mul(i128 %x, i128 %y) {
 define { i128, i8 } @u128_checked_mul(i128 %x, i128 %y) {
 ; CHECK-LABEL: u128_checked_mul:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr x8, x1, x3
+; CHECK-NEXT:    cbz x8, .LBB17_2
+; CHECK-NEXT:  // %bb.1: // %overflow
 ; CHECK-NEXT:    mul x9, x3, x0
 ; CHECK-NEXT:    cmp x1, #0
 ; CHECK-NEXT:    ccmp x3, #0, #4, ne
-; CHECK-NEXT:    umulh x8, x1, x2
-; CHECK-NEXT:    umulh x10, x3, x0
+; CHECK-NEXT:    umulh x10, x1, x2
+; CHECK-NEXT:    umulh x8, x3, x0
 ; CHECK-NEXT:    madd x9, x1, x2, x9
-; CHECK-NEXT:    ccmp xzr, x8, #0, eq
-; CHECK-NEXT:    umulh x11, x0, x2
 ; CHECK-NEXT:    ccmp xzr, x10, #0, eq
+; CHECK-NEXT:    umulh x11, x0, x2
+; CHECK-NEXT:    ccmp xzr, x8, #0, eq
 ; CHECK-NEXT:    mul x0, x0, x2
 ; CHECK-NEXT:    cset w8, ne
 ; CHECK-NEXT:    adds x1, x11, x9
 ; CHECK-NEXT:    csinc w8, w8, wzr, lo
 ; CHECK-NEXT:    eor w2, w8, #0x1
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB17_2: // %overflow.no
+; CHECK-NEXT:    umulh x1, x0, x2
+; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:    eor w2, w8, #0x1
 ; CHECK-NEXT:    ret
   %1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
   %2 = extractvalue { i128, i1 } %1, 0
@@ -290,19 +298,27 @@ define { i128, i8 } @u128_checked_mul(i128 %x, i128 %y) {
 define { i128, i8 } @u128_overflowing_mul(i128 %x, i128 %y) {
 ; CHECK-LABEL: u128_overflowing_mul:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr x8, x1, x3
+; CHECK-NEXT:    cbz x8, .LBB18_2
+; CHECK-NEXT:  // %bb.1: // %overflow
 ; CHECK-NEXT:    mul x9, x3, x0
 ; CHECK-NEXT:    cmp x1, #0
 ; CHECK-NEXT:    ccmp x3, #0, #4, ne
-; CHECK-NEXT:    umulh x8, x1, x2
-; CHECK-NEXT:    umulh x10, x3, x0
+; CHECK-NEXT:    umulh x10, x1, x2
+; CHECK-NEXT:    umulh x8, x3, x0
 ; CHECK-NEXT:    madd x9, x1, x2, x9
-; CHECK-NEXT:    ccmp xzr, x8, #0, eq
-; CHECK-NEXT:    umulh x11, x0, x2
 ; CHECK-NEXT:    ccmp xzr, x10, #0, eq
+; CHECK-NEXT:    umulh x11, x0, x2
+; CHECK-NEXT:    ccmp xzr, x8, #0, eq
 ; CHECK-NEXT:    mul x0, x0, x2
 ; CHECK-NEXT:    cset w8, ne
 ; CHECK-NEXT:    adds x1, x11, x9
 ; CHECK-NEXT:    csinc w2, w8, wzr, lo
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB18_2: // %overflow.no
+; CHECK-NEXT:    umulh x1, x0, x2
+; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:    mov w2, wzr
 ; CHECK-NEXT:    ret
   %1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
   %2 = extractvalue { i128, i1 } %1, 0
@@ -316,19 +332,28 @@ define { i128, i8 } @u128_overflowing_mul(i128 %x, i128 %y) {
 define i128 @u128_saturating_mul(i128 %x, i128 %y) {
 ; CHECK-LABEL: u128_saturating_mul:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mul x9, x3, x0
+; CHECK-NEXT:    orr x8, x1, x3
+; CHECK-NEXT:    cbz x8, .LBB19_2
+; CHECK-NEXT:  // %bb.1: // %overflow
+; CHECK-NEXT:    mul x8, x3, x0
 ; CHECK-NEXT:    cmp x1, #0
 ; CHECK-NEXT:    ccmp x3, #0, #4, ne
-; CHECK-NEXT:    umulh x8, x1, x2
-; CHECK-NEXT:    umulh x10, x3, x0
-; CHECK-NEXT:    madd x9, x1, x2, x9
-; CHECK-NEXT:    ccmp xzr, x8, #0, eq
-; CHECK-NEXT:    umulh x11, x0, x2
+; CHECK-NEXT:    umulh x10, x1, x2
+; CHECK-NEXT:    umulh x9, x3, x0
+; CHECK-NEXT:    madd x11, x1, x2, x8
 ; CHECK-NEXT:    ccmp xzr, x10, #0, eq
+; CHECK-NEXT:    umulh x12, x0, x2
+; CHECK-NEXT:    ccmp xzr, x9, #0, eq
 ; CHECK-NEXT:    mul x8, x0, x2
 ; CHECK-NEXT:    cset w10, ne
-; CHECK-NEXT:    adds x9, x11, x9
+; CHECK-NEXT:    adds x9, x12, x11
 ; CHECK-NEXT:    csinc w10, w10, wzr, lo
+; CHECK-NEXT:    b .LBB19_3
+; CHECK-NEXT:  .LBB19_2: // %overflow.no
+; CHECK-NEXT:    umulh x9, x0, x2
+; CHECK-NEXT:    mov w10, wzr
+; CHECK-NEXT:    mul x8, x0, x2
+; CHECK-NEXT:  .LBB19_3: // %overflow.res
 ; CHECK-NEXT:    cmp w10, #0
 ; CHECK-NEXT:    csinv x0, x8, xzr, eq
 ; CHECK-NEXT:    csinv x1, x9, xzr, eq
@@ -355,6 +380,11 @@ define i128 @i128_mul(i128 %x, i128 %y) {
 define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) {
 ; CHECK-LABEL: i128_checked_mul:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor x8, x3, x2, asr #63
+; CHECK-NEXT:    eor x9, x1, x0, asr #63
+; CHECK-NEXT:    orr x8, x9, x8
+; CHECK-NEXT:    cbz x8, .LBB21_2
+; CHECK-NEXT:  // %bb.1: // %overflow
 ; CHECK-NEXT:    asr x9, x1, #63
 ; CHECK-NEXT:    umulh x10, x0, x2
 ; CHECK-NEXT:    asr x13, x3, #63
@@ -364,24 +394,30 @@ define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) {
 ; CHECK-NEXT:    adds x10, x11, x10
 ; CHECK-NEXT:    mul x14, x0, x3
 ; CHECK-NEXT:    umulh x12, x0, x3
-; CHECK-NEXT:    adc x9, x8, x9
+; CHECK-NEXT:    adc x8, x8, x9
+; CHECK-NEXT:    mov x9, x1
 ; CHECK-NEXT:    mul x13, x0, x13
-; CHECK-NEXT:    adds x8, x14, x10
-; CHECK-NEXT:    mul x15, x1, x3
-; CHECK-NEXT:    smulh x10, x1, x3
-; CHECK-NEXT:    mov x1, x8
-; CHECK-NEXT:    adc x11, x12, x13
-; CHECK-NEXT:    asr x12, x9, #63
-; CHECK-NEXT:    asr x13, x11, #63
-; CHECK-NEXT:    adds x9, x9, x11
 ; CHECK-NEXT:    asr x11, x8, #63
+; CHECK-NEXT:    mul x15, x1, x3
+; CHECK-NEXT:    adds x1, x14, x10
+; CHECK-NEXT:    smulh x9, x9, x3
+; CHECK-NEXT:    adc x10, x12, x13
+; CHECK-NEXT:    asr x12, x10, #63
+; CHECK-NEXT:    adds x8, x8, x10
+; CHECK-NEXT:    asr x10, x1, #63
 ; CHECK-NEXT:    mul x0, x0, x2
-; CHECK-NEXT:    adc x12, x12, x13
-; CHECK-NEXT:    adds x9, x15, x9
-; CHECK-NEXT:    adc x10, x10, x12
-; CHECK-NEXT:    cmp x9, x11
-; CHECK-NEXT:    ccmp x10, x11, #0, eq
-; CHECK-NEXT:    cset w2, eq
+; CHECK-NEXT:    adc x11, x11, x12
+; CHECK-NEXT:    adds x8, x15, x8
+; CHECK-NEXT:    adc x9, x9, x11
+; CHECK-NEXT:    cmp x8, x10
+; CHECK-NEXT:    ccmp x9, x10, #0, eq
+; CHECK-NEXT:    cset w8, ne
+; CHECK-NEXT:    eor w2, w8, #0x1
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB21_2: // %overflow.no
+; CHECK-NEXT:    smulh x1, x0, x2
+; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:    eor w2, w8, #0x1
 ; CHECK-NEXT:    ret
   %1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
   %2 = extractvalue { i128, i1 } %1, 0
@@ -396,6 +432,11 @@ define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) {
 define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) {
 ; CHECK-LABEL: i128_overflowing_mul:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor x8, x3, x2, asr #63
+; CHECK-NEXT:    eor x9, x1, x0, asr #63
+; CHECK-NEXT:    orr x8, x9, x8
+; CHECK-NEXT:    cbz x8, .LBB22_2
+; CHECK-NEXT:  // %bb.1: // %overflow
 ; CHECK-NEXT:    asr x9, x1, #63
 ; CHECK-NEXT:    umulh x10, x0, x2
 ; CHECK-NEXT:    asr x13, x3, #63
@@ -405,24 +446,29 @@ define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) {
 ; CHECK-NEXT:    adds x10, x11, x10
 ; CHECK-NEXT:    mul x14, x0, x3
 ; CHECK-NEXT:    umulh x12, x0, x3
-; CHECK-NEXT:    adc x9, x8, x9
+; CHECK-NEXT:    adc x8, x8, x9
+; CHECK-NEXT:    mov x9, x1
 ; CHECK-NEXT:    mul x13, x0, x13
-; CHECK-NEXT:    adds x8, x14, x10
-; CHECK-NEXT:    mul x15, x1, x3
-; CHECK-NEXT:    smulh x10, x1, x3
-; CHECK-NEXT:    mov x1, x8
-; CHECK-NEXT:    adc x11, x12, x13
-; CHECK-NEXT:    asr x12, x9, #63
-; CHECK-NEXT:    asr x13, x11, #63
-; CHECK-NEXT:    adds x9, x9, x11
 ; CHECK-NEXT:    asr x11, x8, #63
+; CHECK-NEXT:    mul x15, x1, x3
+; CHECK-NEXT:    adds x1, x14, x10
+; CHECK-NEXT:    smulh x9, x9, x3
+; CHECK-NEXT:    adc x10, x12, x13
+; CHECK-NEXT:    asr x12, x10, #63
+; CHECK-NEXT:    adds x8, x8, x10
+; CHECK-NEXT:    asr x10, x1, #63
 ; CHECK-NEXT:    mul x0, x0, x2
-; CHECK-NEXT:    adc x12, x12, x13
-; CHECK-NEXT:    adds x9, x15, x9
-; CHECK-NEXT:    adc x10, x10, x12
-; CHECK-NEXT:    cmp x9, x11
-; CHECK-NEXT:    ccmp x10, x11, #0, eq
+; CHECK-NEXT:    adc x11, x11, x12
+; CHECK-NEXT:    adds x8, x15, x8
+; CHECK-NEXT:    adc x9, x9, x11
+; CHECK-NEXT:    cmp x8, x10
+; CHECK-NEXT:    ccmp x9, x10, #0, eq
 ; CHECK-NEXT:    cset w2, ne
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB22_2: // %overflow.no
+; CHECK-NEXT:    smulh x1, x0, x2
+; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:    mov w2, wzr
 ; CHECK-NEXT:    ret
   %1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
   %2 = extractvalue { i128, i1 } %1, 0
@@ -436,6 +482,11 @@ define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) {
 define i128 @i128_saturating_mul(i128 %x, i128 %y) {
 ; CHECK-LABEL: i128_saturating_mul:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor x8, x3, x2, asr #63
+; CHECK-NEXT:    eor x9, x1, x0, asr #63
+; CHECK-NEXT:    orr x8, x9, x8
+; CHECK-NEXT:    cbz x8, .LBB23_2
+; CHECK-NEXT:  // %bb.1: // %overflow
 ; CHECK-NEXT:    asr x9, x1, #63
 ; CHECK-NEXT:    umulh x10, x0, x2
 ; CHECK-NEXT:    asr x13, x3, #63
@@ -445,29 +496,35 @@ define i128 @i128_saturating_mul(i128 %x, i128 %y) {
 ; CHECK-NEXT:    adds x10, x11, x10
 ; CHECK-NEXT:    mul x14, x0, x3
 ; CHECK-NEXT:    umulh x12, x0, x3
-; CHECK-NEXT:    adc x8, x8, x9
+; CHECK-NEXT:    adc x9, x8, x9
 ; CHECK-NEXT:    mul x13, x0, x13
-; CHECK-NEXT:    adds x9, x14, x10
-; CHECK-NEXT:    mul x11, x1, x3
-; CHECK-NEXT:    adc x10, x12, x13
-; CHECK-NEXT:    smulh x12, x1, x3
-; CHECK-NEXT:    asr x13, x8, #63
-; CHECK-NEXT:    asr x14, x10, #63
-; CHECK-NEXT:    adds x8, x8, x10
-; CHECK-NEXT:    adc x10, x13, x14
-; CHECK-NEXT:    adds x8, x11, x8
-; CHECK-NEXT:    asr x11, x9, #63
-; CHECK-NEXT:    mul x13, x0, x2
-; CHECK-NEXT:    adc x10, x12, x10
-; CHECK-NEXT:    eor x12, x3, x1
-; CHECK-NEXT:    eor x8, x8, x11
-; CHECK-NEXT:    eor x10, x10, x11
-; CHECK-NEXT:    asr x11, x12, #63
-; CHECK-NEXT:    orr x8, x8, x10
-; CHECK-NEXT:    eor x10, x11, #0x7fffffffffffffff
-; CHECK-NEXT:    cmp x8, #0
-; CHECK-NEXT:    csinv x0, x13, x11, eq
-; CHECK-NEXT:    csel x1, x10, x9, ne
+; CHECK-NEXT:    adds x8, x14, x10
+; CHECK-NEXT:    mul x15, x1, x3
+; CHECK-NEXT:    asr x14, x8, #63
+; CHECK-NEXT:    smulh x10, x1, x3
+; CHECK-NEXT:    adc x11, x12, x13
+; CHECK-NEXT:    asr x12, x9, #63
+; CHECK-NEXT:    asr x13, x11, #63
+; CHECK-NEXT:    adds x11, x9, x11
+; CHECK-NEXT:    mul x9, x0, x2
+; CHECK-NEXT:    adc x12, x12, x13
+; CHECK-NEXT:    adds x11, x15, x11
+; CHECK-NEXT:    adc x10, x10, x12
+; CHECK-NEXT:    cmp x11, x14
+; CHECK-NEXT:    ccmp x10, x14, #0, eq
+; CHECK-NEXT:    cset w10, ne
+; CHECK-NEXT:    b .LBB23_3
+; CHECK-NEXT:  .LBB23_2: // %overflow.no
+; CHECK-NEXT:    smulh x8, x0, x2
+; CHECK-NEXT:    mov w10, wzr
+; CHECK-NEXT:    mul x9, x0, x2
+; CHECK-NEXT:  .LBB23_3: // %overflow.res
+; CHECK-NEXT:    eor x11, x3, x1
+; CHECK-NEXT:    cmp w10, #0
+; CHECK-NEXT:    asr x11, x11, #63
+; CHECK-NEXT:    eor x12, x11, #0x7fffffffffffffff
+; CHECK-NEXT:    csinv x0, x9, x11, eq
+; CHECK-NEXT:    csel x1, x12, x8, ne
 ; CHECK-NEXT:    ret
   %1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
   %2 = extractvalue { i128, i1 } %1, 0
diff --git a/llvm/test/CodeGen/AArch64/i128_with_overflow.ll b/llvm/test/CodeGen/AArch64/i128_with_overflow.ll
index 9924b7c63f763..3d90e094a5747 100644
--- a/llvm/test/CodeGen/AArch64/i128_with_overflow.ll
+++ b/llvm/test/CodeGen/AArch64/i128_with_overflow.ll
@@ -224,21 +224,29 @@ cleanup:
 define i128 @test_umul_i128(i128 noundef %x, i128 noundef %y) {
 ; CHECK-LABEL: test_umul_i128:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    orr x8, x1, x3
+; CHECK-NEXT:    cbz x8, .LBB4_2
+; CHECK-NEXT:  // %bb.1: // %overflow
 ; CHECK-NEXT:    mul x9, x3, x0
 ; CHECK-NEXT:    cmp x1, #0
 ; CHECK-NEXT:    ccmp x3, #0, #4, ne
-; CHECK-NEXT:    umulh x8, x1, x2
-; CHECK-NEXT:    umulh x10, x3, x0
+; CHECK-NEXT:    umulh x10, x1, x2
+; CHECK-NEXT:    umulh x8, x3, x0
 ; CHECK-NEXT:    madd x9, x1, x2, x9
-; CHECK-NEXT:    ccmp xzr, x8, #0, eq
-; CHECK-NEXT:    umulh x11, x0, x2
 ; CHECK-NEXT:    ccmp xzr, x10, #0, eq
+; CHECK-NEXT:    umulh x11, x0, x2
+; CHECK-NEXT:    ccmp xzr, x8, #0, eq
+; CHECK-NEXT:    mul x0, x0, x2
 ; CHECK-NEXT:    cset w8, ne
 ; CHECK-NEXT:    adds x1, x11, x9
 ; CHECK-NEXT:    csinc w8, w8, wzr, lo
-; CHECK-NEXT:    cmp w8, #1
-; CHECK-NEXT:    b.ne .LBB4_2
-; CHECK-NEXT:  // %bb.1: // %if.then
+; CHECK-NEXT:    cbnz w8, .LBB4_3
+; CHECK-NEXT:    b .LBB4_4
+; CHECK-NEXT:  .LBB4_2: // %overflow.no
+; CHECK-NEXT:    umulh x1, x0, x2
+; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:    cbz w8, .LBB4_4
+; CHECK-NEXT:  .LBB4_3: // %if.then
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
@@ -247,9 +255,7 @@ define i128 @test_umul_i128(i128 noundef %x, i128 noundef %y) {
 ; CHECK-NEXT:    sxtw x0, w0
 ; CHECK-NEXT:    asr x1, x0, #63
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB4_2: // %if.end
-; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:  .LBB4_4: // %cleanup
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
@@ -273,34 +279,40 @@ cleanup:
 define i128 @test_smul_i128(i128 noundef %x, i128 noundef %y) {
 ; CHECK-LABEL: test_smul_i128:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    asr x10, x1, #63
-; CHECK-NEXT:    umulh x11, x0, x2
-; CHECK-NEXT:    asr x14, x3, #63
-; CHECK-NEXT:    mov x8, x1
-; CHECK-NEXT:    mul x12, x1, x2
-; CHECK-NEXT:    umulh x9, x1, x2
-; CHECK-NEXT:    mul x10, x10, x2
-; CHECK-NEXT:    adds x11, x12, x11
-; CHECK-NEXT:    mul x15, x0, x3
-; CHECK-NEXT:    umulh x13, x0, x3
-; CHECK-NEXT:    adc x9, x9, x10
-; CHECK-NEXT:    mul x14, x0, x14
-; CHECK-NEXT:    mul x16, x1, x3
-; CHECK-NEXT:    adds x1, x15, x11
-; CHECK-NEXT:    asr x11, x9, #63
-; CHECK-NEXT:    smulh x8, x8, x3
-; CHECK-NEXT:    adc x10, x13, x14
-; CHECK-NEXT:    asr x12, x10, #63
-; CHECK-NEXT:    adds x9, x9, x10
-; CHECK-NEXT:    adc x10, x11, x12
-; CHECK-NEXT:    adds x9, x16, x9
-; CHECK-NEXT:    asr x11, x1, #63
-; CHECK-NEXT:    adc x8, x8, x10
-; CHECK-NEXT:    eor x8, x8, x11
-; CHECK-NEXT:    eor x9, x9, x11
+; CHECK-NEXT:    eor x8, x3, x2, asr #63
+; CHECK-NEXT:    eor x9, x1, x0, asr #63
 ; CHECK-NEXT:    orr x8, x9, x8
-; CHECK-NEXT:    cbz x8, .LBB5_2
-; CHECK-NEXT:  // %bb.1: // %if.then
+; CHECK-NEXT:    cbz x8, .LBB5_4
+; CHECK-NEXT:  // %bb.1: // %overflow
+; CHECK-NEXT:    asr x9, x1, #63
+; CHECK-NEXT:    umulh x10, x0, x2
+; CHECK-NEXT:    asr x13, x3, #63
+; CHECK-NEXT:    mul x11, x1, x2
+; CHECK-NEXT:    umulh x8, x1, x2
+; CHECK-NEXT:    mul x9, x9, x2
+; CHECK-NEXT:    adds x10, x11, x10
+; CHECK-NEXT:    mul x14, x0, x3
+; CHECK-NEXT:    umulh x12, x0, x3
+; CHECK-NEXT:    adc x8, x8, x9
+; CHECK-NEXT:    mov x9, x1
+; CHECK-NEXT:    mul x13, x0, x13
+; CHECK-NEXT:    asr x11, x8, #63
+; CHECK-NEXT:    mul x15, x1, x3
+; CHECK-NEXT:    adds x1, x14, x10
+; CHECK-NEXT:    smulh x9, x9, x3
+; CHECK-NEXT:    adc x10, x12, x13
+; CHECK-NEXT:    asr x12, x10, #63
+; CHECK-NEXT:    adds x8, x8, x10
+; CHECK-NEXT:    asr x10, x1, #63
+; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:    adc x11, x11, x12
+; CHECK-NEXT:    adds x8, x15, x8
+; CHECK-NEXT:    adc x9, x9, x11
+; CHECK-NEXT:    cmp x8, x10
+; CHECK-NEXT:    ccmp x9, x10, #0, eq
+; CHECK-NEXT:    cset w8, ne
+; CHECK-NEXT:    cbz w8, .LBB5_3
+; CHECK-NEXT:  .LBB5_2: // %if.then
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
@@ -309,10 +321,13 @@ define i128 @test_smul_i128(i128 noundef %x, i128 noundef %y) {
 ; CHECK-NEXT:    sxtw x0, w0
 ; CHECK-NEXT:    asr x1, x0, #63
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:  .LBB5_3: // %cleanup
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB5_2: // %if.end
+; CHECK-NEXT:  .LBB5_4: // %overflow.no
+; CHECK-NEXT:    smulh x1, x0, x2
 ; CHECK-NEXT:    mul x0, x0, x2
-; CHECK-NEXT:    ret
+; CHECK-NEXT:    cbnz w8, .LBB5_2
+; CHECK-NEXT:    b .LBB5_3
 entry:
   %0 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
   %1 = extractvalue { i128, i1 } %0, 1
diff --git a/llvm/test/CodeGen/AArch64/mul-i128-overflow.ll b/llvm/test/CodeGen/AArch64/mul-i128-overflow.ll
new file mode 100644
index 0000000000000..7b60f81539aa8
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/mul-i128-overflow.ll
@@ -0,0 +1,261 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 -o - %s | FileCheck %s
+
+
+declare i32 @error()
+
+define i128 @test1(i128 noundef %x, i128 noundef %y) {
+; CHECK-LABEL: test1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    eor x8, x3, x2, asr #63
+; CHECK-NEXT:    eor x9, x1, x0, asr #63
+; CHECK-NEXT:    orr x8, x9, x8
+; CHECK-NEXT:    cbz x8, .LBB0_4
+; CHECK-NEXT:  // %bb.1: // %overflow
+; CHECK-NEXT:    asr x9, x1, #63
+; CHECK-NEXT:    umulh x10, x0, x2
+; CHECK-NEXT:    asr x13, x3, #63
+; CHECK-NEXT:    mul x11, x1, x2
+; CHECK-NEXT:    umulh x8, x1, x2
+; CHECK-NEXT:    mul x9, x9, x2
+; CHECK-NEXT:    adds x10, x11, x10
+; CHECK-NEXT:    mul x14, x0, x3
+; CHECK-NEXT:    umulh x12, x0, x3
+; CHECK-NEXT:    adc x8, x8, x9
+; CHECK-NEXT:    mov x9, x1
+; CHECK-NEXT:    mul x13, x0, x13
+; CHECK-NEXT:    asr x11, x8, #63
+; CHECK-NEXT:    mul x15, x1, x3
+; CHECK-NEXT:    adds x1, x14, x10
+; CHECK-NEXT:    smulh x9, x9, x3
+; CHECK-NEXT:    adc x10, x12, x13
+; CHECK-NEXT:    asr x12, x10, #63
+; CHECK-NEXT:    adds x8, x8, x10
+; CHECK-NEXT:    asr x10, x1, #63
+; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:    adc x11, x11, x12
+; CHECK-NEXT:    adds x8, x15, x8
+; CHECK-NEXT:    adc x9, x9, x11
+; CHECK-NEXT:    cmp x8, x10
+; CHECK-NEXT:    ccmp x9, x10, #0, eq
+; CHECK-NEXT:    cset w8, ne
+; CHECK-NEXT:    cbz w8, .LBB0_3
+; CHECK-NEXT:  .LBB0_2: // %if.then
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl error
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sxtw x0, w0
+; CHECK-NEXT:    asr x1, x0, #63
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:  .LBB0_3: // %cleanup
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB0_4: // %overflow.no
+; CHECK-NEXT:    smulh x1, x0, x2
+; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:    cbnz w8, .LBB0_2
+; CHECK-NEXT:    b .LBB0_3
+entry:
+  %0 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
+  %1 = extractvalue { i128, i1 } %0, 1
+  br i1 %1, label %if.then, label %if.end
+
+if.then:
+  %call = tail call i32 @error()
+  %conv1 = sext i32 %call to i128
+  br label %cleanup
+
+if.end:
+  %2 = extractvalue { i128, i1 } %0, 0
+  br label %cleanup
+
+cleanup:
+  %retval.0 = phi i128 [ %conv1, %if.then ], [ %2, %if.end ]
+  ret i128 %retval.0
+}
+
+define i128 @test2(i128 noundef %x, i128 noundef %y, ptr %out) {
+; CHECK-LABEL: test2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    eor x8, x3, x2, asr #63
+; CHECK-NEXT:    eor x9, x1, x0, asr #63
+; CHECK-NEXT:    orr x8, x9, x8
+; CHECK-NEXT:    cbz x8, .LBB1_4
+; CHECK-NEXT:  // %bb.1: // %overflow
+; CHECK-NEXT:    asr x9, x1, #63
+; CHECK-NEXT:    umulh x10, x0, x2
+; CHECK-NEXT:    asr x13, x3, #63
+; CHECK-NEXT:    mul x11, x1, x2
+; CHECK-NEXT:    umulh x8, x1, x2
+; CHECK-NEXT:    mul x9, x9, x2
+; CHECK-NEXT:    adds x10, x11, x10
+; CHECK-NEXT:    mul x14, x0, x3
+; CHECK-NEXT:    umulh x12, x0, x3
+; CHECK-NEXT:    adc x8, x8, x9
+; CHECK-NEXT:    mov x9, x1
+; CHECK-NEXT:    mul x13, x0, x13
+; CHECK-NEXT:    asr x11, x8, #63
+; CHECK-NEXT:    mul x15, x1, x3
+; CHECK-NEXT:    adds x1, x14, x10
+; CHECK-NEXT:    smulh x9, x9, x3
+; CHECK-NEXT:    adc x10, x12, x13
+; CHECK-NEXT:    asr x12, x10, #63
+; CHECK-NEXT:    adds x8, x8, x10
+; CHECK-NEXT:    asr x10, x1, #63
+; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:    adc x11, x11, x12
+; CHECK-NEXT:    adds x8, x15, x8
+; CHECK-NEXT:    adc x9, x9, x11
+; CHECK-NEXT:    cmp x8, x10
+; CHECK-NEXT:    ccmp x9, x10, #0, eq
+; CHECK-NEXT:    cset w8, ne
+; CHECK-NEXT:    stp x0, x1, [x4]
+; CHECK-NEXT:    cbz w8, .LBB1_3
+; CHECK-NEXT:  .LBB1_2: // %if.then
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl error
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sxtw x0, w0
+; CHECK-NEXT:    asr x1, x0, #63
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:  .LBB1_3: // %cleanup
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB1_4: // %overflow.no
+; CHECK-NEXT:    smulh x1, x0, x2
+; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:    stp x0, x1, [x4]
+; CHECK-NEXT:    cbnz w8, .LBB1_2
+; CHECK-NEXT:    b .LBB1_3
+entry:
+  %0 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
+  %1 = extractvalue { i128, i1 } %0, 0
+  store i128 %1, ptr %out
+  %2 = extractvalue { i128, i1 } %0, 1
+  br i1 %2, label %if.then, label %cleanup
+
+if.then:
+  %call = tail call i32 @error()
+  %conv1 = sext i32 %call to i128
+  br label %cleanup
+
+cleanup:
+  %retval.0 = phi i128 [ %conv1, %if.then ], [ %1, %entry ]
+  ret i128 %retval.0
+}
+
+define i128 @test3(i128 noundef %x, i128 noundef %y, ptr %out) {
+; CHECK-LABEL: test3:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    orr x8, x1, x3
+; CHECK-NEXT:    cbz x8, .LBB2_3
+; CHECK-NEXT:  // %bb.1: // %overflow
+; CHECK-NEXT:    mul x8, x3, x0
+; CHECK-NEXT:    cmp x1, #0
+; CHECK-NEXT:    ccmp x3, #0, #4, ne
+; CHECK-NEXT:    umulh x10, x1, x2
+; CHECK-NEXT:    umulh x9, x3, x0
+; CHECK-NEXT:    madd x11, x1, x2, x8
+; CHECK-NEXT:    ccmp xzr, x10, #0, eq
+; CHECK-NEXT:    umulh x12, x0, x2
+; CHECK-NEXT:    ccmp xzr, x9, #0, eq
+; CHECK-NEXT:    mul x8, x0, x2
+; CHECK-NEXT:    cset w10, ne
+; CHECK-NEXT:    adds x9, x12, x11
+; CHECK-NEXT:    csinc w10, w10, wzr, lo
+; CHECK-NEXT:    stp x8, x9, [x4]
+; CHECK-NEXT:    cbnz w10, .LBB2_4
+; CHECK-NEXT:  .LBB2_2:
+; CHECK-NEXT:    mov x1, xzr
+; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB2_3: // %overflow.no
+; CHECK-NEXT:    umulh x9, x0, x2
+; CHECK-NEXT:    mov w10, wzr
+; CHECK-NEXT:    mul x8, x0, x2
+; CHECK-NEXT:    stp x8, x9, [x4]
+; CHECK-NEXT:    cbz w10, .LBB2_2
+; CHECK-NEXT:  .LBB2_4: // %if.then
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl error
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sxtw x0, w0
+; CHECK-NEXT:    asr x1, x0, #63
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
+  %1 = extractvalue { i128, i1 } %0, 0
+  store i128 %1, ptr %out
+  %2 = extractvalue { i128, i1 } %0, 1
+  br i1 %2, label %if.then, label %cleanup
+
+if.then:
+  %call = tail call i32 @error()
+  %conv1 = sext i32 %call to i128
+  br label %cleanup
+
+cleanup:
+  %retval.0 = phi i128 [ %conv1, %if.then ], [ 1, %entry ]
+  ret i128 %retval.0
+}
+
+define i128 @test4(i128 noundef %x, i128 noundef %y, i128 %out) {
+; CHECK-LABEL: test4:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    orr x8, x1, x3
+; CHECK-NEXT:    cbz x8, .LBB3_2
+; CHECK-NEXT:  // %bb.1: // %overflow
+; CHECK-NEXT:    mul x8, x3, x0
+; CHECK-NEXT:    cmp x1, #0
+; CHECK-NEXT:    ccmp x3, #0, #4, ne
+; CHECK-NEXT:    umulh x10, x1, x2
+; CHECK-NEXT:    umulh x9, x3, x0
+; CHECK-NEXT:    madd x11, x1, x2, x8
+; CHECK-NEXT:    ccmp xzr, x10, #0, eq
+; CHECK-NEXT:    umulh x12, x0, x2
+; CHECK-NEXT:    ccmp xzr, x9, #0, eq
+; CHECK-NEXT:    mul x8, x0, x2
+; CHECK-NEXT:    cset w10, ne
+; CHECK-NEXT:    adds x9, x12, x11
+; CHECK-NEXT:    csinc w10, w10, wzr, lo
+; CHECK-NEXT:    b .LBB3_3
+; CHECK-NEXT:  .LBB3_2: // %overflow.no
+; CHECK-NEXT:    umulh x9, x0, x2
+; CHECK-NEXT:    mov w10, wzr
+; CHECK-NEXT:    mul x8, x0, x2
+; CHECK-NEXT:  .LBB3_3: // %overflow.res
+; CHECK-NEXT:    adds x0, x8, x4
+; CHECK-NEXT:    adc x1, x9, x5
+; CHECK-NEXT:    cbz w10, .LBB3_5
+; CHECK-NEXT:  // %bb.4: // %if.then
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl error
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sxtw x0, w0
+; CHECK-NEXT:    asr x1, x0, #63
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:  .LBB3_5: // %cleanup
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
+  %1 = extractvalue { i128, i1 } %0, 0
+  %res = add i128 %1, %out
+  %2 = extractvalue { i128, i1 } %0, 1
+  br i1 %2, label %if.then, label %cleanup
+
+if.then:
+  %call = tail call i32 @error()
+  %conv1 = sext i32 %call to i128
+  br label %cleanup
+
+cleanup:
+  %retval.0 = phi i128 [ %conv1, %if.then ], [ %res, %entry ]
+  ret i128 %retval.0
+}
diff --git a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll
index edfd80b4f2706..ace0c83e63c7c 100644
--- a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll
@@ -4,20 +4,28 @@
 define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; AARCH-LABEL: muloti_test:
 ; AARCH:       // %bb.0: // %start
+; AARCH-NEXT:    orr x8, x1, x3
+; AARCH-NEXT:    cbz x8, .LBB0_2
+; AARCH-NEXT:  // %bb.1: // %overflow
 ; AARCH-NEXT:    mul x9, x3, x0
 ; AARCH-NEXT:    cmp x1, #0
 ; AARCH-NEXT:    ccmp x3, #0, #4, ne
-; AARCH-NEXT:    umulh x8, x1, x2
-; AARCH-NEXT:    umulh x10, x3, x0
+; AARCH-NEXT:    umulh x10, x1, x2
+; AARCH-NEXT:    umulh x8, x3, x0
 ; AARCH-NEXT:    madd x9, x1, x2, x9
-; AARCH-NEXT:    ccmp xzr, x8, #0, eq
-; AARCH-NEXT:    umulh x11, x0, x2
 ; AARCH-NEXT:    ccmp xzr, x10, #0, eq
+; AARCH-NEXT:    umulh x11, x0, x2
+; AARCH-NEXT:    ccmp xzr, x8, #0, eq
 ; AARCH-NEXT:    mul x0, x0, x2
 ; AARCH-NEXT:    cset w8, ne
 ; AARCH-NEXT:    adds x1, x11, x9
 ; AARCH-NEXT:    csinc w2, w8, wzr, lo
 ; AARCH-NEXT:    ret
+; AARCH-NEXT:  .LBB0_2: // %overflow.no
+; AARCH-NEXT:    umulh x1, x0, x2
+; AARCH-NEXT:    mul x0, x0, x2
+; AARCH-NEXT:    mov w2, wzr
+; AARCH-NEXT:    ret
 start:
   %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
   %1 = extractvalue { i128, i1 } %0, 0
@@ -35,45 +43,56 @@ start:
 define i128 @__muloti4(i128 %0, i128 %1, ptr nocapture nonnull writeonly align 4 %2) #2 {
 ; AARCH-LABEL: __muloti4:
 ; AARCH:       // %bb.0: // %Entry
-; AARCH-NEXT:    asr x11, x1, #63
-; AARCH-NEXT:    asr x9, x3, #63
-; AARCH-NEXT:    umulh x12, x0, x2
-; AARCH-NEXT:    mov x8, x1
+; AARCH-NEXT:    eor x8, x3, x2, asr #63
+; AARCH-NEXT:    eor x9, x1, x0, asr #63
 ; AARCH-NEXT:    str wzr, [x4]
-; AARCH-NEXT:    mul x13, x1, x2
-; AARCH-NEXT:    umulh x10, x1, x2
-; AARCH-NEXT:    mul x11, x11, x2
-; AARCH-NEXT:    adds x12, x13, x12
-; AARCH-NEXT:    mul x15, x0, x3
-; AARCH-NEXT:    umulh x14, x0, x3
-; AARCH-NEXT:    adc x10, x10, x11
-; AARCH-NEXT:    mul x9, x0, x9
-; AARCH-NEXT:    mul x16, x1, x3
-; AARCH-NEXT:    adds x1, x15, x12
-; AARCH-NEXT:    asr x12, x10, #63
-; AARCH-NEXT:    smulh x11, x8, x3
-; AARCH-NEXT:    adc x9, x14, x9
-; AARCH-NEXT:    asr x13, x9, #63
-; AARCH-NEXT:    adds x9, x10, x9
-; AARCH-NEXT:    asr x10, x1, #63
+; AARCH-NEXT:    orr x8, x9, x8
+; AARCH-NEXT:    cbz x8, .LBB1_2
+; AARCH-NEXT:  // %bb.1: // %overflow
+; AARCH-NEXT:    asr x9, x1, #63
+; AARCH-NEXT:    umulh x10, x0, x2
+; AARCH-NEXT:    asr x13, x3, #63
+; AARCH-NEXT:    mul x11, x1, x2
+; AARCH-NEXT:    umulh x8, x1, x2
+; AARCH-NEXT:    mul x9, x9, x2
+; AARCH-NEXT:    adds x10, x11, x10
+; AARCH-NEXT:    mul x14, x0, x3
+; AARCH-NEXT:    umulh x12, x0, x3
+; AARCH-NEXT:    adc x9, x8, x9
+; AARCH-NEXT:    mul x13, x0, x13
+; AARCH-NEXT:    adds x8, x14, x10
+; AARCH-NEXT:    mul x15, x1, x3
+; AARCH-NEXT:    smulh x10, x1, x3
+; AARCH-NEXT:    adc x11, x12, x13
+; AARCH-NEXT:    asr x12, x9, #63
+; AARCH-NEXT:    asr x13, x11, #63
+; AARCH-NEXT:    adds x9, x9, x11
+; AARCH-NEXT:    asr x11, x8, #63
 ; AARCH-NEXT:    mul x0, x0, x2
 ; AARCH-NEXT:    adc x12, x12, x13
-; AARCH-NEXT:    adds x9, x16, x9
-; AARCH-NEXT:    adc x11, x11, x12
-; AARCH-NEXT:    cmp x9, x10
-; AARCH-NEXT:    ccmp x11, x10, #0, eq
+; AARCH-NEXT:    adds x9, x15, x9
+; AARCH-NEXT:    adc x10, x10, x12
+; AARCH-NEXT:    cmp x9, x11
+; AARCH-NEXT:    ccmp x10, x11, #0, eq
 ; AARCH-NEXT:    cset w9, ne
-; AARCH-NEXT:    tbz x8, #63, .LBB1_2
-; AARCH-NEXT:  // %bb.1: // %Entry
-; AARCH-NEXT:    eor x8, x3, #0x8000000000000000
-; AARCH-NEXT:    orr x8, x2, x8
-; AARCH-NEXT:    cbz x8, .LBB1_3
-; AARCH-NEXT:  .LBB1_2: // %Else2
-; AARCH-NEXT:    cbz w9, .LBB1_4
-; AARCH-NEXT:  .LBB1_3: // %Then7
-; AARCH-NEXT:    mov w8, #1 // =0x1
-; AARCH-NEXT:    str w8, [x4]
-; AARCH-NEXT:  .LBB1_4: // %Block9
+; AARCH-NEXT:    tbnz x1, #63, .LBB1_3
+; AARCH-NEXT:    b .LBB1_4
+; AARCH-NEXT:  .LBB1_2: // %overflow.no
+; AARCH-NEXT:    smulh x8, x0, x2
+; AARCH-NEXT:    mov w9, wzr
+; AARCH-NEXT:    mul x0, x0, x2
+; AARCH-NEXT:    tbz x1, #63, .LBB1_4
+; AARCH-NEXT:  .LBB1_3: // %overflow.res
+; AARCH-NEXT:    eor x10, x3, #0x8000000000000000
+; AARCH-NEXT:    orr x10, x2, x10
+; AARCH-NEXT:    cbz x10, .LBB1_5
+; AARCH-NEXT:  .LBB1_4: // %Else2
+; AARCH-NEXT:    cbz w9, .LBB1_6
+; AARCH-NEXT:  .LBB1_5: // %Then7
+; AARCH-NEXT:    mov w9, #1 // =0x1
+; AARCH-NEXT:    str w9, [x4]
+; AARCH-NEXT:  .LBB1_6: // %Block9
+; AARCH-NEXT:    mov x1, x8
 ; AARCH-NEXT:    ret
 Entry:
   store i32 0, ptr %2, align 4

From c61c5d29334c7ff044ba46bff17e1f3d57e230a3 Mon Sep 17 00:00:00 2001
From: Luke Hutton <luke.hutton@arm.com>
Date: Tue, 18 Nov 2025 13:54:21 +0000
Subject: [PATCH 10/52] [mlir][tosa] Add a pass to narrow i64 to i32 (#165581)

This pass aims to narrow i64 types on TOSA operations to i32. It can be
useful for legalizations from various frameworks. It comes with the
following options:
- "aggressive-rewrite" - This option is typically able to narrow more
values, but may impact numerical behaviour if not used carefully.
- "convert-function-boundaries" - If enabled, parameters/ results
to/from a function may be narrowed. Otherwise, casts are inserted to
preserve the I/O of the function.

Currently the non aggressive mode is very limited, targeting an argmax
-> cast sequence that has been observed during legalization as well as
some data layout operations that can always narrow. Support for more
operations will be added in the future.

Co-authored-by: Vitalii Shutov <vitalii.shutov@arm.com>
Co-authored-by: Shubham <shubham@arm.com>
Co-authored-by: Declan Flavin <declan.flavin@arm.com>

Signed-off-by: Luke Hutton <luke.hutton@arm.com>
Co-authored-by: Vitalii Shutov <vitalii.shutov@arm.com>
Co-authored-by: Shubham <shubham@arm.com>
Co-authored-by: Declan Flavin <declan.flavin@arm.com>
---
 .../mlir/Dialect/Tosa/Transforms/Passes.td    |  23 ++
 .../Dialect/Tosa/Transforms/CMakeLists.txt    |   1 +
 .../Tosa/Transforms/TosaNarrowI64ToI32.cpp    | 310 ++++++++++++++++++
 .../tosa-narrow-i64-to-i32-aggressive.mlir    |  81 +++++
 .../Dialect/Tosa/tosa-narrow-i64-to-i32.mlir  | 162 +++++++++
 5 files changed, 577 insertions(+)
 create mode 100644 mlir/lib/Dialect/Tosa/Transforms/TosaNarrowI64ToI32.cpp
 create mode 100644 mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32-aggressive.mlir
 create mode 100644 mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32.mlir

diff --git a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td
index 14b00b04ccc18..420e58192b8fd 100644
--- a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td
@@ -166,4 +166,27 @@ def TosaAttachTarget : Pass<"tosa-attach-target", "ModuleOp"> {
   ];
 }
 
+def TosaNarrowI64ToI32Pass : Pass<"tosa-narrow-i64-to-i32", "func::FuncOp"> {
+  let summary = "Narrow I64 TOSA operations to I32";
+  let description = [{
+    This pass narrows TOSA operations with 64-bit integer tensor types to
+    32-bit integer tensor types. This can be useful for backends that do not
+    support the EXT-INT64 extension of TOSA.
+  }];
+
+  let options = [
+    Option<"aggressiveRewrite", "aggressive-rewrite", "bool", "false",
+      "If enabled, all TOSA operations are rewritten, regardless or whether the narrowing"
+      "is safe. This option may lead to data loss if not used carefully.">,
+    Option<"convertFunctionBoundaries", "convert-function-boundaries", "bool", "false",
+      "If enabled, the pass will convert function I/O types as well. Otherwise casts will"
+      "be inserted at the I/O boundaries.">
+  ];
+
+  let dependentDialects = [
+    "func::FuncDialect",
+    "tosa::TosaDialect",
+  ];
+}
+
 #endif // MLIR_DIALECT_TOSA_TRANSFORMS_PASSES
diff --git a/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt
index 41b338d6e7189..987ce4ed870c9 100644
--- a/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt
@@ -12,6 +12,7 @@ add_mlir_dialect_library(MLIRTosaTransforms
   TosaTypeConverters.cpp
   TosaProfileCompliance.cpp
   TosaValidation.cpp
+  TosaNarrowI64ToI32.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tosa/Transforms
diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaNarrowI64ToI32.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaNarrowI64ToI32.cpp
new file mode 100644
index 0000000000000..ddaf7d8a5e033
--- /dev/null
+++ b/mlir/lib/Dialect/Tosa/Transforms/TosaNarrowI64ToI32.cpp
@@ -0,0 +1,310 @@
+//===- TosaNarrowI64ToI32.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass narrows TOSA operations with 64-bit integer tensor types to
+// 32-bit integer tensor types. This can be useful for backends that do not
+// support the EXT-INT64 extension of TOSA. The pass has two options:
+//
+// - aggressive-rewrite - If enabled, all TOSA operations are rewritten,
+//     regardless or whether the narrowing is safe. This option may lead to
+//     data loss if not used carefully.
+// - convert-function-boundaries - If enabled, the pass will convert function
+//     I/O types as well. Otherwise casts will be inserted at the I/O
+//     boundaries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Tosa/Transforms/Passes.h"
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/Func/Transforms/FuncConversions.h"
+#include "mlir/IR/Verifier.h"
+#include "mlir/Pass/Pass.h"
+
+namespace mlir {
+namespace tosa {
+#define GEN_PASS_DEF_TOSANARROWI64TOI32PASS
+#include "mlir/Dialect/Tosa/Transforms/Passes.h.inc"
+} // namespace tosa
+} // namespace mlir
+
+using namespace mlir;
+using namespace mlir::tosa;
+
+namespace {
+
+LogicalResult convertGenericOp(Operation *op, ValueRange operands,
+                               ConversionPatternRewriter &rewriter,
+                               const TypeConverter *typeConverter) {
+  // Convert types of results
+  SmallVector<Type, 4> newResults;
+  if (failed(typeConverter->convertTypes(op->getResultTypes(), newResults)))
+    return failure();
+
+  // Create a new operation state
+  OperationState state(op->getLoc(), op->getName().getStringRef(), operands,
+                       newResults, {}, op->getSuccessors());
+
+  for (const NamedAttribute &namedAttribute : op->getAttrs()) {
+    const Attribute attribute = namedAttribute.getValue();
+
+    // Convert integer attribute type
+    if (const auto intAttr = dyn_cast<IntegerAttr>(attribute)) {
+      const std::optional<Attribute> convertedAttribute =
+          typeConverter->convertTypeAttribute(intAttr.getType(), attribute);
+      state.addAttribute(namedAttribute.getName(), convertedAttribute.value());
+      continue;
+    }
+
+    if (const auto typeAttr = dyn_cast<TypeAttr>(attribute)) {
+      Type type = typeAttr.getValue();
+      const std::optional<Attribute> convertedAttribute =
+          typeConverter->convertTypeAttribute(type, attribute);
+      if (!convertedAttribute)
+        return rewriter.notifyMatchFailure(op,
+                                           "Failed to convert type attribute.");
+      state.addAttribute(namedAttribute.getName(), convertedAttribute.value());
+      continue;
+    }
+
+    if (const auto denseElementsAttr = dyn_cast<DenseElementsAttr>(attribute)) {
+      const Type type = denseElementsAttr.getType();
+      const std::optional<Attribute> convertedAttribute =
+          typeConverter->convertTypeAttribute(type, denseElementsAttr);
+      if (!convertedAttribute)
+        return rewriter.notifyMatchFailure(
+            op, "Failed to convert dense elements attribute.");
+      state.addAttribute(namedAttribute.getName(), convertedAttribute.value());
+      continue;
+    }
+
+    state.addAttribute(namedAttribute.getName(), attribute);
+  }
+
+  for (Region &region : op->getRegions()) {
+    Region *newRegion = state.addRegion();
+    rewriter.inlineRegionBefore(region, *newRegion, newRegion->begin());
+    if (failed(rewriter.convertRegionTypes(newRegion, *typeConverter)))
+      return failure();
+  }
+
+  Operation *newOp = rewriter.create(state);
+  rewriter.replaceOp(op, newOp->getResults());
+  return success();
+}
+
+// ===========================
+// Aggressive rewrite patterns
+// ===========================
+
+class ConvertGenericOp : public ConversionPattern {
+public:
+  ConvertGenericOp(TypeConverter &typeConverter, MLIRContext *context)
+      : ConversionPattern(typeConverter, MatchAnyOpTypeTag{}, 0, context) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    if (!isa<tosa::TosaOp>(op))
+      return rewriter.notifyMatchFailure(
+          op,
+          "Support for operations other than TOSA has not been implemented.");
+
+    return convertGenericOp(op, operands, rewriter, typeConverter);
+  }
+};
+
+// ===============================
+// Bounds checked rewrite patterns
+// ===============================
+
+class ConvertArgMaxOpWithBoundsChecking
+    : public OpConversionPattern<tosa::ArgMaxOp> {
+  using OpConversionPattern::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(tosa::ArgMaxOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
+    // Output type can be narrowed based on the size of the axis dimension
+    const int32_t axis = op.getAxis();
+    const auto inputType = dyn_cast<ShapedType>(adaptor.getInput().getType());
+    if (!inputType || !inputType.isStaticDim(axis))
+      return rewriter.notifyMatchFailure(
+          op, "Requires a static axis dimension for bounds checking.");
+    const int64_t axisDim = inputType.getDimSize(axis);
+    if (axisDim >= std::numeric_limits<int32_t>::max())
+      return rewriter.notifyMatchFailure(
+          op, "Axis dimension is too large to narrow safely.");
+
+    const Type resultType = op.getOutput().getType();
+    const Type newResultType = typeConverter->convertType(resultType);
+    rewriter.replaceOpWithNewOp<tosa::ArgMaxOp>(op, newResultType,
+                                                adaptor.getInput(), axis);
+    return success();
+  }
+};
+
+class ConvertCastOpWithBoundsChecking
+    : public OpConversionPattern<tosa::CastOp> {
+  using OpConversionPattern::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(tosa::CastOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
+    const auto inputType = dyn_cast<ShapedType>(adaptor.getInput().getType());
+    const auto resultType = dyn_cast<ShapedType>(op.getResult().getType());
+    if (!inputType || !resultType)
+      return failure();
+
+    const auto elementInputIntType =
+        dyn_cast<IntegerType>(inputType.getElementType());
+    const auto elementResultIntType =
+        dyn_cast<IntegerType>(resultType.getElementType());
+    if (elementInputIntType && elementResultIntType &&
+        elementInputIntType.getWidth() > elementResultIntType.getWidth())
+      return rewriter.notifyMatchFailure(
+          op, "Narrowing cast may lead to data loss.");
+
+    rewriter.replaceOpWithNewOp<tosa::CastOp>(
+        op, typeConverter->convertType(resultType), adaptor.getInput());
+    return success();
+  }
+};
+
+template <typename OpTy>
+class ConvertTypedOp : public OpConversionPattern<OpTy> {
+  using OpConversionPattern<OpTy>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(OpTy op, typename OpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
+    return convertGenericOp(op, adaptor.getOperands(), rewriter,
+                            this->getTypeConverter());
+  }
+};
+
+struct TosaNarrowI64ToI32
+    : public tosa::impl::TosaNarrowI64ToI32PassBase<TosaNarrowI64ToI32> {
+public:
+  explicit TosaNarrowI64ToI32() = default;
+  explicit TosaNarrowI64ToI32(const TosaNarrowI64ToI32PassOptions &options)
+      : TosaNarrowI64ToI32() {
+    this->aggressiveRewrite = options.aggressiveRewrite;
+    this->convertFunctionBoundaries = options.convertFunctionBoundaries;
+  }
+
+  void runOnOperation() override {
+    MLIRContext *context = &getContext();
+
+    TypeConverter typeConverter;
+    typeConverter.addConversion([](Type type) -> Type { return type; });
+    typeConverter.addConversion([](IntegerType type) -> Type {
+      if (!type.isInteger(64))
+        return type;
+      return IntegerType::get(type.getContext(), 32);
+    });
+    typeConverter.addConversion(
+        [&typeConverter](RankedTensorType type) -> Type {
+          const Type elementType = type.getElementType();
+          if (!elementType.isInteger(64))
+            return type;
+          return RankedTensorType::get(type.getShape(),
+                                       typeConverter.convertType(elementType));
+        });
+
+    const auto materializeCast = [](OpBuilder &builder, Type resultType,
+                                    ValueRange inputs, Location loc) -> Value {
+      if (inputs.size() != 1)
+        return Value();
+      return tosa::CastOp::create(builder, loc, resultType, inputs.front());
+    };
+    typeConverter.addSourceMaterialization(materializeCast);
+    typeConverter.addTargetMaterialization(materializeCast);
+
+    typeConverter.addTypeAttributeConversion(
+        [](IntegerType type, IntegerAttr attribute) -> Attribute {
+          const APInt value = attribute.getValue().truncSSat(32);
+          return IntegerAttr::get(IntegerType::get(type.getContext(), 32),
+                                  value);
+        });
+    typeConverter.addTypeAttributeConversion(
+        [&typeConverter](ShapedType type,
+                         DenseIntElementsAttr attr) -> Attribute {
+          const ShapedType newType =
+              cast<ShapedType>(typeConverter.convertType(type));
+          const auto oldElementType = cast<IntegerType>(type.getElementType());
+          const auto newElementType =
+              cast<IntegerType>(newType.getElementType());
+          if (oldElementType.getWidth() == newElementType.getWidth())
+            return attr;
+
+          DenseElementsAttr mapped =
+              attr.mapValues(newElementType, [&](const APInt &v) {
+                return v.truncSSat(newElementType.getWidth());
+              });
+          return mapped;
+        });
+
+    ConversionTarget target(*context);
+    target.addDynamicallyLegalDialect<tosa::TosaDialect>(
+        [&typeConverter](Operation *op) {
+          return typeConverter.isLegal(op->getResultTypes()) &&
+                 typeConverter.isLegal(op->getOperandTypes());
+        });
+    if (convertFunctionBoundaries) {
+      target.addDynamicallyLegalOp<func::FuncOp>(
+          [&typeConverter](func::FuncOp op) {
+            return typeConverter.isSignatureLegal(op.getFunctionType()) &&
+                   typeConverter.isLegal(&op.getBody());
+          });
+      target.addDynamicallyLegalOp<func::ReturnOp>([](func::ReturnOp op) {
+        const FunctionType funcType =
+            op->getParentOfType<func::FuncOp>().getFunctionType();
+        return llvm::equal(op.getOperandTypes(), funcType.getResults());
+      });
+    } else {
+      target.addDynamicallyLegalOp<func::FuncOp>(
+          [](func::FuncOp op) { return true; });
+      target.addDynamicallyLegalOp<func::ReturnOp>(
+          [](func::ReturnOp op) { return true; });
+    }
+
+    RewritePatternSet patterns(context);
+    if (convertFunctionBoundaries) {
+      populateFunctionOpInterfaceTypeConversionPattern<func::FuncOp>(
+          patterns, typeConverter);
+      populateReturnOpTypeConversionPattern(patterns, typeConverter);
+    }
+    if (aggressiveRewrite) {
+      patterns.add<ConvertGenericOp>(typeConverter, context);
+    } else {
+      // Tensor
+      patterns.add<ConvertArgMaxOpWithBoundsChecking>(typeConverter, context);
+      // Data layout
+      patterns.add<ConvertTypedOp<tosa::ConcatOp>>(typeConverter, context);
+      patterns.add<ConvertTypedOp<tosa::PadOp>>(typeConverter, context);
+      patterns.add<ConvertTypedOp<tosa::ReshapeOp>>(typeConverter, context);
+      patterns.add<ConvertTypedOp<tosa::ReverseOp>>(typeConverter, context);
+      patterns.add<ConvertTypedOp<tosa::SliceOp>>(typeConverter, context);
+      patterns.add<ConvertTypedOp<tosa::TileOp>>(typeConverter, context);
+      patterns.add<ConvertTypedOp<tosa::TransposeOp>>(typeConverter, context);
+      patterns.add<ConvertTypedOp<tosa::IdentityOp>>(typeConverter, context);
+      // Type conversion
+      patterns.add<ConvertCastOpWithBoundsChecking>(typeConverter, context);
+      // Controlflow
+      patterns.add<ConvertTypedOp<tosa::IfOp>>(typeConverter, context);
+      patterns.add<ConvertTypedOp<tosa::WhileOp>>(typeConverter, context);
+    }
+
+    if (failed(
+            applyFullConversion(getOperation(), target, std::move(patterns))))
+      signalPassFailure();
+  }
+};
+
+} // namespace
diff --git a/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32-aggressive.mlir b/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32-aggressive.mlir
new file mode 100644
index 0000000000000..1a36177a37033
--- /dev/null
+++ b/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32-aggressive.mlir
@@ -0,0 +1,81 @@
+// RUN: mlir-opt -split-input-file -verify-diagnostics -tosa-narrow-i64-to-i32="aggressive-rewrite=1" %s | FileCheck %s --allow-unused-prefixes --check-prefixes=COMMON,DEFAULT
+// RUN: mlir-opt -split-input-file -verify-diagnostics -tosa-narrow-i64-to-i32="aggressive-rewrite=1 convert-function-boundaries=1" %s | FileCheck %s --allow-unused-prefixes --check-prefixes=COMMON,FUNCBOUND
+
+// CHECK-LABEL: test_i64_argmax_large_axis_dim
+func.func @test_i64_argmax_large_axis_dim(%arg0: tensor<1x513x513x2147483650xi8>) -> tensor<1x513x513xi64> {
+  // DEFAULT: tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x2147483650xi8>) -> tensor<1x513x513xi32>
+  %0 = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x2147483650xi8>) -> tensor<1x513x513xi64>
+  return %0 : tensor<1x513x513xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_convert_input_parameters
+// DEFAULT: %[[IN:.*]]: tensor<1x513x513x3xi64>
+// FUNCBOUND: %[[IN:.*]]: tensor<1x513x513x3xi32>
+func.func @test_convert_input_parameters(%arg0: tensor<1x513x513x3xi64>) -> tensor<1x513x513x3xf32> {
+  // DEFAULT: %[[FUNC_BOUND_CAST:.*]] = tosa.cast %[[IN]] : (tensor<1x513x513x3xi64>) -> tensor<1x513x513x3xi32>
+  // DEFAULT: %[[CAST1:.*]] = tosa.cast %[[FUNC_BOUND_CAST]] : (tensor<1x513x513x3xi32>) -> tensor<1x513x513x3xi32>
+  // FUNCBOUND: %[[CAST1:.*]] = tosa.cast %[[IN]] : (tensor<1x513x513x3xi32>) -> tensor<1x513x513x3xi32>
+  %0 = tosa.cast %arg0 : (tensor<1x513x513x3xi64>) -> tensor<1x513x513x3xi32>
+
+  // COMMON: %[[CAST2:.*]] = tosa.cast %[[CAST1]] : (tensor<1x513x513x3xi32>) -> tensor<1x513x513x3xf32>
+  %1 = tosa.cast %0 : (tensor<1x513x513x3xi32>) -> tensor<1x513x513x3xf32>
+  return %1 : tensor<1x513x513x3xf32>
+}
+
+// -----
+
+// CHECK-LABEL: test_add
+// DEFAULT: %[[IN0:.*]]: tensor<13x21x1xi64>, %[[IN1:.*]]: tensor<13x21x3xi64>
+// FUNCBOUND: %[[IN0:.*]]: tensor<13x21x1xi32>, %[[IN1:.*]]: tensor<13x21x3xi32>
+func.func @test_add(%arg0: tensor<13x21x1xi64>, %arg1: tensor<13x21x3xi64>) -> tensor<13x21x3xi64> {
+  // DEFAULT-DAG: %[[FUNC_BOUND_CAST0:.*]] = tosa.cast %[[IN0]] : (tensor<13x21x1xi64>) -> tensor<13x21x1xi32>
+  // DEFAULT-DAG: %[[FUNC_BOUND_CAST1:.*]] = tosa.cast %[[IN1]] : (tensor<13x21x3xi64>) -> tensor<13x21x3xi32>
+  // DEFAULT: %[[ADD:.*]] = tosa.add %[[FUNC_BOUND_CAST0]], %[[FUNC_BOUND_CAST1]] : (tensor<13x21x1xi32>, tensor<13x21x3xi32>) -> tensor<13x21x3xi32>
+  // DEFAULT: %[[CAST:.*]] = tosa.cast %[[ADD]] : (tensor<13x21x3xi32>) -> tensor<13x21x3xi64>
+  // DEFAULT: return %[[CAST]] : tensor<13x21x3xi64>
+  // FUNCBOUND: %[[ADD:.*]] = tosa.add %[[IN0]], %[[IN1]] : (tensor<13x21x1xi32>, tensor<13x21x3xi32>) -> tensor<13x21x3xi32>
+  // FUNCBOUND: return %[[ADD]] : tensor<13x21x3xi32>
+  %0 = tosa.add %arg0, %arg1 : (tensor<13x21x1xi64>, tensor<13x21x3xi64>) -> tensor<13x21x3xi64>
+  return %0 : tensor<13x21x3xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_regions
+// DEFAULT: %[[IN0:.*]]: tensor<i64>, %[[IN1:.*]]: tensor<i64>
+func.func @test_regions(%arg0: tensor<i64>, %arg1: tensor<i64>, %arg2: tensor<i1>) -> tensor<i64> {
+  // DEFAULT-DAG: %[[CAST0:.*]] = tosa.cast %[[IN0]] : (tensor<i64>) -> tensor<i32>
+  // DEFAULT-DAG: %[[CAST1:.*]] = tosa.cast %[[IN1]] : (tensor<i64>) -> tensor<i32>
+  // COMMON: %[[IF_RESULT:.*]] = tosa.cond_if
+  %0 = tosa.cond_if %arg2 : tensor<i1> -> (tensor<i64>) {
+    // DEFAULT: %[[ADD:.*]] = tosa.add %[[CAST0]], %[[CAST1]] : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    // FUNCBOUND: %[[ADD:.*]] = tosa.add %[[IN0]], %[[IN1]] : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    %1 = tosa.add %arg0, %arg1 : (tensor<i64>, tensor<i64>) -> tensor<i64>
+    // COMMON: tosa.yield %[[ADD]] : tensor<i32>
+    tosa.yield %1 : tensor<i64>
+  } else {
+    // DEFAULT: %[[SUB:.*]] = tosa.sub %[[CAST0]], %[[CAST1]] : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    // FUNCBOUND: %[[SUB:.*]] = tosa.sub %[[IN0]], %[[IN1]] : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    %1 = tosa.sub %arg0, %arg1 : (tensor<i64>, tensor<i64>) -> tensor<i64>
+    // COMMON: tosa.yield %[[SUB]] : tensor<i32>
+    tosa.yield %1 : tensor<i64>
+  }
+  // DEFAULT: %[[OUT:.*]] = tosa.cast %[[IF_RESULT]] : (tensor<i32>) -> tensor<i64>
+  // DEFAULT: return %[[OUT]] : tensor<i64>
+  // FUNCBOUND: return %[[IF_RESULT]] : tensor<i32>
+  return %0 : tensor<i64>
+}
+
+// -----
+
+// CHECK-LABEL: test_const
+func.func @test_const() -> tensor<2xi64> {
+  // COMMON: %[[CONST:.*]] = "tosa.const"() <{values = dense<[1, 2]> : tensor<2xi32>}> : () -> tensor<2xi32>
+  %0 = "tosa.const"() <{values = dense<[1, 2]> : tensor<2xi64>}> : () -> tensor<2xi64>
+  // DEFAULT: %[[OUT:.*]] = tosa.cast %[[CONST]] : (tensor<2xi32>) -> tensor<2xi64>
+  // DEFAULT: return %[[OUT]] : tensor<2xi64>
+  // FUNCBOUND: return %[[CONST]] : tensor<2xi32>
+  return %0 : tensor<2xi64>
+}
diff --git a/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32.mlir b/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32.mlir
new file mode 100644
index 0000000000000..a14483fcdd7b0
--- /dev/null
+++ b/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32.mlir
@@ -0,0 +1,162 @@
+// RUN: mlir-opt -split-input-file -verify-diagnostics -tosa-narrow-i64-to-i32="convert-function-boundaries=0" %s | FileCheck %s --allow-unused-prefixes --check-prefixes=COMMON,DEFAULT
+// RUN: mlir-opt -split-input-file -verify-diagnostics -tosa-narrow-i64-to-i32="convert-function-boundaries=1" %s | FileCheck %s --allow-unused-prefixes --check-prefixes=COMMON,FUNCBOUND
+
+// -----
+
+// CHECK-LABEL: test_i64_argmax
+func.func @test_i64_argmax(%arg0: tensor<1x513x513x19xi8>) -> tensor<1x513x513xi64> {
+  // COMMON: %[[ARGMAX:.*]] = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x19xi8>) -> tensor<1x513x513xi32>
+  %0 = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x19xi8>) -> tensor<1x513x513xi64>
+
+  // DEFAULT: %[[CAST:.*]] = tosa.cast %[[ARGMAX]] : (tensor<1x513x513xi32>) -> tensor<1x513x513xi64>
+  // FUNCBOUND: return %[[ARGMAX]] : tensor<1x513x513xi32>
+  return %0 : tensor<1x513x513xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_i64_argmax_cast
+func.func @test_i64_argmax_cast(%arg0: tensor<1x513x513x19xi8>) -> tensor<1x513x513xf32> {
+  // COMMON: %[[ARGMAX:.*]] = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x19xi8>) -> tensor<1x513x513xi32>
+  %0 = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x19xi8>) -> tensor<1x513x513xi64>
+  // COMMON: tosa.cast %[[ARGMAX]] : (tensor<1x513x513xi32>) -> tensor<1x513x513xf32>
+  %1 = tosa.cast %0 : (tensor<1x513x513xi64>) -> tensor<1x513x513xf32>
+  return %1 : tensor<1x513x513xf32>
+}
+
+// -----
+
+// CHECK-LABEL: test_i64_argmax_large_axis_dim
+func.func @test_i64_argmax_large_axis_dim(%arg0: tensor<1x513x513x2147483650xi8>) -> tensor<1x513x513xi64> {
+  // expected-error @+1 {{failed to legalize operation 'tosa.argmax'}}
+  %0 = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x2147483650xi8>) -> tensor<1x513x513xi64>
+  return %0 : tensor<1x513x513xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_add
+func.func @test_add(%arg0: tensor<13x21x1xi64>, %arg1: tensor<13x21x3xi64>) -> tensor<13x21x3xi64> {
+  // expected-error @+1 {{failed to legalize operation 'tosa.add'}}
+  %0 = tosa.add %arg0, %arg1 : (tensor<13x21x1xi64>, tensor<13x21x3xi64>) -> tensor<13x21x3xi64>
+  return %0 : tensor<13x21x3xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_regions
+func.func @test_regions(%arg0: tensor<1x2xi32>, %arg1: tensor<1xi32>, %arg2: tensor<i1>) -> tensor<1xi32> {
+  // COMMON: %[[IF_RESULT:.*]] = tosa.cond_if %arg2 : tensor<i1> -> tensor<1xi32>
+  %0 = tosa.cond_if %arg2 : tensor<i1> -> tensor<1xi32> {
+    // COMMON: %[[ARGMAX:.*]] = tosa.argmax %arg0 {axis = 1 : i32} : (tensor<1x2xi32>) -> tensor<1xi32>
+    %1 = tosa.argmax %arg0 {axis = 1 : i32} : (tensor<1x2xi32>) -> tensor<1xi64>
+    // COMMON: %[[CAST:.*]] = tosa.cast %[[ARGMAX]] : (tensor<1xi32>) -> tensor<1xi32>
+    %2 = tosa.cast %1 : (tensor<1xi64>) -> tensor<1xi32>
+    // COMMON: tosa.yield %[[CAST]] : tensor<1xi32>
+    tosa.yield %2 : tensor<1xi32>
+  } else {
+    tosa.yield %arg1 : tensor<1xi32>
+  }
+  // COMMON: return %[[IF_RESULT]] : tensor<1xi32>
+  return %0 : tensor<1xi32>
+}
+
+// -----
+
+// CHECK-LABEL: test_concat
+func.func @test_concat(%arg0: tensor<13x21x3xi64>, %arg1: tensor<13x21x3xi64>) -> tensor<26x21x3xi64> {
+  // COMMON: tosa.concat %{{.*}}, %{{.*}} {axis = 0 : i32} : (tensor<13x21x3xi32>, tensor<13x21x3xi32>) -> tensor<26x21x3xi32>
+  %0 = tosa.concat %arg0, %arg1 {axis = 0 : i32} : (tensor<13x21x3xi64>, tensor<13x21x3xi64>) -> tensor<26x21x3xi64>
+  return %0 : tensor<26x21x3xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_pad
+func.func @test_pad(%arg0: tensor<13x21x3xi64>, %arg1: tensor<1xi64>) -> tensor<15x23x5xi64> {
+  %padding = tosa.const_shape {values = dense<1> : tensor<6xindex>} : () -> !tosa.shape<6>
+  // COMMON: tosa.pad %{{.*}}, %{{.*}}, %{{.*}} : (tensor<13x21x3xi32>, !tosa.shape<6>, tensor<1xi32>) -> tensor<15x23x5xi32>
+  %1 = tosa.pad %arg0, %padding, %arg1 : (tensor<13x21x3xi64>, !tosa.shape<6>, tensor<1xi64>) -> tensor<15x23x5xi64>
+  return %1 : tensor<15x23x5xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_reshape
+func.func @test_reshape(%arg0: tensor<13x21x3xi64>) -> tensor<1x819xi64> {
+  %1 = tosa.const_shape {values = dense<[1, 819]> : tensor<2xindex>} : () -> !tosa.shape<2>
+  // COMMON: tosa.reshape %{{.*}}, %{{.*}} : (tensor<13x21x3xi32>, !tosa.shape<2>) -> tensor<1x819xi32>
+  %0 = tosa.reshape %arg0, %1 : (tensor<13x21x3xi64>, !tosa.shape<2>) -> tensor<1x819xi64>
+  return %0 : tensor<1x819xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_reverse
+func.func @test_reverse(%arg0: tensor<13x21x3xi64>) -> tensor<13x21x3xi64> {
+  // COMMON: tosa.reverse %{{.*}} {axis = 0 : i32} : (tensor<13x21x3xi32>) -> tensor<13x21x3xi32>
+  %0 = tosa.reverse %arg0 {axis = 0 : i32} : (tensor<13x21x3xi64>) -> tensor<13x21x3xi64>
+  return %0 : tensor<13x21x3xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_slice
+func.func @test_slice(%arg0: tensor<13x21x3xi64>) -> tensor<4x11x1xi64> {
+  %0 = tosa.const_shape {values = dense<[4, 11, 1]> : tensor<3xindex>} : () -> !tosa.shape<3>
+  %1 = tosa.const_shape {values = dense<[6, 8, 0]> : tensor<3xindex>} : () -> !tosa.shape<3>
+  // COMMON: tosa.slice %{{.*}}, %{{.*}}, %{{.*}} : (tensor<13x21x3xi32>, !tosa.shape<3>, !tosa.shape<3>) -> tensor<4x11x1xi32>
+  %2 = tosa.slice %arg0, %0, %1 : (tensor<13x21x3xi64>, !tosa.shape<3>, !tosa.shape<3>) -> tensor<4x11x1xi64>
+  return %2 : tensor<4x11x1xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_tile
+func.func @test_tile(%arg0: tensor<13x21x3xi64>) -> tensor<39x21x6xi64> {
+  %cst = tosa.const_shape { values = dense<[3, 1, 2]> : tensor<3xindex> } : () -> !tosa.shape<3>
+  // COMMON: tosa.tile %{{.*}}, %{{.*}} : (tensor<13x21x3xi32>, !tosa.shape<3>) -> tensor<39x21x6xi32>
+  %0 = tosa.tile %arg0, %cst: (tensor<13x21x3xi64>, !tosa.shape<3>) -> tensor<39x21x6xi64>
+  return %0 : tensor<39x21x6xi64>
+}
+
+// -----
+
+// CHECK-LABEL: transpose
+func.func @test_transpose(%arg0: tensor<13x21x3xi64>) -> tensor<3x13x21xi64> {
+  // COMMON: tosa.transpose %{{.*}} {perms = array<i32: 2, 0, 1>} : (tensor<13x21x3xi32>) -> tensor<3x13x21xi32>
+  %1 = tosa.transpose %arg0 {perms = array<i32: 2, 0, 1>} : (tensor<13x21x3xi64>) -> tensor<3x13x21xi64>
+  return %1 : tensor<3x13x21xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_transition_to_i64
+func.func @test_transition_to_i64(%arg0: tensor<1xi32>) -> tensor<1xi64> {
+  // COMMON: %[[CAST:.*]] = tosa.cast %arg0 : (tensor<1xi32>) -> tensor<1xi32>
+  %0 = tosa.cast %arg0 : (tensor<1xi32>) -> tensor<1xi64>
+  // COMMON: %[[IDENTITY1:.*]] = tosa.identity %[[CAST]] : (tensor<1xi32>) -> tensor<1xi32>
+  %1 = tosa.identity %0 : (tensor<1xi64>) -> tensor<1xi64>
+  // COMMON: %[[IDENTITY2:.*]] = tosa.identity %[[IDENTITY1]] : (tensor<1xi32>) -> tensor<1xi32>
+  %2 = tosa.identity %1 : (tensor<1xi64>) -> tensor<1xi64>
+  // DEFAULT: %[[OUT_CAST:.*]] = tosa.cast %[[IDENTITY2]] : (tensor<1xi32>) -> tensor<1xi64>
+  // DEFAULT: return %[[OUT_CAST]] : tensor<1xi64>
+  // FUNCBOUND: return %[[IDENTITY2]] : tensor<1xi32>
+  return %2 : tensor<1xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_transition_from_i64
+func.func @test_transition_from_i64(%arg0: tensor<1xi64>) -> tensor<1xi32> {
+  // DEFAULT: %[[CAST:.*]] = tosa.cast %arg0 : (tensor<1xi64>) -> tensor<1xi32>
+  // DEFAULT: %[[IDENTITY1:.*]] = tosa.identity %[[CAST]] : (tensor<1xi32>) -> tensor<1xi32>
+  // FUNCBOUND: %[[IDENTITY1:.*]] = tosa.identity %arg0 : (tensor<1xi32>) -> tensor<1xi32>
+  %0 = tosa.identity %arg0 : (tensor<1xi64>) -> tensor<1xi64>
+  // COMMON: %[[IDENTITY2:.*]] = tosa.identity %[[IDENTITY1]] : (tensor<1xi32>) -> tensor<1xi32>
+  %1 = tosa.identity %0 : (tensor<1xi64>) -> tensor<1xi64>
+  // COMMON: %[[OUT_CAST:.*]] = tosa.cast %[[IDENTITY2]] : (tensor<1xi32>) -> tensor<1xi32>
+  %2 = tosa.cast %1 : (tensor<1xi64>) -> tensor<1xi32>
+  // COMMON: return %[[OUT_CAST]] : tensor<1xi32>
+  return %2 : tensor<1xi32>
+}

From c771159ab54ae9185c651216614715c1d28f1a74 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman@google.com>
Date: Tue, 18 Nov 2025 05:58:22 -0800
Subject: [PATCH 11/52] [RTSan] Fix tests under Internal Shell (#168470)

This patch fixes the only RTSan test that was broken by enabling lit's
internal shell on Darwin. This patch rewrites the test to prefix env
variables with `env` and to avoid the use of subshells.
---
 compiler-rt/test/rtsan/Darwin/dlopen.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/compiler-rt/test/rtsan/Darwin/dlopen.cpp b/compiler-rt/test/rtsan/Darwin/dlopen.cpp
index 1aabe5cb6e580..435a4353b7026 100644
--- a/compiler-rt/test/rtsan/Darwin/dlopen.cpp
+++ b/compiler-rt/test/rtsan/Darwin/dlopen.cpp
@@ -8,18 +8,19 @@
 // RUN: %clangxx -fsanitize=realtime %s -o %t.so -shared -DSHARED_LIB
 // RUN: %clangxx %s -o %t
 
-// RUN: RTSAN_DYLIB_PATH=`%clangxx -fsanitize=realtime %s -### 2>&1 \
+// RUN: %clangxx -fsanitize=realtime %s -### 2>&1 \
 // RUN:   | grep "libclang_rt.rtsan_osx_dynamic.dylib" \
-// RUN:   | sed -e 's/.*"\(.*libclang_rt.rtsan_osx_dynamic.dylib\)".*/\1/'`
+// RUN:   | sed -e 's/.*"\(.*libclang_rt.rtsan_osx_dynamic.dylib\)".*/\1/' \
+// RUN:   | tr -d '\n' > %t.rtsan_dylib_path
 
 // Launching a non-instrumented binary that dlopen's an instrumented library should fail.
 // RUN: not %run %t %t.so 2>&1 | FileCheck %s --check-prefix=CHECK-FAIL
 // Launching a non-instrumented binary with an explicit DYLD_INSERT_LIBRARIES should work.
-// RUN: DYLD_INSERT_LIBRARIES=$RTSAN_DYLIB_PATH %run %t %t.so 2>&1 | FileCheck %s
+// RUN: env DYLD_INSERT_LIBRARIES="%{readfile:%t.rtsan_dylib_path}" %run %t %t.so 2>&1 | FileCheck %s
 
 // Launching an instrumented binary with the DYLD_INSERT_LIBRARIES env variable has no error
 // RUN: %clangxx -fsanitize=realtime %s -o %t
-// RUN: DYLD_INSERT_LIBRARIES=$RTSAN_DYLIB_PATH %run %t %t.so 2>&1 | FileCheck %s --check-prefix=CHECK-INSTRUMENTED
+// RUN: env DYLD_INSERT_LIBRARIES="%{readfile:%t.rtsan_dylib_path}" %run %t %t.so 2>&1 | FileCheck %s --check-prefix=CHECK-INSTRUMENTED
 
 #include <dlfcn.h>
 #include <stdio.h>

From e9f74dff138c9d31bb582efe097f326253368834 Mon Sep 17 00:00:00 2001
From: Alan Li <me@alanli.org>
Date: Tue, 18 Nov 2025 09:12:08 -0500
Subject: [PATCH 12/52] [BAZEL] Fix BAZEL build issue (#168539)

---
 utils/bazel/llvm-project-overlay/clang/BUILD.bazel | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
index deb56dc0957e9..790709bdef05c 100644
--- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
@@ -1025,6 +1025,7 @@ cc_library(
 gentbl_cc_library(
     name = "sema_attr_gen",
     tbl_outs = {
+        "include/clang/Sema/AttrIsTypeDependent.inc": ["-gen-clang-attr-is-type-dependent"],
         "include/clang/Sema/AttrParsedAttrImpl.inc": ["-gen-clang-attr-parsed-attr-impl"],
         "include/clang/Sema/AttrParsedAttrKinds.inc": ["-gen-clang-attr-parsed-attr-kinds"],
         "include/clang/Sema/AttrSpellingListIndex.inc": ["-gen-clang-attr-spelling-index"],

From 38891bacaef474e10b87356545b10d2d1ed8fb2d Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Tue, 18 Nov 2025 09:17:11 -0500
Subject: [PATCH 13/52] [mlir][tosa] Fix shared build

---
 mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt
index 987ce4ed870c9..76e9ddd5b2304 100644
--- a/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt
@@ -22,6 +22,7 @@ add_mlir_dialect_library(MLIRTosaTransforms
 
   LINK_LIBS PUBLIC
   MLIRFuncDialect
+  MLIRFuncTransformOps
   MLIRPass
   MLIRTosaDialect
   MLIRTransformUtils

From 65c4a534bd55ed56962fb99c36f464b3f1c9732f Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler@amd.com>
Date: Tue, 18 Nov 2025 15:22:49 +0100
Subject: [PATCH 14/52] [OpenMP] Implement omp_get_uid_from_device() /
 omp_get_device_from_uid() (#164392)

Use the implementation in libomptarget. If libomptarget is not
available, always return the UID / device number of the host / the
initial device.
---
 offload/include/OpenMP/omp.h               |  7 ++
 offload/include/omptarget.h                |  2 +
 offload/libomptarget/OpenMP/API.cpp        | 58 ++++++++++++++++
 offload/libomptarget/exports               |  2 +
 offload/test/api/omp_device_uid.c          | 76 +++++++++++++++++++++
 openmp/device/include/DeviceTypes.h        |  3 +
 openmp/device/include/Interface.h          |  4 ++
 openmp/device/src/State.cpp                |  6 ++
 openmp/runtime/src/dllexports              |  2 +
 openmp/runtime/src/include/omp.h.var       |  5 ++
 openmp/runtime/src/include/omp_lib.F90.var | 14 ++++
 openmp/runtime/src/include/omp_lib.h.var   | 19 ++++++
 openmp/runtime/src/kmp_ftn_entry.h         | 29 +++++++-
 openmp/runtime/src/kmp_ftn_os.h            |  8 +++
 openmp/runtime/test/api/omp_device_uid.c   | 77 ++++++++++++++++++++++
 15 files changed, 310 insertions(+), 2 deletions(-)
 create mode 100644 offload/test/api/omp_device_uid.c
 create mode 100644 openmp/runtime/test/api/omp_device_uid.c

diff --git a/offload/include/OpenMP/omp.h b/offload/include/OpenMP/omp.h
index 768ca46a9bed0..d92c7e450c677 100644
--- a/offload/include/OpenMP/omp.h
+++ b/offload/include/OpenMP/omp.h
@@ -30,6 +30,13 @@
 
 extern "C" {
 
+/// Definitions
+///{
+
+#define omp_invalid_device -2
+
+///}
+
 /// Type declarations
 ///{
 
diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index fbb4a06accf84..00910704a979a 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -270,6 +270,8 @@ extern "C" {
 void ompx_dump_mapping_tables(void);
 int omp_get_num_devices(void);
 int omp_get_device_num(void);
+int omp_get_device_from_uid(const char *DeviceUid);
+const char *omp_get_uid_from_device(int DeviceNum);
 int omp_get_initial_device(void);
 void *omp_target_alloc(size_t Size, int DeviceNum);
 void omp_target_free(void *DevicePtr, int DeviceNum);
diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp
index dd83a3ccd08e6..6e85e5764449c 100644
--- a/offload/libomptarget/OpenMP/API.cpp
+++ b/offload/libomptarget/OpenMP/API.cpp
@@ -40,6 +40,8 @@ EXTERN void ompx_dump_mapping_tables() {
 using namespace llvm::omp::target::ompt;
 #endif
 
+using GenericDeviceTy = llvm::omp::target::plugin::GenericDeviceTy;
+
 void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
                           const char *Name);
 void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind,
@@ -68,6 +70,62 @@ EXTERN int omp_get_device_num(void) {
   return HostDevice;
 }
 
+static inline bool is_initial_device_uid(const char *DeviceUid) {
+  return strcmp(DeviceUid, GenericPluginTy::getHostDeviceUid()) == 0;
+}
+
+EXTERN int omp_get_device_from_uid(const char *DeviceUid) {
+  TIMESCOPE();
+  OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
+
+  if (!DeviceUid) {
+    DP("Call to omp_get_device_from_uid returning omp_invalid_device\n");
+    return omp_invalid_device;
+  }
+  if (is_initial_device_uid(DeviceUid)) {
+    DP("Call to omp_get_device_from_uid returning initial device number %d\n",
+       omp_get_initial_device());
+    return omp_get_initial_device();
+  }
+
+  int DeviceNum = omp_invalid_device;
+
+  auto ExclusiveDevicesAccessor = PM->getExclusiveDevicesAccessor();
+  for (const DeviceTy &Device : PM->devices(ExclusiveDevicesAccessor)) {
+    const char *Uid = Device.RTL->getDevice(Device.RTLDeviceID).getDeviceUid();
+    if (Uid && strcmp(DeviceUid, Uid) == 0) {
+      DeviceNum = Device.DeviceID;
+      break;
+    }
+  }
+
+  DP("Call to omp_get_device_from_uid returning %d\n", DeviceNum);
+  return DeviceNum;
+}
+
+EXTERN const char *omp_get_uid_from_device(int DeviceNum) {
+  TIMESCOPE();
+  OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
+
+  if (DeviceNum == omp_invalid_device) {
+    DP("Call to omp_get_uid_from_device returning nullptr\n");
+    return nullptr;
+  }
+  if (DeviceNum == omp_get_initial_device()) {
+    DP("Call to omp_get_uid_from_device returning initial device UID\n");
+    return GenericPluginTy::getHostDeviceUid();
+  }
+
+  auto DeviceOrErr = PM->getDevice(DeviceNum);
+  if (!DeviceOrErr)
+    FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
+
+  const char *Uid =
+      DeviceOrErr->RTL->getDevice(DeviceOrErr->RTLDeviceID).getDeviceUid();
+  DP("Call to omp_get_uid_from_device returning %s\n", Uid);
+  return Uid;
+}
+
 EXTERN int omp_get_initial_device(void) {
   TIMESCOPE();
   OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports
index 910a5b6c827a7..2ebc23e3cf60a 100644
--- a/offload/libomptarget/exports
+++ b/offload/libomptarget/exports
@@ -40,6 +40,8 @@ VERS1.0 {
     omp_get_mapped_ptr;
     omp_get_num_devices;
     omp_get_device_num;
+    omp_get_device_from_uid;
+    omp_get_uid_from_device;
     omp_get_initial_device;
     omp_target_alloc;
     omp_target_free;
diff --git a/offload/test/api/omp_device_uid.c b/offload/test/api/omp_device_uid.c
new file mode 100644
index 0000000000000..2a41d8d04ef8a
--- /dev/null
+++ b/offload/test/api/omp_device_uid.c
@@ -0,0 +1,76 @@
+// RUN: %libomptarget-compile-run-and-check-generic
+
+#include <omp.h>
+#include <stdio.h>
+#include <string.h>
+
+int test_omp_device_uid(int device_num) {
+  const char *device_uid = omp_get_uid_from_device(device_num);
+  if (device_uid == NULL) {
+    printf("FAIL for device %d: omp_get_uid_from_device returned NULL\n",
+           device_num);
+    return 0;
+  }
+
+  int device_num_from_uid = omp_get_device_from_uid(device_uid);
+  if (device_num_from_uid != device_num) {
+    printf(
+        "FAIL for device %d: omp_get_device_from_uid returned %d (UID: %s)\n",
+        device_num, device_num_from_uid, device_uid);
+    return 0;
+  }
+
+  if (device_num == omp_get_initial_device())
+    return 1;
+
+  int success = 1;
+
+// Note that the following code may be executed on the host if the host is the
+// device
+#pragma omp target map(tofrom : success) device(device_num)
+  {
+    int device_num = omp_get_device_num();
+
+    // omp_get_uid_from_device() in the device runtime is a dummy function
+    // returning NULL
+    const char *device_uid = omp_get_uid_from_device(device_num);
+
+    // omp_get_device_from_uid() in the device runtime is a dummy function
+    // returning omp_invalid_device.
+    int device_num_from_uid = omp_get_device_from_uid(device_uid);
+
+    // Depending on whether we're executing on the device or the host, we either
+    // got NULL as the device UID or the correct device UID.  Consequently,
+    // omp_get_device_from_uid() either returned omp_invalid_device or the
+    // correct device number (aka omp_get_initial_device()).
+    if (device_uid ? device_num_from_uid != device_num
+                   : device_num_from_uid != omp_invalid_device) {
+      printf("FAIL for device %d (target): omp_get_device_from_uid returned %d "
+             "(UID: %s)\n",
+             device_num, device_num_from_uid, device_uid);
+      success = 0;
+    }
+  }
+
+  return success;
+}
+
+int main() {
+  int num_devices = omp_get_num_devices();
+  int num_failed = 0;
+  // (also test initial device aka num_devices)
+  for (int i = 0; i < num_devices + 1; i++) {
+    if (!test_omp_device_uid(i)) {
+      printf("FAIL for device %d\n", i);
+      num_failed++;
+    }
+  }
+  if (num_failed) {
+    printf("FAIL\n");
+    return 1;
+  }
+  printf("PASS\n");
+  return 0;
+}
+
+// CHECK: PASS
diff --git a/openmp/device/include/DeviceTypes.h b/openmp/device/include/DeviceTypes.h
index 2e5d92380f040..213ccfe58b4fb 100644
--- a/openmp/device/include/DeviceTypes.h
+++ b/openmp/device/include/DeviceTypes.h
@@ -21,6 +21,9 @@ template <typename T> using Constant = __gpu_constant T;
 template <typename T> using Local = __gpu_local T;
 template <typename T> using Global = __gpu_local T;
 
+// See definition in OpenMP (omp.h.var/omp_lib.(F90|h).var)
+#define omp_invalid_device -2
+
 enum omp_proc_bind_t {
   omp_proc_bind_false = 0,
   omp_proc_bind_true = 1,
diff --git a/openmp/device/include/Interface.h b/openmp/device/include/Interface.h
index c4bfaaa2404b4..71c3b1fc06d40 100644
--- a/openmp/device/include/Interface.h
+++ b/openmp/device/include/Interface.h
@@ -130,6 +130,10 @@ int omp_get_num_devices(void);
 
 int omp_get_device_num(void);
 
+int omp_get_device_from_uid(const char *DeviceUid);
+
+const char *omp_get_uid_from_device(int DeviceNum);
+
 int omp_get_num_teams(void);
 
 int omp_get_team_num();
diff --git a/openmp/device/src/State.cpp b/openmp/device/src/State.cpp
index 9f38cf26f8c6f..985e6b169137f 100644
--- a/openmp/device/src/State.cpp
+++ b/openmp/device/src/State.cpp
@@ -403,6 +403,12 @@ int omp_get_num_devices(void) { return config::getNumDevices(); }
 
 int omp_get_device_num(void) { return config::getDeviceNum(); }
 
+int omp_get_device_from_uid(const char *DeviceUid) {
+  return omp_invalid_device;
+}
+
+const char *omp_get_uid_from_device(int DeviceNum) { return nullptr; }
+
 int omp_get_num_teams(void) { return mapping::getNumberOfBlocksInKernel(); }
 
 int omp_get_team_num() { return mapping::getBlockIdInKernel(); }
diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports
index 3983dae80c9f5..00becd1a657fd 100644
--- a/openmp/runtime/src/dllexports
+++ b/openmp/runtime/src/dllexports
@@ -544,6 +544,8 @@ kmp_set_disp_num_buffers                    890
     omp_get_devices_all_allocator           819
     omp_get_memspace_num_resources          820
     omp_get_submemspace                     821
+    omp_get_device_from_uid                 822
+    omp_get_uid_from_device                 823
     %ifndef stub
         __kmpc_set_default_allocator
         __kmpc_get_default_allocator
diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var
index 74f385feb3ea5..e98df731ad888 100644
--- a/openmp/runtime/src/include/omp.h.var
+++ b/openmp/runtime/src/include/omp.h.var
@@ -536,6 +536,11 @@
 
     /* OpenMP 5.2 */
     extern int __KAI_KMPC_CONVENTION omp_in_explicit_task(void);
+    #define omp_invalid_device -2
+
+    /* OpenMP 6.0 */
+    extern int   __KAI_KMPC_CONVENTION  omp_get_device_from_uid(const char *DeviceUid);
+    extern const char *   __KAI_KMPC_CONVENTION  omp_get_uid_from_device(int DeviceNum);
 
     /* LLVM Extensions */
     extern void *llvm_omp_target_dynamic_shared_alloc(void);
diff --git a/openmp/runtime/src/include/omp_lib.F90.var b/openmp/runtime/src/include/omp_lib.F90.var
index 90d7e49ebf549..159b42ab5b5cc 100644
--- a/openmp/runtime/src/include/omp_lib.F90.var
+++ b/openmp/runtime/src/include/omp_lib.F90.var
@@ -215,6 +215,8 @@
 
         integer (kind=omp_interop_kind), parameter, public :: omp_interop_none = 0
 
+        integer (kind=omp_integer_kind), parameter, public :: omp_invalid_device = -2
+
         interface
 
 !         ***
@@ -417,6 +419,18 @@
             integer (kind=omp_integer_kind) omp_get_device_num
           end function omp_get_device_num
 
+          function omp_get_uid_from_device(device_num) bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind), value :: device_num
+            character (len=*) omp_get_uid_from_device
+          end function omp_get_uid_from_device
+
+          function omp_get_device_from_uid(device_uid) bind(c)
+            use omp_lib_kinds
+            character (len=*), value :: device_uid
+            integer (kind=omp_integer_kind) omp_get_device_from_uid
+          end function omp_get_device_from_uid
+
           function omp_pause_resource(kind, device_num) bind(c)
             use omp_lib_kinds
             integer (kind=omp_pause_resource_kind), value :: kind
diff --git a/openmp/runtime/src/include/omp_lib.h.var b/openmp/runtime/src/include/omp_lib.h.var
index a50bb018c7cc3..468eb03e99ef1 100644
--- a/openmp/runtime/src/include/omp_lib.h.var
+++ b/openmp/runtime/src/include/omp_lib.h.var
@@ -291,6 +291,9 @@
       integer(kind=omp_interop_kind)omp_interop_none
       parameter(omp_interop_none=0)
 
+      integer(kind=omp_integer_kind)omp_invalid_device
+      parameter(omp_invalid_device=-2)
+
       interface
 
 !       ***
@@ -486,6 +489,18 @@
           integer (kind=omp_integer_kind) omp_get_device_num
         end function omp_get_device_num
 
+        function omp_get_uid_from_device(device_num) bind(c)
+          import
+          integer (kind=omp_integer_kind), value :: device_num
+          character (len=*) omp_get_uid_from_device
+        end function omp_get_uid_from_device
+
+        function omp_get_device_from_uid(device_uid) bind(c)
+          import
+          character (len=*), value :: device_uid
+          integer (kind=omp_integer_kind) omp_get_device_from_uid
+        end function omp_get_device_from_uid
+
         function omp_pause_resource(kind, device_num) bind(c)
           import
           integer (kind=omp_pause_resource_kind), value :: kind
@@ -1159,6 +1174,8 @@
 !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_initial_device
 !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_devices
 !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_device_num
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_uid_from_device
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_device_from_uid
 !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_pause_resource
 !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_pause_resource_all
 !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_supported_active_levels
@@ -1242,6 +1259,8 @@
 !$omp declare target(omp_get_initial_device )
 !$omp declare target(omp_get_num_devices )
 !$omp declare target(omp_get_device_num )
+!$omp declare target(omp_get_uid_from_device )
+!$omp declare target(omp_get_device_from_uid )
 !$omp declare target(omp_pause_resource )
 !$omp declare target(omp_pause_resource_all )
 !$omp declare target(omp_get_supported_active_levels )
diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h
index 2b0063eb23a0a..49c56d2b9a769 100644
--- a/openmp/runtime/src/kmp_ftn_entry.h
+++ b/openmp/runtime/src/kmp_ftn_entry.h
@@ -1543,13 +1543,38 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_MAX_TASK_PRIORITY)(void) {
 #endif
 }
 
-// This function will be defined in libomptarget. When libomptarget is not
-// loaded, we assume we are on the host and return KMP_HOST_DEVICE.
+// These functions will be defined in libomptarget. When libomptarget is not
+// loaded, we assume we are on the host.
 // Compiler/libomptarget will handle this if called inside target.
 int FTN_STDCALL FTN_GET_DEVICE_NUM(void) KMP_WEAK_ATTRIBUTE_EXTERNAL;
 int FTN_STDCALL FTN_GET_DEVICE_NUM(void) {
   return KMP_EXPAND_NAME(FTN_GET_INITIAL_DEVICE)();
 }
+const char *FTN_STDCALL FTN_GET_UID_FROM_DEVICE(int device_num)
+    KMP_WEAK_ATTRIBUTE_EXTERNAL;
+const char *FTN_STDCALL FTN_GET_UID_FROM_DEVICE(int device_num) {
+#if KMP_OS_DARWIN || KMP_OS_WASI || defined(KMP_STUB)
+  return nullptr;
+#else
+  const char *(*fptr)(int);
+  if ((*(void **)(&fptr) = KMP_DLSYM_NEXT("omp_get_uid_from_device")))
+    return (*fptr)(device_num);
+  // Returns the same string as used by libomptarget
+  return "HOST";
+#endif
+}
+int FTN_STDCALL FTN_GET_DEVICE_FROM_UID(const char *device_uid)
+    KMP_WEAK_ATTRIBUTE_EXTERNAL;
+int FTN_STDCALL FTN_GET_DEVICE_FROM_UID(const char *device_uid) {
+#if KMP_OS_DARWIN || KMP_OS_WASI || defined(KMP_STUB)
+  return omp_invalid_device;
+#else
+  int (*fptr)(const char *);
+  if ((*(void **)(&fptr) = KMP_DLSYM_NEXT("omp_get_device_from_uid")))
+    return (*fptr)(device_uid);
+  return KMP_EXPAND_NAME(FTN_GET_INITIAL_DEVICE)();
+#endif
+}
 
 // Compiler will ensure that this is only called from host in sequential region
 int FTN_STDCALL KMP_EXPAND_NAME(FTN_PAUSE_RESOURCE)(kmp_pause_status_t kind,
diff --git a/openmp/runtime/src/kmp_ftn_os.h b/openmp/runtime/src/kmp_ftn_os.h
index ae0ed067235e5..c439a058f22b4 100644
--- a/openmp/runtime/src/kmp_ftn_os.h
+++ b/openmp/runtime/src/kmp_ftn_os.h
@@ -140,6 +140,8 @@
 #define FTN_GET_MEMSPACE_NUM_RESOURCES omp_get_memspace_num_resources
 #define FTN_GET_SUBMEMSPACE omp_get_submemspace
 #define FTN_GET_DEVICE_NUM omp_get_device_num
+#define FTN_GET_UID_FROM_DEVICE omp_get_uid_from_device
+#define FTN_GET_DEVICE_FROM_UID omp_get_device_from_uid
 #define FTN_SET_AFFINITY_FORMAT omp_set_affinity_format
 #define FTN_GET_AFFINITY_FORMAT omp_get_affinity_format
 #define FTN_DISPLAY_AFFINITY omp_display_affinity
@@ -289,6 +291,8 @@
 #define FTN_ALLOC omp_alloc_
 #define FTN_FREE omp_free_
 #define FTN_GET_DEVICE_NUM omp_get_device_num_
+#define FTN_GET_UID_FROM_DEVICE omp_get_uid_from_device_
+#define FTN_GET_DEVICE_FROM_UID omp_get_device_from_uid_
 #define FTN_SET_AFFINITY_FORMAT omp_set_affinity_format_
 #define FTN_GET_AFFINITY_FORMAT omp_get_affinity_format_
 #define FTN_DISPLAY_AFFINITY omp_display_affinity_
@@ -436,6 +440,8 @@
 #define FTN_GET_MEMSPACE_NUM_RESOURCES OMP_GET_MEMSPACE_NUM_RESOURCES
 #define FTN_GET_SUBMEMSPACE OMP_GET_SUBMEMSPACE
 #define FTN_GET_DEVICE_NUM OMP_GET_DEVICE_NUM
+#define FTN_GET_UID_FROM_DEVICE OMP_GET_UID_FROM_DEVICE
+#define FTN_GET_DEVICE_FROM_UID OMP_GET_DEVICE_FROM_UID
 #define FTN_SET_AFFINITY_FORMAT OMP_SET_AFFINITY_FORMAT
 #define FTN_GET_AFFINITY_FORMAT OMP_GET_AFFINITY_FORMAT
 #define FTN_DISPLAY_AFFINITY OMP_DISPLAY_AFFINITY
@@ -585,6 +591,8 @@
 #define FTN_ALLOC OMP_ALLOC_
 #define FTN_FREE OMP_FREE_
 #define FTN_GET_DEVICE_NUM OMP_GET_DEVICE_NUM_
+#define FTN_GET_UID_FROM_DEVICE OMP_GET_UID_FROM_DEVICE_
+#define FTN_GET_DEVICE_FROM_UID OMP_GET_DEVICE_FROM_UID_
 #define FTN_SET_AFFINITY_FORMAT OMP_SET_AFFINITY_FORMAT_
 #define FTN_GET_AFFINITY_FORMAT OMP_GET_AFFINITY_FORMAT_
 #define FTN_DISPLAY_AFFINITY OMP_DISPLAY_AFFINITY_
diff --git a/openmp/runtime/test/api/omp_device_uid.c b/openmp/runtime/test/api/omp_device_uid.c
new file mode 100644
index 0000000000000..40a1cbb644c7b
--- /dev/null
+++ b/openmp/runtime/test/api/omp_device_uid.c
@@ -0,0 +1,77 @@
+// RUN: %libomp-compile-and-run 2>&1 | FileCheck %s
+// Linking fails for icc 18
+// UNSUPPORTED: icc-18
+
+#include <omp_testsuite.h>
+#include <string.h>
+
+int test_omp_device_uid(int device_num) {
+  const char *device_uid = omp_get_uid_from_device(device_num);
+  if (device_uid == NULL) {
+    printf("FAIL for device %d: omp_get_uid_from_device returned NULL\n",
+           device_num);
+    return 0;
+  }
+
+  int device_num_from_uid = omp_get_device_from_uid(device_uid);
+  if (device_num_from_uid != device_num) {
+    printf(
+        "FAIL for device %d: omp_get_device_from_uid returned %d (UID: %s)\n",
+        device_num, device_num_from_uid, device_uid);
+    return 0;
+  }
+
+  if (device_num == omp_get_initial_device())
+    return 1;
+
+  int success = 1;
+
+// Note that the following code may be executed on the host if the host is the
+// device
+#pragma omp target map(tofrom : success) device(device_num)
+  {
+    int device_num = omp_get_device_num();
+
+    // omp_get_uid_from_device() in the device runtime is a dummy function
+    // returning NULL
+    const char *device_uid = omp_get_uid_from_device(device_num);
+
+    // omp_get_device_from_uid() in the device runtime is a dummy function
+    // returning omp_invalid_device.
+    int device_num_from_uid = omp_get_device_from_uid(device_uid);
+
+    // Depending on whether we're executing on the device or the host, we either
+    // got NULL as the device UID or the correct device UID.  Consequently,
+    // omp_get_device_from_uid() either returned omp_invalid_device or the
+    // correct device number (aka omp_get_initial_device()).
+    if (device_uid ? device_num_from_uid != device_num
+                   : device_num_from_uid != omp_invalid_device) {
+      printf("FAIL for device %d (target): omp_get_device_from_uid returned %d "
+             "(UID: %s)\n",
+             device_num, device_num_from_uid, device_uid);
+      success = 0;
+    }
+  }
+
+  return success;
+}
+
+int main() {
+  int num_devices = omp_get_num_devices();
+  int num_failed = 0;
+  // (also test initial device aka num_devices)
+  for (int i = 0; i < num_devices + 1; i++) {
+    if (!test_omp_device_uid(i)) {
+      printf("FAIL for device %d\n", i);
+      num_failed++;
+    }
+  }
+  if (num_failed) {
+    printf("FAIL\n");
+    return 1;
+  }
+  printf("PASS\n");
+  return 0;
+}
+
+// CHECK: PASS

From 6fc2bc1ccc0d8f08ed794c792ec6ef145ee4ea1f Mon Sep 17 00:00:00 2001
From: Alan Li <me@alanli.org>
Date: Tue, 18 Nov 2025 09:25:30 -0500
Subject: [PATCH 15/52] [BAZEL] Fix OrcDebugging dep (#168540)

---
 utils/bazel/llvm-project-overlay/llvm/BUILD.bazel | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
index 635f77215b38f..ddad2f4f7611d 100644
--- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
@@ -4100,6 +4100,7 @@ cc_library(
         ":DebugInfo",
         ":DebugInfoDWARF",
         ":JITLink",
+        ":Object",
         ":OrcJIT",
         ":OrcShared",
         ":Support",

From f2b5d04f2968727270a9d9368c6a4222bbebf12c Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker@arm.com>
Date: Tue, 18 Nov 2025 14:33:43 +0000
Subject: [PATCH 16/52] [LLVM][InstSimplify] Add folds for SVE integer
 reduction intrinsics. (#167519)

[andv, eorv, orv, s/uaddv, s/umaxv, s/uminv]
sve_reduce_##(none, ?) -> op's neutral value
sve_reduce_##(any, neutral) -> op's neutral value

[andv, orv, s/umaxv, s/uminv]
sve_reduce_##(all, splat(X)) -> X

[eorv]
sve_reduce_##(all, splat(X)) -> 0
---
 llvm/include/llvm/IR/Constant.h               |   3 +
 llvm/lib/Analysis/InstructionSimplify.cpp     |  68 ++
 llvm/lib/IR/Constants.cpp                     |  17 +
 .../AArch64/aarch64-sve-reductions.ll         | 912 ++++++++++++++++++
 .../InstSimplify/AArch64/lit.local.cfg        |   2 +
 5 files changed, 1002 insertions(+)
 create mode 100644 llvm/test/Transforms/InstSimplify/AArch64/aarch64-sve-reductions.ll
 create mode 100644 llvm/test/Transforms/InstSimplify/AArch64/lit.local.cfg

diff --git a/llvm/include/llvm/IR/Constant.h b/llvm/include/llvm/IR/Constant.h
index 0be1fc172ebd4..e8ce453559ed7 100644
--- a/llvm/include/llvm/IR/Constant.h
+++ b/llvm/include/llvm/IR/Constant.h
@@ -79,6 +79,9 @@ class Constant : public User {
   /// Return true if the value is the smallest signed value.
   LLVM_ABI bool isMinSignedValue() const;
 
+  /// Return true if the value is the largest signed value.
+  LLVM_ABI bool isMaxSignedValue() const;
+
   /// Return true if this is a finite and non-zero floating-point scalar
   /// constant or a fixed width vector constant with all finite and non-zero
   /// elements.
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 6f44713bd22cd..8968f6b934d77 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -41,6 +41,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Statepoint.h"
@@ -6676,6 +6677,62 @@ static MinMaxOptResult OptimizeConstMinMax(const Constant *RHSConst,
   return MinMaxOptResult::CannotOptimize;
 }
 
+static Value *simplifySVEIntReduction(Intrinsic::ID IID, Type *ReturnType,
+                                      Value *Op0, Value *Op1) {
+  Constant *C0 = dyn_cast<Constant>(Op0);
+  Constant *C1 = dyn_cast<Constant>(Op1);
+  unsigned Width = ReturnType->getPrimitiveSizeInBits();
+
+  // All false predicate or reduction of neutral values ==> neutral result.
+  switch (IID) {
+  case Intrinsic::aarch64_sve_eorv:
+  case Intrinsic::aarch64_sve_orv:
+  case Intrinsic::aarch64_sve_saddv:
+  case Intrinsic::aarch64_sve_uaddv:
+  case Intrinsic::aarch64_sve_umaxv:
+    if ((C0 && C0->isNullValue()) || (C1 && C1->isNullValue()))
+      return ConstantInt::get(ReturnType, 0);
+    break;
+  case Intrinsic::aarch64_sve_andv:
+  case Intrinsic::aarch64_sve_uminv:
+    if ((C0 && C0->isNullValue()) || (C1 && C1->isAllOnesValue()))
+      return ConstantInt::get(ReturnType, APInt::getMaxValue(Width));
+    break;
+  case Intrinsic::aarch64_sve_smaxv:
+    if ((C0 && C0->isNullValue()) || (C1 && C1->isMinSignedValue()))
+      return ConstantInt::get(ReturnType, APInt::getSignedMinValue(Width));
+    break;
+  case Intrinsic::aarch64_sve_sminv:
+    if ((C0 && C0->isNullValue()) || (C1 && C1->isMaxSignedValue()))
+      return ConstantInt::get(ReturnType, APInt::getSignedMaxValue(Width));
+    break;
+  }
+
+  switch (IID) {
+  case Intrinsic::aarch64_sve_andv:
+  case Intrinsic::aarch64_sve_orv:
+  case Intrinsic::aarch64_sve_smaxv:
+  case Intrinsic::aarch64_sve_sminv:
+  case Intrinsic::aarch64_sve_umaxv:
+  case Intrinsic::aarch64_sve_uminv:
+    // sve_reduce_##(all, splat(X)) ==> X
+    if (C0 && C0->isAllOnesValue()) {
+      if (Value *SplatVal = getSplatValue(Op1)) {
+        assert(SplatVal->getType() == ReturnType && "Unexpected result type!");
+        return SplatVal;
+      }
+    }
+    break;
+  case Intrinsic::aarch64_sve_eorv:
+    // sve_reduce_xor(all, splat(X)) ==> 0
+    if (C0 && C0->isAllOnesValue())
+      return ConstantInt::get(ReturnType, 0);
+    break;
+  }
+
+  return nullptr;
+}
+
 Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType,
                                      Value *Op0, Value *Op1,
                                      const SimplifyQuery &Q,
@@ -7037,6 +7094,17 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType,
 
     break;
   }
+
+  case Intrinsic::aarch64_sve_andv:
+  case Intrinsic::aarch64_sve_eorv:
+  case Intrinsic::aarch64_sve_orv:
+  case Intrinsic::aarch64_sve_saddv:
+  case Intrinsic::aarch64_sve_smaxv:
+  case Intrinsic::aarch64_sve_sminv:
+  case Intrinsic::aarch64_sve_uaddv:
+  case Intrinsic::aarch64_sve_umaxv:
+  case Intrinsic::aarch64_sve_uminv:
+    return simplifySVEIntReduction(IID, ReturnType, Op0, Op1);
   default:
     break;
   }
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index cbce8bd736102..a3aa5e9571657 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -183,6 +183,23 @@ bool Constant::isMinSignedValue() const {
   return false;
 }
 
+bool Constant::isMaxSignedValue() const {
+  // Check for INT_MAX integers
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
+    return CI->isMaxValue(/*isSigned=*/true);
+
+  // Check for FP which are bitcasted from INT_MAX integers
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+    return CFP->getValueAPF().bitcastToAPInt().isMaxSignedValue();
+
+  // Check for splats of INT_MAX values.
+  if (getType()->isVectorTy())
+    if (const auto *SplatVal = getSplatValue())
+      return SplatVal->isMaxSignedValue();
+
+  return false;
+}
+
 bool Constant::isNotMinSignedValue() const {
   // Check for INT_MIN integers
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
diff --git a/llvm/test/Transforms/InstSimplify/AArch64/aarch64-sve-reductions.ll b/llvm/test/Transforms/InstSimplify/AArch64/aarch64-sve-reductions.ll
new file mode 100644
index 0000000000000..a54d6044d04b1
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/AArch64/aarch64-sve-reductions.ll
@@ -0,0 +1,912 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -passes=instsimplify < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;
+; ANDV
+;
+
+define i8 @andv_i8_no_active(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define i8 @andv_i8_no_active(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    ret i8 -1
+;
+  %out = call i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a)
+  ret i8 %out
+}
+
+define i8 @andv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @andv_i8_splat_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 -1
+;
+  %out = call i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 -1))
+  ret i8 %out
+}
+
+define i8 @andv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @andv_i8_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT:    ret i8 [[OUT]]
+;
+  %out = call i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  ret i8 %out
+}
+
+define i8 @andv_i8_all_active_splat(i8 %a) #0 {
+; CHECK-LABEL: define i8 @andv_i8_all_active_splat(
+; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 [[A]]
+;
+  %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0
+  %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %out = call i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat)
+  ret i8 %out
+}
+
+define i16 @andv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @andv_i16_splat_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i16 -1
+;
+  %out = call i16 @llvm.aarch64.sve.andv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 -1))
+  ret i16 %out
+}
+
+define i16 @andv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @andv_i16_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i16 @llvm.aarch64.sve.andv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> zeroinitializer)
+; CHECK-NEXT:    ret i16 [[OUT]]
+;
+  %out = call i16 @llvm.aarch64.sve.andv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer)
+  ret i16 %out
+}
+
+define i32 @andv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @andv_i32_splat_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i32 -1
+;
+  %out = call i32 @llvm.aarch64.sve.andv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 -1))
+  ret i32 %out
+}
+
+define i32 @andv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @andv_i32_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i32 @llvm.aarch64.sve.andv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %out = call i32 @llvm.aarch64.sve.andv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+  ret i32 %out
+}
+
+define i64 @andv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @andv_i64_splat_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 -1
+;
+  %out = call i64 @llvm.aarch64.sve.andv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 -1))
+  ret i64 %out
+}
+
+define i64 @andv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @andv_i64_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.andv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.andv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer)
+  ret i64 %out
+}
+
+;
+; EORV
+;
+
+define i8 @eorv_i8_no_active(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define i8 @eorv_i8_no_active(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 0
+;
+  %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a)
+  ret i8 %out
+}
+
+define i8 @eorv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @eorv_i8_splat_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 0
+;
+  %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  ret i8 %out
+}
+
+define i8 @eorv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @eorv_i8_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i8 @llvm.aarch64.sve.eorv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1))
+; CHECK-NEXT:    ret i8 [[OUT]]
+;
+  %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 1))
+  ret i8 %out
+}
+
+define i8 @eorv_i8_all_active_splat(i8 %a) #0 {
+; CHECK-LABEL: define i8 @eorv_i8_all_active_splat(
+; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 0
+;
+  %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0
+  %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat)
+  ret i8 %out
+}
+
+define i16 @eorv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @eorv_i16_splat_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i16 0
+;
+  %out = call i16 @llvm.aarch64.sve.eorv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer)
+  ret i16 %out
+}
+
+define i16 @eorv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @eorv_i16_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i16 @llvm.aarch64.sve.eorv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1))
+; CHECK-NEXT:    ret i16 [[OUT]]
+;
+  %out = call i16 @llvm.aarch64.sve.eorv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 1))
+  ret i16 %out
+}
+
+define i32 @eorv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @eorv_i32_splat_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i32 0
+;
+  %out = call i32 @llvm.aarch64.sve.eorv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+  ret i32 %out
+}
+
+define i32 @eorv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @eorv_i32_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i32 @llvm.aarch64.sve.eorv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1))
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %out = call i32 @llvm.aarch64.sve.eorv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 1))
+  ret i32 %out
+}
+
+define i64 @eorv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @eorv_i64_splat_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.eorv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @eorv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @eorv_i64_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.eorv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.eorv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 1))
+  ret i64 %out
+}
+
+;
+; ORV
+;
+
+define i8 @orv_i8_no_active(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define i8 @orv_i8_no_active(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 0
+;
+  %out = call i8 @llvm.aarch64.sve.orv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a)
+  ret i8 %out
+}
+
+define i8 @orv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @orv_i8_splat_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 0
+;
+  %out = call i8 @llvm.aarch64.sve.orv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  ret i8 %out
+}
+
+define i8 @orv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @orv_i8_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i8 @llvm.aarch64.sve.orv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1))
+; CHECK-NEXT:    ret i8 [[OUT]]
+;
+  %out = call i8 @llvm.aarch64.sve.orv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 1))
+  ret i8 %out
+}
+
+define i8 @orv_i8_all_active_splat(i8 %a) #0 {
+; CHECK-LABEL: define i8 @orv_i8_all_active_splat(
+; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 [[A]]
+;
+  %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0
+  %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %out = call i8 @llvm.aarch64.sve.orv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat)
+  ret i8 %out
+}
+
+define i16 @orv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @orv_i16_splat_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i16 0
+;
+  %out = call i16 @llvm.aarch64.sve.orv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer)
+  ret i16 %out
+}
+
+define i16 @orv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @orv_i16_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i16 @llvm.aarch64.sve.orv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1))
+; CHECK-NEXT:    ret i16 [[OUT]]
+;
+  %out = call i16 @llvm.aarch64.sve.orv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 1))
+  ret i16 %out
+}
+
+define i32 @orv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @orv_i32_splat_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i32 0
+;
+  %out = call i32 @llvm.aarch64.sve.orv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+  ret i32 %out
+}
+
+define i32 @orv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @orv_i32_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i32 @llvm.aarch64.sve.orv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1))
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %out = call i32 @llvm.aarch64.sve.orv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 1))
+  ret i32 %out
+}
+
+define i64 @orv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @orv_i64_splat_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.orv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @orv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @orv_i64_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.orv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.orv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 1))
+  ret i64 %out
+}
+
+;
+; SADDV
+;
+
+define i64 @saddv_i8_no_active(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define i64 @saddv_i8_no_active(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a)
+  ret i64 %out
+}
+
+define i64 @saddv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @saddv_i8_splat_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @saddv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @saddv_i8_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 1))
+  ret i64 %out
+}
+
+define i64 @saddv_i8_all_active_splat(i8 %a) #0 {
+; CHECK-LABEL: define i64 @saddv_i8_all_active_splat(
+; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[A_INSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i8 0
+; CHECK-NEXT:    [[A_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[A_INSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A_SPLAT]])
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0
+  %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat)
+  ret i64 %out
+}
+
+define i64 @saddv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @saddv_i16_splat_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.saddv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @saddv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @saddv_i16_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.saddv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 1))
+  ret i64 %out
+}
+
+define i64 @saddv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @saddv_i32_splat_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.saddv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @saddv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @saddv_i32_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.saddv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 1))
+  ret i64 %out
+}
+
+define i64 @saddv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @saddv_i64_splat_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.saddv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @saddv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @saddv_i64_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.saddv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 1))
+  ret i64 %out
+}
+
+;
+; SMAXV
+;
+
+define i8 @smaxv_i8_no_active(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define i8 @smaxv_i8_no_active(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 -128
+;
+  %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a)
+  ret i8 %out
+}
+
+define i8 @smaxv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @smaxv_i8_splat_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 -128
+;
+  %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 -128))
+  ret i8 %out
+}
+
+define i8 @smaxv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @smaxv_i8_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i8 @llvm.aarch64.sve.smaxv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT:    ret i8 [[OUT]]
+;
+  %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  ret i8 %out
+}
+
+define i8 @smaxv_i8_all_active_splat(i8 %a) #0 {
+; CHECK-LABEL: define i8 @smaxv_i8_all_active_splat(
+; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 [[A]]
+;
+  %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0
+  %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat)
+  ret i8 %out
+}
+
+define i16 @smaxv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @smaxv_i16_splat_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i16 -32768
+;
+  %out = call i16 @llvm.aarch64.sve.smaxv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 -32768))
+  ret i16 %out
+}
+
+define i16 @smaxv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @smaxv_i16_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i16 @llvm.aarch64.sve.smaxv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> zeroinitializer)
+; CHECK-NEXT:    ret i16 [[OUT]]
+;
+  %out = call i16 @llvm.aarch64.sve.smaxv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer)
+  ret i16 %out
+}
+
+define i32 @smaxv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @smaxv_i32_splat_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i32 -2147483648
+;
+  %out = call i32 @llvm.aarch64.sve.smaxv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 -2147483648))
+  ret i32 %out
+}
+
+define i32 @smaxv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @smaxv_i32_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i32 @llvm.aarch64.sve.smaxv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %out = call i32 @llvm.aarch64.sve.smaxv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+  ret i32 %out
+}
+
+define i64 @smaxv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @smaxv_i64_splat_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 -9223372036854775808
+;
+  %out = call i64 @llvm.aarch64.sve.smaxv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 -9223372036854775808))
+  ret i64 %out
+}
+
+define i64 @smaxv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @smaxv_i64_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.smaxv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.smaxv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer)
+  ret i64 %out
+}
+
+;
+; SMINV
+;
+
+define i8 @sminv_i8_no_active(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define i8 @sminv_i8_no_active(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 127
+;
+  %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a)
+  ret i8 %out
+}
+
+define i8 @sminv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @sminv_i8_splat_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 127
+;
+  %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 127))
+  ret i8 %out
+}
+
+define i8 @sminv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @sminv_i8_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i8 @llvm.aarch64.sve.sminv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT:    ret i8 [[OUT]]
+;
+  %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  ret i8 %out
+}
+
+define i8 @sminv_i8_all_active_splat(i8 %a) #0 {
+; CHECK-LABEL: define i8 @sminv_i8_all_active_splat(
+; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 [[A]]
+;
+  %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0
+  %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat)
+  ret i8 %out
+}
+
+define i16 @sminv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @sminv_i16_splat_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i16 32767
+;
+  %out = call i16 @llvm.aarch64.sve.sminv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 32767))
+  ret i16 %out
+}
+
+define i16 @sminv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @sminv_i16_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i16 @llvm.aarch64.sve.sminv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> zeroinitializer)
+; CHECK-NEXT:    ret i16 [[OUT]]
+;
+  %out = call i16 @llvm.aarch64.sve.sminv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer)
+  ret i16 %out
+}
+
+define i32 @sminv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @sminv_i32_splat_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i32 2147483647
+;
+  %out = call i32 @llvm.aarch64.sve.sminv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 2147483647))
+  ret i32 %out
+}
+
+define i32 @sminv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @sminv_i32_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i32 @llvm.aarch64.sve.sminv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %out = call i32 @llvm.aarch64.sve.sminv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+  ret i32 %out
+}
+
+define i64 @sminv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @sminv_i64_splat_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 9223372036854775807
+;
+  %out = call i64 @llvm.aarch64.sve.sminv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 9223372036854775807))
+  ret i64 %out
+}
+
+define i64 @sminv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @sminv_i64_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.sminv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.sminv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer)
+  ret i64 %out
+}
+
+;
+; UADDV
+;
+
+define i64 @uaddv_i8_no_active(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define i64 @uaddv_i8_no_active(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a)
+  ret i64 %out
+}
+
+define i64 @uaddv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @uaddv_i8_splat_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @uaddv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @uaddv_i8_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 1))
+  ret i64 %out
+}
+
+define i64 @uaddv_i8_all_active_splat(i8 %a) #0 {
+; CHECK-LABEL: define i64 @uaddv_i8_all_active_splat(
+; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[A_INSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i8 0
+; CHECK-NEXT:    [[A_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[A_INSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A_SPLAT]])
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0
+  %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat)
+  ret i64 %out
+}
+
+define i64 @uaddv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @uaddv_i16_splat_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.uaddv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @uaddv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @uaddv_i16_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.uaddv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 1))
+  ret i64 %out
+}
+
+define i64 @uaddv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @uaddv_i32_splat_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @uaddv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @uaddv_i32_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 1))
+  ret i64 %out
+}
+
+define i64 @uaddv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @uaddv_i64_splat_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.uaddv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @uaddv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @uaddv_i64_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.uaddv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 1))
+  ret i64 %out
+}
+
+;
+; UMAXV
+;
+
+define i8 @umaxv_i8_no_active(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define i8 @umaxv_i8_no_active(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 0
+;
+  %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a)
+  ret i8 %out
+}
+
+define i8 @umaxv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @umaxv_i8_splat_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 0
+;
+  %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  ret i8 %out
+}
+
+define i8 @umaxv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @umaxv_i8_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1))
+; CHECK-NEXT:    ret i8 [[OUT]]
+;
+  %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 1))
+  ret i8 %out
+}
+
+define i8 @umaxv_i8_all_active_splat(i8 %a) #0 {
+; CHECK-LABEL: define i8 @umaxv_i8_all_active_splat(
+; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 [[A]]
+;
+  %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0
+  %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat)
+  ret i8 %out
+}
+
+define i16 @umaxv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @umaxv_i16_splat_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i16 0
+;
+  %out = call i16 @llvm.aarch64.sve.umaxv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer)
+  ret i16 %out
+}
+
+define i16 @umaxv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @umaxv_i16_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i16 @llvm.aarch64.sve.umaxv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1))
+; CHECK-NEXT:    ret i16 [[OUT]]
+;
+  %out = call i16 @llvm.aarch64.sve.umaxv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 1))
+  ret i16 %out
+}
+
+define i32 @umaxv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @umaxv_i32_splat_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i32 0
+;
+  %out = call i32 @llvm.aarch64.sve.umaxv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+  ret i32 %out
+}
+
+define i32 @umaxv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @umaxv_i32_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i32 @llvm.aarch64.sve.umaxv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1))
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %out = call i32 @llvm.aarch64.sve.umaxv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 1))
+  ret i32 %out
+}
+
+define i64 @umaxv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @umaxv_i64_splat_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.umaxv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @umaxv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @umaxv_i64_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.umaxv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.umaxv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 1))
+  ret i64 %out
+}
+
+;
+; UMINV
+;
+
+define i8 @uminv_i8_no_active(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define i8 @uminv_i8_no_active(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 -1
+;
+  %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a)
+  ret i8 %out
+}
+
+define i8 @uminv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @uminv_i8_splat_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 -1
+;
+  %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 -1))
+  ret i8 %out
+}
+
+define i8 @uminv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @uminv_i8_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i8 @llvm.aarch64.sve.uminv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT:    ret i8 [[OUT]]
+;
+  %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  ret i8 %out
+}
+
+define i8 @uminv_i8_all_active_splat(i8 %a) #0 {
+; CHECK-LABEL: define i8 @uminv_i8_all_active_splat(
+; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 [[A]]
+;
+  %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0
+  %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat)
+  ret i8 %out
+}
+
+define i16 @uminv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @uminv_i16_splat_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i16 -1
+;
+  %out = call i16 @llvm.aarch64.sve.uminv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 -1))
+  ret i16 %out
+}
+
+define i16 @uminv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @uminv_i16_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i16 @llvm.aarch64.sve.uminv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> zeroinitializer)
+; CHECK-NEXT:    ret i16 [[OUT]]
+;
+  %out = call i16 @llvm.aarch64.sve.uminv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer)
+  ret i16 %out
+}
+
+define i32 @uminv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @uminv_i32_splat_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i32 -1
+;
+  %out = call i32 @llvm.aarch64.sve.uminv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 -1))
+  ret i32 %out
+}
+
+define i32 @uminv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @uminv_i32_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i32 @llvm.aarch64.sve.uminv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %out = call i32 @llvm.aarch64.sve.uminv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+  ret i32 %out
+}
+
+define i64 @uminv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @uminv_i64_splat_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 -1
+;
+  %out = call i64 @llvm.aarch64.sve.uminv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 -1))
+  ret i64 %out
+}
+
+define i64 @uminv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @uminv_i64_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uminv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.uminv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer)
+  ret i64 %out
+}
+
+attributes #0 = { "target-features"="+sve" }
diff --git a/llvm/test/Transforms/InstSimplify/AArch64/lit.local.cfg b/llvm/test/Transforms/InstSimplify/AArch64/lit.local.cfg
new file mode 100644
index 0000000000000..10d4a0e953ed4
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/AArch64/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "AArch64" in config.root.targets:
+    config.unsupported = True

From 75792d60778b34f20cd350d717baeb4ec6fadbcf Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser@berlin.de>
Date: Tue, 18 Nov 2025 16:05:53 +0100
Subject: [PATCH 17/52] [libc++] Fix header deprecations (#163356)

Currently, there are no diagnostics issued when including a deprecated
header, since the diagnostic is issued inside a system header. This
patch fixes that by using `#warning` instead, which also simplifies the
implementation of the deprecation warnings.
---
 libcxx/include/__config                            |  6 ++++++
 libcxx/include/ccomplex                            | 14 +++-----------
 libcxx/include/ciso646                             |  9 +++------
 libcxx/include/cstdalign                           | 13 +++----------
 libcxx/include/cstdbool                            | 13 +++----------
 libcxx/include/ctgmath                             | 13 ++-----------
 libcxx/test/libcxx/transitive_includes.gen.py      |  2 +-
 .../std/depr/depr.cpp.headers/ccomplex.verify.cpp  |  8 +-------
 .../std/depr/depr.cpp.headers/ciso646.verify.cpp   |  3 ++-
 .../std/depr/depr.cpp.headers/cstdalign.verify.cpp |  8 +-------
 .../std/depr/depr.cpp.headers/cstdbool.verify.cpp  |  8 +-------
 .../std/depr/depr.cpp.headers/ctgmath.verify.cpp   |  8 +-------
 .../tuple.apply/make_from_tuple.verify.cpp         |  7 +------
 libcxx/utils/libcxx/test/format.py                 |  2 +-
 14 files changed, 29 insertions(+), 85 deletions(-)

diff --git a/libcxx/include/__config b/libcxx/include/__config
index 8f461599ffd5b..d79ace0cbb896 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -546,6 +546,12 @@ typedef __char32_t char32_t;
 #    define _LIBCPP_DEPRECATED_(m)
 #  endif
 
+#  if defined(__DEPRECATED) && __DEPRECATED && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS)
+#    define _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS 1
+#  else
+#    define _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS 0
+#  endif
+
 #  if !defined(_LIBCPP_CXX03_LANG)
 #    define _LIBCPP_DEPRECATED_IN_CXX11 _LIBCPP_DEPRECATED
 #  else
diff --git a/libcxx/include/ccomplex b/libcxx/include/ccomplex
index ee7e088aac54d..c1cb039f83a5e 100644
--- a/libcxx/include/ccomplex
+++ b/libcxx/include/ccomplex
@@ -26,18 +26,10 @@
 #    pragma GCC system_header
 #  endif
 
-#  if _LIBCPP_STD_VER >= 20
-
-using __standard_header_ccomplex
-    _LIBCPP_DEPRECATED_("removed in C++20. Include <complex> instead.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_ccomplex _LIBCPP_NODEBUG                                    = __standard_header_ccomplex;
-
-#  elif _LIBCPP_STD_VER >= 17
-
-using __standard_header_ccomplex _LIBCPP_DEPRECATED_("Include <complex> instead.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_ccomplex _LIBCPP_NODEBUG = __standard_header_ccomplex;
-
+#  if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS
+#    warning <ccomplex> is deprecated in C++17 and removed in C++20. Include <complex> instead.
 #  endif
+
 #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
 
 #endif // _LIBCPP_CCOMPLEX
diff --git a/libcxx/include/ciso646 b/libcxx/include/ciso646
index 34164362dc10d..d9eae41291024 100644
--- a/libcxx/include/ciso646
+++ b/libcxx/include/ciso646
@@ -24,13 +24,10 @@
 #    pragma GCC system_header
 #  endif
 
-#  if _LIBCPP_STD_VER >= 20
-
-using __standard_header_ciso646
-    _LIBCPP_DEPRECATED_("removed in C++20. Include <version> instead.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_ciso646 _LIBCPP_NODEBUG                                     = __standard_header_ciso646;
-
+#  if _LIBCPP_STD_VER >= 20 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS
+#    warning <ciso646> is removed in C++20. Include <version> instead.
 #  endif
+
 #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
 
 #endif // _LIBCPP_CISO646
diff --git a/libcxx/include/cstdalign b/libcxx/include/cstdalign
index 7f8dd1e1fbaf8..7aa8cc81ad14c 100644
--- a/libcxx/include/cstdalign
+++ b/libcxx/include/cstdalign
@@ -43,17 +43,10 @@ Macros:
 #  undef __alignof_is_defined
 #  define __alignof_is_defined 1
 
-#  if _LIBCPP_STD_VER >= 20
-
-using __standard_header_cstdalign _LIBCPP_DEPRECATED_("removed in C++20.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_cstdalign _LIBCPP_NODEBUG = __standard_header_cstdalign;
-
-#  elif _LIBCPP_STD_VER >= 17
-
-using __standard_header_cstdalign _LIBCPP_DEPRECATED _LIBCPP_NODEBUG = void;
-using __use_standard_header_cstdalign _LIBCPP_NODEBUG                = __standard_header_cstdalign;
-
+#  if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS
+#    warning <cstdalign> is deprecated in C++17 and removed in C++20.
 #  endif
+
 #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
 
 #endif // _LIBCPP_CSTDALIGN
diff --git a/libcxx/include/cstdbool b/libcxx/include/cstdbool
index a432d5f08b9ae..805a287bd7627 100644
--- a/libcxx/include/cstdbool
+++ b/libcxx/include/cstdbool
@@ -31,17 +31,10 @@ Macros:
 #  undef __bool_true_false_are_defined
 #  define __bool_true_false_are_defined 1
 
-#  if _LIBCPP_STD_VER >= 20
-
-using __standard_header_cstdbool _LIBCPP_DEPRECATED_("removed in C++20.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_cstdbool _LIBCPP_NODEBUG                                      = __standard_header_cstdbool;
-
-#  elif _LIBCPP_STD_VER >= 17
-
-using __standard_header_cstdbool _LIBCPP_DEPRECATED _LIBCPP_NODEBUG = void;
-using __use_standard_header_cstdbool _LIBCPP_NODEBUG                = __standard_header_cstdbool;
-
+#  if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS
+#    warning <cstdbool> is deprecated in C++17 and removed in C++20.
 #  endif
+
 #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
 
 #endif // _LIBCPP_CSTDBOOL
diff --git a/libcxx/include/ctgmath b/libcxx/include/ctgmath
index db0786f1e2c46..13b7a96e4d8fc 100644
--- a/libcxx/include/ctgmath
+++ b/libcxx/include/ctgmath
@@ -28,17 +28,8 @@
 #    pragma GCC system_header
 #  endif
 
-#  if _LIBCPP_STD_VER >= 20
-
-using __standard_header_ctgmath
-    _LIBCPP_DEPRECATED_("removed in C++20. Include <cmath> and <complex> instead.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_ctgmath _LIBCPP_NODEBUG = __standard_header_ctgmath;
-
-#  elif _LIBCPP_STD_VER >= 17
-
-using __standard_header_ctgmath _LIBCPP_DEPRECATED_("Include <cmath> and <complex> instead.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_ctgmath _LIBCPP_NODEBUG = __standard_header_ctgmath;
-
+#  if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS
+#    warning <ctgmath> is deprecated in C++17 and removed in C++20. Include <cmath> and <complex> instead.
 #  endif
 
 #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
diff --git a/libcxx/test/libcxx/transitive_includes.gen.py b/libcxx/test/libcxx/transitive_includes.gen.py
index 6ed35af7e275e..2b643e1f2ad48 100644
--- a/libcxx/test/libcxx/transitive_includes.gen.py
+++ b/libcxx/test/libcxx/transitive_includes.gen.py
@@ -89,7 +89,7 @@
 // UNSUPPORTED: LIBCXX-FREEBSD-FIXME
 
 // RUN: mkdir %t
-// RUN: %{{cxx}} %s %{{flags}} %{{compile_flags}} --trace-includes -fshow-skipped-includes --preprocess > /dev/null 2> %t/trace-includes.txt
+// RUN: %{{cxx}} %s %{{flags}} %{{compile_flags}} -Wno-deprecated --trace-includes -fshow-skipped-includes --preprocess > /dev/null 2> %t/trace-includes.txt
 // RUN: %{{python}} %{{libcxx-dir}}/test/libcxx/transitive_includes/to_csv.py %t/trace-includes.txt > %t/actual_transitive_includes.csv
 // RUN: cat %{{libcxx-dir}}/test/libcxx/transitive_includes/%{{cxx_std}}.csv | awk '/^{escaped_header} / {{ print }}' > %t/expected_transitive_includes.csv
 // RUN: diff -w %t/expected_transitive_includes.csv %t/actual_transitive_includes.csv
diff --git a/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp
index 0eaf82ce5cef0..8df89d0ba9206 100644
--- a/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp
+++ b/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp
@@ -14,12 +14,6 @@
 // UNSUPPORTED: c++03, c++11, c++14
 // UNSUPPORTED: clang-modules-build
 
-#include "test_macros.h"
-
 #include <ccomplex>
 
-#if TEST_STD_VER >= 20
-// expected-warning@ccomplex:* {{'__standard_header_ccomplex' is deprecated: removed in C++20. Include <complex> instead.}}
-#else
-// expected-warning@ccomplex:* {{'__standard_header_ccomplex' is deprecated: Include <complex> instead.}}
-#endif
+// expected-warning@ccomplex:* {{<ccomplex> is deprecated in C++17 and removed in C++20. Include <complex> instead.}}
diff --git a/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp
index 04acd10081548..32b57033331c8 100644
--- a/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp
+++ b/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp
@@ -15,4 +15,5 @@
 // UNSUPPORTED: clang-modules-build
 
 #include <ciso646>
-// expected-warning@ciso646:* {{'__standard_header_ciso646' is deprecated: removed in C++20. Include <version> instead.}}
+
+// expected-warning@ciso646:* {{<ciso646> is removed in C++20. Include <version> instead.}}
diff --git a/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp
index dc9f1af55b3f1..23a7709a9d658 100644
--- a/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp
+++ b/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp
@@ -14,12 +14,6 @@
 // UNSUPPORTED: c++03, c++11, c++14
 // UNSUPPORTED: clang-modules-build
 
-#include "test_macros.h"
-
 #include <cstdalign>
 
-#if TEST_STD_VER >= 20
-// expected-warning@cstdalign:* {{'__standard_header_cstdalign' is deprecated: removed in C++20.}}
-#else
-// expected-warning@cstdalign:* {{'__standard_header_cstdalign' is deprecated}}
-#endif
+// expected-warning@cstdalign:* {{<cstdalign> is deprecated in C++17 and removed in C++20.}}
diff --git a/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp
index eddefe14d35ea..c2c0f03c52d3c 100644
--- a/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp
+++ b/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp
@@ -14,12 +14,6 @@
 // UNSUPPORTED: c++03, c++11, c++14
 // UNSUPPORTED: clang-modules-build
 
-#include "test_macros.h"
-
 #include <cstdbool>
 
-#if TEST_STD_VER >= 20
-// expected-warning@cstdbool:* {{'__standard_header_cstdbool' is deprecated: removed in C++20.}}
-#else
-// expected-warning@cstdbool:* {{'__standard_header_cstdbool' is deprecated}}
-#endif
+// expected-warning@cstdbool:* {{<cstdbool> is deprecated in C++17 and removed in C++20.}}
diff --git a/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp
index 097ab1643d15a..4f5564915443d 100644
--- a/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp
+++ b/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp
@@ -14,12 +14,6 @@
 // UNSUPPORTED: c++03, c++11, c++14
 // UNSUPPORTED: clang-modules-build
 
-#include "test_macros.h"
-
 #include <ctgmath>
 
-#if TEST_STD_VER >= 20
-// expected-warning@ctgmath:* {{'__standard_header_ctgmath' is deprecated: removed in C++20. Include <cmath> and <complex> instead.}}
-#else
-// expected-warning@ctgmath:* {{'__standard_header_ctgmath' is deprecated: Include <cmath> and <complex> instead.}}
-#endif
+// expected-warning@ctgmath:* {{<ctgmath> is deprecated in C++17 and removed in C++20. Include <cmath> and <complex> instead.}}
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.verify.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.verify.cpp
index 12d778408d5ec..e58e760a5ce81 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.verify.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.verify.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// REQUIRES: std-at-least-c++23
+// REQUIRES: std-at-least-c++26
 
 // <tuple>
 
@@ -21,11 +21,6 @@
 void test() {
   // expected-error@*:* {{static assertion failed}}
 
-  // Turns to an error since C++26 (Disallow Binding a Returned Glvalue to a Temporary https://wg21.link/P2748R5).
-#if TEST_STD_VER >= 26
   // expected-error@tuple:* {{returning reference to local temporary object}}
-#else
-  // expected-warning@tuple:* {{returning reference to local temporary object}}
-#endif
   std::ignore = std::make_from_tuple<const int&>(std::tuple<char>{});
 }
diff --git a/libcxx/utils/libcxx/test/format.py b/libcxx/utils/libcxx/test/format.py
index 975209c273f8c..76e9115295b99 100644
--- a/libcxx/utils/libcxx/test/format.py
+++ b/libcxx/utils/libcxx/test/format.py
@@ -99,7 +99,7 @@ def parseScript(test, preamble):
     substitutions.append(
         (
             "%{verify}",
-            "%{cxx} %s %{flags} %{compile_flags} -fsyntax-only -Wno-error -Xclang -verify -Xclang -verify-ignore-unexpected=note -ferror-limit=0",
+            "%{cxx} %s %{flags} %{compile_flags} -U_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER -fsyntax-only -Wno-error -Xclang -verify -Xclang -verify-ignore-unexpected=note -ferror-limit=0",
         )
     )
     substitutions.append(("%{run}", "%{exec} %t.exe"))

From 9a0fd22da1013281d6269f19facc5d5c1be58904 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler@amd.com>
Date: Tue, 18 Nov 2025 16:10:42 +0100
Subject: [PATCH 18/52] Revert "[OpenMP] Implement omp_get_uid_from_device() /
 omp_get_device_from_uid()" (#168547)

Reverts llvm/llvm-project#164392 due to fortran issues
---
 offload/include/OpenMP/omp.h               |  7 --
 offload/include/omptarget.h                |  2 -
 offload/libomptarget/OpenMP/API.cpp        | 58 ----------------
 offload/libomptarget/exports               |  2 -
 offload/test/api/omp_device_uid.c          | 76 ---------------------
 openmp/device/include/DeviceTypes.h        |  3 -
 openmp/device/include/Interface.h          |  4 --
 openmp/device/src/State.cpp                |  6 --
 openmp/runtime/src/dllexports              |  2 -
 openmp/runtime/src/include/omp.h.var       |  5 --
 openmp/runtime/src/include/omp_lib.F90.var | 14 ----
 openmp/runtime/src/include/omp_lib.h.var   | 19 ------
 openmp/runtime/src/kmp_ftn_entry.h         | 29 +-------
 openmp/runtime/src/kmp_ftn_os.h            |  8 ---
 openmp/runtime/test/api/omp_device_uid.c   | 77 ----------------------
 15 files changed, 2 insertions(+), 310 deletions(-)
 delete mode 100644 offload/test/api/omp_device_uid.c
 delete mode 100644 openmp/runtime/test/api/omp_device_uid.c

diff --git a/offload/include/OpenMP/omp.h b/offload/include/OpenMP/omp.h
index d92c7e450c677..768ca46a9bed0 100644
--- a/offload/include/OpenMP/omp.h
+++ b/offload/include/OpenMP/omp.h
@@ -30,13 +30,6 @@
 
 extern "C" {
 
-/// Definitions
-///{
-
-#define omp_invalid_device -2
-
-///}
-
 /// Type declarations
 ///{
 
diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index 00910704a979a..fbb4a06accf84 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -270,8 +270,6 @@ extern "C" {
 void ompx_dump_mapping_tables(void);
 int omp_get_num_devices(void);
 int omp_get_device_num(void);
-int omp_get_device_from_uid(const char *DeviceUid);
-const char *omp_get_uid_from_device(int DeviceNum);
 int omp_get_initial_device(void);
 void *omp_target_alloc(size_t Size, int DeviceNum);
 void omp_target_free(void *DevicePtr, int DeviceNum);
diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp
index 6e85e5764449c..dd83a3ccd08e6 100644
--- a/offload/libomptarget/OpenMP/API.cpp
+++ b/offload/libomptarget/OpenMP/API.cpp
@@ -40,8 +40,6 @@ EXTERN void ompx_dump_mapping_tables() {
 using namespace llvm::omp::target::ompt;
 #endif
 
-using GenericDeviceTy = llvm::omp::target::plugin::GenericDeviceTy;
-
 void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
                           const char *Name);
 void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind,
@@ -70,62 +68,6 @@ EXTERN int omp_get_device_num(void) {
   return HostDevice;
 }
 
-static inline bool is_initial_device_uid(const char *DeviceUid) {
-  return strcmp(DeviceUid, GenericPluginTy::getHostDeviceUid()) == 0;
-}
-
-EXTERN int omp_get_device_from_uid(const char *DeviceUid) {
-  TIMESCOPE();
-  OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
-
-  if (!DeviceUid) {
-    DP("Call to omp_get_device_from_uid returning omp_invalid_device\n");
-    return omp_invalid_device;
-  }
-  if (is_initial_device_uid(DeviceUid)) {
-    DP("Call to omp_get_device_from_uid returning initial device number %d\n",
-       omp_get_initial_device());
-    return omp_get_initial_device();
-  }
-
-  int DeviceNum = omp_invalid_device;
-
-  auto ExclusiveDevicesAccessor = PM->getExclusiveDevicesAccessor();
-  for (const DeviceTy &Device : PM->devices(ExclusiveDevicesAccessor)) {
-    const char *Uid = Device.RTL->getDevice(Device.RTLDeviceID).getDeviceUid();
-    if (Uid && strcmp(DeviceUid, Uid) == 0) {
-      DeviceNum = Device.DeviceID;
-      break;
-    }
-  }
-
-  DP("Call to omp_get_device_from_uid returning %d\n", DeviceNum);
-  return DeviceNum;
-}
-
-EXTERN const char *omp_get_uid_from_device(int DeviceNum) {
-  TIMESCOPE();
-  OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
-
-  if (DeviceNum == omp_invalid_device) {
-    DP("Call to omp_get_uid_from_device returning nullptr\n");
-    return nullptr;
-  }
-  if (DeviceNum == omp_get_initial_device()) {
-    DP("Call to omp_get_uid_from_device returning initial device UID\n");
-    return GenericPluginTy::getHostDeviceUid();
-  }
-
-  auto DeviceOrErr = PM->getDevice(DeviceNum);
-  if (!DeviceOrErr)
-    FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
-
-  const char *Uid =
-      DeviceOrErr->RTL->getDevice(DeviceOrErr->RTLDeviceID).getDeviceUid();
-  DP("Call to omp_get_uid_from_device returning %s\n", Uid);
-  return Uid;
-}
-
 EXTERN int omp_get_initial_device(void) {
   TIMESCOPE();
   OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports
index 2ebc23e3cf60a..910a5b6c827a7 100644
--- a/offload/libomptarget/exports
+++ b/offload/libomptarget/exports
@@ -40,8 +40,6 @@ VERS1.0 {
     omp_get_mapped_ptr;
     omp_get_num_devices;
     omp_get_device_num;
-    omp_get_device_from_uid;
-    omp_get_uid_from_device;
     omp_get_initial_device;
     omp_target_alloc;
     omp_target_free;
diff --git a/offload/test/api/omp_device_uid.c b/offload/test/api/omp_device_uid.c
deleted file mode 100644
index 2a41d8d04ef8a..0000000000000
--- a/offload/test/api/omp_device_uid.c
+++ /dev/null
@@ -1,76 +0,0 @@
-// RUN: %libomptarget-compile-run-and-check-generic
-
-#include <omp.h>
-#include <stdio.h>
-#include <string.h>
-
-int test_omp_device_uid(int device_num) {
-  const char *device_uid = omp_get_uid_from_device(device_num);
-  if (device_uid == NULL) {
-    printf("FAIL for device %d: omp_get_uid_from_device returned NULL\n",
-           device_num);
-    return 0;
-  }
-
-  int device_num_from_uid = omp_get_device_from_uid(device_uid);
-  if (device_num_from_uid != device_num) {
-    printf(
-        "FAIL for device %d: omp_get_device_from_uid returned %d (UID: %s)\n",
-        device_num, device_num_from_uid, device_uid);
-    return 0;
-  }
-
-  if (device_num == omp_get_initial_device())
-    return 1;
-
-  int success = 1;
-
-// Note that the following code may be executed on the host if the host is the
-// device
-#pragma omp target map(tofrom : success) device(device_num)
-  {
-    int device_num = omp_get_device_num();
-
-    // omp_get_uid_from_device() in the device runtime is a dummy function
-    // returning NULL
-    const char *device_uid = omp_get_uid_from_device(device_num);
-
-    // omp_get_device_from_uid() in the device runtime is a dummy function
-    // returning omp_invalid_device.
-    int device_num_from_uid = omp_get_device_from_uid(device_uid);
-
-    // Depending on whether we're executing on the device or the host, we either
-    // got NULL as the device UID or the correct device UID.  Consequently,
-    // omp_get_device_from_uid() either returned omp_invalid_device or the
-    // correct device number (aka omp_get_initial_device()).
-    if (device_uid ? device_num_from_uid != device_num
-                   : device_num_from_uid != omp_invalid_device) {
-      printf("FAIL for device %d (target): omp_get_device_from_uid returned %d "
-             "(UID: %s)\n",
-             device_num, device_num_from_uid, device_uid);
-      success = 0;
-    }
-  }
-
-  return success;
-}
-
-int main() {
-  int num_devices = omp_get_num_devices();
-  int num_failed = 0;
-  // (also test initial device aka num_devices)
-  for (int i = 0; i < num_devices + 1; i++) {
-    if (!test_omp_device_uid(i)) {
-      printf("FAIL for device %d\n", i);
-      num_failed++;
-    }
-  }
-  if (num_failed) {
-    printf("FAIL\n");
-    return 1;
-  }
-  printf("PASS\n");
-  return 0;
-}
-
-// CHECK: PASS
diff --git a/openmp/device/include/DeviceTypes.h b/openmp/device/include/DeviceTypes.h
index 213ccfe58b4fb..2e5d92380f040 100644
--- a/openmp/device/include/DeviceTypes.h
+++ b/openmp/device/include/DeviceTypes.h
@@ -21,9 +21,6 @@ template <typename T> using Constant = __gpu_constant T;
 template <typename T> using Local = __gpu_local T;
 template <typename T> using Global = __gpu_local T;
 
-// See definition in OpenMP (omp.h.var/omp_lib.(F90|h).var)
-#define omp_invalid_device -2
-
 enum omp_proc_bind_t {
   omp_proc_bind_false = 0,
   omp_proc_bind_true = 1,
diff --git a/openmp/device/include/Interface.h b/openmp/device/include/Interface.h
index 71c3b1fc06d40..c4bfaaa2404b4 100644
--- a/openmp/device/include/Interface.h
+++ b/openmp/device/include/Interface.h
@@ -130,10 +130,6 @@ int omp_get_num_devices(void);
 
 int omp_get_device_num(void);
 
-int omp_get_device_from_uid(const char *DeviceUid);
-
-const char *omp_get_uid_from_device(int DeviceNum);
-
 int omp_get_num_teams(void);
 
 int omp_get_team_num();
diff --git a/openmp/device/src/State.cpp b/openmp/device/src/State.cpp
index 985e6b169137f..9f38cf26f8c6f 100644
--- a/openmp/device/src/State.cpp
+++ b/openmp/device/src/State.cpp
@@ -403,12 +403,6 @@ int omp_get_num_devices(void) { return config::getNumDevices(); }
 
 int omp_get_device_num(void) { return config::getDeviceNum(); }
 
-int omp_get_device_from_uid(const char *DeviceUid) {
-  return omp_invalid_device;
-}
-
-const char *omp_get_uid_from_device(int DeviceNum) { return nullptr; }
-
 int omp_get_num_teams(void) { return mapping::getNumberOfBlocksInKernel(); }
 
 int omp_get_team_num() { return mapping::getBlockIdInKernel(); }
diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports
index 00becd1a657fd..3983dae80c9f5 100644
--- a/openmp/runtime/src/dllexports
+++ b/openmp/runtime/src/dllexports
@@ -544,8 +544,6 @@ kmp_set_disp_num_buffers                    890
     omp_get_devices_all_allocator           819
     omp_get_memspace_num_resources          820
     omp_get_submemspace                     821
-    omp_get_device_from_uid                 822
-    omp_get_uid_from_device                 823
     %ifndef stub
         __kmpc_set_default_allocator
         __kmpc_get_default_allocator
diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var
index e98df731ad888..74f385feb3ea5 100644
--- a/openmp/runtime/src/include/omp.h.var
+++ b/openmp/runtime/src/include/omp.h.var
@@ -536,11 +536,6 @@
 
     /* OpenMP 5.2 */
     extern int __KAI_KMPC_CONVENTION omp_in_explicit_task(void);
-    #define omp_invalid_device -2
-
-    /* OpenMP 6.0 */
-    extern int   __KAI_KMPC_CONVENTION  omp_get_device_from_uid(const char *DeviceUid);
-    extern const char *   __KAI_KMPC_CONVENTION  omp_get_uid_from_device(int DeviceNum);
 
     /* LLVM Extensions */
     extern void *llvm_omp_target_dynamic_shared_alloc(void);
diff --git a/openmp/runtime/src/include/omp_lib.F90.var b/openmp/runtime/src/include/omp_lib.F90.var
index 159b42ab5b5cc..90d7e49ebf549 100644
--- a/openmp/runtime/src/include/omp_lib.F90.var
+++ b/openmp/runtime/src/include/omp_lib.F90.var
@@ -215,8 +215,6 @@
 
         integer (kind=omp_interop_kind), parameter, public :: omp_interop_none = 0
 
-        integer (kind=omp_integer_kind), parameter, public :: omp_invalid_device = -2
-
         interface
 
 !         ***
@@ -419,18 +417,6 @@
             integer (kind=omp_integer_kind) omp_get_device_num
           end function omp_get_device_num
 
-          function omp_get_uid_from_device(device_num) bind(c)
-            use omp_lib_kinds
-            integer (kind=omp_integer_kind), value :: device_num
-            character (len=*) omp_get_uid_from_device
-          end function omp_get_uid_from_device
-
-          function omp_get_device_from_uid(device_uid) bind(c)
-            use omp_lib_kinds
-            character (len=*), value :: device_uid
-            integer (kind=omp_integer_kind) omp_get_device_from_uid
-          end function omp_get_device_from_uid
-
           function omp_pause_resource(kind, device_num) bind(c)
             use omp_lib_kinds
             integer (kind=omp_pause_resource_kind), value :: kind
diff --git a/openmp/runtime/src/include/omp_lib.h.var b/openmp/runtime/src/include/omp_lib.h.var
index 468eb03e99ef1..a50bb018c7cc3 100644
--- a/openmp/runtime/src/include/omp_lib.h.var
+++ b/openmp/runtime/src/include/omp_lib.h.var
@@ -291,9 +291,6 @@
       integer(kind=omp_interop_kind)omp_interop_none
       parameter(omp_interop_none=0)
 
-      integer(kind=omp_integer_kind)omp_invalid_device
-      parameter(omp_invalid_device=-2)
-
       interface
 
 !       ***
@@ -489,18 +486,6 @@
           integer (kind=omp_integer_kind) omp_get_device_num
         end function omp_get_device_num
 
-        function omp_get_uid_from_device(device_num) bind(c)
-          import
-          integer (kind=omp_integer_kind), value :: device_num
-          character (len=*) omp_get_uid_from_device
-        end function omp_get_uid_from_device
-
-        function omp_get_device_from_uid(device_uid) bind(c)
-          import
-          character (len=*), value :: device_uid
-          integer (kind=omp_integer_kind) omp_get_device_from_uid
-        end function omp_get_device_from_uid
-
         function omp_pause_resource(kind, device_num) bind(c)
           import
           integer (kind=omp_pause_resource_kind), value :: kind
@@ -1174,8 +1159,6 @@
 !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_initial_device
 !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_devices
 !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_device_num
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_uid_from_device
-!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_device_from_uid
 !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_pause_resource
 !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_pause_resource_all
 !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_supported_active_levels
@@ -1259,8 +1242,6 @@
 !$omp declare target(omp_get_initial_device )
 !$omp declare target(omp_get_num_devices )
 !$omp declare target(omp_get_device_num )
-!$omp declare target(omp_get_uid_from_device )
-!$omp declare target(omp_get_device_from_uid )
 !$omp declare target(omp_pause_resource )
 !$omp declare target(omp_pause_resource_all )
 !$omp declare target(omp_get_supported_active_levels )
diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h
index 49c56d2b9a769..2b0063eb23a0a 100644
--- a/openmp/runtime/src/kmp_ftn_entry.h
+++ b/openmp/runtime/src/kmp_ftn_entry.h
@@ -1543,38 +1543,13 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_MAX_TASK_PRIORITY)(void) {
 #endif
 }
 
-// These functions will be defined in libomptarget. When libomptarget is not
-// loaded, we assume we are on the host.
+// This function will be defined in libomptarget. When libomptarget is not
+// loaded, we assume we are on the host and return KMP_HOST_DEVICE.
 // Compiler/libomptarget will handle this if called inside target.
 int FTN_STDCALL FTN_GET_DEVICE_NUM(void) KMP_WEAK_ATTRIBUTE_EXTERNAL;
 int FTN_STDCALL FTN_GET_DEVICE_NUM(void) {
   return KMP_EXPAND_NAME(FTN_GET_INITIAL_DEVICE)();
 }
-const char *FTN_STDCALL FTN_GET_UID_FROM_DEVICE(int device_num)
-    KMP_WEAK_ATTRIBUTE_EXTERNAL;
-const char *FTN_STDCALL FTN_GET_UID_FROM_DEVICE(int device_num) {
-#if KMP_OS_DARWIN || KMP_OS_WASI || defined(KMP_STUB)
-  return nullptr;
-#else
-  const char *(*fptr)(int);
-  if ((*(void **)(&fptr) = KMP_DLSYM_NEXT("omp_get_uid_from_device")))
-    return (*fptr)(device_num);
-  // Returns the same string as used by libomptarget
-  return "HOST";
-#endif
-}
-int FTN_STDCALL FTN_GET_DEVICE_FROM_UID(const char *device_uid)
-    KMP_WEAK_ATTRIBUTE_EXTERNAL;
-int FTN_STDCALL FTN_GET_DEVICE_FROM_UID(const char *device_uid) {
-#if KMP_OS_DARWIN || KMP_OS_WASI || defined(KMP_STUB)
-  return omp_invalid_device;
-#else
-  int (*fptr)(const char *);
-  if ((*(void **)(&fptr) = KMP_DLSYM_NEXT("omp_get_device_from_uid")))
-    return (*fptr)(device_uid);
-  return KMP_EXPAND_NAME(FTN_GET_INITIAL_DEVICE)();
-#endif
-}
 
 // Compiler will ensure that this is only called from host in sequential region
 int FTN_STDCALL KMP_EXPAND_NAME(FTN_PAUSE_RESOURCE)(kmp_pause_status_t kind,
diff --git a/openmp/runtime/src/kmp_ftn_os.h b/openmp/runtime/src/kmp_ftn_os.h
index c439a058f22b4..ae0ed067235e5 100644
--- a/openmp/runtime/src/kmp_ftn_os.h
+++ b/openmp/runtime/src/kmp_ftn_os.h
@@ -140,8 +140,6 @@
 #define FTN_GET_MEMSPACE_NUM_RESOURCES omp_get_memspace_num_resources
 #define FTN_GET_SUBMEMSPACE omp_get_submemspace
 #define FTN_GET_DEVICE_NUM omp_get_device_num
-#define FTN_GET_UID_FROM_DEVICE omp_get_uid_from_device
-#define FTN_GET_DEVICE_FROM_UID omp_get_device_from_uid
 #define FTN_SET_AFFINITY_FORMAT omp_set_affinity_format
 #define FTN_GET_AFFINITY_FORMAT omp_get_affinity_format
 #define FTN_DISPLAY_AFFINITY omp_display_affinity
@@ -291,8 +289,6 @@
 #define FTN_ALLOC omp_alloc_
 #define FTN_FREE omp_free_
 #define FTN_GET_DEVICE_NUM omp_get_device_num_
-#define FTN_GET_UID_FROM_DEVICE omp_get_uid_from_device_
-#define FTN_GET_DEVICE_FROM_UID omp_get_device_from_uid_
 #define FTN_SET_AFFINITY_FORMAT omp_set_affinity_format_
 #define FTN_GET_AFFINITY_FORMAT omp_get_affinity_format_
 #define FTN_DISPLAY_AFFINITY omp_display_affinity_
@@ -440,8 +436,6 @@
 #define FTN_GET_MEMSPACE_NUM_RESOURCES OMP_GET_MEMSPACE_NUM_RESOURCES
 #define FTN_GET_SUBMEMSPACE OMP_GET_SUBMEMSPACE
 #define FTN_GET_DEVICE_NUM OMP_GET_DEVICE_NUM
-#define FTN_GET_UID_FROM_DEVICE OMP_GET_UID_FROM_DEVICE
-#define FTN_GET_DEVICE_FROM_UID OMP_GET_DEVICE_FROM_UID
 #define FTN_SET_AFFINITY_FORMAT OMP_SET_AFFINITY_FORMAT
 #define FTN_GET_AFFINITY_FORMAT OMP_GET_AFFINITY_FORMAT
 #define FTN_DISPLAY_AFFINITY OMP_DISPLAY_AFFINITY
@@ -591,8 +585,6 @@
 #define FTN_ALLOC OMP_ALLOC_
 #define FTN_FREE OMP_FREE_
 #define FTN_GET_DEVICE_NUM OMP_GET_DEVICE_NUM_
-#define FTN_GET_UID_FROM_DEVICE OMP_GET_UID_FROM_DEVICE_
-#define FTN_GET_DEVICE_FROM_UID OMP_GET_DEVICE_FROM_UID_
 #define FTN_SET_AFFINITY_FORMAT OMP_SET_AFFINITY_FORMAT_
 #define FTN_GET_AFFINITY_FORMAT OMP_GET_AFFINITY_FORMAT_
 #define FTN_DISPLAY_AFFINITY OMP_DISPLAY_AFFINITY_
diff --git a/openmp/runtime/test/api/omp_device_uid.c b/openmp/runtime/test/api/omp_device_uid.c
deleted file mode 100644
index 40a1cbb644c7b..0000000000000
--- a/openmp/runtime/test/api/omp_device_uid.c
+++ /dev/null
@@ -1,77 +0,0 @@
-// RUN: %libomp-compile-and-run 2>&1 | FileCheck %s
-// Linking fails for icc 18
-// UNSUPPORTED: icc-18
-
-#include <omp_testsuite.h>
-#include <string.h>
-
-int test_omp_device_uid(int device_num) {
-  const char *device_uid = omp_get_uid_from_device(device_num);
-  if (device_uid == NULL) {
-    printf("FAIL for device %d: omp_get_uid_from_device returned NULL\n",
-           device_num);
-    return 0;
-  }
-
-  int device_num_from_uid = omp_get_device_from_uid(device_uid);
-  if (device_num_from_uid != device_num) {
-    printf(
-        "FAIL for device %d: omp_get_device_from_uid returned %d (UID: %s)\n",
-        device_num, device_num_from_uid, device_uid);
-    return 0;
-  }
-
-  if (device_num == omp_get_initial_device())
-    return 1;
-
-  int success = 1;
-
-// Note that the following code may be executed on the host if the host is the
-// device
-#pragma omp target map(tofrom : success) device(device_num)
-  {
-    int device_num = omp_get_device_num();
-
-    // omp_get_uid_from_device() in the device runtime is a dummy function
-    // returning NULL
-    const char *device_uid = omp_get_uid_from_device(device_num);
-
-    // omp_get_device_from_uid() in the device runtime is a dummy function
-    // returning omp_invalid_device.
-    int device_num_from_uid = omp_get_device_from_uid(device_uid);
-
-    // Depending on whether we're executing on the device or the host, we either
-    // got NULL as the device UID or the correct device UID.  Consequently,
-    // omp_get_device_from_uid() either returned omp_invalid_device or the
-    // correct device number (aka omp_get_initial_device()).
-    if (device_uid ? device_num_from_uid != device_num
-                   : device_num_from_uid != omp_invalid_device) {
-      printf("FAIL for device %d (target): omp_get_device_from_uid returned %d "
-             "(UID: %s)\n",
-             device_num, device_num_from_uid, device_uid);
-      success = 0;
-    }
-  }
-
-  return success;
-}
-
-int main() {
-  int num_devices = omp_get_num_devices();
-  int num_failed = 0;
-  // (also test initial device aka num_devices)
-  for (int i = 0; i < num_devices + 1; i++) {
-    if (!test_omp_device_uid(i)) {
-      printf("FAIL for device %d\n", i);
-      num_failed++;
-    }
-  }
-  if (num_failed) {
-    printf("FAIL\n");
-    return 1;
-  }
-  printf("PASS\n");
-  return 0;
-}
-
-// CHECK: PASS

From 2befda2225a6c61d0308e536c19b066ab27bbf2a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Tue, 18 Nov 2025 15:15:14 +0000
Subject: [PATCH 19/52] [VPlan] Populate and use VPIRFlags from initial
 VPInstruction. (#168450)

Update VPlan to populate VPIRFlags during VPInstruction construction and
use it when creating widened recipes, instead of constructing VPIRFlags
from the underlying IR instruction each time. The VPRecipeWithIRFlags
constructor taking an underlying instruction and setting the flags based
on it has been removed.

This centralizes initial VPIRFlags creation and ensures flags are
consistently available throughout VPlan transformations and makes sure
we don't accidentally re-add flags from the underlying instruction that
already got dropped during transformations.

Follow-up to https://github.com/llvm/llvm-project/pull/167253, which did
the same for VPIRMetadata.

Should be NFC w.r.t. to the generated IR.

PR: https://github.com/llvm/llvm-project/pull/168450
---
 .../Vectorize/LoopVectorizationPlanner.h      |  5 +-
 .../Transforms/Vectorize/LoopVectorize.cpp    | 35 ++++---
 llvm/lib/Transforms/Vectorize/VPlan.h         | 92 +++++++++----------
 .../Vectorize/VPlanConstruction.cpp           | 11 ++-
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 16 ++--
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 50 +++++-----
 llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp |  6 +-
 .../vplan-printing-outer-loop.ll              | 14 +--
 .../Transforms/Vectorize/VPlanHCFGTest.cpp    | 10 +-
 .../Transforms/Vectorize/VPlanTest.cpp        | 15 ++-
 10 files changed, 134 insertions(+), 120 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index f533a47150a7b..741392247c0d6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -152,11 +152,12 @@ class VPBuilder {
   /// its underlying Instruction.
   VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
                               Instruction *Inst = nullptr,
+                              const VPIRFlags &Flags = {},
                               const VPIRMetadata &MD = {},
                               DebugLoc DL = DebugLoc::getUnknown(),
                               const Twine &Name = "") {
     VPInstruction *NewVPInst = tryInsertInstruction(
-        new VPInstruction(Opcode, Operands, {}, MD, DL, Name));
+        new VPInstruction(Opcode, Operands, Flags, MD, DL, Name));
     NewVPInst->setUnderlyingValue(Inst);
     return NewVPInst;
   }
@@ -329,7 +330,7 @@ class VPBuilder {
     else if (Opcode == Instruction::ZExt)
       Flags = VPIRFlags::NonNegFlagsTy(false);
     return tryInsertInstruction(
-        new VPWidenCastRecipe(Opcode, Op, ResultTy, Flags));
+        new VPWidenCastRecipe(Opcode, Op, ResultTy, nullptr, Flags));
   }
 
   VPScalarIVStepsRecipe *
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 356d759b94799..c680b6fca84cd 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7750,7 +7750,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(VPInstruction *VPI,
                 },
                 Range);
   if (ShouldUseVectorIntrinsic)
-    return new VPWidenIntrinsicRecipe(*CI, ID, Ops, CI->getType(), *VPI,
+    return new VPWidenIntrinsicRecipe(*CI, ID, Ops, CI->getType(), *VPI, *VPI,
                                       VPI->getDebugLoc());
 
   Function *Variant = nullptr;
@@ -7804,7 +7804,8 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(VPInstruction *VPI,
     }
 
     Ops.push_back(VPI->getOperand(VPI->getNumOperands() - 1));
-    return new VPWidenCallRecipe(CI, Variant, Ops, VPI->getDebugLoc());
+    return new VPWidenCallRecipe(CI, Variant, Ops, *VPI, *VPI,
+                                 VPI->getDebugLoc());
   }
 
   return nullptr;
@@ -7842,7 +7843,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) {
       auto *SafeRHS =
           Builder.createSelect(Mask, Ops[1], One, VPI->getDebugLoc());
       Ops[1] = SafeRHS;
-      return new VPWidenRecipe(*I, Ops, *VPI, VPI->getDebugLoc());
+      return new VPWidenRecipe(*I, Ops, *VPI, *VPI, VPI->getDebugLoc());
     }
     [[fallthrough]];
   }
@@ -7888,7 +7889,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) {
       // For other binops, the legacy cost model only checks the second operand.
       NewOps[1] = GetConstantViaSCEV(NewOps[1]);
     }
-    return new VPWidenRecipe(*I, NewOps, *VPI, VPI->getDebugLoc());
+    return new VPWidenRecipe(*I, NewOps, *VPI, *VPI, VPI->getDebugLoc());
   }
   case Instruction::ExtractValue: {
     SmallVector<VPValue *> NewOps(VPI->operands());
@@ -7896,7 +7897,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) {
     assert(EVI->getNumIndices() == 1 && "Expected one extractvalue index");
     unsigned Idx = EVI->getIndices()[0];
     NewOps.push_back(Plan.getConstantInt(32, Idx));
-    return new VPWidenRecipe(*I, NewOps, *VPI, VPI->getDebugLoc());
+    return new VPWidenRecipe(*I, NewOps, *VPI, *VPI, VPI->getDebugLoc());
   }
   };
 }
@@ -7981,7 +7982,8 @@ VPReplicateRecipe *VPRecipeBuilder::handleReplication(VPInstruction *VPI,
           (Range.Start.isScalable() && isa<IntrinsicInst>(I))) &&
          "Should not predicate a uniform recipe");
   auto *Recipe =
-      new VPReplicateRecipe(I, VPI->operands(), IsUniform, BlockInMask, *VPI);
+      new VPReplicateRecipe(I, VPI->operands(), IsUniform, BlockInMask, *VPI,
+                            *VPI, VPI->getDebugLoc());
   return Recipe;
 }
 
@@ -8231,17 +8233,19 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
     return nullptr;
 
   if (VPI->getOpcode() == Instruction::GetElementPtr)
-    return new VPWidenGEPRecipe(cast<GetElementPtrInst>(Instr), R->operands());
+    return new VPWidenGEPRecipe(cast<GetElementPtrInst>(Instr), R->operands(),
+                                *VPI, VPI->getDebugLoc());
 
   if (VPI->getOpcode() == Instruction::Select)
-    return new VPWidenSelectRecipe(*cast<SelectInst>(Instr), R->operands(),
-                                   *VPI);
+    return new VPWidenSelectRecipe(cast<SelectInst>(Instr), R->operands(), *VPI,
+                                   *VPI, VPI->getDebugLoc());
 
   if (Instruction::isCast(VPI->getOpcode())) {
-    auto *CastR = cast<VPInstructionWithType>(R);
     auto *CI = cast<CastInst>(Instr);
+    auto *CastR = cast<VPInstructionWithType>(VPI);
     return new VPWidenCastRecipe(CI->getOpcode(), VPI->getOperand(0),
-                                 CastR->getResultType(), *CI, *VPI);
+                                 CastR->getResultType(), CI, *VPI, *VPI,
+                                 VPI->getDebugLoc());
   }
 
   return tryToWiden(VPI);
@@ -8269,8 +8273,8 @@ VPRecipeBuilder::tryToCreatePartialReduction(VPInstruction *Reduction,
     SmallVector<VPValue *, 2> Ops;
     Ops.push_back(Plan.getOrAddLiveIn(Zero));
     Ops.push_back(BinOp);
-    BinOp = new VPWidenRecipe(*ReductionI, Ops, VPIRMetadata(),
-                              ReductionI->getDebugLoc());
+    BinOp = new VPWidenRecipe(*ReductionI, Ops, VPIRFlags(*ReductionI),
+                              VPIRMetadata(), ReductionI->getDebugLoc());
     Builder.insert(BinOp->getDefiningRecipe());
     ReductionOpcode = Instruction::Add;
   }
@@ -8454,9 +8458,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
           Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) {
         // Only create recipe for the final invariant store of the reduction.
         if (Legal->isInvariantStoreOfReduction(SI)) {
+          auto *VPI = cast<VPInstruction>(SingleDef);
           auto *Recipe = new VPReplicateRecipe(
-              SI, R.operands(), true /* IsUniform */, nullptr /*Mask*/,
-              *cast<VPInstruction>(SingleDef));
+              SI, R.operands(), true /* IsUniform */, nullptr /*Mask*/, *VPI,
+              *VPI, VPI->getDebugLoc());
           Recipe->insertBefore(*MiddleVPBB, MBIP);
         }
         R.eraseFromParent();
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index fc29ab0c84093..fedbcfb6bd32a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -882,14 +882,6 @@ class VPIRFlags {
 /// A pure-virtual common base class for recipes defining a single VPValue and
 /// using IR flags.
 struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
-  VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
-                      DebugLoc DL = DebugLoc::getUnknown())
-      : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags() {}
-
-  VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
-                      Instruction &I)
-      : VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()), VPIRFlags(I) {}
-
   VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
                       const VPIRFlags &Flags,
                       DebugLoc DL = DebugLoc::getUnknown())
@@ -1474,9 +1466,12 @@ class LLVM_ABI_FOR_TEST VPWidenRecipe : public VPRecipeWithIRFlags,
         VPIRMetadata(Metadata), Opcode(Opcode) {}
 
   VPWidenRecipe(Instruction &I, ArrayRef<VPValue *> Operands,
-                const VPIRMetadata &Metadata, DebugLoc DL)
-      : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
-        VPIRMetadata(Metadata), Opcode(I.getOpcode()) {}
+                const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
+                DebugLoc DL = {})
+      : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
+        VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
+    setUnderlyingValue(&I);
+  }
 
   ~VPWidenRecipe() override = default;
 
@@ -1517,30 +1512,22 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
 
 public:
   VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
-                    CastInst &UI, const VPIRMetadata &Metadata)
-      : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI),
-        VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
-    assert(UI.getOpcode() == Opcode &&
-           "opcode of underlying cast doesn't match");
-  }
-  VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
-                    const VPIRFlags &Flags = {},
+                    CastInst *CI = nullptr, const VPIRFlags &Flags = {},
                     const VPIRMetadata &Metadata = {},
                     DebugLoc DL = DebugLoc::getUnknown())
       : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL),
         VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
     assert(flagsValidForOpcode(Opcode) &&
            "Set flags not supported for the provided opcode");
+    setUnderlyingValue(CI);
   }
 
   ~VPWidenCastRecipe() override = default;
 
   VPWidenCastRecipe *clone() override {
-    auto *New = new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy, *this,
-                                      *this, getDebugLoc());
-    if (auto *UV = getUnderlyingValue())
-      New->setUnderlyingValue(UV);
-    return New;
+    return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
+                                 cast_or_null<CastInst>(getUnderlyingValue()),
+                                 *this, *this, getDebugLoc());
   }
 
   VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
@@ -1585,13 +1572,17 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
 public:
   VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID,
                          ArrayRef<VPValue *> CallArguments, Type *Ty,
+                         const VPIRFlags &Flags = {},
                          const VPIRMetadata &MD = {},
                          DebugLoc DL = DebugLoc::getUnknown())
-      : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
+      : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
+                            DL),
         VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
         MayReadFromMemory(CI.mayReadFromMemory()),
         MayWriteToMemory(CI.mayWriteToMemory()),
-        MayHaveSideEffects(CI.mayHaveSideEffects()) {}
+        MayHaveSideEffects(CI.mayHaveSideEffects()) {
+    setUnderlyingValue(&CI);
+  }
 
   VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
                          ArrayRef<VPValue *> CallArguments, Type *Ty,
@@ -1617,7 +1608,7 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
   VPWidenIntrinsicRecipe *clone() override {
     if (Value *CI = getUnderlyingValue())
       return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
-                                        operands(), ResultTy, *this,
+                                        operands(), ResultTy, *this, *this,
                                         getDebugLoc());
     return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
                                       *this, *this, getDebugLoc());
@@ -1671,10 +1662,11 @@ class LLVM_ABI_FOR_TEST VPWidenCallRecipe : public VPRecipeWithIRFlags,
 public:
   VPWidenCallRecipe(Value *UV, Function *Variant,
                     ArrayRef<VPValue *> CallArguments,
-                    DebugLoc DL = DebugLoc::getUnknown())
-      : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
-                            *cast<Instruction>(UV)),
-        VPIRMetadata(*cast<Instruction>(UV)), Variant(Variant) {
+                    const VPIRFlags &Flags = {},
+                    const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
+      : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, Flags, DL),
+        VPIRMetadata(Metadata), Variant(Variant) {
+    setUnderlyingValue(UV);
     assert(
         isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
         "last operand must be the called function");
@@ -1684,7 +1676,7 @@ class LLVM_ABI_FOR_TEST VPWidenCallRecipe : public VPRecipeWithIRFlags,
 
   VPWidenCallRecipe *clone() override {
     return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
-                                 getDebugLoc());
+                                 *this, *this, getDebugLoc());
   }
 
   VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
@@ -1761,16 +1753,19 @@ class VPHistogramRecipe : public VPRecipeBase {
 /// instruction.
 struct LLVM_ABI_FOR_TEST VPWidenSelectRecipe : public VPRecipeWithIRFlags,
                                                public VPIRMetadata {
-  VPWidenSelectRecipe(SelectInst &I, ArrayRef<VPValue *> Operands,
-                      const VPIRMetadata &MD = {})
-      : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, I),
-        VPIRMetadata(MD) {}
+  VPWidenSelectRecipe(SelectInst *SI, ArrayRef<VPValue *> Operands,
+                      const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
+                      DebugLoc DL = {})
+      : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, Flags, DL),
+        VPIRMetadata(MD) {
+    setUnderlyingValue(SI);
+  }
 
   ~VPWidenSelectRecipe() override = default;
 
   VPWidenSelectRecipe *clone() override {
-    return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()),
-                                   operands(), *this);
+    return new VPWidenSelectRecipe(cast<SelectInst>(getUnderlyingInstr()),
+                                   operands(), *this, *this, getDebugLoc());
   }
 
   VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
@@ -1822,9 +1817,12 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags {
   }
 
 public:
-  VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef<VPValue *> Operands)
-      : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP),
+  VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef<VPValue *> Operands,
+                   const VPIRFlags &Flags = {},
+                   DebugLoc DL = DebugLoc::getUnknown())
+      : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, Flags, DL),
         SourceElementTy(GEP->getSourceElementType()) {
+    setUnderlyingValue(GEP);
     SmallVector<std::pair<unsigned, MDNode *>> Metadata;
     (void)Metadata;
     getMetadataToPropagate(GEP, Metadata);
@@ -1835,7 +1833,7 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags {
 
   VPWidenGEPRecipe *clone() override {
     return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()),
-                                operands());
+                                operands(), *this, getDebugLoc());
   }
 
   VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
@@ -2929,10 +2927,12 @@ class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags,
 public:
   VPReplicateRecipe(Instruction *I, ArrayRef<VPValue *> Operands,
                     bool IsSingleScalar, VPValue *Mask = nullptr,
-                    VPIRMetadata Metadata = {})
-      : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
+                    const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
+                    DebugLoc DL = DebugLoc::getUnknown())
+      : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, Flags, DL),
         VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
         IsPredicated(Mask) {
+    setUnderlyingValue(I);
     if (Mask)
       addOperand(Mask);
   }
@@ -2940,9 +2940,9 @@ class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags,
   ~VPReplicateRecipe() override = default;
 
   VPReplicateRecipe *clone() override {
-    auto *Copy =
-        new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsSingleScalar,
-                              isPredicated() ? getMask() : nullptr, *this);
+    auto *Copy = new VPReplicateRecipe(
+        getUnderlyingInstr(), operands(), IsSingleScalar,
+        isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
     Copy->transferFlags(*this);
     return Copy;
   }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 612202d049774..dbbde1cafa9f2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -190,7 +190,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
       // recipes.
       if (Br->isConditional()) {
         VPValue *Cond = getOrCreateVPOperand(Br->getCondition());
-        VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst,
+        VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst, {},
                                  VPIRMetadata(*Inst), Inst->getDebugLoc());
       }
 
@@ -205,7 +205,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
       SmallVector<VPValue *> Ops = {getOrCreateVPOperand(SI->getCondition())};
       for (auto Case : SI->cases())
         Ops.push_back(getOrCreateVPOperand(Case.getCaseValue()));
-      VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst,
+      VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst, {},
                                VPIRMetadata(*Inst), Inst->getDebugLoc());
       continue;
     }
@@ -255,13 +255,14 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
       if (auto *CI = dyn_cast<CastInst>(Inst)) {
         NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0],
                                             CI->getType(), CI->getDebugLoc(),
-                                            {}, MD);
+                                            VPIRFlags(*CI), MD);
         NewR->setUnderlyingValue(CI);
       } else {
         // Build VPInstruction for any arbitrary Instruction without specific
         // representation in VPlan.
-        NewR = VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst, MD,
-                                        Inst->getDebugLoc());
+        NewR =
+            VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst,
+                                     VPIRFlags(*Inst), MD, Inst->getDebugLoc());
       }
     }
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index fca6554ad77c6..ef36e29aaa5c4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2056,24 +2056,26 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
   switch (OpType) {
   case OperationType::OverflowingBinOp:
     return Opcode == Instruction::Add || Opcode == Instruction::Sub ||
-           Opcode == Instruction::Mul ||
+           Opcode == Instruction::Mul || Opcode == Instruction::Shl ||
            Opcode == VPInstruction::VPInstruction::CanonicalIVIncrementForPart;
   case OperationType::Trunc:
     return Opcode == Instruction::Trunc;
   case OperationType::DisjointOp:
     return Opcode == Instruction::Or;
   case OperationType::PossiblyExactOp:
-    return Opcode == Instruction::AShr;
+    return Opcode == Instruction::AShr || Opcode == Instruction::LShr ||
+           Opcode == Instruction::UDiv || Opcode == Instruction::SDiv;
   case OperationType::GEPOp:
     return Opcode == Instruction::GetElementPtr ||
            Opcode == VPInstruction::PtrAdd ||
            Opcode == VPInstruction::WidePtrAdd;
   case OperationType::FPMathOp:
-    return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
-           Opcode == Instruction::FSub || Opcode == Instruction::FNeg ||
-           Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
-           Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc ||
-           Opcode == Instruction::FCmp || Opcode == Instruction::Select ||
+    return Opcode == Instruction::Call || Opcode == Instruction::FAdd ||
+           Opcode == Instruction::FMul || Opcode == Instruction::FSub ||
+           Opcode == Instruction::FNeg || Opcode == Instruction::FDiv ||
+           Opcode == Instruction::FRem || Opcode == Instruction::FPExt ||
+           Opcode == Instruction::FPTrunc || Opcode == Instruction::FCmp ||
+           Opcode == Instruction::Select ||
            Opcode == VPInstruction::WideIVStep ||
            Opcode == VPInstruction::ReductionStartVector ||
            Opcode == VPInstruction::ComputeReductionResult;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 26563242de283..25557f1d5d651 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -104,24 +104,26 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
               nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/, *VPI,
               Ingredient.getDebugLoc());
         } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
-          NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
+          NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands(), *VPI,
+                                           Ingredient.getDebugLoc());
         } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
           Intrinsic::ID VectorID = getVectorIntrinsicIDForCall(CI, &TLI);
           if (VectorID == Intrinsic::not_intrinsic)
             return false;
           NewRecipe = new VPWidenIntrinsicRecipe(
               *CI, getVectorIntrinsicIDForCall(CI, &TLI),
-              drop_end(Ingredient.operands()), CI->getType(), *VPI,
-              CI->getDebugLoc());
+              drop_end(Ingredient.operands()), CI->getType(), VPIRFlags(*CI),
+              *VPI, CI->getDebugLoc());
         } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
-          NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands(), *VPI);
+          NewRecipe = new VPWidenSelectRecipe(SI, Ingredient.operands(), *VPI,
+                                              *VPI, Ingredient.getDebugLoc());
         } else if (auto *CI = dyn_cast<CastInst>(Inst)) {
-          NewRecipe =
-              new VPWidenCastRecipe(CI->getOpcode(), Ingredient.getOperand(0),
-                                    CI->getType(), *CI, *VPI);
+          NewRecipe = new VPWidenCastRecipe(
+              CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
+              VPIRFlags(*CI), VPIRMetadata(*CI));
         } else {
           NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands(), *VPI,
-                                        Ingredient.getDebugLoc());
+                                        *VPI, Ingredient.getDebugLoc());
         }
       }
 
@@ -226,7 +228,8 @@ static bool sinkScalarOperands(VPlan &Plan) {
         // then cloning should be sufficient here.
         Instruction *I = SinkCandidate->getUnderlyingInstr();
         Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true,
-                                      nullptr /*Mask*/, *SinkCandidateRepR);
+                                      nullptr /*Mask*/, *SinkCandidateRepR,
+                                      *SinkCandidateRepR);
         // TODO: add ".cloned" suffix to name of Clone's VPValue.
       } else {
         Clone = SinkCandidate->clone();
@@ -385,7 +388,8 @@ static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe,
   // mask but in the replicate region.
   auto *RecipeWithoutMask = new VPReplicateRecipe(
       PredRecipe->getUnderlyingInstr(), drop_end(PredRecipe->operands()),
-      PredRecipe->isSingleScalar(), nullptr /*Mask*/, *PredRecipe);
+      PredRecipe->isSingleScalar(), nullptr /*Mask*/, *PredRecipe, *PredRecipe,
+      PredRecipe->getDebugLoc());
   auto *Pred =
       Plan.createVPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask);
 
@@ -691,7 +695,7 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
     // analysis.
     auto Users = collectUsersRecursively(PhiR);
     for (VPUser *U : reverse(Users)) {
-      auto *Def = dyn_cast<VPSingleDefRecipe>(U);
+      auto *Def = dyn_cast<VPRecipeWithIRFlags>(U);
       auto *RepR = dyn_cast<VPReplicateRecipe>(U);
       // Skip recipes that shouldn't be narrowed.
       if (!Def || !isa<VPReplicateRecipe, VPWidenRecipe>(Def) ||
@@ -704,7 +708,8 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
         continue;
 
       auto *Clone = new VPReplicateRecipe(Def->getUnderlyingInstr(),
-                                          Def->operands(), /*IsUniform*/ true);
+                                          Def->operands(), /*IsUniform*/ true,
+                                          /*Mask*/ nullptr, /*Flags*/ *Def);
       Clone->insertAfter(Def);
       Def->replaceAllUsesWith(Clone);
     }
@@ -1423,12 +1428,13 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
       if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
         continue;
 
-      auto *RepOrWidenR = cast<VPSingleDefRecipe>(&R);
+      auto *RepOrWidenR = cast<VPRecipeWithIRFlags>(&R);
       if (RepR && isa<StoreInst>(RepR->getUnderlyingInstr()) &&
           vputils::isSingleScalar(RepR->getOperand(1))) {
         auto *Clone = new VPReplicateRecipe(
             RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
-            true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Metadata*/);
+            true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Flags*/,
+            *RepR /*Metadata*/, RepR->getDebugLoc());
         Clone->insertBefore(RepOrWidenR);
         unsigned ExtractOpc =
             vputils::isUniformAcrossVFsAndUFs(RepR->getOperand(1))
@@ -1469,9 +1475,9 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
           }))
         continue;
 
-      auto *Clone = new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(),
-                                          RepOrWidenR->operands(),
-                                          true /*IsSingleScalar*/);
+      auto *Clone = new VPReplicateRecipe(
+          RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
+          true /*IsSingleScalar*/, nullptr, *RepOrWidenR);
       Clone->insertBefore(RepOrWidenR);
       RepOrWidenR->replaceAllUsesWith(Clone);
       if (isDeadRecipe(*RepOrWidenR))
@@ -3824,15 +3830,15 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
         Ext0->getOpcode() == Ext1->getOpcode() &&
         IsMulAccValidAndClampRange(Mul, Ext0, Ext1, Ext) && Mul->hasOneUse()) {
       auto *NewExt0 = new VPWidenCastRecipe(
-          Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), *Ext0,
-          *Ext0, Ext0->getDebugLoc());
+          Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), nullptr,
+          *Ext0, *Ext0, Ext0->getDebugLoc());
       NewExt0->insertBefore(Ext0);
 
       VPWidenCastRecipe *NewExt1 = NewExt0;
       if (Ext0 != Ext1) {
         NewExt1 = new VPWidenCastRecipe(Ext1->getOpcode(), Ext1->getOperand(0),
-                                        Ext->getResultType(), *Ext1, *Ext1,
-                                        Ext1->getDebugLoc());
+                                        Ext->getResultType(), nullptr, *Ext1,
+                                        *Ext1, Ext1->getDebugLoc());
         NewExt1->insertBefore(Ext1);
       }
       Mul->setOperand(0, NewExt0);
@@ -4353,7 +4359,7 @@ narrowInterleaveGroupOp(VPValue *V, SmallPtrSetImpl<VPValue *> &NarrowedOps) {
   // process one original iteration.
   auto *N = new VPReplicateRecipe(&WideLoad->getIngredient(), {PtrOp},
                                   /*IsUniform*/ true,
-                                  /*Mask*/ nullptr, *WideLoad);
+                                  /*Mask*/ nullptr, {}, *WideLoad);
   N->insertBefore(WideLoad);
   NarrowedOps.insert(N);
   return N;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index d4b8b72beb942..d76d2ed5f1c76 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -518,9 +518,9 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
     // TODO: have cloning of replicate recipes also provide the desired result
     // coupled with setting its operands to NewOps (deriving IsSingleScalar and
     // Mask from the operands?)
-    New =
-        new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
-                              /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
+    New = new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
+                                /*IsSingleScalar=*/true, /*Mask=*/nullptr,
+                                *RepR, *RepR, RepR->getDebugLoc());
   } else {
     assert(isa<VPInstruction>(DefR) &&
            "DefR must be a VPReplicateRecipe or VPInstruction");
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
index 20676f3702294..10c265519952b 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
@@ -14,23 +14,23 @@ define void @foo(i64 %n) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT: outer.header:
 ; CHECK-NEXT:   EMIT-SCALAR ir<%outer.iv> = phi [ ir<%outer.iv.next>, outer.latch ], [ ir<0>, ir-bb<entry> ]
-; CHECK-NEXT:   EMIT ir<%gep.1> = getelementptr ir<@arr2>, ir<0>, ir<%outer.iv>
+; CHECK-NEXT:   EMIT ir<%gep.1> = getelementptr inbounds ir<@arr2>, ir<0>, ir<%outer.iv>
 ; CHECK-NEXT:   EMIT store ir<%outer.iv>, ir<%gep.1>
-; CHECK-NEXT:   EMIT ir<%add> = add ir<%outer.iv>, ir<%n>
+; CHECK-NEXT:   EMIT ir<%add> = add nsw ir<%outer.iv>, ir<%n>
 ; CHECK-NEXT: Successor(s): inner
 ; CHECK-EMPTY:
 ; CHECK-NEXT: inner:
 ; CHECK-NEXT:   EMIT-SCALAR ir<%inner.iv> = phi [ ir<%inner.iv.next>, inner ], [ ir<0>, outer.header ]
-; CHECK-NEXT:   EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
+; CHECK-NEXT:   EMIT ir<%gep.2> = getelementptr inbounds ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
 ; CHECK-NEXT:   EMIT store ir<%add>, ir<%gep.2>
-; CHECK-NEXT:   EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
-; CHECK-NEXT:   EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
+; CHECK-NEXT:   EMIT ir<%inner.iv.next> = add nuw nsw ir<%inner.iv>, ir<1>
+; CHECK-NEXT:   EMIT ir<%inner.ec> = icmp eq ir<%inner.iv.next>, ir<8>
 ; CHECK-NEXT:   EMIT branch-on-cond ir<%inner.ec>
 ; CHECK-NEXT: Successor(s): outer.latch, inner
 ; CHECK-EMPTY:
 ; CHECK-NEXT: outer.latch:
-; CHECK-NEXT:   EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>
-; CHECK-NEXT:   EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8>
+; CHECK-NEXT:   EMIT ir<%outer.iv.next> = add nuw nsw ir<%outer.iv>, ir<1>
+; CHECK-NEXT:   EMIT ir<%outer.ec> = icmp eq ir<%outer.iv.next>, ir<8>
 ; CHECK-NEXT:   EMIT branch-on-cond ir<%outer.ec>
 ; CHECK-NEXT: Successor(s): ir-bb<exit>, outer.header
 ; CHECK-EMPTY:
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
index b99d656c5c50f..5742df2aa3c53 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
@@ -139,12 +139,12 @@ compound=true
       "vector.body:\l" +
       "  EMIT vp\<%2\> = CANONICAL-INDUCTION ir\<0\>, vp\<%index.next\>\l" +
       "  EMIT-SCALAR ir\<%indvars.iv\> = phi [ ir\<0\>, vector.ph ], [ ir\<%indvars.iv.next\>, vector.body ]\l" +
-      "  EMIT ir\<%arr.idx\> = getelementptr ir\<%A\>, ir\<%indvars.iv\>\l" +
+      "  EMIT ir\<%arr.idx\> = getelementptr inbounds ir\<%A\>, ir\<%indvars.iv\>\l" +
       "  EMIT ir\<%l1\> = load ir\<%arr.idx\>\l" +
       "  EMIT ir\<%res\> = add ir\<%l1\>, ir\<10\>\l" +
       "  EMIT store ir\<%res\>, ir\<%arr.idx\>\l" +
       "  EMIT ir\<%indvars.iv.next\> = add ir\<%indvars.iv\>, ir\<1\>\l" +
-      "  EMIT ir\<%exitcond\> = icmp ir\<%indvars.iv.next\>, ir\<%N\>\l" +
+      "  EMIT ir\<%exitcond\> = icmp ne ir\<%indvars.iv.next\>, ir\<%N\>\l" +
       "  EMIT vp\<%3\> = not ir\<%exitcond\>\l" +
       "  EMIT vp\<%index.next\> = add nuw vp\<%2\>, vp\<%0\>\l" +
       "  EMIT branch-on-count vp\<%index.next\>, vp\<%1\>\l" +
@@ -305,9 +305,9 @@ compound=true
       "vector.body:\l" +
       "  EMIT vp\<%2\> = CANONICAL-INDUCTION ir\<0\>, vp\<%index.next\>\l" +
       "  EMIT-SCALAR ir\<%iv\> = phi [ ir\<0\>, vector.ph ], [ ir\<%iv.next\>, loop.latch ]\l" +
-      "  EMIT ir\<%arr.idx\> = getelementptr ir\<%A\>, ir\<%iv\>\l" +
+      "  EMIT ir\<%arr.idx\> = getelementptr inbounds ir\<%A\>, ir\<%iv\>\l" +
       "  EMIT ir\<%l1\> = load ir\<%arr.idx\>\l" +
-      "  EMIT ir\<%c\> = icmp ir\<%l1\>, ir\<0\>\l" +
+      "  EMIT ir\<%c\> = icmp eq ir\<%l1\>, ir\<0\>\l" +
       "Successor(s): loop.latch\l"
     ]
     N4 -> N6 [ label=""]
@@ -316,7 +316,7 @@ compound=true
       "  EMIT ir\<%res\> = add ir\<%l1\>, ir\<10\>\l" +
       "  EMIT store ir\<%res\>, ir\<%arr.idx\>\l" +
       "  EMIT ir\<%iv.next\> = add ir\<%iv\>, ir\<1\>\l" +
-      "  EMIT ir\<%exitcond\> = icmp ir\<%iv.next\>, ir\<%N\>\l" +
+      "  EMIT ir\<%exitcond\> = icmp ne ir\<%iv.next\>, ir\<%N\>\l" +
       "  EMIT vp\<%3\> = not ir\<%exitcond\>\l" +
       "  EMIT vp\<%index.next\> = add nuw vp\<%2\>, vp\<%0\>\l" +
       "  EMIT branch-on-count vp\<%index.next\>, vp\<%1\>\l" +
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
index 3842ba235ead3..63776b78a2088 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@@ -1009,7 +1009,7 @@ TEST_F(VPRecipeTest, CastVPWidenRecipeToVPUser) {
   SmallVector<VPValue *, 2> Args;
   Args.push_back(Op1);
   Args.push_back(Op2);
-  VPWidenRecipe WidenR(*AI, Args, VPIRMetadata(), DebugLoc());
+  VPWidenRecipe WidenR(*AI, Args);
 
   checkVPRecipeCastImpl<VPWidenRecipe, VPUser, VPIRMetadata>(&WidenR);
   delete AI;
@@ -1053,7 +1053,7 @@ TEST_F(VPRecipeTest, CastVPWidenSelectRecipeToVPUserAndVPDef) {
   Args.push_back(Op1);
   Args.push_back(Op2);
   Args.push_back(Op3);
-  VPWidenSelectRecipe WidenSelectR(*SelectI,
+  VPWidenSelectRecipe WidenSelectR(SelectI,
                                    make_range(Args.begin(), Args.end()));
 
   checkVPRecipeCastImpl<VPWidenSelectRecipe, VPUser, VPIRMetadata>(
@@ -1093,7 +1093,7 @@ TEST_F(VPRecipeTest, CastVPWidenCastRecipeToVPUser) {
   IntegerType *Int64 = IntegerType::get(C, 64);
   auto *Cast = CastInst::CreateZExtOrBitCast(PoisonValue::get(Int32), Int64);
   VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
-  VPWidenCastRecipe Recipe(Instruction::ZExt, Op1, Int64, *Cast, {});
+  VPWidenCastRecipe Recipe(Instruction::ZExt, Op1, Int64, Cast);
 
   checkVPRecipeCastImpl<VPWidenCastRecipe, VPUser, VPIRMetadata>(&Recipe);
   delete Cast;
@@ -1264,7 +1264,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
     SmallVector<VPValue *, 2> Args;
     Args.push_back(Op1);
     Args.push_back(Op2);
-    VPWidenRecipe Recipe(*AI, Args, VPIRMetadata(), DebugLoc());
+    VPWidenRecipe Recipe(*AI, Args);
     EXPECT_FALSE(Recipe.mayHaveSideEffects());
     EXPECT_FALSE(Recipe.mayReadFromMemory());
     EXPECT_FALSE(Recipe.mayWriteToMemory());
@@ -1283,7 +1283,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
     Args.push_back(Op1);
     Args.push_back(Op2);
     Args.push_back(Op3);
-    VPWidenSelectRecipe Recipe(*SelectI, make_range(Args.begin(), Args.end()));
+    VPWidenSelectRecipe Recipe(SelectI, make_range(Args.begin(), Args.end()));
     EXPECT_FALSE(Recipe.mayHaveSideEffects());
     EXPECT_FALSE(Recipe.mayReadFromMemory());
     EXPECT_FALSE(Recipe.mayWriteToMemory());
@@ -1412,7 +1412,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
     Args.push_back(Op1);
     Args.push_back(Op2);
     Args.push_back(CalledFn);
-    VPWidenCallRecipe Recipe(Call, TheFn, Args);
+    VPWidenCallRecipe Recipe(Call, TheFn, Args, VPIRFlags(), VPIRMetadata());
     EXPECT_FALSE(Recipe.mayHaveSideEffects());
     EXPECT_FALSE(Recipe.mayReadFromMemory());
     EXPECT_FALSE(Recipe.mayWriteToMemory());
@@ -1468,8 +1468,7 @@ TEST_F(VPRecipeTest, dumpRecipeInPlan) {
   VPValue *ExtVPV2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
   Args.push_back(ExtVPV1);
   Args.push_back(ExtVPV2);
-  VPWidenRecipe *WidenR =
-      new VPWidenRecipe(*AI, Args, VPIRMetadata(), DebugLoc());
+  VPWidenRecipe *WidenR = new VPWidenRecipe(*AI, Args);
   VPBB1->appendRecipe(WidenR);
 
   {

From 1e18b4885bb44cfe7b03990274ab9de9d94935e0 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Tue, 18 Nov 2025 07:16:41 -0800
Subject: [PATCH 20/52] [DWARFCFIChecker] Remove an unused local variable (NFC)
 (#168487)

Note that getCurrentUnwindRow does not change any state.

Identified with unused-local-non-trivial-variable.
---
 llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp b/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp
index bca820fa807c8..4acc064dbc212 100644
--- a/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp
+++ b/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp
@@ -64,7 +64,6 @@ dwarf::CFIProgram DWARFCFIState::convert(MCCFIInstruction Directive) {
       /* CodeAlignmentFactor */ 1, /* DataAlignmentFactor */ 1,
       Context->getTargetTriple().getArch());
 
-  auto MaybeCurrentRow = getCurrentUnwindRow();
   switch (Directive.getOperation()) {
   case MCCFIInstruction::OpSameValue:
     CFIP.addInstruction(dwarf::DW_CFA_same_value, Directive.getRegister());

From 4749cc407114f1e2da591491aacd0a8d3afb54e0 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Tue, 18 Nov 2025 07:16:50 -0800
Subject: [PATCH 21/52] [Bitcode] Use a range-based for loop (NFC) (#168489)

Identified with modernize-loop-convert.
---
 llvm/lib/Bitcode/Writer/ValueEnumerator.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index f497c574ee75d..36d0d35d024cc 100644
--- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -616,9 +616,8 @@ void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
 /// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
 /// table into the values table.
 void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
-  for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end();
-       VI != VE; ++VI)
-    EnumerateValue(VI->getValue());
+  for (const auto &VI : VST)
+    EnumerateValue(VI.getValue());
 }
 
 /// Insert all of the values referenced by named metadata in the specified

From 00ef94805a8c3ced416f8854b80452eb7d0bac2a Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Tue, 18 Nov 2025 07:16:58 -0800
Subject: [PATCH 22/52] [AMDGPU] Remove const on a return type. (#168490)

While I am at it, this patch switches to the constructor that takes
a container instead of a pair of begin/end.

Identified with readability-const-return-type.
---
 llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
index 9af812960542c..b7078825928be 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
@@ -314,9 +314,7 @@ class SplitGraph {
 #endif
 
   bool empty() const { return Nodes.empty(); }
-  const iterator_range<nodes_iterator> nodes() const {
-    return {Nodes.begin(), Nodes.end()};
-  }
+  iterator_range<nodes_iterator> nodes() const { return Nodes; }
   const Node &getNode(unsigned ID) const { return *Nodes[ID]; }
 
   unsigned getNumNodes() const { return Nodes.size(); }

From cc0c899765db2c9a2ec16ff11824a8c1055174bb Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Tue, 18 Nov 2025 10:16:15 -0500
Subject: [PATCH 23/52] [clang][CIR] Temporarily fix CIR codegen test on call.
 NFC

- MemoryEffectsAttr in MLIR LLVM dialect is out of sync with LLVM
  itself.
---
 clang/test/CIR/CodeGen/call.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/CIR/CodeGen/call.c b/clang/test/CIR/CodeGen/call.c
index d780e37f3d153..99ae4506b1f16 100644
--- a/clang/test/CIR/CodeGen/call.c
+++ b/clang/test/CIR/CodeGen/call.c
@@ -130,7 +130,7 @@ int f12(void) {
 // OGCG:         %{{.+}} = call i32 @f10(i32 noundef 1) #[[ATTR0:.+]]
 // OGCG-NEXT:    %{{.+}} = call i32 @f11(i32 noundef 2) #[[ATTR1:.+]]
 
-// LLVM: attributes #[[ATTR0]] = { nounwind willreturn memory(read, errnomem: none) }
+// LLVM: attributes #[[ATTR0]] = { nounwind willreturn memory(read, errnomem: none, target_mem0: none, target_mem1: none) }
 // LLVM: attributes #[[ATTR1]] = { nounwind willreturn memory(none) }
 
 // OGCG: attributes #[[ATTR0]] = { nounwind willreturn memory(read) }

From 906f17566c3ad30696d5b51016acaa52e4c88ecc Mon Sep 17 00:00:00 2001
From: Jessica Clarke <jrtc27@jrtc27.com>
Date: Tue, 18 Nov 2025 15:23:18 +0000
Subject: [PATCH 24/52] [ELF][AArch64] Fix copy/paste error in llvm_unreachable
 message

Fixes: e1979aed0a15 ("Implement gd to ie relaxation for aarch64.")
---
 lld/ELF/Arch/AArch64.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index 2a97df4785ecb..b0dc797292511 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -762,7 +762,7 @@ void AArch64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
     relocateNoSym(loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val);
     break;
   default:
-    llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
+    llvm_unreachable("unsupported relocation for TLS GD to IE relaxation");
   }
 }
 

From 2ede6afff07ad26419f22e00967120dbfc9e5617 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman@google.com>
Date: Tue, 18 Nov 2025 07:25:48 -0800
Subject: [PATCH 25/52] [TSan] Make tests work with internal shell

This patch makes all tsan tests work with the internal shell on Darwin. Tests
were using various features not supported by the internal shell, mainly subshells
and not using env to set environment variables. This patch also fixes one of the
dynamiclib substitutions to not use a subshell.

Reviewers: ndrewh, DanBlackwell, fmayer, vitalybuka

Reviewed By: DanBlackwell

Pull Request: https://github.com/llvm/llvm-project/pull/168544
---
 compiler-rt/test/lit.common.cfg.py                       | 5 ++++-
 compiler-rt/test/tsan/Darwin/dlopen.cpp                  | 7 ++++---
 .../test/tsan/Darwin/external-ignore-noninstrumented.cpp | 6 ++++--
 compiler-rt/test/tsan/Darwin/external.cpp                | 9 ++++++---
 compiler-rt/test/tsan/Darwin/malloc-stack-logging.cpp    | 2 +-
 5 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py
index 3f7dd8e402b78..ea22fb0babc46 100644
--- a/compiler-rt/test/lit.common.cfg.py
+++ b/compiler-rt/test/lit.common.cfg.py
@@ -875,7 +875,7 @@ def is_windows_lto_supported():
         config.substitutions.append(
             (
                 "%ld_flags_rpath_so" + postfix,
-                "-install_name @rpath/`basename %dynamiclib{}`".format(postfix),
+                "-install_name @rpath/%base_dynamiclib{}".format(postfix),
             )
         )
     elif config.target_os in ("FreeBSD", "NetBSD", "OpenBSD"):
@@ -908,6 +908,9 @@ def is_windows_lto_supported():
     config.substitutions.append(
         ("%dynamiclib" + postfix, "%t.dir/%xdynamiclib_filename" + postfix)
     )
+    config.substitutions.append(
+        ("%base_dynamiclib" + postfix, "%xdynamiclib_filename" + postfix)
+    )
     config.substitutions.append(
         (
             "%xdynamiclib_filename" + postfix,
diff --git a/compiler-rt/test/tsan/Darwin/dlopen.cpp b/compiler-rt/test/tsan/Darwin/dlopen.cpp
index 3d12b815f9c25..2ab052f1c0c26 100644
--- a/compiler-rt/test/tsan/Darwin/dlopen.cpp
+++ b/compiler-rt/test/tsan/Darwin/dlopen.cpp
@@ -9,14 +9,15 @@
 // RUN: %clangxx_tsan %s -o %t.so -shared -DSHARED_LIB
 // RUN: %clangxx_tsan -fno-sanitize=thread %s -o %t
 
-// RUN: TSAN_DYLIB_PATH=`%clangxx_tsan %s -### 2>&1 \
+// RUN: %clangxx_tsan %s -### 2>&1 \
 // RUN:   | grep "libclang_rt.tsan_osx_dynamic.dylib" \
-// RUN:   | sed -e 's/.*"\(.*libclang_rt.tsan_osx_dynamic.dylib\)".*/\1/'`
+// RUN:   | sed -e 's/.*"\(.*libclang_rt.tsan_osx_dynamic.dylib\)".*/\1/' \
+// RUN:   | tr -d '\n' > %t.tsan_dylib_path
 
 // Launching a non-instrumented binary that dlopen's an instrumented library should fail.
 // RUN: not %run %t %t.so 2>&1 | FileCheck %s --check-prefix=CHECK-FAIL
 // Launching a non-instrumented binary with an explicit DYLD_INSERT_LIBRARIES should work.
-// RUN: DYLD_INSERT_LIBRARIES=$TSAN_DYLIB_PATH %run %t %t.so 2>&1 | FileCheck %s
+// RUN: env DYLD_INSERT_LIBRARIES="%{readfile:%t.tsan_dylib_path}" %run %t %t.so 2>&1 | FileCheck %s
 
 #include <dlfcn.h>
 #include <pthread.h>
diff --git a/compiler-rt/test/tsan/Darwin/external-ignore-noninstrumented.cpp b/compiler-rt/test/tsan/Darwin/external-ignore-noninstrumented.cpp
index 916b0b893fc0d..cfa46e0f0a213 100644
--- a/compiler-rt/test/tsan/Darwin/external-ignore-noninstrumented.cpp
+++ b/compiler-rt/test/tsan/Darwin/external-ignore-noninstrumented.cpp
@@ -1,8 +1,10 @@
+// RUN: basename %t-lib.dylib | tr -d '\n' > %t.basename
 // RUN: %clangxx_tsan -shared %p/external-lib.cpp -fno-sanitize=thread -DUSE_TSAN_CALLBACKS \
-// RUN:   -o %t-lib.dylib -install_name @rpath/`basename %t-lib.dylib`
+// RUN:   -o %t-lib.dylib -install_name @rpath/%{readfile:%t.basename}
 
+// RUN: basename %t-module.dylib | tr -d '\n' > %t.basename
 // RUN: %clangxx_tsan -shared %p/external-noninstrumented-module.cpp %t-lib.dylib -fno-sanitize=thread \
-// RUN:   -o %t-module.dylib -install_name @rpath/`basename %t-module.dylib`
+// RUN:   -o %t-module.dylib -install_name @rpath/%{readfile:%t.basename}
 
 // RUN: %clangxx_tsan %s %t-module.dylib -o %t
 // RUN: %run %t 2>&1 | FileCheck %s
diff --git a/compiler-rt/test/tsan/Darwin/external.cpp b/compiler-rt/test/tsan/Darwin/external.cpp
index bf189eb1d6b5b..52fae36f0e1f4 100644
--- a/compiler-rt/test/tsan/Darwin/external.cpp
+++ b/compiler-rt/test/tsan/Darwin/external.cpp
@@ -1,14 +1,17 @@
+// RUN: basename %t-lib-instrumented.dylib | tr -d '\n' > %t.basename
 // RUN: %clangxx_tsan %p/external-lib.cpp -shared \
 // RUN:                               -o %t-lib-instrumented.dylib \
-// RUN:   -install_name @rpath/`basename %t-lib-instrumented.dylib`
+// RUN:   -install_name @rpath/%{readfile:%t.basename}
 
+// RUN: basename %t-lib-noninstrumented.dylib | tr -d '\n' > %t.basename
 // RUN: %clangxx_tsan %p/external-lib.cpp -shared -fno-sanitize=thread \
 // RUN:                               -o %t-lib-noninstrumented.dylib \
-// RUN:   -install_name @rpath/`basename %t-lib-noninstrumented.dylib`
+// RUN:   -install_name @rpath/%{readfile:%t.basename}
 
+// RUN: basename %t-lib-noninstrumented-callbacks.dylib | tr -d '\n' > %t.basename
 // RUN: %clangxx_tsan %p/external-lib.cpp -shared -fno-sanitize=thread -DUSE_TSAN_CALLBACKS \
 // RUN:                               -o %t-lib-noninstrumented-callbacks.dylib \
-// RUN:   -install_name @rpath/`basename %t-lib-noninstrumented-callbacks.dylib`
+// RUN:   -install_name @rpath/%{readfile:%t.basename}
 
 // RUN: %clangxx_tsan %s %t-lib-instrumented.dylib -o %t-lib-instrumented
 // RUN: %clangxx_tsan %s %t-lib-noninstrumented.dylib -o %t-lib-noninstrumented
diff --git a/compiler-rt/test/tsan/Darwin/malloc-stack-logging.cpp b/compiler-rt/test/tsan/Darwin/malloc-stack-logging.cpp
index 8d9c2122d0e6c..0a96e346f8012 100644
--- a/compiler-rt/test/tsan/Darwin/malloc-stack-logging.cpp
+++ b/compiler-rt/test/tsan/Darwin/malloc-stack-logging.cpp
@@ -4,7 +4,7 @@
 // use syscalls directly) to make sure other interceptors aren't called.
 
 // RUN: %clangxx_tsan -O1 %s -o %t
-// RUN: MallocStackLogging=1 %run %t 2>&1 | FileCheck %s
+// RUN: env MallocStackLogging=1 %run %t 2>&1 | FileCheck %s
 #include <pthread.h>
 #include <stdlib.h>
 #include <stdio.h>

From 40645ed4ed7ce853d9cc76bcc4aeabb6a83a0f2c Mon Sep 17 00:00:00 2001
From: Discookie <viktor.cseh@ericsson.com>
Date: Tue, 18 Nov 2025 15:26:20 +0000
Subject: [PATCH 26/52] [clang-tidy] Add a fully custom message to
 `bugprone-unsafe-functions` (#162443)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In some cases, such as when recommending the compiler option
_FORTIFY_SOURCE, the current custom message format is clunky. Now, when
the reason starts with `>`, the replacement string is omitted., so only
the Reason is shown.

`^function$,,has a custom message;` - function 'function' has a custom
message; it should not be used
`^function$,,>has a custom message and no replacement suggestion;` -
function 'function' has a custom message and no replacement suggestion

---------

Co-authored-by: Donát Nagy <donat.nagy@ericsson.com>
---
 .../bugprone/UnsafeFunctionsCheck.cpp         | 10 +++-
 clang-tools-extra/docs/ReleaseNotes.rst       | 12 ++++
 .../checks/bugprone/unsafe-functions.rst      | 55 ++++++++++++++-----
 .../bugprone/unsafe-functions-custom.c        |  6 +-
 4 files changed, 63 insertions(+), 20 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp
index 5524c4b484be1..67d0931003c54 100644
--- a/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp
@@ -301,14 +301,20 @@ void UnsafeFunctionsCheck::check(const MatchFinder::MatchResult &Result) {
   if (Custom) {
     for (const auto &Entry : CustomFunctions) {
       if (Entry.Pattern.match(*FuncDecl)) {
-        const StringRef Reason =
+        StringRef Reason =
             Entry.Reason.empty() ? "is marked as unsafe" : Entry.Reason.c_str();
 
-        if (Entry.Replacement.empty()) {
+        // Omit the replacement, when a fully-custom reason is given.
+        if (Reason.consume_front(">")) {
+          diag(SourceExpr->getExprLoc(), "function %0 %1")
+              << FuncDecl << Reason.trim() << SourceExpr->getSourceRange();
+          // Do not recommend a replacement when it is not present.
+        } else if (Entry.Replacement.empty()) {
           diag(SourceExpr->getExprLoc(),
                "function %0 %1; it should not be used")
               << FuncDecl << Reason << Entry.Replacement
               << SourceExpr->getSourceRange();
+          // Otherwise, emit the replacement.
         } else {
           diag(SourceExpr->getExprLoc(),
                "function %0 %1; '%2' should be used instead")
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index b982216297919..743397e3ec6ce 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -69,6 +69,13 @@ Potentially Breaking Changes
   - `CharTypdefsToIgnore` to `CharTypedefsToIgnore` in
     :doc:`bugprone-signed-char-misuse
     <clang-tidy/checks/bugprone/signed-char-misuse>`
+  
+- Modified the custom message format of :doc:`bugprone-unsafe-functions
+  <clang-tidy/checks/bugprone/unsafe-functions>` by assigning a special meaning
+  to the character ``>`` at the start of the value of the option
+  ``CustomFunctions``. If the option value starts with ``>``, then the
+  replacement suggestion part of the message (which would be included by
+  default) is omitted. (This does not change the warning locations.)
 
 - :program:`clang-tidy` now displays warnings from all non-system headers by
   default. Previously, users had to explicitly opt-in to header warnings using
@@ -387,6 +394,11 @@ Changes in existing checks
   <clang-tidy/checks/bugprone/unhandled-self-assignment>` check by adding
   an additional matcher that generalizes the copy-and-swap idiom pattern
   detection.
+  
+- Improved :doc:`bugprone-unsafe-functions
+  <clang-tidy/checks/bugprone/unsafe-functions>` check by hiding the default
+  suffix when the reason starts with the character `>` in the `CustomFunctions`
+  option.
 
 - Improved :doc:`cppcoreguidelines-avoid-non-const-global-variables
   <clang-tidy/checks/cppcoreguidelines/avoid-non-const-global-variables>` check
diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst
index f1fec13739271..cb7ea415c54b2 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst
@@ -96,37 +96,62 @@ to be checked. The format is the following, without newlines:
 The functions are matched using POSIX extended regular expressions.
 *(Note: The regular expressions do not support negative* ``(?!)`` *matches.)*
 
-The `reason` is optional and is used to provide additional information
-about the reasoning behind the replacement. The default reason is
-`is marked as unsafe`.
+The ``reason`` is optional and is used to provide additional information about the
+reasoning behind the replacement. The default reason is ``is marked as unsafe``.
 
-If `replacement` is empty, the text `it should not be used` will be shown
-instead of the suggestion for a replacement.
+If ``replacement`` is empty, the default text ``it should not be used`` will be
+shown instead of the suggestion for a replacement.
 
-As an example, the configuration `^original$, replacement, is deprecated;`
-will produce the following diagnostic message.
+If the ``reason`` starts with the character ``>``, the reason becomes fully custom.
+The default suffix is disabled even if a ``replacement`` is present, and only the
+reason message is shown after the matched function, to allow better control over
+the suggestions. (The starting ``>`` and whitespace directly after it are
+trimmed from the message.)
+
+As an example, the following configuration matches only the function ``original``
+in the default namespace. A similar diagnostic can also be printed using a fully
+custom reason.
 
 .. code:: c
 
+   // bugprone-unsafe-functions.CustomFunctions:
+   //   ^original$, replacement, is deprecated;
+   // Using the fully custom message syntax:
+   //   ^suspicious$,,> should be avoided if possible.
    original(); // warning: function 'original' is deprecated; 'replacement' should be used instead.
+   suspicious(); // warning: function 'suspicious' should be avoided if possible.
    ::std::original(); // no-warning
    original_function(); // no-warning
 
-If the regular expression contains the character `:`, it is matched against the
-qualified name (i.e. ``std::original``), otherwise the regex is matched against the unqualified name (``original``).
-If the regular expression starts with `::` (or `^::`), it is matched against the
-fully qualified name (``::std::original``).
+If the regular expression contains the character ``:``, it is matched against the
+qualified name (i.e. ``std::original``), otherwise the regex is matched against
+the unqualified name (``original``). If the regular expression starts with ``::``
+(or ``^::``), it is matched against the fully qualified name
+(``::std::original``).
+
+One of the use cases for fully custom messages is suggesting compiler options
+and warning flags:
+
+.. code:: c
+
+   // bugprone-unsafe-functions.CustomFunctions:
+   //   ^memcpy$,,>is recommended to have compiler hardening using '_FORTIFY_SOURCE';
+   //   ^printf$,,>is recommended to have the '-Werror=format-security' compiler warning flag;
+
+   memcpy(dest, src, 999'999); // warning: function 'memcpy' is recommended to have compiler hardening using '_FORTIFY_SOURCE'
+   printf(raw_str); // warning: function 'printf' is recommended to have the '-Werror=format-security' compiler warning flag
 
 .. note::
 
-   Fully qualified names can contain template parameters on certain C++ classes, but not on C++ functions.
-   Type aliases are resolved before matching.
+   Fully qualified names can contain template parameters on certain C++ classes,
+   but not on C++ functions. Type aliases are resolved before matching.
 
    As an example, the member function ``open`` in the class ``std::ifstream``
    has a fully qualified name of ``::std::basic_ifstream<char>::open``.
 
-   The example could also be matched with the regex ``::std::basic_ifstream<[^>]*>::open``, which matches all potential
-   template parameters, but does not match nested template classes.
+   The example could also be matched with the regex
+   ``::std::basic_ifstream<[^>]*>::open``, which matches all potential template
+   parameters, but does not match nested template classes.
 
 Options
 -------
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom.c
index 7fd71ec2f2e7b..7eaf015f06aa2 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom.c
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom.c
@@ -1,5 +1,5 @@
 // RUN: %check_clang_tidy -check-suffix=NON-STRICT-REGEX %s bugprone-unsafe-functions %t --\
-// RUN:   -config="{CheckOptions: {bugprone-unsafe-functions.CustomFunctions: '::name_match,replacement,is a qualname match;^::prefix_match,,is matched on qualname prefix'}}"
+// RUN:   -config="{CheckOptions: {bugprone-unsafe-functions.CustomFunctions: \"::name_match,,>is a qualname match, but with a fully 'custom' message;^::prefix_match,,is matched on qualname prefix\"}}"
 // RUN: %check_clang_tidy -check-suffix=STRICT-REGEX     %s bugprone-unsafe-functions %t --\
 // RUN:   -config="{CheckOptions: {bugprone-unsafe-functions.CustomFunctions: '^name_match$,replacement,is matched on function name only;^::prefix_match$,,is a full qualname match'}}"
 
@@ -11,14 +11,14 @@ void prefix_match_regex();
 
 void f1() {
   name_match();
-  // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match' is a qualname match; 'replacement' should be used instead
+  // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match' is a qualname match, but with a fully 'custom' message
   // CHECK-MESSAGES-STRICT-REGEX: :[[@LINE-2]]:3: warning: function 'name_match' is matched on function name only; 'replacement' should be used instead
   prefix_match();
   // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'prefix_match' is matched on qualname prefix; it should not be used
   // CHECK-MESSAGES-STRICT-REGEX: :[[@LINE-2]]:3: warning: function 'prefix_match' is a full qualname match; it should not be used
 
   name_match_regex();
-  // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match_regex' is a qualname match; 'replacement' should be used instead
+  // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match_regex' is a qualname match, but with a fully 'custom' message
   // no-warning STRICT-REGEX
 
   prefix_match_regex();

From 1fcfd5c67bbabe5f134ef4268c2a890f2b0cfa0f Mon Sep 17 00:00:00 2001
From: Erick Ochoa Lopez <erick.ochoalopez@amd.com>
Date: Tue, 18 Nov 2025 10:35:05 -0500
Subject: [PATCH 27/52] [mlir][amdgpu] Sink op creation in scaled conversion
 intrinsics (NFC) (#168542)

Where possible:

* notifyMatchFailure happen first
* then op.emitOpError
* finally assertions / op creation.

---------

Co-authored-by: Jakub Kuderski <kubakuderski@gmail.com>
---
 .../Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp   | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index edc6565f44f00..b9a5e7d7f6eac 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -1738,15 +1738,11 @@ LogicalResult ScaledExtPacked816OpLowering::matchAndRewrite(
   auto sourceType = cast<VectorType>(op.getSource().getType());
   auto srcElemType = cast<FloatType>(sourceType.getElementType());
   unsigned bitWidth = srcElemType.getWidth();
-  int32_t scaleSel =
-      getScaleSel(blockSize, bitWidth, firstScaleLane, firstScaleByte);
 
   auto targetType = cast<VectorType>(op.getResult().getType());
   auto destElemType = cast<FloatType>(targetType.getElementType());
-  IntegerType i32 = rewriter.getI32Type();
-  Value castedScale =
-      LLVM::BitcastOp::create(rewriter, loc, i32, adaptor.getScale());
 
+  IntegerType i32 = rewriter.getI32Type();
   Value source = adaptor.getSource();
   Type llvmResultType = typeConverter->convertType(op.getResult().getType());
   Type packedType = nullptr;
@@ -1767,15 +1763,19 @@ LogicalResult ScaledExtPacked816OpLowering::matchAndRewrite(
     return rewriter.notifyMatchFailure(op, "type conversion failed");
   }
 
-  Value castedSource =
-      LLVM::BitcastOp::create(rewriter, loc, packedType, source);
-
   std::optional<StringRef> maybeIntrinsic =
       scaledExtPacked816ToIntrinsic(srcElemType, destElemType);
   if (!maybeIntrinsic.has_value())
     return op.emitOpError(
         "no intrinsic matching packed scaled conversion on the given chipset");
 
+  int32_t scaleSel =
+      getScaleSel(blockSize, bitWidth, firstScaleLane, firstScaleByte);
+  Value castedScale =
+      LLVM::BitcastOp::create(rewriter, loc, i32, adaptor.getScale());
+  Value castedSource =
+      LLVM::BitcastOp::create(rewriter, loc, packedType, source);
+
   OperationState loweredOp(loc, *maybeIntrinsic);
   loweredOp.addTypes({llvmResultType});
   loweredOp.addOperands({castedSource, castedScale});

From ed60cd2563ca6ee474f76487857dd5fd56b83925 Mon Sep 17 00:00:00 2001
From: Alexander Johnston <alexander.javen.johnston@gmail.com>
Date: Tue, 18 Nov 2025 15:41:07 +0000
Subject: [PATCH 28/52] [HLSL] Implement ddx/ddy_coarse intrinsics (#164831)

Closes https://github.com/llvm/llvm-project/issues/99097
Closes https://github.com/llvm/llvm-project/issues/99100

As ddx and ddy are near identical implementations I've combined them in
this PR. This aims to unblock
https://github.com/llvm/llvm-project/pull/161378

---------

Co-authored-by: Alexander Johnston <alexander.johnston@amd.com>
---
 clang/include/clang/Basic/Builtins.td         | 12 +++
 clang/lib/CodeGen/CGHLSLBuiltins.cpp          | 18 ++++
 clang/lib/CodeGen/CGHLSLRuntime.h             |  2 +
 .../lib/Headers/hlsl/hlsl_alias_intrinsics.h  | 68 +++++++++++++++
 clang/lib/Sema/SemaHLSL.cpp                   |  4 +-
 .../builtins/ddx-coarse-builtin.hlsl          | 26 ++++++
 .../test/CodeGenHLSL/builtins/ddx-coarse.hlsl | 86 +++++++++++++++++++
 .../builtins/ddy-coarse-builtin.hlsl          | 26 ++++++
 .../test/CodeGenHLSL/builtins/ddy-coarse.hlsl | 86 +++++++++++++++++++
 .../SemaHLSL/BuiltIns/ddx-coarse-errors.hlsl  | 22 +++++
 .../SemaHLSL/BuiltIns/ddy-coarse-errors.hlsl  | 22 +++++
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |  2 +
 llvm/include/llvm/IR/IntrinsicsSPIRV.td       |  2 +
 llvm/lib/Target/DirectX/DXIL.td               | 18 ++++
 .../DirectX/DirectXTargetTransformInfo.cpp    |  2 +
 llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp | 15 ++--
 .../Target/SPIRV/SPIRVInstructionSelector.cpp | 73 +++++++++++++++-
 llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp |  9 +-
 .../test/CodeGen/DirectX/ddx_coarse-errors.ll | 15 ++++
 llvm/test/CodeGen/DirectX/ddx_coarse.ll       | 40 +++++++++
 .../test/CodeGen/DirectX/ddy_coarse-errors.ll | 15 ++++
 llvm/test/CodeGen/DirectX/ddy_coarse.ll       | 40 +++++++++
 .../SPIRV/hlsl-intrinsics/ddx_coarse.ll       | 47 ++++++++++
 .../SPIRV/hlsl-intrinsics/ddy_coarse.ll       | 47 ++++++++++
 .../CodeGen/SPIRV/opencl/ddx_coarse-error.ll  | 12 +++
 .../CodeGen/SPIRV/opencl/ddy_coarse-error.ll  | 12 +++
 26 files changed, 713 insertions(+), 8 deletions(-)
 create mode 100644 clang/test/CodeGenHLSL/builtins/ddx-coarse-builtin.hlsl
 create mode 100644 clang/test/CodeGenHLSL/builtins/ddx-coarse.hlsl
 create mode 100644 clang/test/CodeGenHLSL/builtins/ddy-coarse-builtin.hlsl
 create mode 100644 clang/test/CodeGenHLSL/builtins/ddy-coarse.hlsl
 create mode 100644 clang/test/SemaHLSL/BuiltIns/ddx-coarse-errors.hlsl
 create mode 100644 clang/test/SemaHLSL/BuiltIns/ddy-coarse-errors.hlsl
 create mode 100644 llvm/test/CodeGen/DirectX/ddx_coarse-errors.ll
 create mode 100644 llvm/test/CodeGen/DirectX/ddx_coarse.ll
 create mode 100644 llvm/test/CodeGen/DirectX/ddy_coarse-errors.ll
 create mode 100644 llvm/test/CodeGen/DirectX/ddy_coarse.ll
 create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddx_coarse.ll
 create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddy_coarse.ll
 create mode 100644 llvm/test/CodeGen/SPIRV/opencl/ddx_coarse-error.ll
 create mode 100644 llvm/test/CodeGen/SPIRV/opencl/ddy_coarse-error.ll

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index dbf857afa08c8..47da17e5cfe83 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5253,6 +5253,18 @@ def HLSLF16ToF32 : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLDdxCoarse : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_ddx_coarse"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
+def HLSLDdyCoarse : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_ddy_coarse"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 // Builtins for XRay.
 def XRayCustomEvent : Builtin {
   let Spellings = ["__xray_customevent"];
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index b6928ce7d9c44..12d9a98915ce3 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -924,6 +924,24 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
     return EmitRuntimeCall(
         Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
   }
+  case Builtin::BI__builtin_hlsl_elementwise_ddx_coarse: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    if (!E->getArg(0)->getType()->hasFloatingRepresentation())
+      llvm_unreachable("ddx_coarse operand must have a float representation");
+    Intrinsic::ID ID = CGM.getHLSLRuntime().getDdxCoarseIntrinsic();
+    return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID,
+                                   ArrayRef<Value *>{Op0}, nullptr,
+                                   "hlsl.ddx.coarse");
+  }
+  case Builtin::BI__builtin_hlsl_elementwise_ddy_coarse: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    if (!E->getArg(0)->getType()->hasFloatingRepresentation())
+      llvm_unreachable("ddy_coarse operand must have a float representation");
+    Intrinsic::ID ID = CGM.getHLSLRuntime().getDdyCoarseIntrinsic();
+    return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID,
+                                   ArrayRef<Value *>{Op0}, nullptr,
+                                   "hlsl.ddy.coarse");
+  }
   case Builtin::BI__builtin_get_spirv_spec_constant_bool:
   case Builtin::BI__builtin_get_spirv_spec_constant_short:
   case Builtin::BI__builtin_get_spirv_spec_constant_ushort:
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 48935584f28a2..e1200c62eccf1 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -163,6 +163,8 @@ class CGHLSLRuntime {
   GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrierWithGroupSync,
                                    group_memory_barrier_with_group_sync)
   GENERATE_HLSL_INTRINSIC_FUNCTION(GetDimensionsX, resource_getdimensions_x)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(DdxCoarse, ddx_coarse)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(DdyCoarse, ddy_coarse)
 
   //===----------------------------------------------------------------------===//
   // End of reserved area for HLSL intrinsic getters.
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index 2e2703de18cb1..38b95ee90736a 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -2946,5 +2946,73 @@ float4 radians(float4);
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_group_memory_barrier_with_group_sync)
 __attribute__((convergent)) void GroupMemoryBarrierWithGroupSync(void);
 
+//===----------------------------------------------------------------------===//
+// ddx_coarse builtin
+//===----------------------------------------------------------------------===//
+
+/// \fn T ddx_coarse(T value)
+/// \brief Computes a low precision partial derivative with respect to the
+/// screen-space x-coordinate.
+/// \param value The input value.
+///
+/// The return value is a floating point scalar or vector containing the low
+/// prevision partial derivative of the input value.
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse)
+half ddx_coarse(half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse)
+half2 ddx_coarse(half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse)
+half3 ddx_coarse(half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse)
+half4 ddx_coarse(half4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse)
+float ddx_coarse(float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse)
+float2 ddx_coarse(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse)
+float3 ddx_coarse(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse)
+float4 ddx_coarse(float4);
+
+//===----------------------------------------------------------------------===//
+// ddy_coarse builtin
+//===----------------------------------------------------------------------===//
+
+/// \fn T ddy_coarse(T value)
+/// \brief Computes a low precision partial derivative with respect to the
+/// screen-space y-coordinate.
+/// \param value The input value.
+///
+/// The return value is a floating point scalar or vector containing the low
+/// prevision partial derivative of the input value.
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse)
+half ddy_coarse(half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse)
+half2 ddy_coarse(half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse)
+half3 ddy_coarse(half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse)
+half4 ddy_coarse(half4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse)
+float ddy_coarse(float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse)
+float2 ddy_coarse(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse)
+float3 ddy_coarse(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse)
+float4 ddy_coarse(float4);
+
 } // namespace hlsl
 #endif //_HLSL_HLSL_ALIAS_INTRINSICS_H_
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 2b9b3abbd5360..5555916c2536f 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3239,7 +3239,9 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   case Builtin::BI__builtin_hlsl_elementwise_degrees:
   case Builtin::BI__builtin_hlsl_elementwise_radians:
   case Builtin::BI__builtin_hlsl_elementwise_rsqrt:
-  case Builtin::BI__builtin_hlsl_elementwise_frac: {
+  case Builtin::BI__builtin_hlsl_elementwise_frac:
+  case Builtin::BI__builtin_hlsl_elementwise_ddx_coarse:
+  case Builtin::BI__builtin_hlsl_elementwise_ddy_coarse: {
     if (SemaRef.checkArgCount(TheCall, 1))
       return true;
     if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall,
diff --git a/clang/test/CodeGenHLSL/builtins/ddx-coarse-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/ddx-coarse-builtin.hlsl
new file mode 100644
index 0000000000000..01216eefadba2
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/ddx-coarse-builtin.hlsl
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple dxil-pc-shadermodel6.3-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \
+// RUN:  FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple spirv-pc-vulkan-compute  %s \
+// RUN:  -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \
+// RUN:  FileCheck %s --check-prefixes=CHECK-SPIRV
+
+// CHECK-LABEL: half @_Z19test_f16_ddx_coarseDh
+// CHECK: %hlsl.ddx.coarse = call {{.*}} half @llvm.dx.ddx.coarse.f16(half %{{.*}})
+// CHECK: ret half %hlsl.ddx.coarse
+// CHECK-LABEL-SPIRV: half @_Z19test_f16_ddx_coarseDh
+// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} half @llvm.spv.ddx.coarse.f16(half %{{.*}})
+// CHECK-SPIRV: ret half %hlsl.ddx.coarse
+half test_f16_ddx_coarse(half val) {
+    return __builtin_hlsl_elementwise_ddx_coarse(val);
+}
+
+// CHECK-LABEL: float @_Z19test_f32_ddx_coarsef
+// CHECK: %hlsl.ddx.coarse = call {{.*}} float @llvm.dx.ddx.coarse.f32(float %{{.*}})
+// CHECK: ret float %hlsl.ddx.coarse
+// CHECK-LABEL-SPIRV: float @_Z19test_f32_ddx_coarsef
+// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} float @llvm.spv.ddx.coarse.f32(float %{{.*}})
+// CHECK-SPIRV: ret float %hlsl.ddx.coarse
+float test_f32_ddx_coarse(float val) {
+    return __builtin_hlsl_elementwise_ddx_coarse(val);
+}
diff --git a/clang/test/CodeGenHLSL/builtins/ddx-coarse.hlsl b/clang/test/CodeGenHLSL/builtins/ddx-coarse.hlsl
new file mode 100644
index 0000000000000..c200d4715629e
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/ddx-coarse.hlsl
@@ -0,0 +1,86 @@
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple dxil-pc-shadermodel6.3-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \
+// RUN:  FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple spirv-pc-vulkan-compute  %s \
+// RUN:  -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \
+// RUN:  FileCheck %s --check-prefixes=CHECK-SPIRV
+
+// CHECK-LABEL: half @_Z19test_f16_ddx_coarseDh
+// CHECK: %hlsl.ddx.coarse = call {{.*}} half @llvm.dx.ddx.coarse.f16(half %{{.*}})
+// CHECK: ret half %hlsl.ddx.coarse
+// CHECK-LABEL-SPIRV: half @_Z19test_f16_ddx_coarseDh
+// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} half @llvm.spv.ddx.coarse.f16(half %{{.*}})
+// CHECK-SPIRV: ret half %hlsl.ddx.coarse
+half test_f16_ddx_coarse(half val) {
+    return ddx_coarse(val);
+}
+
+// CHECK-LABEL: <2 x half> @_Z20test_f16_ddx_coarse2Dv2_Dh
+// CHECK: %hlsl.ddx.coarse = call {{.*}} <2 x half> @llvm.dx.ddx.coarse.v2f16(<2 x half> %{{.*}})
+// CHECK: ret <2 x half> %hlsl.ddx.coarse
+// CHECK-LABEL-SPIRV: <2 x half> @_Z20test_f16_ddx_coarse2Dv2_Dh
+// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <2 x half> @llvm.spv.ddx.coarse.v2f16(<2 x half> %{{.*}})
+// CHECK-SPIRV: ret <2 x half> %hlsl.ddx.coarse
+half2 test_f16_ddx_coarse2(half2 val) {
+    return ddx_coarse(val);
+}
+
+// CHECK-LABEL: <3 x half> @_Z20test_f16_ddx_coarse3Dv3_Dh
+// CHECK: %hlsl.ddx.coarse = call {{.*}} <3 x half> @llvm.dx.ddx.coarse.v3f16(<3 x half> %{{.*}})
+// CHECK: ret <3 x half> %hlsl.ddx.coarse
+// CHECK-LABEL-SPIRV: <3 x half> @_Z20test_f16_ddx_coarse3Dv3_Dh
+// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <3 x half> @llvm.spv.ddx.coarse.v3f16(<3 x half> %{{.*}})
+// CHECK-SPIRV: ret <3 x half> %hlsl.ddx.coarse
+half3 test_f16_ddx_coarse3(half3 val) {
+    return ddx_coarse(val);
+}
+
+// CHECK-LABEL: <4 x half> @_Z20test_f16_ddx_coarse4Dv4_Dh
+// CHECK: %hlsl.ddx.coarse = call {{.*}} <4 x half> @llvm.dx.ddx.coarse.v4f16(<4 x half> %{{.*}})
+// CHECK: ret <4 x half> %hlsl.ddx.coarse
+// CHECK-LABEL-SPIRV: <4 x half> @_Z20test_f16_ddx_coarse4Dv4_Dh
+// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <4 x half> @llvm.spv.ddx.coarse.v4f16(<4 x half> %{{.*}})
+// CHECK-SPIRV: ret <4 x half> %hlsl.ddx.coarse
+half4 test_f16_ddx_coarse4(half4 val) {
+    return ddx_coarse(val);
+}
+
+// CHECK-LABEL: float @_Z19test_f32_ddx_coarsef
+// CHECK: %hlsl.ddx.coarse = call {{.*}} float @llvm.dx.ddx.coarse.f32(float %{{.*}})
+// CHECK: ret float %hlsl.ddx.coarse
+// CHECK-LABEL-SPIRV: float @_Z19test_f32_ddx_coarsef
+// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} float @llvm.spv.ddx.coarse.f32(float %{{.*}})
+// CHECK-SPIRV: ret float %hlsl.ddx.coarse
+float test_f32_ddx_coarse(float val) {
+    return ddx_coarse(val);
+}
+
+// CHECK-LABEL: <2 x float> @_Z20test_f32_ddx_coarse2Dv2_f
+// CHECK: %hlsl.ddx.coarse = call {{.*}} <2 x float> @llvm.dx.ddx.coarse.v2f32(<2 x float> %{{.*}})
+// CHECK: ret <2 x float> %hlsl.ddx.coarse
+// CHECK-LABEL-SPIRV: <2 x float> @_Z20test_f32_ddx_coarse2Dv2_f
+// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <2 x float> @llvm.spv.ddx.coarse.v2f32(<2 x float> %{{.*}})
+// CHECK-SPIRV: ret <2 x float> %hlsl.ddx.coarse
+float2 test_f32_ddx_coarse2(float2 val) {
+    return ddx_coarse(val);
+}
+
+// CHECK-LABEL: <3 x float> @_Z20test_f32_ddx_coarse3Dv3_f
+// CHECK: %hlsl.ddx.coarse = call {{.*}} <3 x float> @llvm.dx.ddx.coarse.v3f32(<3 x float> %{{.*}})
+// CHECK: ret <3 x float> %hlsl.ddx.coarse
+// CHECK-LABEL-SPIRV: <3 x float> @_Z20test_f32_ddx_coarse3Dv3_f
+// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <3 x float> @llvm.spv.ddx.coarse.v3f32(<3 x float> %{{.*}})
+// CHECK-SPIRV: ret <3 x float> %hlsl.ddx.coarse
+float3 test_f32_ddx_coarse3(float3 val) {
+    return ddx_coarse(val);
+}
+
+// CHECK-LABEL: <4 x float> @_Z20test_f32_ddx_coarse4Dv4_f
+// CHECK: %hlsl.ddx.coarse = call {{.*}} <4 x float> @llvm.dx.ddx.coarse.v4f32(<4 x float> %{{.*}})
+// CHECK: ret <4 x float> %hlsl.ddx.coarse
+// CHECK-LABEL-SPIRV: <4 x float> @_Z20test_f32_ddx_coarse4Dv4_f
+// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <4 x float> @llvm.spv.ddx.coarse.v4f32(<4 x float> %{{.*}})
+// CHECK-SPIRV: ret <4 x float> %hlsl.ddx.coarse
+float4 test_f32_ddx_coarse4(float4 val) {
+    return ddx_coarse(val);
+}
diff --git a/clang/test/CodeGenHLSL/builtins/ddy-coarse-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/ddy-coarse-builtin.hlsl
new file mode 100644
index 0000000000000..2967deb75031f
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/ddy-coarse-builtin.hlsl
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple dxil-pc-shadermodel6.3-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \
+// RUN:  FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple spirv-pc-vulkan-compute  %s \
+// RUN:  -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \
+// RUN:  FileCheck %s --check-prefixes=CHECK-SPIRV
+
+// CHECK-LABEL: half @_Z19test_f16_ddy_coarseDh
+// CHECK: %hlsl.ddy.coarse = call {{.*}} half @llvm.dx.ddy.coarse.f16(half %{{.*}})
+// CHECK: ret half %hlsl.ddy.coarse
+// CHECK-LABEL-SPIRV: half @_Z19test_f16_ddy_coarseDh
+// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} half @llvm.spv.ddy.coarse.f16(half %{{.*}})
+// CHECK-SPIRV: ret half %hlsl.ddy.coarse
+half test_f16_ddy_coarse(half val) {
+    return __builtin_hlsl_elementwise_ddy_coarse(val);
+}
+
+// CHECK-LABEL: float @_Z19test_f32_ddy_coarsef
+// CHECK: %hlsl.ddy.coarse = call {{.*}} float @llvm.dx.ddy.coarse.f32(float %{{.*}})
+// CHECK: ret float %hlsl.ddy.coarse
+// CHECK-LABEL-SPIRV: float @_Z19test_f32_ddy_coarsef
+// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} float @llvm.spv.ddy.coarse.f32(float %{{.*}})
+// CHECK-SPIRV: ret float %hlsl.ddy.coarse
+float test_f32_ddy_coarse(float val) {
+    return __builtin_hlsl_elementwise_ddy_coarse(val);
+}
diff --git a/clang/test/CodeGenHLSL/builtins/ddy-coarse.hlsl b/clang/test/CodeGenHLSL/builtins/ddy-coarse.hlsl
new file mode 100644
index 0000000000000..faa972a1be326
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/ddy-coarse.hlsl
@@ -0,0 +1,86 @@
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple dxil-pc-shadermodel6.3-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \
+// RUN:  FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple spirv-pc-vulkan-compute  %s \
+// RUN:  -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \
+// RUN:  FileCheck %s --check-prefixes=CHECK-SPIRV
+
+// CHECK-LABEL: half @_Z19test_f16_ddy_coarseDh
+// CHECK: %hlsl.ddy.coarse = call {{.*}} half @llvm.dx.ddy.coarse.f16(half %{{.*}})
+// CHECK: ret half %hlsl.ddy.coarse
+// CHECK-LABEL-SPIRV: half @_Z19test_f16_ddy_coarseDh
+// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} half @llvm.spv.ddy.coarse.f16(half %{{.*}})
+// CHECK-SPIRV: ret half %hlsl.ddy.coarse
+half test_f16_ddy_coarse(half val) {
+    return ddy_coarse(val);
+}
+
+// CHECK-LABEL: <2 x half> @_Z20test_f16_ddy_coarse2Dv2_Dh
+// CHECK: %hlsl.ddy.coarse = call {{.*}} <2 x half> @llvm.dx.ddy.coarse.v2f16(<2 x half> %{{.*}})
+// CHECK: ret <2 x half> %hlsl.ddy.coarse
+// CHECK-LABEL-SPIRV: <2 x half> @_Z20test_f16_ddy_coarse2Dv2_Dh
+// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <2 x half> @llvm.spv.ddy.coarse.v2f16(<2 x half> %{{.*}})
+// CHECK-SPIRV: ret <2 x half> %hlsl.ddy.coarse
+half2 test_f16_ddy_coarse2(half2 val) {
+    return ddy_coarse(val);
+}
+
+// CHECK-LABEL: <3 x half> @_Z20test_f16_ddy_coarse3Dv3_Dh
+// CHECK: %hlsl.ddy.coarse = call {{.*}} <3 x half> @llvm.dx.ddy.coarse.v3f16(<3 x half> %{{.*}})
+// CHECK: ret <3 x half> %hlsl.ddy.coarse
+// CHECK-LABEL-SPIRV: <3 x half> @_Z20test_f16_ddy_coarse3Dv3_Dh
+// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <3 x half> @llvm.spv.ddy.coarse.v3f16(<3 x half> %{{.*}})
+// CHECK-SPIRV: ret <3 x half> %hlsl.ddy.coarse
+half3 test_f16_ddy_coarse3(half3 val) {
+    return ddy_coarse(val);
+}
+
+// CHECK-LABEL: <4 x half> @_Z20test_f16_ddy_coarse4Dv4_Dh
+// CHECK: %hlsl.ddy.coarse = call {{.*}} <4 x half> @llvm.dx.ddy.coarse.v4f16(<4 x half> %{{.*}})
+// CHECK: ret <4 x half> %hlsl.ddy.coarse
+// CHECK-LABEL-SPIRV: <4 x half> @_Z20test_f16_ddy_coarse4Dv4_Dh
+// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <4 x half> @llvm.spv.ddy.coarse.v4f16(<4 x half> %{{.*}})
+// CHECK-SPIRV: ret <4 x half> %hlsl.ddy.coarse
+half4 test_f16_ddy_coarse4(half4 val) {
+    return ddy_coarse(val);
+}
+
+// CHECK-LABEL: float @_Z19test_f32_ddy_coarsef
+// CHECK: %hlsl.ddy.coarse = call {{.*}} float @llvm.dx.ddy.coarse.f32(float %{{.*}})
+// CHECK: ret float %hlsl.ddy.coarse
+// CHECK-LABEL-SPIRV: float @_Z19test_f32_ddy_coarsef
+// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} float @llvm.spv.ddy.coarse.f32(float %{{.*}})
+// CHECK-SPIRV: ret float %hlsl.ddy.coarse
+float test_f32_ddy_coarse(float val) {
+    return ddy_coarse(val);
+}
+
+// CHECK-LABEL: <2 x float> @_Z20test_f32_ddy_coarse2Dv2_f
+// CHECK: %hlsl.ddy.coarse = call {{.*}} <2 x float> @llvm.dx.ddy.coarse.v2f32(<2 x float> %{{.*}})
+// CHECK: ret <2 x float> %hlsl.ddy.coarse
+// CHECK-LABEL-SPIRV: <2 x float> @_Z20test_f32_ddy_coarse2Dv2_f
+// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <2 x float> @llvm.spv.ddy.coarse.v2f32(<2 x float> %{{.*}})
+// CHECK-SPIRV: ret <2 x float> %hlsl.ddy.coarse
+float2 test_f32_ddy_coarse2(float2 val) {
+    return ddy_coarse(val);
+}
+
+// CHECK-LABEL: <3 x float> @_Z20test_f32_ddy_coarse3Dv3_f
+// CHECK: %hlsl.ddy.coarse = call {{.*}} <3 x float> @llvm.dx.ddy.coarse.v3f32(<3 x float> %{{.*}})
+// CHECK: ret <3 x float> %hlsl.ddy.coarse
+// CHECK-LABEL-SPIRV: <3 x float> @_Z20test_f32_ddy_coarse3Dv3_f
+// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <3 x float> @llvm.spv.ddy.coarse.v3f32(<3 x float> %{{.*}})
+// CHECK-SPIRV: ret <3 x float> %hlsl.ddy.coarse
+float3 test_f32_ddy_coarse3(float3 val) {
+    return ddy_coarse(val);
+}
+
+// CHECK-LABEL: <4 x float> @_Z20test_f32_ddy_coarse4Dv4_f
+// CHECK: %hlsl.ddy.coarse = call {{.*}} <4 x float> @llvm.dx.ddy.coarse.v4f32(<4 x float> %{{.*}})
+// CHECK: ret <4 x float> %hlsl.ddy.coarse
+// CHECK-LABEL-SPIRV: <4 x float> @_Z20test_f32_ddy_coarse4Dv4_f
+// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <4 x float> @llvm.spv.ddy.coarse.v4f32(<4 x float> %{{.*}})
+// CHECK-SPIRV: ret <4 x float> %hlsl.ddy.coarse
+float4 test_f32_ddy_coarse4(float4 val) {
+    return ddy_coarse(val);
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/ddx-coarse-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/ddx-coarse-errors.hlsl
new file mode 100644
index 0000000000000..ebad1cc6826d8
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/ddx-coarse-errors.hlsl
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -verify
+// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-library %s -fnative-half-type -verify
+
+float no_arg() {
+  return __builtin_hlsl_elementwise_ddx_coarse();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+}
+
+float too_many_args(float val) {
+  return __builtin_hlsl_elementwise_ddx_coarse(val, val);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+}
+
+float test_integer_scalar_input(int val) {
+  return __builtin_hlsl_elementwise_ddx_coarse(val);
+  // expected-error@-1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'int')}}
+}
+
+double test_double_scalar_input(double val) {
+  return __builtin_hlsl_elementwise_ddx_coarse(val);
+  // expected-error@-1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'double')}}
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/ddy-coarse-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/ddy-coarse-errors.hlsl
new file mode 100644
index 0000000000000..9cc23665882c8
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/ddy-coarse-errors.hlsl
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -verify
+// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-library %s -fnative-half-type -verify
+
+float no_arg() {
+  return __builtin_hlsl_elementwise_ddy_coarse();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+}
+
+float too_many_args(float val) {
+  return __builtin_hlsl_elementwise_ddy_coarse(val, val);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+}
+
+float test_integer_scalar_input(int val) {
+  return __builtin_hlsl_elementwise_ddy_coarse(val);
+  // expected-error@-1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'int')}}
+}
+
+double test_double_scalar_input(double val) {
+  return __builtin_hlsl_elementwise_ddy_coarse(val);
+  // expected-error@-1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'double')}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index d7db935ee07f1..5a4cc776b26a5 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -170,6 +170,8 @@ def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>
     [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>;
 def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
 def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>;
+def int_dx_ddx_coarse : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+def int_dx_ddy_coarse : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
 def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
 def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
 def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index f39c6cda2c579..2f7c25550a0cc 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -134,6 +134,8 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]
   def int_spv_group_memory_barrier_with_group_sync
       : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>;
   def int_spv_discard : DefaultAttrsIntrinsic<[], [], []>;
+  def int_spv_ddx_coarse : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+  def int_spv_ddy_coarse : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
   def int_spv_uclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
   def int_spv_sclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
   def int_spv_nclamp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 67437f6969b27..8b2866260e9c9 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -930,6 +930,24 @@ def Discard : DXILOp<82, discard> {
   let stages = [Stages<DXIL1_0, [pixel]>];
 }
 
+def DerivCoarseX : DXILOp<83, unary> {
+  let Doc = "computes the rate of change per stamp in x direction";
+  let intrinsics = [IntrinSelect<int_dx_ddx_coarse>];
+  let arguments = [OverloadTy];
+  let result = OverloadTy;
+  let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
+  let stages = [Stages<DXIL1_0, [library, pixel]>];
+}
+
+def DerivCoarseY : DXILOp<84, unary> {
+  let Doc = "computes the rate of change per stamp in y direction";
+  let intrinsics = [IntrinSelect<int_dx_ddy_coarse>];
+  let arguments = [OverloadTy];
+  let result = OverloadTy;
+  let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
+  let stages = [Stages<DXIL1_0, [library, pixel]>];
+}
+
 def ThreadId : DXILOp<93, threadId> {
   let Doc = "Reads the thread ID";
   let intrinsics = [IntrinSelect<int_dx_thread_id>];
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
index 6cacbf6564db2..a755dd522969d 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -64,6 +64,8 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
   case Intrinsic::dx_wave_reduce_usum:
   case Intrinsic::dx_imad:
   case Intrinsic::dx_umad:
+  case Intrinsic::dx_ddx_coarse:
+  case Intrinsic::dx_ddy_coarse:
     return true;
   default:
     return false;
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 47022b3f89a8b..76fd834fd7219 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -1697,11 +1697,16 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType(unsigned BitWidth,
   MachineIRBuilder MIRBuilder(DepMBB, DepMBB.getFirstNonPHI());
   const MachineInstr *NewMI =
       createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) {
-        return BuildMI(MIRBuilder.getMBB(), *MIRBuilder.getInsertPt(),
-                       MIRBuilder.getDL(), TII.get(SPIRVOPcode))
-            .addDef(createTypeVReg(CurMF->getRegInfo()))
-            .addImm(BitWidth)
-            .addImm(0);
+        auto NewTypeMI = BuildMI(MIRBuilder.getMBB(), *MIRBuilder.getInsertPt(),
+                                 MIRBuilder.getDL(), TII.get(SPIRVOPcode))
+                             .addDef(createTypeVReg(CurMF->getRegInfo()))
+                             .addImm(BitWidth);
+        // Don't add Encoding to FP type
+        if (!Ty->isFloatTy()) {
+          return NewTypeMI.addImm(0);
+        } else {
+          return NewTypeMI;
+        }
       });
   add(Ty, false, NewMI);
   return finishCreatingSPIRVType(Ty, NewMI);
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index fc87288a4a212..0653b4eb9dfe2 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -328,6 +328,8 @@ class SPIRVInstructionSelector : public InstructionSelector {
                            MachineInstr &I) const;
   bool selectFrexp(Register ResVReg, const SPIRVType *ResType,
                    MachineInstr &I) const;
+  bool selectDpdCoarse(Register ResVReg, const SPIRVType *ResType,
+                       MachineInstr &I, const unsigned DPdOpCode) const;
   // Utilities
   std::pair<Register, bool>
   buildI32Constant(uint32_t Val, MachineInstr &I,
@@ -371,6 +373,7 @@ class SPIRVInstructionSelector : public InstructionSelector {
   bool loadHandleBeforePosition(Register &HandleReg, const SPIRVType *ResType,
                                 GIntrinsic &HandleDef, MachineInstr &Pos) const;
   void decorateUsesAsNonUniform(Register &NonUniformReg) const;
+  void errorIfInstrOutsideShader(MachineInstr &I) const;
 };
 
 bool sampledTypeIsSignedInteger(const llvm::Type *HandleType) {
@@ -3140,6 +3143,58 @@ bool SPIRVInstructionSelector::wrapIntoSpecConstantOp(
   return Result;
 }
 
+bool SPIRVInstructionSelector::selectDpdCoarse(Register ResVReg,
+                                               const SPIRVType *ResType,
+                                               MachineInstr &I,
+                                               const unsigned DPdOpCode) const {
+  // TODO: This should check specifically for Fragment Execution Model, but STI
+  // doesn't provide that information yet. See #167562
+  errorIfInstrOutsideShader(I);
+
+  // If the arg/result types are half then we need to wrap the instr in
+  // conversions to float
+  // This case occurs because a half arg/result is legal in HLSL but not spirv.
+  Register SrcReg = I.getOperand(2).getReg();
+  SPIRVType *SrcType = GR.getSPIRVTypeForVReg(SrcReg);
+  unsigned BitWidth = std::min(GR.getScalarOrVectorBitWidth(SrcType),
+                               GR.getScalarOrVectorBitWidth(ResType));
+  if (BitWidth == 32)
+    return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(DPdOpCode))
+        .addDef(ResVReg)
+        .addUse(GR.getSPIRVTypeID(ResType))
+        .addUse(I.getOperand(2).getReg());
+
+  MachineIRBuilder MIRBuilder(I);
+  unsigned componentCount = GR.getScalarOrVectorComponentCount(SrcType);
+  SPIRVType *F32ConvertTy = GR.getOrCreateSPIRVFloatType(32, I, TII);
+  if (componentCount != 1)
+    F32ConvertTy = GR.getOrCreateSPIRVVectorType(F32ConvertTy, componentCount,
+                                                 MIRBuilder, false);
+
+  const TargetRegisterClass *RegClass = GR.getRegClass(SrcType);
+  Register ConvertToVReg = MRI->createVirtualRegister(RegClass);
+  Register DpdOpVReg = MRI->createVirtualRegister(RegClass);
+
+  bool Result =
+      BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFConvert))
+          .addDef(ConvertToVReg)
+          .addUse(GR.getSPIRVTypeID(F32ConvertTy))
+          .addUse(SrcReg)
+          .constrainAllUses(TII, TRI, RBI);
+  Result &= BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(DPdOpCode))
+                .addDef(DpdOpVReg)
+                .addUse(GR.getSPIRVTypeID(F32ConvertTy))
+                .addUse(ConvertToVReg)
+                .constrainAllUses(TII, TRI, RBI);
+  Result &=
+      BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFConvert))
+          .addDef(ResVReg)
+          .addUse(GR.getSPIRVTypeID(ResType))
+          .addUse(DpdOpVReg)
+          .constrainAllUses(TII, TRI, RBI);
+  return Result;
+}
+
 bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
                                                const SPIRVType *ResType,
                                                MachineInstr &I) const {
@@ -3528,7 +3583,12 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
   case Intrinsic::spv_unpackhalf2x16: {
     return selectExtInst(ResVReg, ResType, I, GL::UnpackHalf2x16);
   }
-
+  case Intrinsic::spv_ddx_coarse: {
+    return selectDpdCoarse(ResVReg, ResType, I, SPIRV::OpDPdxCoarse);
+  }
+  case Intrinsic::spv_ddy_coarse: {
+    return selectDpdCoarse(ResVReg, ResType, I, SPIRV::OpDPdyCoarse);
+  }
   default: {
     std::string DiagMsg;
     raw_string_ostream OS(DiagMsg);
@@ -4694,6 +4754,17 @@ bool SPIRVInstructionSelector::loadHandleBeforePosition(
       .constrainAllUses(TII, TRI, RBI);
 }
 
+void SPIRVInstructionSelector::errorIfInstrOutsideShader(
+    MachineInstr &I) const {
+  if (!STI.isShader()) {
+    std::string DiagMsg;
+    raw_string_ostream OS(DiagMsg);
+    I.print(OS, true, false, false, false);
+    DiagMsg += " is only supported in shaders.\n";
+    report_fatal_error(DiagMsg.c_str(), false);
+  }
+}
+
 namespace llvm {
 InstructionSelector *
 createSPIRVInstructionSelector(const SPIRVTargetMachine &TM,
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index b8cd9c1358f00..bd754d17694b8 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -934,7 +934,8 @@ void RequirementHandler::initAvailableCapabilitiesForVulkan(
                     Capability::UniformBufferArrayDynamicIndexing,
                     Capability::SampledImageArrayDynamicIndexing,
                     Capability::StorageBufferArrayDynamicIndexing,
-                    Capability::StorageImageArrayDynamicIndexing});
+                    Capability::StorageImageArrayDynamicIndexing,
+                    Capability::DerivativeControl});
 
   // Became core in Vulkan 1.2
   if (ST.isAtLeastSPIRVVer(VersionTuple(1, 5))) {
@@ -2148,6 +2149,12 @@ void addInstrRequirements(const MachineInstr &MI,
     }
     break;
   }
+  case SPIRV::OpDPdxCoarse:
+  case SPIRV::OpDPdyCoarse: {
+    Reqs.addCapability(SPIRV::Capability::DerivativeControl);
+    break;
+  }
+
   default:
     break;
   }
diff --git a/llvm/test/CodeGen/DirectX/ddx_coarse-errors.ll b/llvm/test/CodeGen/DirectX/ddx_coarse-errors.ll
new file mode 100644
index 0000000000000..0679eec31cec1
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ddx_coarse-errors.ll
@@ -0,0 +1,15 @@
+; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
+
+; DXIL operation ddx.coarse does not support double overload type
+; CHECK: in function ddx.coarse
+; CHECK-SAME: Cannot create DerivCoarseX operation: Invalid overload type
+
+; Function Attrs: noinline nounwind optnone
+define noundef double @ddx.coarse_double(double noundef %a) #0 {
+entry:
+  %a.addr = alloca double, align 8
+  store double %a, ptr %a.addr, align 8
+  %0 = load double, ptr %a.addr, align 8
+  %dx.ddx.coarse = call double @llvm.dx.ddx.coarse.f64(double %0)
+  ret double %dx.ddx.coarse
+}
diff --git a/llvm/test/CodeGen/DirectX/ddx_coarse.ll b/llvm/test/CodeGen/DirectX/ddx_coarse.ll
new file mode 100644
index 0000000000000..f6ea031273263
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ddx_coarse.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S  -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+; Make sure dxil operation function calls for ddx_coarse are generated for half/float and matching vectors
+
+define noundef half @deriv_coarse_x_half(half noundef %a) {
+; CHECK: call half @dx.op.unary.f16(i32 83, half %{{.*}})
+entry:
+  %dx.ddx.coarse = call half @llvm.dx.ddx.coarse.f16(half %a)
+  ret half %dx.ddx.coarse
+}
+
+define noundef float @deriv_coarse_x_float(float noundef %a) {
+; CHECK: call float @dx.op.unary.f32(i32 83, float %{{.*}})
+entry:
+  %dx.ddx.coarse = call float @llvm.dx.ddx.coarse.f32(float %a)
+  ret float %dx.ddx.coarse
+}
+
+define noundef <4 x float> @deriv_coarse_x_float4(<4 x float> noundef %a) {
+; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee0]])
+; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee1]])
+; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee2]])
+; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee3]])
+; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+; CHECK: ret <4 x float> %{{.*}}
+entry:
+  %dx.ddx.coarse = call <4 x float> @llvm.dx.ddx.coarse.v4f32(<4 x float> %a)
+  ret <4 x float> %dx.ddx.coarse
+}
+
+declare half @llvm.dx.ddx.coarse.f16(half)
+declare float @llvm.dx.ddx.coarse.f32(float)
+declare <4 x float> @llvm.dx.ddx.coarse.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/ddy_coarse-errors.ll b/llvm/test/CodeGen/DirectX/ddy_coarse-errors.ll
new file mode 100644
index 0000000000000..df8e3eb0f7e0b
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ddy_coarse-errors.ll
@@ -0,0 +1,15 @@
+; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
+
+; DXIL operation ddy.coarse does not support double overload type
+; CHECK: in function ddy.coarse
+; CHECK-SAME: Cannot create DerivCoarseY operation: Invalid overload type
+
+; Function Attrs: noinline nounwind optnone
+define noundef double @ddy.coarse_double(double noundef %a) #0 {
+entry:
+  %a.addr = alloca double, align 8
+  store double %a, ptr %a.addr, align 8
+  %0 = load double, ptr %a.addr, align 8
+  %dx.ddy.coarse = call double @llvm.dx.ddy.coarse.f64(double %0)
+  ret double %dx.ddy.coarse
+}
diff --git a/llvm/test/CodeGen/DirectX/ddy_coarse.ll b/llvm/test/CodeGen/DirectX/ddy_coarse.ll
new file mode 100644
index 0000000000000..e3337022e1b01
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ddy_coarse.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S  -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+; Make sure dxil operation function calls for ddy_coarse are generated for half/float and matching vectors
+
+define noundef half @deriv_coarse_y_half(half noundef %a) {
+; CHECK: call half @dx.op.unary.f16(i32 84, half %{{.*}})
+entry:
+  %dx.ddy.coarse = call half @llvm.dx.ddy.coarse.f16(half %a)
+  ret half %dx.ddy.coarse
+}
+
+define noundef float @deriv_coarse_y_float(float noundef %a) {
+; CHECK: call float @dx.op.unary.f32(i32 84, float %{{.*}})
+entry:
+  %dx.ddy.coarse = call float @llvm.dx.ddy.coarse.f32(float %a)
+  ret float %dx.ddy.coarse
+}
+
+define noundef <4 x float> @deriv_coarse_y_float4(<4 x float> noundef %a) {
+; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee0]])
+; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee1]])
+; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee2]])
+; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee3]])
+; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+; CHECK: ret <4 x float> %{{.*}}
+entry:
+  %dx.ddy.coarse = call <4 x float> @llvm.dx.ddy.coarse.v4f32(<4 x float> %a)
+  ret <4 x float> %dx.ddy.coarse
+}
+
+declare half @llvm.dx.ddy.coarse.f16(half)
+declare float @llvm.dx.ddy.coarse.f32(float)
+declare <4 x float> @llvm.dx.ddy.coarse.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddx_coarse.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddx_coarse.ll
new file mode 100644
index 0000000000000..478acb53701ea
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddx_coarse.ll
@@ -0,0 +1,47 @@
+; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val --target-env spv1.4 %}
+
+; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
+
+; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
+; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
+
+define noundef float @ddx_coarse_float(float noundef %a) {
+entry:
+; CHECK: %[[#float_32_arg:]] = OpFunctionParameter %[[#float_32]]
+; CHECK: %[[#]] = OpDPdxCoarse %[[#float_32]] %[[#float_32_arg]]
+  %elt.ddx.coarse = call float @llvm.spv.ddx.coarse.f32(float %a)
+  ret float %elt.ddx.coarse
+}
+
+define noundef half @ddx_coarse_half(half noundef %a) {
+entry:
+; CHECK: %[[#float_16_arg:]] = OpFunctionParameter %[[#float_16]]
+; CHECK: %[[#converted:]] = OpFConvert %[[#float_32:]] %[[#float_16_arg]]
+; CHECK: %[[#coarse:]] = OpDPdxCoarse %[[#float_32]] %[[#converted]]
+; CHECK: %[[#]] = OpFConvert %[[#float_16]] %[[#coarse]]
+  %elt.ddx.coarse = call half @llvm.spv.ddx.coarse.f16(half %a)
+  ret half %elt.ddx.coarse
+}
+
+define noundef <4 x float> @ddx_coarse_float_vector(<4 x float> noundef %a) {
+entry:
+; CHECK: %[[#vec4_float_32_arg:]] = OpFunctionParameter %[[#vec4_float_32]]
+; CHECK: %[[#]] = OpDPdxCoarse %[[#vec4_float_32]] %[[#vec4_float_32_arg]]
+  %elt.ddx.coarse = call <4 x float> @llvm.spv.ddx.coarse.v4f32(<4 x float> %a)
+  ret <4 x float> %elt.ddx.coarse
+}
+
+define noundef <4 x half> @ddx_coarse_half_vector(<4 x half> noundef %a) {
+entry:
+; CHECK: %[[#vec4_float_16_arg:]] = OpFunctionParameter %[[#vec4_float_16]]
+; CHECK: %[[#converted:]] = OpFConvert %[[#vec4_float_32:]] %[[#vec4_float_16_arg]]
+; CHECK: %[[#coarse:]] = OpDPdxCoarse %[[#vec4_float_32]] %[[#converted]]
+; CHECK: %[[#]] = OpFConvert %[[#vec4_float_16]] %[[#coarse]]
+  %elt.ddx.coarse = call <4 x half> @llvm.spv.ddx.coarse.v4f16(<4 x half> %a)
+  ret <4 x half> %elt.ddx.coarse
+}
+
+declare float @llvm.spv.ddx.coarse.f32(float)
+declare half @llvm.spv.ddx.coarse.f16(half)
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddy_coarse.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddy_coarse.ll
new file mode 100644
index 0000000000000..8ad67cb644aa7
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddy_coarse.ll
@@ -0,0 +1,47 @@
+; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val --target-env spv1.4 %}
+
+; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
+
+; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
+; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
+
+define noundef float @ddy_coarse_float(float noundef %a) {
+entry:
+; CHECK: %[[#float_32_arg:]] = OpFunctionParameter %[[#float_32]]
+; CHECK: %[[#]] = OpDPdyCoarse %[[#float_32]] %[[#float_32_arg]]
+  %elt.ddy.coarse = call float @llvm.spv.ddy.coarse.f32(float %a)
+  ret float %elt.ddy.coarse
+}
+
+define noundef half @ddy_coarse_half(half noundef %a) {
+entry:
+; CHECK: %[[#float_16_arg:]] = OpFunctionParameter %[[#float_16]]
+; CHECK: %[[#converted:]] = OpFConvert %[[#float_32:]] %[[#float_16_arg]]
+; CHECK: %[[#coarse:]] = OpDPdyCoarse %[[#float_32]] %[[#converted]]
+; CHECK: %[[#]] = OpFConvert %[[#float_16]] %[[#coarse]]
+  %elt.ddy.coarse = call half @llvm.spv.ddy.coarse.f16(half %a)
+  ret half %elt.ddy.coarse
+}
+
+define noundef <4 x float> @ddy_coarse_float_vector(<4 x float> noundef %a) {
+entry:
+; CHECK: %[[#vec4_float_32_arg:]] = OpFunctionParameter %[[#vec4_float_32]]
+; CHECK: %[[#]] = OpDPdyCoarse %[[#vec4_float_32]] %[[#vec4_float_32_arg]]
+  %elt.ddy.coarse = call <4 x float> @llvm.spv.ddy.coarse.v4f32(<4 x float> %a)
+  ret <4 x float> %elt.ddy.coarse
+}
+
+define noundef <4 x half> @ddy_coarse_half_vector(<4 x half> noundef %a) {
+entry:
+; CHECK: %[[#vec4_float_16_arg:]] = OpFunctionParameter %[[#vec4_float_16]]
+; CHECK: %[[#converted:]] = OpFConvert %[[#vec4_float_32:]] %[[#vec4_float_16_arg]]
+; CHECK: %[[#coarse:]] = OpDPdyCoarse %[[#vec4_float_32]] %[[#converted]]
+; CHECK: %[[#]] = OpFConvert %[[#vec4_float_16]] %[[#coarse]]
+  %elt.ddy.coarse = call <4 x half> @llvm.spv.ddy.coarse.v4f16(<4 x half> %a)
+  ret <4 x half> %elt.ddy.coarse
+}
+
+declare float @llvm.spv.ddy.coarse.f32(float)
+declare half @llvm.spv.ddy.coarse.f16(half)
diff --git a/llvm/test/CodeGen/SPIRV/opencl/ddx_coarse-error.ll b/llvm/test/CodeGen/SPIRV/opencl/ddx_coarse-error.ll
new file mode 100644
index 0000000000000..e93c1d1ba4d36
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/opencl/ddx_coarse-error.ll
@@ -0,0 +1,12 @@
+; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s
+; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: LLVM ERROR: %{{.*}} = G_INTRINSIC intrinsic(@llvm.spv.ddx.coarse), %{{.*}} is only supported in shaders.
+
+define noundef float @ddx_coarse(float noundef %a) {
+entry:
+  %spv.ddx.coarse = call float @llvm.spv.ddx.coarse.f32(float %a)
+  ret float %spv.ddx.coarse
+}
+
+declare float @llvm.spv.ddx.coarse.f32(float)
diff --git a/llvm/test/CodeGen/SPIRV/opencl/ddy_coarse-error.ll b/llvm/test/CodeGen/SPIRV/opencl/ddy_coarse-error.ll
new file mode 100644
index 0000000000000..aa71a395d8680
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/opencl/ddy_coarse-error.ll
@@ -0,0 +1,12 @@
+; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s
+; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: LLVM ERROR: %{{.*}} = G_INTRINSIC intrinsic(@llvm.spv.ddy.coarse), %{{.*}} is only supported in shaders.
+
+define noundef float @ddy_coarse(float noundef %a) {
+entry:
+  %spv.ddy.coarse = call float @llvm.spv.ddy.coarse.f32(float %a)
+  ret float %spv.ddy.coarse
+}
+
+declare float @llvm.spv.ddy.coarse.f32(float)

From 61c2cc9462d4cf4a1925975e34eed7122463ef16 Mon Sep 17 00:00:00 2001
From: Manuel Carrasco <Manuel.Carrasco@amd.com>
Date: Tue, 18 Nov 2025 15:48:04 +0000
Subject: [PATCH 29/52] [clang][clang-linker-wrapper] Use the correct triple
 for clang-offload-bundler and AMD SPIR-V. (#168521)

`clang-linker-wrapper` was incorrectly calling `clang-offload-bundler`
for AMD SPIR-V. This resulted in a binary that couldn't be executed if
built using the new driver.

The runtime couldn't recognise the triple triggering this error at
execution time:

```
No compatible code objects found for: gfx90a:sramecc+:xnack-,
```

With this PR, this is solved:

```
Creating ISA for: gfx90a:sramecc+:xnack- from spirv
```
---
 .../test/Driver/linker-wrapper-hip-amdgcnspirv.c | 16 ++++++++++++++++
 .../clang-linker-wrapper/ClangLinkerWrapper.cpp  |  7 +++++--
 2 files changed, 21 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/Driver/linker-wrapper-hip-amdgcnspirv.c

diff --git a/clang/test/Driver/linker-wrapper-hip-amdgcnspirv.c b/clang/test/Driver/linker-wrapper-hip-amdgcnspirv.c
new file mode 100644
index 0000000000000..429f7d3b9ee13
--- /dev/null
+++ b/clang/test/Driver/linker-wrapper-hip-amdgcnspirv.c
@@ -0,0 +1,16 @@
+// RUN: %clang -cc1 %s -triple "spirv64-amd-amdhsa" -emit-llvm-bc -o %t.bc
+// RUN: llvm-offload-binary -o %t.out "--image=file=%t.bc,triple=spirv64-amd-amdhsa,arch=amdgcnspirv,kind=hip"
+// RUN: clang-linker-wrapper \
+// RUN:     "--should-extract=amdgcnspirv" \
+// RUN:     "--host-triple=spirv64-amd-amdhsa" \
+// RUN:     "--linker-path=clang-offload-bundler" \
+// RUN:     "--emit-fatbin-only" \
+// RUN:     "-o" "%t.hipfb" \
+// RUN:     "%t.out" \
+// RUN:     --dry-run \
+// RUN: 2>&1 | FileCheck %s
+
+// clang-linker-wrapper was previously calling clang-offload-bundler with -targets=...,hip-amdgcn-amd-amdhsa--amdgcnspirv
+// This caused the runtime not to recognise the triple for the AMD SPIR-V code.
+
+// CHECK: {{".*clang-offload-bundler.*"}} {{.*}} -targets={{.*}},hip-spirv64-amd-amdhsa--amdgcnspirv
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index bd4b40192c9f2..4a4a43db6ef25 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -439,8 +439,11 @@ fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
         Args.MakeArgString(Twine("-compression-level=") + Arg->getValue()));
 
   SmallVector<StringRef> Targets = {"-targets=host-x86_64-unknown-linux-gnu"};
-  for (const auto &[File, Arch] : InputFiles)
-    Targets.push_back(Saver.save("hip-amdgcn-amd-amdhsa--" + Arch));
+  for (const auto &[File, Arch] : InputFiles) {
+    Targets.push_back(Saver.save(Arch == "amdgcnspirv"
+                                     ? "hip-spirv64-amd-amdhsa--" + Arch
+                                     : "hip-amdgcn-amd-amdhsa--" + Arch));
+  }
   CmdArgs.push_back(Saver.save(llvm::join(Targets, ",")));
 
 #ifdef _WIN32

From 4d093683ceab90a8df17f6887c5b21a27ed95ba6 Mon Sep 17 00:00:00 2001
From: Jordan Rupprecht <rupprecht@google.com>
Date: Tue, 18 Nov 2025 09:48:13 -0600
Subject: [PATCH 30/52] [bazel] Add MODULE.bazel (#164891)

This is a simple translation of the current WORKSPACE file.

* External repos are replaced with `bazel_dep()`. The versions have been
bumped to newer versions.
* `maybe()` doesn't seem to be a thing, so I just removed that.
* Existing repos where we define our own BUILD file in third_party_build
have *not* been replaced due to compatibility issues. For example,
`nanobind_bazel` could replace the `nanobind` config we have, but
switching to that caused some build errors.
* For these existing repos, they have been specified as module
extensions

This should have no effect since `.bazelrc` defines `common
--enable_bzlmod=false --enable_workspace`

Tested locally: `bazel test --enable_bzlmod --noenable_workspace
--config=generic_clang @llvm-project//... //...`
---
 utils/bazel/MODULE.bazel      |  38 +++
 utils/bazel/MODULE.bazel.lock | 490 ++++++++++++++++++++++++++++++++++
 utils/bazel/extensions.bzl    | 127 +++++++++
 3 files changed, 655 insertions(+)
 create mode 100644 utils/bazel/MODULE.bazel
 create mode 100644 utils/bazel/MODULE.bazel.lock
 create mode 100644 utils/bazel/extensions.bzl

diff --git a/utils/bazel/MODULE.bazel b/utils/bazel/MODULE.bazel
new file mode 100644
index 0000000000000..d061487acf4d7
--- /dev/null
+++ b/utils/bazel/MODULE.bazel
@@ -0,0 +1,38 @@
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""bzlmod configuration for llvm-project"""
+module(name = "llvm-project-overlay")
+
+bazel_dep(name = "apple_support", version = "1.24.1", repo_name = "build_bazel_apple_support")
+bazel_dep(name = "bazel_skylib", version = "1.8.2")
+bazel_dep(name = "platforms", version = "1.0.0")
+bazel_dep(name = "rules_android", version = "0.6.6")
+bazel_dep(name = "rules_cc", version = "0.2.11")
+bazel_dep(name = "rules_foreign_cc", version = "0.15.1")
+bazel_dep(name = "rules_python", version = "1.6.3")
+bazel_dep(name = "rules_shell", version = "0.6.1")
+
+llvm_repos_extension = use_extension(":extensions.bzl", "llvm_repos_extension")
+
+use_repo(
+    llvm_repos_extension,
+    "llvm-raw",
+    "llvm_zlib",
+    "vulkan_headers",
+    "vulkan_sdk_setup",
+    "gmp",
+    "mpfr",
+    "mpc",
+    "pfm",
+    "llvm_zstd",
+    "pybind11",
+    "pyyaml",
+    "robin_map",
+    "nanobind",
+)
+
+llvm_configure = use_repo_rule("@llvm-raw//utils/bazel:configure.bzl", "llvm_configure")
+
+llvm_configure(name = "llvm-project")
diff --git a/utils/bazel/MODULE.bazel.lock b/utils/bazel/MODULE.bazel.lock
new file mode 100644
index 0000000000000..64de258401e91
--- /dev/null
+++ b/utils/bazel/MODULE.bazel.lock
@@ -0,0 +1,490 @@
+{
+  "lockFileVersion": 16,
+  "registryFileHashes": {
+    "https://bcr.bazel.build/bazel_registry.json": "8a28e4aff06ee60aed2a8c281907fb8bcbf3b753c91fb5a5c57da3215d5b3497",
+    "https://bcr.bazel.build/modules/abseil-cpp/20210324.2/MODULE.bazel": "7cd0312e064fde87c8d1cd79ba06c876bd23630c83466e9500321be55c96ace2",
+    "https://bcr.bazel.build/modules/abseil-cpp/20211102.0/MODULE.bazel": "70390338f7a5106231d20620712f7cccb659cd0e9d073d1991c038eb9fc57589",
+    "https://bcr.bazel.build/modules/abseil-cpp/20230125.1/MODULE.bazel": "89047429cb0207707b2dface14ba7f8df85273d484c2572755be4bab7ce9c3a0",
+    "https://bcr.bazel.build/modules/abseil-cpp/20230802.0.bcr.1/MODULE.bazel": "1c8cec495288dccd14fdae6e3f95f772c1c91857047a098fad772034264cc8cb",
+    "https://bcr.bazel.build/modules/abseil-cpp/20230802.0/MODULE.bazel": "d253ae36a8bd9ee3c5955384096ccb6baf16a1b1e93e858370da0a3b94f77c16",
+    "https://bcr.bazel.build/modules/abseil-cpp/20230802.1/MODULE.bazel": "fa92e2eb41a04df73cdabeec37107316f7e5272650f81d6cc096418fe647b915",
+    "https://bcr.bazel.build/modules/abseil-cpp/20240116.1/MODULE.bazel": "37bcdb4440fbb61df6a1c296ae01b327f19e9bb521f9b8e26ec854b6f97309ed",
+    "https://bcr.bazel.build/modules/abseil-cpp/20240116.2/MODULE.bazel": "73939767a4686cd9a520d16af5ab440071ed75cec1a876bf2fcfaf1f71987a16",
+    "https://bcr.bazel.build/modules/abseil-cpp/20250127.0/MODULE.bazel": "d1086e248cda6576862b4b3fe9ad76a214e08c189af5b42557a6e1888812c5d5",
+    "https://bcr.bazel.build/modules/abseil-cpp/20250127.0/source.json": "1b996859f840d8efc7c720efc61dcf2a84b1261cb3974cbbe9b6666ebf567775",
+    "https://bcr.bazel.build/modules/abseil-py/2.1.0/MODULE.bazel": "5ebe5bf853769c65707e5c28f216798f7a4b1042015e6a36e6d03094d94bec8a",
+    "https://bcr.bazel.build/modules/abseil-py/2.1.0/source.json": "0e8fc4f088ce07099c1cd6594c20c7ddbb48b4b3c0849b7d94ba94be88ff042b",
+    "https://bcr.bazel.build/modules/apple_support/1.11.1/MODULE.bazel": "1843d7cd8a58369a444fc6000e7304425fba600ff641592161d9f15b179fb896",
+    "https://bcr.bazel.build/modules/apple_support/1.15.1/MODULE.bazel": "a0556fefca0b1bb2de8567b8827518f94db6a6e7e7d632b4c48dc5f865bc7c85",
+    "https://bcr.bazel.build/modules/apple_support/1.24.1/MODULE.bazel": "f46e8ddad60aef170ee92b2f3d00ef66c147ceafea68b6877cb45bd91737f5f8",
+    "https://bcr.bazel.build/modules/apple_support/1.24.1/source.json": "cf725267cbacc5f028ef13bb77e7f2c2e0066923a4dab1025e4a0511b1ed258a",
+    "https://bcr.bazel.build/modules/bazel_features/1.1.0/MODULE.bazel": "cfd42ff3b815a5f39554d97182657f8c4b9719568eb7fded2b9135f084bf760b",
+    "https://bcr.bazel.build/modules/bazel_features/1.1.1/MODULE.bazel": "27b8c79ef57efe08efccbd9dd6ef70d61b4798320b8d3c134fd571f78963dbcd",
+    "https://bcr.bazel.build/modules/bazel_features/1.11.0/MODULE.bazel": "f9382337dd5a474c3b7d334c2f83e50b6eaedc284253334cf823044a26de03e8",
+    "https://bcr.bazel.build/modules/bazel_features/1.13.0/MODULE.bazel": "c14c33c7c3c730612bdbe14ebbb5e61936b6f11322ea95a6e91cd1ba962f94df",
+    "https://bcr.bazel.build/modules/bazel_features/1.15.0/MODULE.bazel": "d38ff6e517149dc509406aca0db3ad1efdd890a85e049585b7234d04238e2a4d",
+    "https://bcr.bazel.build/modules/bazel_features/1.17.0/MODULE.bazel": "039de32d21b816b47bd42c778e0454217e9c9caac4a3cf8e15c7231ee3ddee4d",
+    "https://bcr.bazel.build/modules/bazel_features/1.18.0/MODULE.bazel": "1be0ae2557ab3a72a57aeb31b29be347bcdc5d2b1eb1e70f39e3851a7e97041a",
+    "https://bcr.bazel.build/modules/bazel_features/1.19.0/MODULE.bazel": "59adcdf28230d220f0067b1f435b8537dd033bfff8db21335ef9217919c7fb58",
+    "https://bcr.bazel.build/modules/bazel_features/1.21.0/MODULE.bazel": "675642261665d8eea09989aa3b8afb5c37627f1be178382c320d1b46afba5e3b",
+    "https://bcr.bazel.build/modules/bazel_features/1.23.0/MODULE.bazel": "fd1ac84bc4e97a5a0816b7fd7d4d4f6d837b0047cf4cbd81652d616af3a6591a",
+    "https://bcr.bazel.build/modules/bazel_features/1.27.0/MODULE.bazel": "621eeee06c4458a9121d1f104efb80f39d34deff4984e778359c60eaf1a8cb65",
+    "https://bcr.bazel.build/modules/bazel_features/1.28.0/MODULE.bazel": "4b4200e6cbf8fa335b2c3f43e1d6ef3e240319c33d43d60cc0fbd4b87ece299d",
+    "https://bcr.bazel.build/modules/bazel_features/1.3.0/MODULE.bazel": "cdcafe83ec318cda34e02948e81d790aab8df7a929cec6f6969f13a489ccecd9",
+    "https://bcr.bazel.build/modules/bazel_features/1.30.0/MODULE.bazel": "a14b62d05969a293b80257e72e597c2da7f717e1e69fa8b339703ed6731bec87",
+    "https://bcr.bazel.build/modules/bazel_features/1.30.0/source.json": "b07e17f067fe4f69f90b03b36ef1e08fe0d1f3cac254c1241a1818773e3423bc",
+    "https://bcr.bazel.build/modules/bazel_features/1.4.1/MODULE.bazel": "e45b6bb2350aff3e442ae1111c555e27eac1d915e77775f6fdc4b351b758b5d7",
+    "https://bcr.bazel.build/modules/bazel_features/1.9.1/MODULE.bazel": "8f679097876a9b609ad1f60249c49d68bfab783dd9be012faf9d82547b14815a",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.0.3/MODULE.bazel": "bcb0fd896384802d1ad283b4e4eb4d718eebd8cb820b0a2c3a347fb971afd9d8",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.1.1/MODULE.bazel": "1add3e7d93ff2e6998f9e118022c84d163917d912f5afafb3058e3d2f1545b5e",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.2.0/MODULE.bazel": "44fe84260e454ed94ad326352a698422dbe372b21a1ac9f3eab76eb531223686",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.2.1/MODULE.bazel": "f35baf9da0efe45fa3da1696ae906eea3d615ad41e2e3def4aeb4e8bc0ef9a7a",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.3.0/MODULE.bazel": "20228b92868bf5cfc41bda7afc8a8ba2a543201851de39d990ec957b513579c5",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.4.1/MODULE.bazel": "a0dcb779424be33100dcae821e9e27e4f2901d9dfd5333efe5ac6a8d7ab75e1d",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.4.2/MODULE.bazel": "3bd40978e7a1fac911d5989e6b09d8f64921865a45822d8b09e815eaa726a651",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.5.0/MODULE.bazel": "32880f5e2945ce6a03d1fbd588e9198c0a959bb42297b2cfaf1685b7bc32e138",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.6.1/MODULE.bazel": "8fdee2dbaace6c252131c00e1de4b165dc65af02ea278476187765e1a617b917",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.7.0/MODULE.bazel": "0db596f4563de7938de764cc8deeabec291f55e8ec15299718b93c4423e9796d",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.7.1/MODULE.bazel": "3120d80c5861aa616222ec015332e5f8d3171e062e3e804a2a0253e1be26e59b",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.8.1/MODULE.bazel": "88ade7293becda963e0e3ea33e7d54d3425127e0a326e0d17da085a5f1f03ff6",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.8.2/MODULE.bazel": "69ad6927098316848b34a9142bcc975e018ba27f08c4ff403f50c1b6e646ca67",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.8.2/source.json": "34a3c8bcf233b835eb74be9d628899bb32999d3e0eadef1947a0a562a2b16ffb",
+    "https://bcr.bazel.build/modules/bazel_worker_api/0.0.1/MODULE.bazel": "02a13b77321773b2042e70ee5e4c5e099c8ddee4cf2da9cd420442c36938d4bd",
+    "https://bcr.bazel.build/modules/bazel_worker_api/0.0.4/MODULE.bazel": "460aa12d01231a80cce03c548287b433b321d205b0028ae596728c35e5ee442e",
+    "https://bcr.bazel.build/modules/bazel_worker_api/0.0.4/source.json": "d353c410d47a8b65d09fa98e83d57ebec257a2c2b9c6e42d6fda1cb25e5464a5",
+    "https://bcr.bazel.build/modules/bazel_worker_java/0.0.4/MODULE.bazel": "82494a01018bb7ef06d4a17ec4cd7a758721f10eb8b6c820a818e70d669500db",
+    "https://bcr.bazel.build/modules/bazel_worker_java/0.0.4/source.json": "a2d30458fd86cf022c2b6331e652526fa08e17573b2f5034a9dbcacdf9c2583c",
+    "https://bcr.bazel.build/modules/buildozer/7.1.2/MODULE.bazel": "2e8dd40ede9c454042645fd8d8d0cd1527966aa5c919de86661e62953cd73d84",
+    "https://bcr.bazel.build/modules/buildozer/7.1.2/source.json": "c9028a501d2db85793a6996205c8de120944f50a0d570438fcae0457a5f9d1f8",
+    "https://bcr.bazel.build/modules/gazelle/0.32.0/MODULE.bazel": "b499f58a5d0d3537f3cf5b76d8ada18242f64ec474d8391247438bf04f58c7b8",
+    "https://bcr.bazel.build/modules/gazelle/0.33.0/MODULE.bazel": "a13a0f279b462b784fb8dd52a4074526c4a2afe70e114c7d09066097a46b3350",
+    "https://bcr.bazel.build/modules/gazelle/0.34.0/MODULE.bazel": "abdd8ce4d70978933209db92e436deb3a8b737859e9354fb5fd11fb5c2004c8a",
+    "https://bcr.bazel.build/modules/gazelle/0.36.0/MODULE.bazel": "e375d5d6e9a6ca59b0cb38b0540bc9a05b6aa926d322f2de268ad267a2ee74c0",
+    "https://bcr.bazel.build/modules/gazelle/0.40.0/MODULE.bazel": "42ba5378ebe845fca43989a53186ab436d956db498acde790685fe0e8f9c6146",
+    "https://bcr.bazel.build/modules/gazelle/0.40.0/source.json": "1e5ef6e4d8b9b6836d93273c781e78ff829ea2e077afef7a57298040fa4f010a",
+    "https://bcr.bazel.build/modules/google_benchmark/1.8.2/MODULE.bazel": "a70cf1bba851000ba93b58ae2f6d76490a9feb74192e57ab8e8ff13c34ec50cb",
+    "https://bcr.bazel.build/modules/googletest/1.11.0/MODULE.bazel": "3a83f095183f66345ca86aa13c58b59f9f94a2f81999c093d4eeaa2d262d12f4",
+    "https://bcr.bazel.build/modules/googletest/1.14.0.bcr.1/MODULE.bazel": "22c31a561553727960057361aa33bf20fb2e98584bc4fec007906e27053f80c6",
+    "https://bcr.bazel.build/modules/googletest/1.14.0/MODULE.bazel": "cfbcbf3e6eac06ef9d85900f64424708cc08687d1b527f0ef65aa7517af8118f",
+    "https://bcr.bazel.build/modules/googletest/1.15.2/MODULE.bazel": "6de1edc1d26cafb0ea1a6ab3f4d4192d91a312fd2d360b63adaa213cd00b2108",
+    "https://bcr.bazel.build/modules/googletest/1.15.2/source.json": "dbdda654dcb3a0d7a8bc5d0ac5fc7e150b58c2a986025ae5bc634bb2cb61f470",
+    "https://bcr.bazel.build/modules/jsoncpp/1.9.5/MODULE.bazel": "31271aedc59e815656f5736f282bb7509a97c7ecb43e927ac1a37966e0578075",
+    "https://bcr.bazel.build/modules/jsoncpp/1.9.6/MODULE.bazel": "2f8d20d3b7d54143213c4dfc3d98225c42de7d666011528dc8fe91591e2e17b0",
+    "https://bcr.bazel.build/modules/jsoncpp/1.9.6/source.json": "a04756d367a2126c3541682864ecec52f92cdee80a35735a3cb249ce015ca000",
+    "https://bcr.bazel.build/modules/libpfm/4.11.0/MODULE.bazel": "45061ff025b301940f1e30d2c16bea596c25b176c8b6b3087e92615adbd52902",
+    "https://bcr.bazel.build/modules/nlohmann_json/3.6.1/MODULE.bazel": "6f7b417dcc794d9add9e556673ad25cb3ba835224290f4f848f8e2db1e1fca74",
+    "https://bcr.bazel.build/modules/nlohmann_json/3.6.1/source.json": "f448c6e8963fdfa7eb831457df83ad63d3d6355018f6574fb017e8169deb43a9",
+    "https://bcr.bazel.build/modules/platforms/0.0.10/MODULE.bazel": "8cb8efaf200bdeb2150d93e162c40f388529a25852b332cec879373771e48ed5",
+    "https://bcr.bazel.build/modules/platforms/0.0.11/MODULE.bazel": "0daefc49732e227caa8bfa834d65dc52e8cc18a2faf80df25e8caea151a9413f",
+    "https://bcr.bazel.build/modules/platforms/0.0.4/MODULE.bazel": "9b328e31ee156f53f3c416a64f8491f7eb731742655a47c9eec4703a71644aee",
+    "https://bcr.bazel.build/modules/platforms/0.0.5/MODULE.bazel": "5733b54ea419d5eaf7997054bb55f6a1d0b5ff8aedf0176fef9eea44f3acda37",
+    "https://bcr.bazel.build/modules/platforms/0.0.6/MODULE.bazel": "ad6eeef431dc52aefd2d77ed20a4b353f8ebf0f4ecdd26a807d2da5aa8cd0615",
+    "https://bcr.bazel.build/modules/platforms/0.0.7/MODULE.bazel": "72fd4a0ede9ee5c021f6a8dd92b503e089f46c227ba2813ff183b71616034814",
+    "https://bcr.bazel.build/modules/platforms/0.0.8/MODULE.bazel": "9f142c03e348f6d263719f5074b21ef3adf0b139ee4c5133e2aa35664da9eb2d",
+    "https://bcr.bazel.build/modules/platforms/0.0.9/MODULE.bazel": "4a87a60c927b56ddd67db50c89acaa62f4ce2a1d2149ccb63ffd871d5ce29ebc",
+    "https://bcr.bazel.build/modules/platforms/1.0.0/MODULE.bazel": "f05feb42b48f1b3c225e4ccf351f367be0371411a803198ec34a389fb22aa580",
+    "https://bcr.bazel.build/modules/platforms/1.0.0/source.json": "f4ff1fd412e0246fd38c82328eb209130ead81d62dcd5a9e40910f867f733d96",
+    "https://bcr.bazel.build/modules/protobuf/21.7/MODULE.bazel": "a5a29bb89544f9b97edce05642fac225a808b5b7be74038ea3640fae2f8e66a7",
+    "https://bcr.bazel.build/modules/protobuf/23.1/MODULE.bazel": "88b393b3eb4101d18129e5db51847cd40a5517a53e81216144a8c32dfeeca52a",
+    "https://bcr.bazel.build/modules/protobuf/24.4/MODULE.bazel": "7bc7ce5f2abf36b3b7b7c8218d3acdebb9426aeb35c2257c96445756f970eb12",
+    "https://bcr.bazel.build/modules/protobuf/27.0/MODULE.bazel": "7873b60be88844a0a1d8f80b9d5d20cfbd8495a689b8763e76c6372998d3f64c",
+    "https://bcr.bazel.build/modules/protobuf/27.1/MODULE.bazel": "703a7b614728bb06647f965264967a8ef1c39e09e8f167b3ca0bb1fd80449c0d",
+    "https://bcr.bazel.build/modules/protobuf/27.2/MODULE.bazel": "32450b50673882e4c8c3d10a83f3bc82161b213ed2f80d17e38bece8f165c295",
+    "https://bcr.bazel.build/modules/protobuf/29.0-rc2/MODULE.bazel": "6241d35983510143049943fc0d57937937122baf1b287862f9dc8590fc4c37df",
+    "https://bcr.bazel.build/modules/protobuf/29.0-rc3/MODULE.bazel": "33c2dfa286578573afc55a7acaea3cada4122b9631007c594bf0729f41c8de92",
+    "https://bcr.bazel.build/modules/protobuf/29.0/MODULE.bazel": "319dc8bf4c679ff87e71b1ccfb5a6e90a6dbc4693501d471f48662ac46d04e4e",
+    "https://bcr.bazel.build/modules/protobuf/3.19.0/MODULE.bazel": "6b5fbb433f760a99a22b18b6850ed5784ef0e9928a72668b66e4d7ccd47db9b0",
+    "https://bcr.bazel.build/modules/protobuf/3.19.2/MODULE.bazel": "532ffe5f2186b69fdde039efe6df13ba726ff338c6bc82275ad433013fa10573",
+    "https://bcr.bazel.build/modules/protobuf/3.19.6/MODULE.bazel": "9233edc5e1f2ee276a60de3eaa47ac4132302ef9643238f23128fea53ea12858",
+    "https://bcr.bazel.build/modules/protobuf/31.1/MODULE.bazel": "379a389bb330b7b8c1cdf331cc90bf3e13de5614799b3b52cdb7c6f389f6b38e",
+    "https://bcr.bazel.build/modules/protobuf/31.1/source.json": "25af5d0219da0c0fc4d1191a24ce438e6ca7f49d2e1a94f354efeba6ef10426f",
+    "https://bcr.bazel.build/modules/pybind11_bazel/2.11.1/MODULE.bazel": "88af1c246226d87e65be78ed49ecd1e6f5e98648558c14ce99176da041dc378e",
+    "https://bcr.bazel.build/modules/pybind11_bazel/2.12.0/MODULE.bazel": "e6f4c20442eaa7c90d7190d8dc539d0ab422f95c65a57cc59562170c58ae3d34",
+    "https://bcr.bazel.build/modules/pybind11_bazel/2.12.0/source.json": "6900fdc8a9e95866b8c0d4ad4aba4d4236317b5c1cd04c502df3f0d33afed680",
+    "https://bcr.bazel.build/modules/re2/2023-09-01/MODULE.bazel": "cb3d511531b16cfc78a225a9e2136007a48cf8a677e4264baeab57fe78a80206",
+    "https://bcr.bazel.build/modules/re2/2024-07-02.bcr.1/MODULE.bazel": "b4963dda9b31080be1905ef085ecd7dd6cd47c05c79b9cdf83ade83ab2ab271a",
+    "https://bcr.bazel.build/modules/re2/2024-07-02.bcr.1/source.json": "2ff292be6ef3340325ce8a045ecc326e92cbfab47c7cbab4bd85d28971b97ac4",
+    "https://bcr.bazel.build/modules/re2/2024-07-02/MODULE.bazel": "0eadc4395959969297cbcf31a249ff457f2f1d456228c67719480205aa306daa",
+    "https://bcr.bazel.build/modules/rules_android/0.1.1/MODULE.bazel": "48809ab0091b07ad0182defb787c4c5328bd3a278938415c00a7b69b50c4d3a8",
+    "https://bcr.bazel.build/modules/rules_android/0.6.6/MODULE.bazel": "b0fb569752aab65ab1a9db0a8f6cfaf5aa1754965e17e95dcf0e4d88e192a68d",
+    "https://bcr.bazel.build/modules/rules_android/0.6.6/source.json": "a9d8dc2d5a102dc03269a94acc886a4cab82cdcb9ccbc77b0f665d6d17a6ae09",
+    "https://bcr.bazel.build/modules/rules_apple/3.16.0/MODULE.bazel": "0d1caf0b8375942ce98ea944be754a18874041e4e0459401d925577624d3a54a",
+    "https://bcr.bazel.build/modules/rules_apple/3.16.0/source.json": "d8b5fe461272018cc07cfafce11fe369c7525330804c37eec5a82f84cd475366",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.1/MODULE.bazel": "cb2aa0747f84c6c3a78dad4e2049c154f08ab9d166b1273835a8174940365647",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.10/MODULE.bazel": "ec1705118f7eaedd6e118508d3d26deba2a4e76476ada7e0e3965211be012002",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.13/MODULE.bazel": "0e8529ed7b323dad0775ff924d2ae5af7640b23553dfcd4d34344c7e7a867191",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.14/MODULE.bazel": "5e343a3aac88b8d7af3b1b6d2093b55c347b8eefc2e7d1442f7a02dc8fea48ac",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.15/MODULE.bazel": "6704c35f7b4a72502ee81f61bf88706b54f06b3cbe5558ac17e2e14666cd5dcc",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.16/MODULE.bazel": "7661303b8fc1b4d7f532e54e9d6565771fea666fbdf839e0a86affcd02defe87",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.17/MODULE.bazel": "2ae1d8f4238ec67d7185d8861cb0a2cdf4bc608697c331b95bf990e69b62e64a",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.2/MODULE.bazel": "6915987c90970493ab97393024c156ea8fb9f3bea953b2f3ec05c34f19b5695c",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.6/MODULE.bazel": "abf360251023dfe3efcef65ab9d56beefa8394d4176dd29529750e1c57eaa33f",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.8/MODULE.bazel": "964c85c82cfeb6f3855e6a07054fdb159aced38e99a5eecf7bce9d53990afa3e",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.9/MODULE.bazel": "836e76439f354b89afe6a911a7adf59a6b2518fafb174483ad78a2a2fde7b1c5",
+    "https://bcr.bazel.build/modules/rules_cc/0.1.1/MODULE.bazel": "2f0222a6f229f0bf44cd711dc13c858dad98c62d52bd51d8fc3a764a83125513",
+    "https://bcr.bazel.build/modules/rules_cc/0.2.11/MODULE.bazel": "e94f24f065bf2191dba2dace951814378b66a94bb3bcc48077492fe0508059b5",
+    "https://bcr.bazel.build/modules/rules_cc/0.2.11/source.json": "4d555dc20c9c135b21b2e403cf0ce8393fb65711b2305979ce053df4ee3e78de",
+    "https://bcr.bazel.build/modules/rules_cc/0.2.8/MODULE.bazel": "f1df20f0bf22c28192a794f29b501ee2018fa37a3862a1a2132ae2940a23a642",
+    "https://bcr.bazel.build/modules/rules_foreign_cc/0.15.1/MODULE.bazel": "c2c60d26c79fda484acb95cdbec46e89d6b28b4845cb277160ce1e0c8622bb88",
+    "https://bcr.bazel.build/modules/rules_foreign_cc/0.15.1/source.json": "a161811a63ba8a859086da3b7ff3ad04f2e9c255d7727b41087103fc0eb22f55",
+    "https://bcr.bazel.build/modules/rules_foreign_cc/0.9.0/MODULE.bazel": "c9e8c682bf75b0e7c704166d79b599f93b72cfca5ad7477df596947891feeef6",
+    "https://bcr.bazel.build/modules/rules_fuzzing/0.5.2/MODULE.bazel": "40c97d1144356f52905566c55811f13b299453a14ac7769dfba2ac38192337a8",
+    "https://bcr.bazel.build/modules/rules_go/0.41.0/MODULE.bazel": "55861d8e8bb0e62cbd2896f60ff303f62ffcb0eddb74ecb0e5c0cbe36fc292c8",
+    "https://bcr.bazel.build/modules/rules_go/0.42.0/MODULE.bazel": "8cfa875b9aa8c6fce2b2e5925e73c1388173ea3c32a0db4d2b4804b453c14270",
+    "https://bcr.bazel.build/modules/rules_go/0.46.0/MODULE.bazel": "3477df8bdcc49e698b9d25f734c4f3a9f5931ff34ee48a2c662be168f5f2d3fd",
+    "https://bcr.bazel.build/modules/rules_go/0.50.1/MODULE.bazel": "b91a308dc5782bb0a8021ad4330c81fea5bda77f96b9e4c117b9b9c8f6665ee0",
+    "https://bcr.bazel.build/modules/rules_go/0.51.0-rc2/MODULE.bazel": "edfc3a9cea7bedb0eaaff37b0d7817c1a4bf72b3c615580b0ffcee6c52690fd4",
+    "https://bcr.bazel.build/modules/rules_go/0.51.0-rc2/source.json": "6b5cd0b3da2bd0e6949580851db990a04af0a285f072b9a0f059424457cd8cc9",
+    "https://bcr.bazel.build/modules/rules_java/4.0.0/MODULE.bazel": "5a78a7ae82cd1a33cef56dc578c7d2a46ed0dca12643ee45edbb8417899e6f74",
+    "https://bcr.bazel.build/modules/rules_java/5.3.5/MODULE.bazel": "a4ec4f2db570171e3e5eb753276ee4b389bae16b96207e9d3230895c99644b86",
+    "https://bcr.bazel.build/modules/rules_java/6.0.0/MODULE.bazel": "8a43b7df601a7ec1af61d79345c17b31ea1fedc6711fd4abfd013ea612978e39",
+    "https://bcr.bazel.build/modules/rules_java/6.3.0/MODULE.bazel": "a97c7678c19f236a956ad260d59c86e10a463badb7eb2eda787490f4c969b963",
+    "https://bcr.bazel.build/modules/rules_java/6.4.0/MODULE.bazel": "e986a9fe25aeaa84ac17ca093ef13a4637f6107375f64667a15999f77db6c8f6",
+    "https://bcr.bazel.build/modules/rules_java/6.5.2/MODULE.bazel": "1d440d262d0e08453fa0c4d8f699ba81609ed0e9a9a0f02cd10b3e7942e61e31",
+    "https://bcr.bazel.build/modules/rules_java/7.1.0/MODULE.bazel": "30d9135a2b6561c761bd67bd4990da591e6bdc128790ce3e7afd6a3558b2fb64",
+    "https://bcr.bazel.build/modules/rules_java/7.10.0/MODULE.bazel": "530c3beb3067e870561739f1144329a21c851ff771cd752a49e06e3dc9c2e71a",
+    "https://bcr.bazel.build/modules/rules_java/7.12.2/MODULE.bazel": "579c505165ee757a4280ef83cda0150eea193eed3bef50b1004ba88b99da6de6",
+    "https://bcr.bazel.build/modules/rules_java/7.2.0/MODULE.bazel": "06c0334c9be61e6cef2c8c84a7800cef502063269a5af25ceb100b192453d4ab",
+    "https://bcr.bazel.build/modules/rules_java/7.3.2/MODULE.bazel": "50dece891cfdf1741ea230d001aa9c14398062f2b7c066470accace78e412bc2",
+    "https://bcr.bazel.build/modules/rules_java/7.4.0/MODULE.bazel": "a592852f8a3dd539e82ee6542013bf2cadfc4c6946be8941e189d224500a8934",
+    "https://bcr.bazel.build/modules/rules_java/7.6.1/MODULE.bazel": "2f14b7e8a1aa2f67ae92bc69d1ec0fa8d9f827c4e17ff5e5f02e91caa3b2d0fe",
+    "https://bcr.bazel.build/modules/rules_java/8.13.0/MODULE.bazel": "0444ebf737d144cf2bb2ccb368e7f1cce735264285f2a3711785827c1686625e",
+    "https://bcr.bazel.build/modules/rules_java/8.13.0/source.json": "4605c0f676b87dd9d1fabd4d743b71f04d97503bd1a79aad53f87399fb5396de",
+    "https://bcr.bazel.build/modules/rules_java/8.3.2/MODULE.bazel": "7336d5511ad5af0b8615fdc7477535a2e4e723a357b6713af439fe8cf0195017",
+    "https://bcr.bazel.build/modules/rules_java/8.5.1/MODULE.bazel": "d8a9e38cc5228881f7055a6079f6f7821a073df3744d441978e7a43e20226939",
+    "https://bcr.bazel.build/modules/rules_java/8.6.0/MODULE.bazel": "9c064c434606d75a086f15ade5edb514308cccd1544c2b2a89bbac4310e41c71",
+    "https://bcr.bazel.build/modules/rules_java/8.6.1/MODULE.bazel": "f4808e2ab5b0197f094cabce9f4b006a27766beb6a9975931da07099560ca9c2",
+    "https://bcr.bazel.build/modules/rules_jvm_external/4.4.2/MODULE.bazel": "a56b85e418c83eb1839819f0b515c431010160383306d13ec21959ac412d2fe7",
+    "https://bcr.bazel.build/modules/rules_jvm_external/5.1/MODULE.bazel": "33f6f999e03183f7d088c9be518a63467dfd0be94a11d0055fe2d210f89aa909",
+    "https://bcr.bazel.build/modules/rules_jvm_external/5.2/MODULE.bazel": "d9351ba35217ad0de03816ef3ed63f89d411349353077348a45348b096615036",
+    "https://bcr.bazel.build/modules/rules_jvm_external/5.3/MODULE.bazel": "bf93870767689637164657731849fb887ad086739bd5d360d90007a581d5527d",
+    "https://bcr.bazel.build/modules/rules_jvm_external/6.1/MODULE.bazel": "75b5fec090dbd46cf9b7d8ea08cf84a0472d92ba3585b476f44c326eda8059c4",
+    "https://bcr.bazel.build/modules/rules_jvm_external/6.2/MODULE.bazel": "36a6e52487a855f33cb960724eb56547fa87e2c98a0474c3acad94339d7f8e99",
+    "https://bcr.bazel.build/modules/rules_jvm_external/6.3/MODULE.bazel": "c998e060b85f71e00de5ec552019347c8bca255062c990ac02d051bb80a38df0",
+    "https://bcr.bazel.build/modules/rules_jvm_external/6.6/MODULE.bazel": "153042249c7060536dc95b6bb9f9bb8063b8a0b0cb7acdb381bddbc2374aed55",
+    "https://bcr.bazel.build/modules/rules_jvm_external/6.7/MODULE.bazel": "e717beabc4d091ecb2c803c2d341b88590e9116b8bf7947915eeb33aab4f96dd",
+    "https://bcr.bazel.build/modules/rules_jvm_external/6.7/source.json": "5426f412d0a7fc6b611643376c7e4a82dec991491b9ce5cb1cfdd25fe2e92be4",
+    "https://bcr.bazel.build/modules/rules_kotlin/1.9.0/MODULE.bazel": "ef85697305025e5a61f395d4eaede272a5393cee479ace6686dba707de804d59",
+    "https://bcr.bazel.build/modules/rules_kotlin/1.9.5/MODULE.bazel": "043a16a572f610558ec2030db3ff0c9938574e7dd9f58bded1bb07c0192ef025",
+    "https://bcr.bazel.build/modules/rules_kotlin/1.9.6/MODULE.bazel": "d269a01a18ee74d0335450b10f62c9ed81f2321d7958a2934e44272fe82dcef3",
+    "https://bcr.bazel.build/modules/rules_kotlin/1.9.6/source.json": "2faa4794364282db7c06600b7e5e34867a564ae91bda7cae7c29c64e9466b7d5",
+    "https://bcr.bazel.build/modules/rules_license/0.0.3/MODULE.bazel": "627e9ab0247f7d1e05736b59dbb1b6871373de5ad31c3011880b4133cafd4bd0",
+    "https://bcr.bazel.build/modules/rules_license/0.0.7/MODULE.bazel": "088fbeb0b6a419005b89cf93fe62d9517c0a2b8bb56af3244af65ecfe37e7d5d",
+    "https://bcr.bazel.build/modules/rules_license/1.0.0/MODULE.bazel": "a7fda60eefdf3d8c827262ba499957e4df06f659330bbe6cdbdb975b768bb65c",
+    "https://bcr.bazel.build/modules/rules_license/1.0.0/source.json": "a52c89e54cc311196e478f8382df91c15f7a2bfdf4c6cd0e2675cc2ff0b56efb",
+    "https://bcr.bazel.build/modules/rules_pkg/0.7.0/MODULE.bazel": "df99f03fc7934a4737122518bb87e667e62d780b610910f0447665a7e2be62dc",
+    "https://bcr.bazel.build/modules/rules_pkg/1.0.1/MODULE.bazel": "5b1df97dbc29623bccdf2b0dcd0f5cb08e2f2c9050aab1092fd39a41e82686ff",
+    "https://bcr.bazel.build/modules/rules_pkg/1.0.1/source.json": "bd82e5d7b9ce2d31e380dd9f50c111d678c3bdaca190cb76b0e1c71b05e1ba8a",
+    "https://bcr.bazel.build/modules/rules_proto/4.0.0/MODULE.bazel": "a7a7b6ce9bee418c1a760b3d84f83a299ad6952f9903c67f19e4edd964894e06",
+    "https://bcr.bazel.build/modules/rules_proto/5.3.0-21.7/MODULE.bazel": "e8dff86b0971688790ae75528fe1813f71809b5afd57facb44dad9e8eca631b7",
+    "https://bcr.bazel.build/modules/rules_proto/6.0.0-rc1/MODULE.bazel": "1e5b502e2e1a9e825eef74476a5a1ee524a92297085015a052510b09a1a09483",
+    "https://bcr.bazel.build/modules/rules_proto/6.0.0/MODULE.bazel": "b531d7f09f58dce456cd61b4579ce8c86b38544da75184eadaf0a7cb7966453f",
+    "https://bcr.bazel.build/modules/rules_proto/6.0.2/MODULE.bazel": "ce916b775a62b90b61888052a416ccdda405212b6aaeb39522f7dc53431a5e73",
+    "https://bcr.bazel.build/modules/rules_proto/7.0.2/MODULE.bazel": "bf81793bd6d2ad89a37a40693e56c61b0ee30f7a7fdbaf3eabbf5f39de47dea2",
+    "https://bcr.bazel.build/modules/rules_proto/7.0.2/source.json": "1e5e7260ae32ef4f2b52fd1d0de8d03b606a44c91b694d2f1afb1d3b28a48ce1",
+    "https://bcr.bazel.build/modules/rules_python/0.10.2/MODULE.bazel": "cc82bc96f2997baa545ab3ce73f196d040ffb8756fd2d66125a530031cd90e5f",
+    "https://bcr.bazel.build/modules/rules_python/0.23.1/MODULE.bazel": "49ffccf0511cb8414de28321f5fcf2a31312b47c40cc21577144b7447f2bf300",
+    "https://bcr.bazel.build/modules/rules_python/0.25.0/MODULE.bazel": "72f1506841c920a1afec76975b35312410eea3aa7b63267436bfb1dd91d2d382",
+    "https://bcr.bazel.build/modules/rules_python/0.28.0/MODULE.bazel": "cba2573d870babc976664a912539b320cbaa7114cd3e8f053c720171cde331ed",
+    "https://bcr.bazel.build/modules/rules_python/0.31.0/MODULE.bazel": "93a43dc47ee570e6ec9f5779b2e64c1476a6ce921c48cc9a1678a91dd5f8fd58",
+    "https://bcr.bazel.build/modules/rules_python/0.33.2/MODULE.bazel": "3e036c4ad8d804a4dad897d333d8dce200d943df4827cb849840055be8d2e937",
+    "https://bcr.bazel.build/modules/rules_python/0.37.1/MODULE.bazel": "3faeb2d9fa0a81f8980643ee33f212308f4d93eea4b9ce6f36d0b742e71e9500",
+    "https://bcr.bazel.build/modules/rules_python/0.37.2/MODULE.bazel": "b5ffde91410745750b6c13be1c5dc4555ef5bc50562af4a89fd77807fdde626a",
+    "https://bcr.bazel.build/modules/rules_python/0.4.0/MODULE.bazel": "9208ee05fd48bf09ac60ed269791cf17fb343db56c8226a720fbb1cdf467166c",
+    "https://bcr.bazel.build/modules/rules_python/0.40.0/MODULE.bazel": "9d1a3cd88ed7d8e39583d9ffe56ae8a244f67783ae89b60caafc9f5cf318ada7",
+    "https://bcr.bazel.build/modules/rules_python/1.0.0/MODULE.bazel": "898a3d999c22caa585eb062b600f88654bf92efb204fa346fb55f6f8edffca43",
+    "https://bcr.bazel.build/modules/rules_python/1.2.0/MODULE.bazel": "5aeeb48b2a6c19d668b48adf2b8a2b209a6310c230db0ce77450f148a89846e4",
+    "https://bcr.bazel.build/modules/rules_python/1.6.3/MODULE.bazel": "a7b80c42cb3de5ee2a5fa1abc119684593704fcd2fec83165ebe615dec76574f",
+    "https://bcr.bazel.build/modules/rules_python/1.6.3/source.json": "f0be74977e5604a6526c8a416cda22985093ff7d5d380d41722d7e44015cc419",
+    "https://bcr.bazel.build/modules/rules_robolectric/4.14.1.2/MODULE.bazel": "d44fec647d0aeb67b9f3b980cf68ba634976f3ae7ccd6c07d790b59b87a4f251",
+    "https://bcr.bazel.build/modules/rules_robolectric/4.14.1.2/source.json": "37c10335f2361c337c5c1f34ed36d2da70534c23088062b33a8bdaab68aa9dea",
+    "https://bcr.bazel.build/modules/rules_shell/0.1.2/MODULE.bazel": "66e4ca3ce084b04af0b9ff05ff14cab4e5df7503973818bb91cbc6cda08d32fc",
+    "https://bcr.bazel.build/modules/rules_shell/0.2.0/MODULE.bazel": "fda8a652ab3c7d8fee214de05e7a9916d8b28082234e8d2c0094505c5268ed3c",
+    "https://bcr.bazel.build/modules/rules_shell/0.3.0/MODULE.bazel": "de4402cd12f4cc8fda2354fce179fdb068c0b9ca1ec2d2b17b3e21b24c1a937b",
+    "https://bcr.bazel.build/modules/rules_shell/0.6.1/MODULE.bazel": "72e76b0eea4e81611ef5452aa82b3da34caca0c8b7b5c0c9584338aa93bae26b",
+    "https://bcr.bazel.build/modules/rules_shell/0.6.1/source.json": "20ec05cd5e592055e214b2da8ccb283c7f2a421ea0dc2acbf1aa792e11c03d0c",
+    "https://bcr.bazel.build/modules/rules_swift/1.16.0/MODULE.bazel": "4a09f199545a60d09895e8281362b1ff3bb08bbde69c6fc87aff5b92fcc916ca",
+    "https://bcr.bazel.build/modules/rules_swift/2.1.1/MODULE.bazel": "494900a80f944fc7aa61500c2073d9729dff0b764f0e89b824eb746959bc1046",
+    "https://bcr.bazel.build/modules/rules_swift/2.1.1/source.json": "40fc69dfaac64deddbb75bd99cdac55f4427d9ca0afbe408576a65428427a186",
+    "https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8",
+    "https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c",
+    "https://bcr.bazel.build/modules/stardoc/0.5.6/MODULE.bazel": "c43dabc564990eeab55e25ed61c07a1aadafe9ece96a4efabb3f8bf9063b71ef",
+    "https://bcr.bazel.build/modules/stardoc/0.6.2/MODULE.bazel": "7060193196395f5dd668eda046ccbeacebfd98efc77fed418dbe2b82ffaa39fd",
+    "https://bcr.bazel.build/modules/stardoc/0.7.0/MODULE.bazel": "05e3d6d30c099b6770e97da986c53bd31844d7f13d41412480ea265ac9e8079c",
+    "https://bcr.bazel.build/modules/stardoc/0.7.1/MODULE.bazel": "3548faea4ee5dda5580f9af150e79d0f6aea934fc60c1cc50f4efdd9420759e7",
+    "https://bcr.bazel.build/modules/stardoc/0.7.2/MODULE.bazel": "fc152419aa2ea0f51c29583fab1e8c99ddefd5b3778421845606ee628629e0e5",
+    "https://bcr.bazel.build/modules/stardoc/0.7.2/source.json": "58b029e5e901d6802967754adf0a9056747e8176f017cfe3607c0851f4d42216",
+    "https://bcr.bazel.build/modules/swift_argument_parser/1.3.1.1/MODULE.bazel": "5e463fbfba7b1701d957555ed45097d7f984211330106ccd1352c6e0af0dcf91",
+    "https://bcr.bazel.build/modules/swift_argument_parser/1.3.1.1/source.json": "32bd87e5f4d7acc57c5b2ff7c325ae3061d5e242c0c4c214ae87e0f1c13e54cb",
+    "https://bcr.bazel.build/modules/upb/0.0.0-20220923-a547704/MODULE.bazel": "7298990c00040a0e2f121f6c32544bab27d4452f80d9ce51349b1a28f3005c43",
+    "https://bcr.bazel.build/modules/upb/0.0.0-20230516-61a97ef/MODULE.bazel": "c0df5e35ad55e264160417fd0875932ee3c9dda63d9fccace35ac62f45e1b6f9",
+    "https://bcr.bazel.build/modules/zlib/1.2.11/MODULE.bazel": "07b389abc85fdbca459b69e2ec656ae5622873af3f845e1c9d80fe179f3effa0",
+    "https://bcr.bazel.build/modules/zlib/1.2.12/MODULE.bazel": "3b1a8834ada2a883674be8cbd36ede1b6ec481477ada359cd2d3ddc562340b27",
+    "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.3/MODULE.bazel": "af322bc08976524477c79d1e45e241b6efbeb918c497e8840b8ab116802dda79",
+    "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.5/MODULE.bazel": "eec517b5bbe5492629466e11dae908d043364302283de25581e3eb944326c4ca",
+    "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.5/source.json": "22bc55c47af97246cfc093d0acf683a7869377de362b5d1c552c2c2e16b7a806",
+    "https://bcr.bazel.build/modules/zlib/1.3.1/MODULE.bazel": "751c9940dcfe869f5f7274e1295422a34623555916eb98c174c1e945594bf198"
+  },
+  "selectedYankedVersions": {},
+  "moduleExtensions": {
+    "//:extensions.bzl%llvm_deps_extension": {
+      "general": {
+        "bzlTransitiveDigest": "LGeZ4Ibt22AGXloFt/bm3EsBB05m6aTG+WxfH8fJVB4=",
+        "usagesDigest": "dHBLC1g5cqg/flxcuZRJMp2heDoB4+0/NDd6MutLhGE=",
+        "recordedFileInputs": {},
+        "recordedDirentsInputs": {},
+        "envVariables": {},
+        "generatedRepoSpecs": {
+          "llvm-raw": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:local.bzl%new_local_repository",
+            "attributes": {
+              "build_file_content": "# empty",
+              "path": "../../"
+            }
+          },
+          "llvm_zlib": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:zlib-ng.BUILD",
+              "sha256": "e36bb346c00472a1f9ff2a0a4643e590a254be6379da7cddd9daeb9a7f296731",
+              "strip_prefix": "zlib-ng-2.0.7",
+              "urls": [
+                "https://github.com/zlib-ng/zlib-ng/archive/refs/tags/2.0.7.zip"
+              ]
+            }
+          },
+          "vulkan_headers": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:vulkan_headers.BUILD",
+              "sha256": "19f491784ef0bc73caff877d11c96a48b946b5a1c805079d9006e3fbaa5c1895",
+              "strip_prefix": "Vulkan-Headers-9bd3f561bcee3f01d22912de10bb07ce4e23d378",
+              "urls": [
+                "https://github.com/KhronosGroup/Vulkan-Headers/archive/9bd3f561bcee3f01d22912de10bb07ce4e23d378.tar.gz"
+              ]
+            }
+          },
+          "vulkan_sdk_setup": {
+            "repoRuleId": "@@//:vulkan_sdk.bzl%vulkan_sdk_setup",
+            "attributes": {}
+          },
+          "gmp": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "urls": [
+                "https://gmplib.org/download/gmp/gmp-6.2.1.tar.xz",
+                "https://ftp.gnu.org/gnu/gmp/gmp-6.2.1.tar.xz"
+              ],
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:gmp.BUILD",
+              "sha256": "fd4829912cddd12f84181c3451cc752be224643e87fac497b69edddadc49b4f2",
+              "strip_prefix": "gmp-6.2.1"
+            }
+          },
+          "mpfr": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "urls": [
+                "https://www.mpfr.org/mpfr-current/mpfr-4.2.2.tar.gz"
+              ],
+              "sha256": "826cbb24610bd193f36fde172233fb8c009f3f5c2ad99f644d0dea2e16a20e42",
+              "strip_prefix": "mpfr-4.2.2",
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:mpfr.BUILD"
+            }
+          },
+          "mpc": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "urls": [
+                "https://ftp.gnu.org/gnu/mpc/mpc-1.3.1.tar.gz"
+              ],
+              "sha256": "ab642492f5cf882b74aa0cb730cd410a81edcdbec895183ce930e706c1c759b8",
+              "strip_prefix": "mpc-1.3.1",
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:mpc.BUILD"
+            }
+          },
+          "pfm": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "urls": [
+                "https://versaweb.dl.sourceforge.net/project/perfmon2/libpfm4/libpfm-4.13.0.tar.gz"
+              ],
+              "sha256": "d18b97764c755528c1051d376e33545d0eb60c6ebf85680436813fa5b04cc3d1",
+              "strip_prefix": "libpfm-4.13.0",
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:pfm.BUILD"
+            }
+          },
+          "llvm_zstd": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:zstd.BUILD",
+              "sha256": "7c42d56fac126929a6a85dbc73ff1db2411d04f104fae9bdea51305663a83fd0",
+              "strip_prefix": "zstd-1.5.2",
+              "urls": [
+                "https://github.com/facebook/zstd/releases/download/v1.5.2/zstd-1.5.2.tar.gz"
+              ]
+            }
+          },
+          "pybind11": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "url": "https://github.com/pybind/pybind11/archive/v2.10.3.zip",
+              "sha256": "201966a61dc826f1b1879a24a3317a1ec9214a918c8eb035be2f30c3e9cfbdcb",
+              "strip_prefix": "pybind11-2.10.3",
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:pybind.BUILD"
+            }
+          },
+          "pyyaml": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "url": "https://github.com/yaml/pyyaml/archive/refs/tags/5.1.zip",
+              "sha256": "f0a35d7f282a6d6b1a4f3f3965ef5c124e30ed27a0088efb97c0977268fd671f",
+              "strip_prefix": "pyyaml-5.1/lib3",
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:pyyaml.BUILD"
+            }
+          },
+          "robin_map": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:robin_map.BUILD",
+              "sha256": "a8424ad3b0affd4c57ed26f0f3d8a29604f0e1f2ef2089f497f614b1c94c7236",
+              "strip_prefix": "robin-map-1.3.0",
+              "url": "https://github.com/Tessil/robin-map/archive/refs/tags/v1.3.0.tar.gz"
+            }
+          },
+          "nanobind": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:nanobind.BUILD",
+              "sha256": "8ce3667dce3e64fc06bfb9b778b6f48731482362fb89a43da156632266cd5a90",
+              "strip_prefix": "nanobind-2.9.2",
+              "url": "https://github.com/wjakob/nanobind/archive/refs/tags/v2.9.2.tar.gz"
+            }
+          }
+        },
+        "recordedRepoMappingEntries": [
+          [
+            "",
+            "bazel_tools",
+            "bazel_tools"
+          ]
+        ]
+      }
+    },
+    "@@rules_android+//rules/android_sdk_repository:rule.bzl%android_sdk_repository_extension": {
+      "general": {
+        "bzlTransitiveDigest": "NAy+0M15JNVEBb8Tny6t7j3lKqTnsAMjoBB6LJ+C370=",
+        "usagesDigest": "g9Ur6X6qhf9a8MmY9qXU/jFjkyk/aZVBegI0yVMF0z4=",
+        "recordedFileInputs": {},
+        "recordedDirentsInputs": {},
+        "envVariables": {},
+        "generatedRepoSpecs": {
+          "androidsdk": {
+            "repoRuleId": "@@rules_android+//rules/android_sdk_repository:rule.bzl%_android_sdk_repository",
+            "attributes": {}
+          }
+        },
+        "recordedRepoMappingEntries": []
+      }
+    },
+    "@@rules_kotlin+//src/main/starlark/core/repositories:bzlmod_setup.bzl%rules_kotlin_extensions": {
+      "general": {
+        "bzlTransitiveDigest": "sFhcgPbDQehmbD1EOXzX4H1q/CD5df8zwG4kp4jbvr8=",
+        "usagesDigest": "QI2z8ZUR+mqtbwsf2fLqYdJAkPOHdOV+tF2yVAUgRzw=",
+        "recordedFileInputs": {},
+        "recordedDirentsInputs": {},
+        "envVariables": {},
+        "generatedRepoSpecs": {
+          "com_github_jetbrains_kotlin_git": {
+            "repoRuleId": "@@rules_kotlin+//src/main/starlark/core/repositories:compiler.bzl%kotlin_compiler_git_repository",
+            "attributes": {
+              "urls": [
+                "https://github.com/JetBrains/kotlin/releases/download/v1.9.23/kotlin-compiler-1.9.23.zip"
+              ],
+              "sha256": "93137d3aab9afa9b27cb06a824c2324195c6b6f6179d8a8653f440f5bd58be88"
+            }
+          },
+          "com_github_jetbrains_kotlin": {
+            "repoRuleId": "@@rules_kotlin+//src/main/starlark/core/repositories:compiler.bzl%kotlin_capabilities_repository",
+            "attributes": {
+              "git_repository_name": "com_github_jetbrains_kotlin_git",
+              "compiler_version": "1.9.23"
+            }
+          },
+          "com_github_google_ksp": {
+            "repoRuleId": "@@rules_kotlin+//src/main/starlark/core/repositories:ksp.bzl%ksp_compiler_plugin_repository",
+            "attributes": {
+              "urls": [
+                "https://github.com/google/ksp/releases/download/1.9.23-1.0.20/artifacts.zip"
+              ],
+              "sha256": "ee0618755913ef7fd6511288a232e8fad24838b9af6ea73972a76e81053c8c2d",
+              "strip_version": "1.9.23-1.0.20"
+            }
+          },
+          "com_github_pinterest_ktlint": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_file",
+            "attributes": {
+              "sha256": "01b2e0ef893383a50dbeb13970fe7fa3be36ca3e83259e01649945b09d736985",
+              "urls": [
+                "https://github.com/pinterest/ktlint/releases/download/1.3.0/ktlint"
+              ],
+              "executable": true
+            }
+          },
+          "rules_android": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "sha256": "cd06d15dd8bb59926e4d65f9003bfc20f9da4b2519985c27e190cddc8b7a7806",
+              "strip_prefix": "rules_android-0.1.1",
+              "urls": [
+                "https://github.com/bazelbuild/rules_android/archive/v0.1.1.zip"
+              ]
+            }
+          }
+        },
+        "recordedRepoMappingEntries": [
+          [
+            "rules_kotlin+",
+            "bazel_tools",
+            "bazel_tools"
+          ]
+        ]
+      }
+    },
+    "@@rules_python+//python/uv:uv.bzl%uv": {
+      "general": {
+        "bzlTransitiveDigest": "477hS4MXeJ7LqPNLTqL+1ltraV5lqwOw3tEXWqnJRt8=",
+        "usagesDigest": "icnInV8HDGrRQf9x8RMfxWfBHgT3OgRlYovS/9POEJw=",
+        "recordedFileInputs": {},
+        "recordedDirentsInputs": {},
+        "envVariables": {},
+        "generatedRepoSpecs": {
+          "uv": {
+            "repoRuleId": "@@rules_python+//python/uv/private:uv_toolchains_repo.bzl%uv_toolchains_repo",
+            "attributes": {
+              "toolchain_type": "'@@rules_python+//python/uv:uv_toolchain_type'",
+              "toolchain_names": [
+                "none"
+              ],
+              "toolchain_implementations": {
+                "none": "'@@rules_python+//python:none'"
+              },
+              "toolchain_compatible_with": {
+                "none": [
+                  "@platforms//:incompatible"
+                ]
+              },
+              "toolchain_target_settings": {}
+            }
+          }
+        },
+        "recordedRepoMappingEntries": [
+          [
+            "rules_python+",
+            "bazel_tools",
+            "bazel_tools"
+          ]
+        ]
+      }
+    }
+  }
+}
diff --git a/utils/bazel/extensions.bzl b/utils/bazel/extensions.bzl
new file mode 100644
index 0000000000000..b0d5871b722a7
--- /dev/null
+++ b/utils/bazel/extensions.bzl
@@ -0,0 +1,127 @@
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""bzlmod extensions for llvm-project"""
+
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+load("@bazel_tools//tools/build_defs/repo:local.bzl", "new_local_repository")
+load(":vulkan_sdk.bzl", "vulkan_sdk_setup")
+
+def _llvm_repos_extension_impl(module_ctx):
+    if any([m.is_root and m.name == "llvm-project-overlay" for m in module_ctx.modules]):
+        new_local_repository(
+            name = "llvm-raw",
+            build_file_content = "# empty",
+            path = "../../",
+        )
+
+    http_archive(
+        name = "llvm_zlib",
+        build_file = "@llvm-raw//utils/bazel/third_party_build:zlib-ng.BUILD",
+        sha256 = "e36bb346c00472a1f9ff2a0a4643e590a254be6379da7cddd9daeb9a7f296731",
+        strip_prefix = "zlib-ng-2.0.7",
+        urls = [
+            "https://github.com/zlib-ng/zlib-ng/archive/refs/tags/2.0.7.zip",
+        ],
+    )
+
+    http_archive(
+        name = "vulkan_headers",
+        build_file = "@llvm-raw//utils/bazel/third_party_build:vulkan_headers.BUILD",
+        sha256 = "19f491784ef0bc73caff877d11c96a48b946b5a1c805079d9006e3fbaa5c1895",
+        strip_prefix = "Vulkan-Headers-9bd3f561bcee3f01d22912de10bb07ce4e23d378",
+        urls = [
+            "https://github.com/KhronosGroup/Vulkan-Headers/archive/9bd3f561bcee3f01d22912de10bb07ce4e23d378.tar.gz",
+        ],
+    )
+
+    vulkan_sdk_setup(name = "vulkan_sdk_setup")
+
+    http_archive(
+        name = "gmp",
+        urls = [
+            "https://gmplib.org/download/gmp/gmp-6.2.1.tar.xz",
+            "https://ftp.gnu.org/gnu/gmp/gmp-6.2.1.tar.xz",
+        ],
+        build_file = "@llvm-raw//utils/bazel/third_party_build:gmp.BUILD",
+        sha256 = "fd4829912cddd12f84181c3451cc752be224643e87fac497b69edddadc49b4f2",
+        strip_prefix = "gmp-6.2.1",
+    )
+
+    http_archive(
+        name = "mpfr",
+        urls = [
+            "https://www.mpfr.org/mpfr-current/mpfr-4.2.2.tar.gz",
+        ],
+        sha256 = "826cbb24610bd193f36fde172233fb8c009f3f5c2ad99f644d0dea2e16a20e42",
+        strip_prefix = "mpfr-4.2.2",
+        build_file = "@llvm-raw//utils/bazel/third_party_build:mpfr.BUILD",
+    )
+
+    http_archive(
+        name = "mpc",
+        urls = [
+            "https://ftp.gnu.org/gnu/mpc/mpc-1.3.1.tar.gz",
+        ],
+        sha256 = "ab642492f5cf882b74aa0cb730cd410a81edcdbec895183ce930e706c1c759b8",
+        strip_prefix = "mpc-1.3.1",
+        build_file = "@llvm-raw//utils/bazel/third_party_build:mpc.BUILD",
+    )
+
+    http_archive(
+        name = "pfm",
+        urls = [
+            "https://versaweb.dl.sourceforge.net/project/perfmon2/libpfm4/libpfm-4.13.0.tar.gz",
+        ],
+        sha256 = "d18b97764c755528c1051d376e33545d0eb60c6ebf85680436813fa5b04cc3d1",
+        strip_prefix = "libpfm-4.13.0",
+        build_file = "@llvm-raw//utils/bazel/third_party_build:pfm.BUILD",
+    )
+
+    http_archive(
+        name = "llvm_zstd",
+        build_file = "@llvm-raw//utils/bazel/third_party_build:zstd.BUILD",
+        sha256 = "7c42d56fac126929a6a85dbc73ff1db2411d04f104fae9bdea51305663a83fd0",
+        strip_prefix = "zstd-1.5.2",
+        urls = [
+            "https://github.com/facebook/zstd/releases/download/v1.5.2/zstd-1.5.2.tar.gz",
+        ],
+    )
+
+    http_archive(
+        name = "pybind11",
+        url = "https://github.com/pybind/pybind11/archive/v2.10.3.zip",
+        sha256 = "201966a61dc826f1b1879a24a3317a1ec9214a918c8eb035be2f30c3e9cfbdcb",
+        strip_prefix = "pybind11-2.10.3",
+        build_file = "@llvm-raw//utils/bazel/third_party_build:pybind.BUILD",
+    )
+
+    http_archive(
+        name = "pyyaml",
+        url = "https://github.com/yaml/pyyaml/archive/refs/tags/5.1.zip",
+        sha256 = "f0a35d7f282a6d6b1a4f3f3965ef5c124e30ed27a0088efb97c0977268fd671f",
+        strip_prefix = "pyyaml-5.1/lib3",
+        build_file = "@llvm-raw//utils/bazel/third_party_build:pyyaml.BUILD",
+    )
+
+    # TODO: bump to robin-map-1.4.0
+    http_archive(
+        name = "robin_map",
+        build_file = "@llvm-raw//utils/bazel/third_party_build:robin_map.BUILD",
+        sha256 = "a8424ad3b0affd4c57ed26f0f3d8a29604f0e1f2ef2089f497f614b1c94c7236",
+        strip_prefix = "robin-map-1.3.0",
+        url = "https://github.com/Tessil/robin-map/archive/refs/tags/v1.3.0.tar.gz",
+    )
+
+    http_archive(
+        name = "nanobind",
+        build_file = "@llvm-raw//utils/bazel/third_party_build:nanobind.BUILD",
+        sha256 = "8ce3667dce3e64fc06bfb9b778b6f48731482362fb89a43da156632266cd5a90",
+        strip_prefix = "nanobind-2.9.2",
+        url = "https://github.com/wjakob/nanobind/archive/refs/tags/v2.9.2.tar.gz",
+    )
+
+llvm_repos_extension = module_extension(
+    implementation = _llvm_repos_extension_impl,
+)

From 47d9d735a7aef937256536af490876879c4b4731 Mon Sep 17 00:00:00 2001
From: Asher Mancinelli <ashermancinelli@gmail.com>
Date: Tue, 18 Nov 2025 07:55:11 -0800
Subject: [PATCH 31/52] [MLIR][Python] Add arg_attrs and res_attrs to gpu func
 (#168475)

I missed these attributes when I added the wrapper for GPUFuncOp in
fbdd98f74f0d.
---
 mlir/python/mlir/dialects/gpu/__init__.py |  6 ++++--
 mlir/test/python/dialects/gpu/dialect.py  | 12 +++++++-----
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/mlir/python/mlir/dialects/gpu/__init__.py b/mlir/python/mlir/dialects/gpu/__init__.py
index 2fbcbb059f87a..d15643ca700e4 100644
--- a/mlir/python/mlir/dialects/gpu/__init__.py
+++ b/mlir/python/mlir/dialects/gpu/__init__.py
@@ -49,13 +49,13 @@ class GPUFuncOp(GPUFuncOp):
 
     FUNCTION_TYPE_ATTR_NAME = "function_type"
     SYM_NAME_ATTR_NAME = "sym_name"
-    ARGUMENT_ATTR_NAME = "arg_attrs"
-    RESULT_ATTR_NAME = "res_attrs"
 
     def __init__(
         self,
         function_type: Union[FunctionType, TypeAttr],
         sym_name: Optional[Union[str, StringAttr]] = None,
+        arg_attrs: Optional[Sequence[dict]] = None,
+        res_attrs: Optional[Sequence[dict]] = None,
         kernel: Optional[bool] = None,
         workgroup_attrib_attrs: Optional[Sequence[dict]] = None,
         private_attrib_attrs: Optional[Sequence[dict]] = None,
@@ -88,6 +88,8 @@ def __init__(
         )
         super().__init__(
             function_type,
+            arg_attrs=arg_attrs,
+            res_attrs=res_attrs,
             workgroup_attrib_attrs=workgroup_attrib_attrs,
             private_attrib_attrs=private_attrib_attrs,
             loc=loc,
diff --git a/mlir/test/python/dialects/gpu/dialect.py b/mlir/test/python/dialects/gpu/dialect.py
index 3945c99c41091..1a009b7dfa30d 100644
--- a/mlir/test/python/dialects/gpu/dialect.py
+++ b/mlir/test/python/dialects/gpu/dialect.py
@@ -133,9 +133,10 @@ def builder(func: gpu.GPUFuncOp) -> None:
             ), func.known_grid_size
 
             func = gpu.GPUFuncOp(
-                func_type,
+                ir.FunctionType.get(inputs=[T.index()], results=[]),
                 sym_name="non_kernel_func",
                 body_builder=builder,
+                arg_attrs=[{"gpu.some_attribute": ir.StringAttr.get("foo")}],
             )
             assert not func.is_kernel
             assert func.known_block_size is None
@@ -154,10 +155,11 @@ def builder(func: gpu.GPUFuncOp) -> None:
     # CHECK:   %[[VAL_0:.*]] = gpu.global_id  x
     # CHECK:   gpu.return
     # CHECK: }
-    # CHECK: gpu.func @non_kernel_func() {
-    # CHECK:   %[[VAL_0:.*]] = gpu.global_id  x
-    # CHECK:   gpu.return
-    # CHECK: }
+    # CHECK:   gpu.func @non_kernel_func(
+    # CHECK-SAME:      %[[ARG0:.*]]: index {gpu.some_attribute = "foo"}) {
+    # CHECK:           %[[GLOBAL_ID_0:.*]] = gpu.global_id  x
+    # CHECK:           gpu.return
+    # CHECK:         }
 
 
 # CHECK-LABEL: testGPULaunchFuncOp

From 83d27f6c84d92b4450a62f4b650b9cfadc0dab0f Mon Sep 17 00:00:00 2001
From: Nabeel Omer <nabeel.omer@sony.com>
Date: Tue, 18 Nov 2025 15:55:54 +0000
Subject: [PATCH 32/52] [Clang][Driver] Create crash reproducers for IR inputs
 (#165572)

This patch makes Clang produce the crash reproducer shell script for IR
inputs as well.
---
 clang/lib/Driver/Driver.cpp          | 109 +++++++++++++++++++--------
 clang/test/Driver/crash-ir-repro.cpp |  15 ++++
 2 files changed, 91 insertions(+), 33 deletions(-)
 create mode 100644 clang/test/Driver/crash-ir-repro.cpp

diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 04fd68692d8d8..426fc796ffc20 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -70,6 +70,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
@@ -103,6 +104,7 @@
 #include <memory>
 #include <optional>
 #include <set>
+#include <string>
 #include <utility>
 #if LLVM_ON_UNIX
 #include <unistd.h> // getpid
@@ -2050,12 +2052,17 @@ void Driver::generateCompilationDiagnostics(
   InputList Inputs;
   BuildInputs(C.getDefaultToolChain(), C.getArgs(), Inputs);
 
+  ArgStringList IRInputs;
   for (InputList::iterator it = Inputs.begin(), ie = Inputs.end(); it != ie;) {
     bool IgnoreInput = false;
 
-    // Ignore input from stdin or any inputs that cannot be preprocessed.
-    // Check type first as not all linker inputs have a value.
-    if (types::getPreprocessedType(it->first) == types::TY_INVALID) {
+    // Save IR inputs separately, ignore input from stdin or any other inputs
+    // that cannot be preprocessed. Check type first as not all linker inputs
+    // have a value.
+    if (types::isLLVMIR(it->first)) {
+      IRInputs.push_back(it->second->getValue());
+      IgnoreInput = true;
+    } else if (types::getPreprocessedType(it->first) == types::TY_INVALID) {
       IgnoreInput = true;
     } else if (!strcmp(it->second->getValue(), "-")) {
       Diag(clang::diag::note_drv_command_failed_diag_msg)
@@ -2072,7 +2079,7 @@ void Driver::generateCompilationDiagnostics(
     }
   }
 
-  if (Inputs.empty()) {
+  if (Inputs.empty() && IRInputs.empty()) {
     Diag(clang::diag::note_drv_command_failed_diag_msg)
         << "Error generating preprocessed source(s) - "
            "no preprocessable inputs.";
@@ -2095,46 +2102,82 @@ void Driver::generateCompilationDiagnostics(
     return;
   }
 
-  // Construct the list of abstract actions to perform for this compilation. On
-  // Darwin OSes this uses the driver-driver and builds universal actions.
-  const ToolChain &TC = C.getDefaultToolChain();
-  if (TC.getTriple().isOSBinFormatMachO())
-    BuildUniversalActions(C, TC, Inputs);
-  else
-    BuildActions(C, C.getArgs(), Inputs, C.getActions());
+  // If we only have IR inputs there's no need for preprocessing.
+  if (!Inputs.empty()) {
+    // Construct the list of abstract actions to perform for this compilation.
+    // On Darwin OSes this uses the driver-driver and builds universal actions.
+    const ToolChain &TC = C.getDefaultToolChain();
+    if (TC.getTriple().isOSBinFormatMachO())
+      BuildUniversalActions(C, TC, Inputs);
+    else
+      BuildActions(C, C.getArgs(), Inputs, C.getActions());
 
-  BuildJobs(C);
+    BuildJobs(C);
 
-  // If there were errors building the compilation, quit now.
-  if (Trap.hasErrorOccurred()) {
-    Diag(clang::diag::note_drv_command_failed_diag_msg)
-        << "Error generating preprocessed source(s).";
-    return;
-  }
+    // If there were errors building the compilation, quit now.
+    if (Trap.hasErrorOccurred()) {
+      Diag(clang::diag::note_drv_command_failed_diag_msg)
+          << "Error generating preprocessed source(s).";
+      return;
+    }
+    // Generate preprocessed output.
+    SmallVector<std::pair<int, const Command *>, 4> FailingCommands;
+    C.ExecuteJobs(C.getJobs(), FailingCommands);
 
-  // Generate preprocessed output.
-  SmallVector<std::pair<int, const Command *>, 4> FailingCommands;
-  C.ExecuteJobs(C.getJobs(), FailingCommands);
+    // If any of the preprocessing commands failed, clean up and exit.
+    if (!FailingCommands.empty()) {
+      Diag(clang::diag::note_drv_command_failed_diag_msg)
+          << "Error generating preprocessed source(s).";
+      return;
+    }
 
-  // If any of the preprocessing commands failed, clean up and exit.
-  if (!FailingCommands.empty()) {
-    Diag(clang::diag::note_drv_command_failed_diag_msg)
-        << "Error generating preprocessed source(s).";
-    return;
+    const ArgStringList &TempFiles = C.getTempFiles();
+    if (TempFiles.empty()) {
+      Diag(clang::diag::note_drv_command_failed_diag_msg)
+          << "Error generating preprocessed source(s).";
+      return;
+    }
   }
 
-  const ArgStringList &TempFiles = C.getTempFiles();
-  if (TempFiles.empty()) {
-    Diag(clang::diag::note_drv_command_failed_diag_msg)
-        << "Error generating preprocessed source(s).";
-    return;
+  // Copying filenames due to ownership.
+  const ArgStringList &Files = C.getTempFiles();
+  SmallVector<std::string> TempFiles(Files.begin(), Files.end());
+
+  // We'd like to copy the IR input file into our own temp file
+  // because the build system might try to clean-up after itself.
+  for (auto const *Input : IRInputs) {
+    int FD;
+    llvm::SmallVector<char, 64> Path;
+
+    StringRef extension = llvm::sys::path::extension(Input);
+    if (!extension.empty())
+      extension = extension.drop_front();
+
+    std::error_code EC = llvm::sys::fs::createTemporaryFile(
+        llvm::sys::path::stem(Input), extension, FD, Path);
+    if (EC) {
+      Diag(clang::diag::note_drv_command_failed_diag_msg)
+          << "Error generating run script: " << "Failed copying IR input files"
+          << " " << EC.message();
+      return;
+    }
+
+    EC = llvm::sys::fs::copy_file(Input, FD);
+    if (EC) {
+      Diag(clang::diag::note_drv_command_failed_diag_msg)
+          << "Error generating run script: " << "Failed copying IR input files"
+          << " " << EC.message();
+      return;
+    }
+
+    TempFiles.push_back(std::string(Path.begin(), Path.end()));
   }
 
   Diag(clang::diag::note_drv_command_failed_diag_msg) << BugReporMsg;
 
   SmallString<128> VFS;
   SmallString<128> ReproCrashFilename;
-  for (const char *TempFile : TempFiles) {
+  for (std::string &TempFile : TempFiles) {
     Diag(clang::diag::note_drv_command_failed_diag_msg) << TempFile;
     if (Report)
       Report->TemporaryFiles.push_back(TempFile);
@@ -2151,7 +2194,7 @@ void Driver::generateCompilationDiagnostics(
   }
 
   for (const char *TempFile : SavedTemps)
-    C.addTempFile(TempFile);
+    TempFiles.push_back(TempFile);
 
   // Assume associated files are based off of the first temporary file.
   CrashReportInfo CrashInfo(TempFiles[0], VFS);
diff --git a/clang/test/Driver/crash-ir-repro.cpp b/clang/test/Driver/crash-ir-repro.cpp
new file mode 100644
index 0000000000000..1f31a5ca1bb34
--- /dev/null
+++ b/clang/test/Driver/crash-ir-repro.cpp
@@ -0,0 +1,15 @@
+// RUN: %clang -S -emit-llvm -o %t.ll %s
+// RUN: not %clang -S -DCRASH %s %t.ll 2>&1 | FileCheck %s
+
+// CHECK: Preprocessed source(s) and associated run script(s) are located at:
+// CHECK-NEXT: clang: note: diagnostic msg: {{.*}}.cpp
+// CHECK-NEXT: clang: note: diagnostic msg: {{.*}}.ll
+// CHECK-NEXT: clang: note: diagnostic msg: {{.*}}.sh
+
+#ifdef CRASH
+#pragma clang __debug parser_crash
+#endif
+
+int main() {
+  return 0;
+}

From a1e47cefa913d53e55d924a6326697f3fe5d1206 Mon Sep 17 00:00:00 2001
From: Emil Tsalapatis <aimilios.tsalapatis@gmail.com>
Date: Tue, 18 Nov 2025 10:58:09 -0500
Subject: [PATCH 33/52] [llvm][AddressSanitizer] option for specifying the
 address space of the shadow map (#167772)

The AddressSanitizer transform currently defaults to placing the shadow
map in address space 0, but it is desirable for some targets (namely
BPF) to select a different address space for the map. Add a compilation
option for specifying the address space of the target.
---
 llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 7c364f86fb0e8..49f03fa93f0e0 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -248,6 +248,11 @@ static cl::opt<bool>
                          "platforms that support this"),
                 cl::Hidden, cl::init(true));
 
+static cl::opt<int>
+    ClShadowAddrSpace("asan-shadow-addr-space",
+                      cl::desc("Address space for pointers to the shadow map"),
+                      cl::Hidden, cl::init(0));
+
 static cl::opt<bool> ClWithIfuncSuppressRemat(
     "asan-with-ifunc-suppress-remat",
     cl::desc("Suppress rematerialization of dynamic shadow address by passing "
@@ -1942,7 +1947,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
 
   Type *ShadowTy =
       IntegerType::get(*C, std::max(8U, TypeStoreSize >> Mapping.Scale));
-  Type *ShadowPtrTy = PointerType::get(*C, 0);
+  Type *ShadowPtrTy = PointerType::get(*C, ClShadowAddrSpace);
   Value *ShadowPtr = memToShadow(AddrLong, IRB);
   const uint64_t ShadowAlign =
       std::max<uint64_t>(Alignment.valueOrOne().value() >> Mapping.Scale, 1);

From 82a7832de27aad8f681773875b081013c2c0c9dd Mon Sep 17 00:00:00 2001
From: Emil Tsalapatis <aimilios.tsalapatis@gmail.com>
Date: Tue, 18 Nov 2025 10:58:56 -0500
Subject: [PATCH 34/52] [llvm][AddressSanitizer][BPF] add default shadow
 mapping offset for BPF target (#167768)

The AddressSanitizer transform does not have a default offset registered
for the shadow map. Set the default shadow map offset for BPF be
dynamically set by the KASAN implementation.
---
 llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 49f03fa93f0e0..3a14ee5addc2f 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -508,6 +508,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
   bool IsAMDGPU = TargetTriple.isAMDGPU();
   bool IsHaiku = TargetTriple.isOSHaiku();
   bool IsWasm = TargetTriple.isWasm();
+  bool IsBPF = TargetTriple.isBPF();
 
   ShadowMapping Mapping;
 
@@ -584,6 +585,8 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
     else if (IsHaiku && IsX86_64)
       Mapping.Offset = (kSmallX86_64ShadowOffsetBase &
                         (kSmallX86_64ShadowOffsetAlignMask << Mapping.Scale));
+    else if (IsBPF)
+      Mapping.Offset = kDynamicShadowSentinel;
     else
       Mapping.Offset = kDefaultShadowOffset64;
   }

From 1347b23cd6510a4149665616433e8505bb6fc6bc Mon Sep 17 00:00:00 2001
From: Emil Tsalapatis <aimilios.tsalapatis@gmail.com>
Date: Tue, 18 Nov 2025 10:59:10 -0500
Subject: [PATCH 35/52] [clang][BPF] Turn on AddressSanitizer pass (#167766)

The BPF LLVM target currently doesn't support turning on the
AddressSanitizer pass, either for userspace ASAN or KASAN. Enable the
KASAN option for the BPF target in anticipation of a KASAN
implementation for BPF.
---
 clang/lib/Driver/ToolChain.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 5ff7d83946137..77a2c73f0d446 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -1639,6 +1639,8 @@ SanitizerMask ToolChain::getSupportedSanitizers() const {
     Res |= SanitizerKind::ShadowCallStack;
   if (getTriple().isAArch64(64))
     Res |= SanitizerKind::MemTag;
+  if (getTriple().isBPF())
+    Res |= SanitizerKind::KernelAddress;
   return Res;
 }
 

From 7aeb2646bff41973f03d4a928ecd7be6ca4a019e Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman@google.com>
Date: Tue, 18 Nov 2025 08:22:25 -0800
Subject: [PATCH 36/52] [ASan] Make most tests run under internal shell on
 Darwin

This patch fixes most of the ASan tests that were failing on Darwin when
running under the internal shell. There are still a couple left that
are more interesting cases that I'll do in a follow up patch. The
tests that still need to be done:
```
TestCases/Darwin/duplicate_os_log_reports.cpp
TestCases/Darwin/dyld_insert_libraries_reexec.cpp
TestCases/Darwin/interface_symbols_darwin.cpp
```

Reviewers: thetruestblue, fhahn, vitalybuka, DanBlackwell, ndrewh

Reviewed By: DanBlackwell

Pull Request: https://github.com/llvm/llvm-project/pull/168545
---
 .../Darwin/atos-symbolizer-dyld-root-path.cpp |  3 ++-
 .../asan/TestCases/Darwin/atos-symbolizer.cpp |  3 ++-
 .../Darwin/dyld_insert_libraries_reexec.cpp   |  3 ++-
 .../Darwin/dyld_insert_libraries_remove.cpp   | 26 +++++++++----------
 .../asan/TestCases/Darwin/init_for_dlopen.cpp |  2 +-
 .../Darwin/malloc_zone-protected.cpp          |  3 +--
 .../Darwin/llvm_interface_symbols.cpp         |  3 ++-
 7 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cpp b/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cpp
index 664471b6987a8..4201d49df4d74 100644
--- a/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cpp
+++ b/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cpp
@@ -1,6 +1,7 @@
 // Check that when having a DYLD_ROOT_PATH set, the symbolizer still works.
 // RUN: %clangxx_asan -O0 %s -o %t
-// RUN: %env_asan_opts=verbosity=2 DYLD_ROOT_PATH="/" ASAN_SYMBOLIZER_PATH=$(which atos) \
+// RUN: which atos | tr -d '\n' > %t.symbolizer_path
+// RUN: %env_asan_opts=verbosity=2 DYLD_ROOT_PATH="/" ASAN_SYMBOLIZER_PATH=%{readfile:%t.symbolizer_path} \
 // RUN:   not %run %t 2>&1 | FileCheck %s
 //
 // Due to a bug in atos, this only works on x86_64.
diff --git a/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer.cpp b/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer.cpp
index bab4e4f3765c2..7487bd4cb40e6 100644
--- a/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer.cpp
+++ b/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer.cpp
@@ -1,7 +1,8 @@
 // Check that the `atos` symbolizer works.
 
 // RUN: %clangxx_asan -O0 %s -o %t
-// RUN: %env_asan_opts=verbosity=2 ASAN_SYMBOLIZER_PATH=$(which atos) not %run %t 2>&1 | FileCheck %s
+// RUN: which atos | tr -d '\n' > %t.symbolizer_path
+// RUN: %env_asan_opts=verbosity=2 ASAN_SYMBOLIZER_PATH=%{readfile:%t.symbolizer_path} not %run %t 2>&1 | FileCheck %s
 
 // Path returned by `which atos` is invalid on iOS.
 // UNSUPPORTED: ios, i386-darwin
diff --git a/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_reexec.cpp b/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_reexec.cpp
index 0fec18b89411a..145e162a21c0e 100644
--- a/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_reexec.cpp
+++ b/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_reexec.cpp
@@ -4,7 +4,8 @@
 // UNSUPPORTED: ios
 
 // RUN: rm -rf %t && mkdir -p %t
-// RUN: cp `%clang_asan -print-file-name=lib`/darwin/libclang_rt.asan_osx_dynamic.dylib \
+// RUN: %clang_asan -print-file-name=lib | tr -d '\n' > %t.lib_name
+// RUN: cp %{readfile:%t.lib_name}/darwin/libclang_rt.asan_osx_dynamic.dylib \
 // RUN:   %t/libclang_rt.asan_osx_dynamic.dylib
 // RUN: %clangxx_asan %s -o %t/a.out
 
diff --git a/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_remove.cpp b/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_remove.cpp
index 0672e064a1904..872848d075eaf 100644
--- a/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_remove.cpp
+++ b/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_remove.cpp
@@ -5,29 +5,27 @@
 // UNSUPPORTED: ios
 
 // RUN: rm -rf %t && mkdir -p %t
-// RUN: cp `%clang_asan -print-file-name=lib`/darwin/libclang_rt.asan_osx_dynamic.dylib \
+// RUN: %clang_asan -print-file-name=lib | tr -d '\n' > %t.lib_name
+// RUN: cp %{readfile:%t.lib_name}/darwin/libclang_rt.asan_osx_dynamic.dylib \
 // RUN:   %t/libclang_rt.asan_osx_dynamic.dylib
 
 // RUN: %clangxx_asan %s -o %t/a.out
 // RUN: %clangxx -DSHARED_LIB %s \
 // RUN:     -dynamiclib -o %t/dummy-so.dylib
 
-// RUN: ( cd %t && \
-// RUN:   DYLD_INSERT_LIBRARIES=@executable_path/libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \
-// RUN:   %run ./a.out 2>&1 ) | FileCheck %s || exit 1
+// RUN: cd %t
+// RUN: env DYLD_INSERT_LIBRARIES=@executable_path/libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \
+// RUN: %run ./a.out 2>&1 | FileCheck %s
 
-// RUN: ( cd %t && \
-// RUN:   DYLD_INSERT_LIBRARIES=libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \
-// RUN:   %run ./a.out 2>&1 ) | FileCheck %s || exit 1
+// RUN: env DYLD_INSERT_LIBRARIES=libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \
+// RUN: %run ./a.out 2>&1 | FileCheck %s
 
-// RUN: ( cd %t && \
-// RUN:   %env_asan_opts=strip_env=0 \
-// RUN:   DYLD_INSERT_LIBRARIES=libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \
-// RUN:   %run ./a.out 2>&1 ) | FileCheck %s --check-prefix=CHECK-KEEP || exit 1
+// RUN: %env_asan_opts=strip_env=0 \
+// RUN: DYLD_INSERT_LIBRARIES=libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \
+// RUN: %run ./a.out 2>&1 | FileCheck %s --check-prefix=CHECK-KEEP
 
-// RUN: ( cd %t && \
-// RUN:   DYLD_INSERT_LIBRARIES=%t/libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \
-// RUN:   %run ./a.out 2>&1 ) | FileCheck %s || exit 1
+// RUN: env DYLD_INSERT_LIBRARIES=%t/libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \
+// RUN: %run ./a.out 2>&1 | FileCheck %s
 
 #if !defined(SHARED_LIB)
 #include <stdio.h>
diff --git a/compiler-rt/test/asan/TestCases/Darwin/init_for_dlopen.cpp b/compiler-rt/test/asan/TestCases/Darwin/init_for_dlopen.cpp
index 3bf8e99703a08..9bb652cc79438 100644
--- a/compiler-rt/test/asan/TestCases/Darwin/init_for_dlopen.cpp
+++ b/compiler-rt/test/asan/TestCases/Darwin/init_for_dlopen.cpp
@@ -5,7 +5,7 @@
 // - By default the lit config sets this but we don't want this
 //   test to implicitly depend on this.
 // - It avoids requiring `--crash` to be passed to `not`.
-// RUN: APPLE_ASAN_INIT_FOR_DLOPEN=0 %env_asan_opts=abort_on_error=0 not \
+// RUN: %env_asan_opts=abort_on_error=0 APPLE_ASAN_INIT_FOR_DLOPEN=0 not \
 // RUN:   %run %t %shared_libasan 2>&1 | \
 // RUN:   FileCheck -check-prefix=CHECK-DL-OPEN-FAIL %s
 // RUN: env -u APPLE_ASAN_INIT_FOR_DLOPEN %env_asan_opts=abort_on_error=0 not \
diff --git a/compiler-rt/test/asan/TestCases/Darwin/malloc_zone-protected.cpp b/compiler-rt/test/asan/TestCases/Darwin/malloc_zone-protected.cpp
index 125b544724d3f..ac3c5898f271a 100644
--- a/compiler-rt/test/asan/TestCases/Darwin/malloc_zone-protected.cpp
+++ b/compiler-rt/test/asan/TestCases/Darwin/malloc_zone-protected.cpp
@@ -3,8 +3,7 @@
 #include <stdio.h>
 
 // RUN: %clangxx_asan %s -o %t
-// RUN: ASAN_OPTIONS="abort_on_error=1" not --crash %run %t 2>&1 | FileCheck %s
-
+// RUN: env ASAN_OPTIONS="abort_on_error=1" not --crash %run %t 2>&1 | FileCheck %s
 
 void *pwn(malloc_zone_t *unused_zone, size_t unused_size) {
   printf("PWNED\n");
diff --git a/compiler-rt/test/asan_abi/TestCases/Darwin/llvm_interface_symbols.cpp b/compiler-rt/test/asan_abi/TestCases/Darwin/llvm_interface_symbols.cpp
index 5da18aa971d43..ba7b5e5815bd6 100644
--- a/compiler-rt/test/asan_abi/TestCases/Darwin/llvm_interface_symbols.cpp
+++ b/compiler-rt/test/asan_abi/TestCases/Darwin/llvm_interface_symbols.cpp
@@ -24,7 +24,8 @@
 // RUN: diff %t.imports-sorted %t.exports-sorted
 
 // Ensure that there is no dynamic dylib linked.
-// RUN: otool -L %t | (! grep -q "dynamic.dylib")
+// RUN: otool -L %t > %t.libs
+// RUN: not grep -q "dynamic.dylib" < %t.libs
 
 // UNSUPPORTED: ios
 

From 38c1a58605e8347afd05e31360d3bfd5c4c19ced Mon Sep 17 00:00:00 2001
From: Tarun Prabhu <tarun@lanl.gov>
Date: Tue, 18 Nov 2025 09:22:43 -0700
Subject: [PATCH 37/52] [flang][NFC] Strip trailing whitespace from tests (6 of
 N)

Only the fortran source files in flang/test/Lower/PowerPC and some in
flang/test/Lower have been modified. The other files in the directory
will be cleaned up in subsequent commits
---
 flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90 |  4 ++--
 flang/test/Lower/PowerPC/ppc-vec-sel.f90             |  2 +-
 .../test/Lower/PowerPC/ppc-vec-store-elem-order.f90  |  4 ++--
 flang/test/Lower/PowerPC/ppc-vec-store.f90           | 12 ++++++------
 flang/test/Lower/allocatable-assignment.f90          | 12 ++++++------
 flang/test/Lower/allocatable-globals.f90             |  2 +-
 flang/test/Lower/allocatable-polymorphic.f90         |  8 ++++----
 flang/test/Lower/allocated.f90                       |  1 -
 flang/test/Lower/array-elemental-calls-2.f90         |  2 +-
 flang/test/Lower/array-elemental-calls.f90           |  2 +-
 flang/test/Lower/array-expression-assumed-size.f90   |  8 ++++----
 flang/test/Lower/array-substring.f90                 |  2 +-
 flang/test/Lower/array-wide-char.f90                 |  2 +-
 flang/test/Lower/array.f90                           |  2 +-
 flang/test/Lower/forall-pointer-assignment.f90       |  2 +-
 flang/test/Lower/forall/forall-2.f90                 |  4 ++--
 flang/test/Lower/forall/forall-ranked.f90            |  2 +-
 flang/test/Lower/forall/forall-where-2.f90           | 10 +++++-----
 flang/test/Lower/forall/forall-where.f90             |  2 +-
 19 files changed, 41 insertions(+), 42 deletions(-)

diff --git a/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
index 355fd6c3a742a..b17c3f1bdc4e7 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
@@ -394,7 +394,7 @@ subroutine vec_xl_testi8a(arg1, arg2, res)
   vector(integer(1)) :: res
   res = vec_xl(arg1, arg2)
 
-  
+
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = load <16 x i8>, ptr %[[addr]], align 1
@@ -481,7 +481,7 @@ subroutine vec_xl_be_testi8a(arg1, arg2, res)
   vector(integer(1)) :: res
   res = vec_xl_be(arg1, arg2)
 
-  
+
 ! LLVMIR: %4 = load i8, ptr %0, align 1
 ! LLVMIR: %5 = getelementptr i8, ptr %1, i8 %4
 ! LLVMIR: %6 = load <16 x i8>, ptr %5, align 1
diff --git a/flang/test/Lower/PowerPC/ppc-vec-sel.f90 b/flang/test/Lower/PowerPC/ppc-vec-sel.f90
index c3de8ba9c1444..93641d1461a99 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-sel.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-sel.f90
@@ -136,7 +136,7 @@ subroutine vec_sel_testu8(arg1, arg2, arg3)
   vector(unsigned(8)) :: arg1, arg2, r
   vector(unsigned(8)) :: arg3
   r = vec_sel(arg1, arg2, arg3)
-  
+
 
 ! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
 ! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
diff --git a/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90
index caf6d5463a833..947c8b1c7eb2c 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90
@@ -14,7 +14,7 @@ subroutine vec_st_test(arg1, arg2, arg3)
 ! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16
 ! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
-! LLVMIR: %[[bc:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32> 
+! LLVMIR: %[[bc:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
 ! LLVMIR: %[[shf:.*]] = shufflevector <4 x i32> %[[bc]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ! LLVMIR:  call void @llvm.ppc.altivec.stvx(<4 x i32> %[[shf]], ptr %[[addr]])
 end subroutine vec_st_test
@@ -28,7 +28,7 @@ subroutine vec_ste_test(arg1, arg2, arg3)
   integer(4) :: arg2
   real(4) :: arg3
   call vec_ste(arg1, arg2, arg3)
-  
+
 ! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %0, align 16
 ! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
 ! LLVMIR: %[[addr]] = getelementptr i8, ptr %2, i32 %[[arg2]]
diff --git a/flang/test/Lower/PowerPC/ppc-vec-store.f90 b/flang/test/Lower/PowerPC/ppc-vec-store.f90
index c25cc8b07cf79..1c3ab9638f117 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-store.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-store.f90
@@ -300,7 +300,7 @@ subroutine vec_xst_test_vr4i2r4(arg1, arg2, arg3)
   real(4) :: arg3
   call vec_xst(arg1, arg2, arg3)
 
-  
+
 ! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
 ! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
@@ -432,7 +432,7 @@ subroutine vec_xst_be_test_vi4i4vai4(arg1, arg2, arg3, i)
 ! LLVMIR: %[[iadd:.*]] = add nsw i64 %[[imul2]], 0
 ! LLVMIR: %[[gep1:.*]] = getelementptr <4 x i32>, ptr %2, i64 %[[iadd]]
 ! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
-! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
 ! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i32 %[[arg2]]
 ! LLVMIR: %[[src:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ! LLVMIR: store <4 x i32> %[[src]], ptr %[[gep2]], align 16
@@ -449,7 +449,7 @@ subroutine vec_xstd2_test_vr4i2r4(arg1, arg2, arg3)
   real(4) :: arg3
   call vec_xstd2(arg1, arg2, arg3)
 
-  
+
 ! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
 ! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
@@ -509,7 +509,7 @@ subroutine vec_xstd2_test_vi4i4vai4(arg1, arg2, arg3, i)
 ! LLVMIR: %[[iadd:.*]] = add nsw i64 %[[imul2]], 0
 ! LLVMIR: %[[gep1:.*]] = getelementptr <4 x i32>, ptr %2, i64 %[[iadd]]
 ! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
-! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
 ! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i32 %[[arg2]]
 ! LLVMIR: %[[src:.*]] = bitcast <4 x i32> %[[arg1]] to <2 x i64>
 ! LLVMIR: store <2 x i64> %[[src]], ptr %[[gep2]], align 16
@@ -526,7 +526,7 @@ subroutine vec_xstw4_test_vr4i2r4(arg1, arg2, arg3)
   real(4) :: arg3
   call vec_xstw4(arg1, arg2, arg3)
 
-  
+
 ! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
 ! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
@@ -584,7 +584,7 @@ subroutine vec_xstw4_test_vi4i4vai4(arg1, arg2, arg3, i)
 ! LLVMIR: %[[iadd:.*]] = add nsw i64 %[[imul2]], 0
 ! LLVMIR: %[[gep1:.*]] = getelementptr <4 x i32>, ptr %2, i64 %[[iadd]]
 ! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
-! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
 ! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i32 %[[arg2]]
 ! LLVMIR: store <4 x i32> %[[arg1]], ptr %[[gep2]], align 16
 end subroutine vec_xstw4_test_vi4i4vai4
diff --git a/flang/test/Lower/allocatable-assignment.f90 b/flang/test/Lower/allocatable-assignment.f90
index 3c220232104a5..b6b2f7b6c77b9 100644
--- a/flang/test/Lower/allocatable-assignment.f90
+++ b/flang/test/Lower/allocatable-assignment.f90
@@ -283,14 +283,14 @@ subroutine test_dyn_char(x, n, c)
 ! CHECK:           hlfir.assign %[[VAL_8]]#0 to %[[VAL_14]]#0 realloc keep_lhs_len : !fir.box<!fir.array<20x!fir.char<1,?>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>>
 
 subroutine test_derived_with_init(x, y)
-  type t 
+  type t
     integer, allocatable :: a(:)
-  end type                                                                                     
-  type(t), allocatable :: x                                                                    
-  type(t) :: y                                                                                 
+  end type
+  type(t), allocatable :: x
+  type(t) :: y
   ! The allocatable component of `x` need to be initialized
   ! during the automatic allocation (setting its rank and allocation
-  ! status) before it is assigned with the component of `y` 
+  ! status) before it is assigned with the component of `y`
   x = y
 end subroutine
 ! CHECK-LABEL:   func.func @_QMalloc_assignPtest_derived_with_init(
@@ -357,7 +357,7 @@ end function elt
 !  real :: y(2, 3) = reshape([1,2,3,4,5,6], [2,3])
 !  real, allocatable :: x (:, :)
 !  allocate(x(2,2))
-!  call test_with_lbounds(x, y) 
+!  call test_with_lbounds(x, y)
 !  print *, x(10, 20)
 !  print *, x
 !end
diff --git a/flang/test/Lower/allocatable-globals.f90 b/flang/test/Lower/allocatable-globals.f90
index 9d386688f8881..8b7420ab32391 100644
--- a/flang/test/Lower/allocatable-globals.f90
+++ b/flang/test/Lower/allocatable-globals.f90
@@ -12,7 +12,7 @@
 module mod_allocatables
   character(10), allocatable :: c(:)
 end module
-  
+
 ! CHECK-LABEL: func @_QPtest_mod_allocatables()
 subroutine test_mod_allocatables()
   use mod_allocatables, only: c
diff --git a/flang/test/Lower/allocatable-polymorphic.f90 b/flang/test/Lower/allocatable-polymorphic.f90
index 27cdf2839767d..d528fd8e546ff 100644
--- a/flang/test/Lower/allocatable-polymorphic.f90
+++ b/flang/test/Lower/allocatable-polymorphic.f90
@@ -460,7 +460,7 @@ subroutine test_allocate_with_mold()
 ! CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]](%{{.*}}) {uniq_name = "_QMpolyFtest_allocate_with_moldEx"} : (!fir.ref<!fir.array<10x!fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10x!fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}>>>, !fir.ref<!fir.array<10x!fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}>>>)
 
 ! CHECK: %[[EMBOX_X:.*]] = fir.embox %[[X_DECL]]#0(%{{.*}}) : (!fir.ref<!fir.array<10x!fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}>>>, !fir.shape<1>) -> !fir.box<!fir.array<10x!fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}>>>
-! CHECK: %[[RANK:.*]] = arith.constant 1 : i32 
+! CHECK: %[[RANK:.*]] = arith.constant 1 : i32
 ! CHECK: %[[P_BOX_NONE:.*]] = fir.convert %[[P_DECL]]#0 : (!fir.ref<!fir.class<!fir.ptr<!fir.array<?x!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>>) -> !fir.ref<!fir.box<none>>
 ! CHECK: %[[X_BOX_NONE:.*]] = fir.convert %[[EMBOX_X]] : (!fir.box<!fir.array<10x!fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}>>>) -> !fir.box<none>
 ! CHECK: fir.call @_FortranAPointerApplyMold(%[[P_BOX_NONE]], %[[X_BOX_NONE]], %[[RANK]]) {{.*}} : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32) -> ()
@@ -614,10 +614,10 @@ program test_alloc
 ! LLVM: %[[TYPE_CODE:.*]] = load i8, ptr %[[TYPE_CODE_GEP]]
 ! LLVM-NEXT: %[[EXT_TYPE_CODE:.*]] = sext i8 %[[TYPE_CODE]] to i32
 ! LLVM: %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } undef, i64 %[[ELEM_SIZE]], 1
-! LLVM: %[[TRUNC_TYPE_CODE:.*]] = trunc i32 %[[EXT_TYPE_CODE]] to i8 
+! LLVM: %[[TRUNC_TYPE_CODE:.*]] = trunc i32 %[[EXT_TYPE_CODE]] to i8
 ! LLVM: %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %{{.*}}, i8 %[[TRUNC_TYPE_CODE]], 4
 ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %{{.*}}, ptr %[[TMP:.*]]
-! LLVM: call void %{{.*}}(ptr %{{.*}}) 
+! LLVM: call void %{{.*}}(ptr %{{.*}})
 
 ! LLVM: call void @llvm.memcpy.p0.p0.i32
 ! LLVM: %[[GEP_TDESC_C2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 7
@@ -628,7 +628,7 @@ program test_alloc
 ! LLVM: %[[TYPE_CODE:.*]] = load i8, ptr %[[TYPE_CODE_GEP]]
 ! LLVM-NEXT: %[[EXT_TYPE_CODE:.*]] = sext i8 %[[TYPE_CODE]] to i32
 ! LLVM: %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } undef, i64 %[[ELEM_SIZE]], 1
-! LLVM: %[[TRUNC_TYPE_CODE:.*]] = trunc i32 %[[EXT_TYPE_CODE]] to i8 
+! LLVM: %[[TRUNC_TYPE_CODE:.*]] = trunc i32 %[[EXT_TYPE_CODE]] to i8
 ! LLVM: %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %{{.*}}, i8 %[[TRUNC_TYPE_CODE]], 4
 ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %{{.*}}, ptr %{{.*}}
 ! LLVM: call void %{{.*}}(ptr %{{.*}})
diff --git a/flang/test/Lower/allocated.f90 b/flang/test/Lower/allocated.f90
index 6e8420fc7d79a..11e856fd67bad 100644
--- a/flang/test/Lower/allocated.f90
+++ b/flang/test/Lower/allocated.f90
@@ -15,4 +15,3 @@ subroutine allocated_test(scalar, array)
     ! CHECK: cmpi ne, %[[addrToInt1]], %c0{{.*}}
     print *, allocated(array)
   end subroutine
-  
\ No newline at end of file
diff --git a/flang/test/Lower/array-elemental-calls-2.f90 b/flang/test/Lower/array-elemental-calls-2.f90
index 2674b07dece17..60c9257a19822 100644
--- a/flang/test/Lower/array-elemental-calls-2.f90
+++ b/flang/test/Lower/array-elemental-calls-2.f90
@@ -172,7 +172,7 @@ subroutine check_parentheses_logical()
 subroutine check_parentheses_derived(a)
   type t
     integer :: i
-  end type  
+  end type
   interface
     integer elemental function elem_func_derived(x)
       import :: t
diff --git a/flang/test/Lower/array-elemental-calls.f90 b/flang/test/Lower/array-elemental-calls.f90
index 853807bcb3e6c..93d2979ec9383 100644
--- a/flang/test/Lower/array-elemental-calls.f90
+++ b/flang/test/Lower/array-elemental-calls.f90
@@ -57,7 +57,7 @@ elemental impure integer function impure_func(j)
       integer, intent(in) :: j
     end function
   end interface
-  
+
   i = 42 + pure_func(j)
   i = 42 + impure_func(j)
 end subroutine
diff --git a/flang/test/Lower/array-expression-assumed-size.f90 b/flang/test/Lower/array-expression-assumed-size.f90
index a498148d07fc7..b51dc00c20e28 100644
--- a/flang/test/Lower/array-expression-assumed-size.f90
+++ b/flang/test/Lower/array-expression-assumed-size.f90
@@ -16,8 +16,8 @@ end subroutine assumed_size_forall_test
 
 ! CHECK-LABEL: func @_QPassumed_size_test(
 ! CHECK-SAME:    %[[VAL_0:.*]]: !fir.ref<!fir.array<10x?xi32>>{{.*}}) {
-! CHECK:         %[[VAL_1A:.*]] = fir.convert %c10{{.*}} : (i64) -> index 
-! CHECK:         %[[VAL_1B:.*]] = arith.cmpi sgt, %[[VAL_1A]], %c0{{.*}} : index 
+! CHECK:         %[[VAL_1A:.*]] = fir.convert %c10{{.*}} : (i64) -> index
+! CHECK:         %[[VAL_1B:.*]] = arith.cmpi sgt, %[[VAL_1A]], %c0{{.*}} : index
 ! CHECK:         %[[VAL_1:.*]] = arith.select %[[VAL_1B]], %[[VAL_1A]], %c0{{.*}} : index
 ! CHECK:         %[[VAL_2:.*]] = fir.assumed_size_extent : index
 ! CHECK:         %[[VAL_3:.*]] = arith.constant 1 : index
@@ -79,8 +79,8 @@ end subroutine assumed_size_forall_test
 ! CHECK-LABEL: func @_QPassumed_size_forall_test(
 ! CHECK-SAME:       %[[VAL_0:.*]]: !fir.ref<!fir.array<10x?xi32>>{{.*}}) {
 ! CHECK:         %[[VAL_1:.*]] = fir.alloca i32 {adapt.valuebyref, bindc_name = "i"}
-! CHECK:         %[[VAL_2A:.*]] = fir.convert %c10{{.*}} : (i64) -> index 
-! CHECK:         %[[VAL_2B:.*]] = arith.cmpi sgt, %[[VAL_2A]], %c0{{.*}} : index 
+! CHECK:         %[[VAL_2A:.*]] = fir.convert %c10{{.*}} : (i64) -> index
+! CHECK:         %[[VAL_2B:.*]] = arith.cmpi sgt, %[[VAL_2A]], %c0{{.*}} : index
 ! CHECK:         %[[VAL_2:.*]] = arith.select %[[VAL_2B]], %[[VAL_2A]], %c0{{.*}} : index
 ! CHECK:         %[[VAL_3:.*]] = fir.assumed_size_extent : index
 ! CHECK:         %[[VAL_4:.*]] = arith.constant 2 : i32
diff --git a/flang/test/Lower/array-substring.f90 b/flang/test/Lower/array-substring.f90
index 7544fbb989627..0ede04f0bb2f8 100644
--- a/flang/test/Lower/array-substring.f90
+++ b/flang/test/Lower/array-substring.f90
@@ -46,5 +46,5 @@ function test(C)
   logical :: test(1)
   character*12  C(1)
 
-  test = C(1:1)(1:8) == (/'ABCDabcd'/) 
+  test = C(1:1)(1:8) == (/'ABCDabcd'/)
 end function test
diff --git a/flang/test/Lower/array-wide-char.f90 b/flang/test/Lower/array-wide-char.f90
index 8bad280d0f056..44fcd45519d85 100644
--- a/flang/test/Lower/array-wide-char.f90
+++ b/flang/test/Lower/array-wide-char.f90
@@ -2,7 +2,7 @@
 
 character(LEN=128, KIND=4), PARAMETER :: conarr(3) = &
      [ character(128,4) :: "now is the time", "for all good men to come", &
-     "to the aid of the country" ]       
+     "to the aid of the country" ]
 character(LEN=10, KIND=4) :: arr(3) = &
      [ character(10,4) :: "good buddy", "best buddy", " " ]
 call action_on_char4(conarr)
diff --git a/flang/test/Lower/array.f90 b/flang/test/Lower/array.f90
index 710175739b3a8..cd12d7f851e67 100644
--- a/flang/test/Lower/array.f90
+++ b/flang/test/Lower/array.f90
@@ -93,7 +93,7 @@ subroutine s(i,j,k,ii,jj,kk,a1,a2,a3,a4,a5,a6,a7)
   ! CHECK: fir.coordinate_of %[[a7]], %[[t7]] :
   ! CHECK-LABEL: EndIoStatement
   print *, a7(kk, jj, ii)
-  
+
 end subroutine s
 
 ! CHECK-LABEL: range
diff --git a/flang/test/Lower/forall-pointer-assignment.f90 b/flang/test/Lower/forall-pointer-assignment.f90
index d89fb3ed5cb57..62184a77addf5 100644
--- a/flang/test/Lower/forall-pointer-assignment.f90
+++ b/flang/test/Lower/forall-pointer-assignment.f90
@@ -1,4 +1,4 @@
-! Test lower of FORALL pointer assignment 
+! Test lower of FORALL pointer assignment
 ! RUN: bbc -emit-fir %s -o - | FileCheck %s
 
 
diff --git a/flang/test/Lower/forall/forall-2.f90 b/flang/test/Lower/forall/forall-2.f90
index cdafb4f3d49e7..c6a20f5859497 100644
--- a/flang/test/Lower/forall/forall-2.f90
+++ b/flang/test/Lower/forall/forall-2.f90
@@ -16,7 +16,7 @@ subroutine implied_iters_allocatable(thing, a1)
   end type t
   type(t) :: thing(:)
   integer :: i
-  
+
   forall (i=5:13)
   ! commenting out this test for the moment (hits assert)
   !  thing(i)%arr = a1
@@ -32,7 +32,7 @@ subroutine conflicting_allocatable(thing, lo, hi)
   end type t
   type(t) :: thing(:)
   integer :: i
-  
+
   forall (i = lo:hi)
   ! commenting out this test for the moment (hits assert)
   !  thing(i)%arr = thing(hi-i)%arr
diff --git a/flang/test/Lower/forall/forall-ranked.f90 b/flang/test/Lower/forall/forall-ranked.f90
index 9e56be926e78e..f508c67468212 100644
--- a/flang/test/Lower/forall/forall-ranked.f90
+++ b/flang/test/Lower/forall/forall-ranked.f90
@@ -68,7 +68,7 @@ end function f
      integer :: arr(11)
   end type t
   type(t) :: a(10,10)
-  
+
   forall (i=1:5)
      a(i,:)%arr(i+4) = f(i)
   end forall
diff --git a/flang/test/Lower/forall/forall-where-2.f90 b/flang/test/Lower/forall/forall-where-2.f90
index c075508bef561..85aab87559c3c 100644
--- a/flang/test/Lower/forall/forall-where-2.f90
+++ b/flang/test/Lower/forall/forall-where-2.f90
@@ -6,7 +6,7 @@
 ! Test a FORALL construct with a nested WHERE construct where the mask
 ! contains temporary array expressions.
 
-subroutine test_nested_forall_where_with_temp_in_mask(a,b)  
+subroutine test_nested_forall_where_with_temp_in_mask(a,b)
   interface
     function temp_foo(i, j)
       integer :: i, j
@@ -28,10 +28,10 @@ function temp_foo(i, j)
 
 ! CHECK:  func @_QPtest_nested_forall_where_with_temp_in_mask({{.*}}) {
 ! CHECK:   %[[tempResultBox:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = ".result"}
-           ! Where condition pre-evaluation 
+           ! Where condition pre-evaluation
 ! CHECK:   fir.do_loop {{.*}} {
 ! CHECK:      fir.do_loop {{.*}} {
-                ! Evaluation of mask for iteration (i,j) into ragged array temp 
+                ! Evaluation of mask for iteration (i,j) into ragged array temp
 ! CHECK:        %[[tempResult:.*]] = fir.call @_QPtemp_foo
 ! CHECK:        fir.save_result %[[tempResult]] to %[[tempResultBox]] : !fir.box<!fir.heap<!fir.array<?xf32>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
 ! CHECK:        fir.if {{.*}} {
@@ -52,7 +52,7 @@ function temp_foo(i, j)
 ! CHECK:      fir.do_loop {{.*}} {
                 ! Array assignment at iteration (i, j)
 ! CHECK:        fir.do_loop {{.*}} {
-! CHECK:          fir.if {{.*}} {  
+! CHECK:          fir.if {{.*}} {
 ! CHECK:            arith.divf
 ! CHECK:          } else {
 ! CHECK:          }
@@ -64,7 +64,7 @@ function temp_foo(i, j)
 ! CHECK:      fir.do_loop {{.*}} {
                 ! Array assignment at iteration (i, j)
 ! CHECK:        fir.do_loop {{.*}} {
-! CHECK:          fir.if {{.*}} {  
+! CHECK:          fir.if {{.*}} {
 ! CHECK:          } else {
 ! CHECK:            arith.negf
 ! CHECK:          }
diff --git a/flang/test/Lower/forall/forall-where.f90 b/flang/test/Lower/forall/forall-where.f90
index 54ff2bd4c3f16..3202edbaec808 100644
--- a/flang/test/Lower/forall/forall-where.f90
+++ b/flang/test/Lower/forall/forall-where.f90
@@ -6,7 +6,7 @@
 !    This has both an explicit and implicit iteration space. The WHERE construct
 !    makes the assignments conditional and the where mask evaluation must happen
 !    prior to evaluating the array assignment statement.
-subroutine test_nested_forall_where(a,b)  
+subroutine test_nested_forall_where(a,b)
   type t
      real data(100)
   end type t

From 67d5c14ad66f022d689cbcb0709df690938e5b6d Mon Sep 17 00:00:00 2001
From: Emil Tsalapatis <aimilios.tsalapatis@gmail.com>
Date: Tue, 18 Nov 2025 11:31:45 -0500
Subject: [PATCH 38/52] [llvm][AddressSanitizer] option for applying
 AddressSanitizer to specific address spaces (#167770)

For some backends, e.g., BPF, it is desirable to only sanitize memory
belonging to specific address spaces. More specifically, it is sometimes
desirable to only apply address sanitization for arena memory belonging
to address space 1. However, AddressSanitizer currently does not support
selectively sanitizing address spaces. Add a new option to select which
address spaces to apply AddressSanitizer to.

No functional change for existing targets (namely AMD GPU) that hardcode
which address spaces to sanitize
---
 .../Instrumentation/AddressSanitizer.cpp      | 35 +++++++++++++++----
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 3a14ee5addc2f..c9f249a8733ac 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
@@ -441,6 +442,15 @@ static cl::opt<AsanDtorKind> ClOverrideDestructorKind(
                           "Use global destructors")),
     cl::init(AsanDtorKind::Invalid), cl::Hidden);
 
+static SmallSet<unsigned, 8> SrcAddrSpaces;
+static cl::list<unsigned> ClAddrSpaces(
+    "asan-instrument-address-spaces",
+    cl::desc("Only instrument variables in the specified address spaces."),
+    cl::Hidden, cl::CommaSeparated, cl::ZeroOrMore,
+    cl::callback([](const unsigned &AddrSpace) {
+      SrcAddrSpaces.insert(AddrSpace);
+    }));
+
 // Debug flags.
 
 static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,
@@ -1363,11 +1373,25 @@ static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) {
 static bool isUnsupportedAMDGPUAddrspace(Value *Addr) {
   Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType());
   unsigned int AddrSpace = PtrTy->getPointerAddressSpace();
+  // Globals in address space 1 and 4 are supported for AMDGPU.
   if (AddrSpace == 3 || AddrSpace == 5)
     return true;
   return false;
 }
 
+static bool isSupportedAddrspace(const Triple &TargetTriple, Value *Addr) {
+  Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType());
+  unsigned int AddrSpace = PtrTy->getPointerAddressSpace();
+
+  if (!SrcAddrSpaces.empty())
+    return SrcAddrSpaces.count(AddrSpace);
+
+  if (TargetTriple.isAMDGPU())
+    return !isUnsupportedAMDGPUAddrspace(Addr);
+
+  return AddrSpace == 0;
+}
+
 Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
   // Shadow >> scale
   Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
@@ -1431,10 +1455,9 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
 }
 
 bool AddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
-  // Instrument accesses from different address spaces only for AMDGPU.
-  Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
-  if (PtrTy->getPointerAddressSpace() != 0 &&
-      !(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(Ptr)))
+  // Check whether the target supports sanitizing the address space
+  // of the pointer.
+  if (!isSupportedAddrspace(TargetTriple, Ptr))
     return true;
 
   // Ignore swifterror addresses.
@@ -2097,9 +2120,7 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const {
     return false;
   if (!Ty->isSized()) return false;
   if (!G->hasInitializer()) return false;
-  // Globals in address space 1 and 4 are supported for AMDGPU.
-  if (G->getAddressSpace() &&
-      !(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(G)))
+  if (!isSupportedAddrspace(TargetTriple, G))
     return false;
   if (GlobalWasGeneratedByCompiler(G)) return false; // Our own globals.
   // Two problems with thread-locals:

From 1fb8e3d76e87a6c6f0d8fc7aa4e7ed75e3641fee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matej=20Ko=C5=A1=C3=ADk?= <m4tej.kosik@gmail.com>
Date: Tue, 18 Nov 2025 17:40:31 +0100
Subject: [PATCH 39/52] [lldb] Support integer registers with more than 64
 bits. (#166363)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In this PR we are proposing to change LLDB codebase so that LLDB is able
to print values of integer registers that have more than 64-bits (even
if the number of bits is not equal to 128).

---------

Co-authored-by: Matej Košík <matej.kosik@codasip.com>
Co-authored-by: Jonas Devlieghere <jonas@devlieghere.com>
---
 lldb/include/lldb/Utility/RegisterValue.h    |  11 +-
 lldb/source/Utility/DataExtractor.cpp        |   4 -
 lldb/source/Utility/RegisterValue.cpp        |  48 ++-
 lldb/unittests/Utility/RegisterValueTest.cpp | 298 +++++++++++++++++--
 4 files changed, 308 insertions(+), 53 deletions(-)

diff --git a/lldb/include/lldb/Utility/RegisterValue.h b/lldb/include/lldb/Utility/RegisterValue.h
index 49aaf68be17fc..baf984cbcb052 100644
--- a/lldb/include/lldb/Utility/RegisterValue.h
+++ b/lldb/include/lldb/Utility/RegisterValue.h
@@ -46,7 +46,8 @@ class RegisterValue {
     eTypeUInt16,
     eTypeUInt32,
     eTypeUInt64,
-    eTypeUInt128,
+    eTypeUIntN, /// < This value is used when the (integer) register is larger
+                /// than 64-bits.
     eTypeFloat,
     eTypeDouble,
     eTypeLongDouble,
@@ -69,7 +70,7 @@ class RegisterValue {
     m_scalar = inst;
   }
 
-  explicit RegisterValue(llvm::APInt inst) : m_type(eTypeUInt128) {
+  explicit RegisterValue(llvm::APInt inst) : m_type(eTypeUIntN) {
     m_scalar = llvm::APInt(std::move(inst));
   }
 
@@ -178,7 +179,7 @@ class RegisterValue {
   }
 
   void operator=(llvm::APInt uint) {
-    m_type = eTypeUInt128;
+    m_type = eTypeUIntN;
     m_scalar = llvm::APInt(std::move(uint));
   }
 
@@ -217,8 +218,8 @@ class RegisterValue {
     m_scalar = uint;
   }
 
-  void SetUInt128(llvm::APInt uint) {
-    m_type = eTypeUInt128;
+  void SetUIntN(llvm::APInt uint) {
+    m_type = eTypeUIntN;
     m_scalar = std::move(uint);
   }
 
diff --git a/lldb/source/Utility/DataExtractor.cpp b/lldb/source/Utility/DataExtractor.cpp
index e9be0cba81f0c..a9aea168acf41 100644
--- a/lldb/source/Utility/DataExtractor.cpp
+++ b/lldb/source/Utility/DataExtractor.cpp
@@ -662,10 +662,6 @@ size_t DataExtractor::ExtractBytes(offset_t offset, offset_t length,
   const uint8_t *src = PeekData(offset, length);
   if (src) {
     if (dst_byte_order != GetByteOrder()) {
-      // Validate that only a word- or register-sized dst is byte swapped
-      assert(length == 1 || length == 2 || length == 4 || length == 8 ||
-             length == 10 || length == 16 || length == 32);
-
       for (uint32_t i = 0; i < length; ++i)
         (static_cast<uint8_t *>(dst))[i] = src[length - i - 1];
     } else
diff --git a/lldb/source/Utility/RegisterValue.cpp b/lldb/source/Utility/RegisterValue.cpp
index 8b2af4e3d4f0e..c28c9e2d4d106 100644
--- a/lldb/source/Utility/RegisterValue.cpp
+++ b/lldb/source/Utility/RegisterValue.cpp
@@ -127,7 +127,7 @@ bool RegisterValue::GetScalarValue(Scalar &scalar) const {
   case eTypeUInt16:
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
   case eTypeFloat:
   case eTypeDouble:
   case eTypeLongDouble:
@@ -180,8 +180,6 @@ Status RegisterValue::SetValueFromData(const RegisterInfo &reg_info,
   if (src_len > reg_info.byte_size)
     src_len = reg_info.byte_size;
 
-  type128 int128;
-
   m_type = eTypeInvalid;
   switch (reg_info.encoding) {
   case eEncodingInvalid:
@@ -196,17 +194,15 @@ Status RegisterValue::SetValueFromData(const RegisterInfo &reg_info,
       SetUInt32(src.GetMaxU32(&src_offset, src_len));
     else if (reg_info.byte_size <= 8)
       SetUInt64(src.GetMaxU64(&src_offset, src_len));
-    else if (reg_info.byte_size <= 16) {
-      uint64_t data1 = src.GetU64(&src_offset);
-      uint64_t data2 = src.GetU64(&src_offset);
-      if (src.GetByteOrder() == eByteOrderLittle) {
-        int128.x[0] = data1;
-        int128.x[1] = data2;
-      } else {
-        int128.x[0] = data2;
-        int128.x[1] = data1;
-      }
-      SetUInt128(llvm::APInt(128, int128.x));
+    else {
+      std::vector<uint8_t> native_endian_src(src_len, 0);
+      src.ExtractBytes(src_offset, src_len,
+                       llvm::sys::IsLittleEndianHost ? eByteOrderLittle
+                                                     : eByteOrderBig,
+                       native_endian_src.data());
+      llvm::APInt uint = llvm::APInt::getZero(src_len * 8);
+      llvm::LoadIntFromMemory(uint, native_endian_src.data(), src_len);
+      SetUIntN(uint);
     }
     break;
   case eEncodingIEEE754:
@@ -442,7 +438,7 @@ bool RegisterValue::SignExtend(uint32_t sign_bitpos) {
   case eTypeUInt16:
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
     return m_scalar.SignExtend(sign_bitpos);
   case eTypeFloat:
   case eTypeDouble:
@@ -465,7 +461,7 @@ bool RegisterValue::CopyValue(const RegisterValue &rhs) {
   case eTypeUInt16:
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
   case eTypeFloat:
   case eTypeDouble:
   case eTypeLongDouble:
@@ -581,7 +577,7 @@ llvm::APInt RegisterValue::GetAsUInt128(const llvm::APInt &fail_value,
   case eTypeUInt16:
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
   case eTypeFloat:
   case eTypeDouble:
   case eTypeLongDouble:
@@ -616,7 +612,7 @@ float RegisterValue::GetAsFloat(float fail_value, bool *success_ptr) const {
     break;
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
   case eTypeFloat:
   case eTypeDouble:
   case eTypeLongDouble:
@@ -636,7 +632,7 @@ double RegisterValue::GetAsDouble(double fail_value, bool *success_ptr) const {
 
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
   case eTypeFloat:
   case eTypeDouble:
   case eTypeLongDouble:
@@ -657,7 +653,7 @@ long double RegisterValue::GetAsLongDouble(long double fail_value,
 
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
   case eTypeFloat:
   case eTypeDouble:
   case eTypeLongDouble:
@@ -676,7 +672,7 @@ const void *RegisterValue::GetBytes() const {
   case eTypeUInt16:
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
   case eTypeFloat:
   case eTypeDouble:
   case eTypeLongDouble:
@@ -698,7 +694,7 @@ uint32_t RegisterValue::GetByteSize() const {
     return 2;
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
   case eTypeFloat:
   case eTypeDouble:
   case eTypeLongDouble:
@@ -721,7 +717,7 @@ bool RegisterValue::SetUInt(uint64_t uint, uint32_t byte_size) {
   } else if (byte_size <= 8) {
     SetUInt64(uint);
   } else if (byte_size <= 16) {
-    SetUInt128(llvm::APInt(128, uint));
+    SetUIntN(llvm::APInt(128, uint));
   } else
     return false;
   return true;
@@ -749,7 +745,7 @@ bool RegisterValue::operator==(const RegisterValue &rhs) const {
     case eTypeUInt16:
     case eTypeUInt32:
     case eTypeUInt64:
-    case eTypeUInt128:
+    case eTypeUIntN:
     case eTypeFloat:
     case eTypeDouble:
     case eTypeLongDouble:
@@ -774,7 +770,7 @@ bool RegisterValue::ClearBit(uint32_t bit) {
   case eTypeUInt16:
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
     if (bit < (GetByteSize() * 8)) {
       return m_scalar.ClearBit(bit);
     }
@@ -814,7 +810,7 @@ bool RegisterValue::SetBit(uint32_t bit) {
   case eTypeUInt16:
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
     if (bit < (GetByteSize() * 8)) {
       return m_scalar.SetBit(bit);
     }
diff --git a/lldb/unittests/Utility/RegisterValueTest.cpp b/lldb/unittests/Utility/RegisterValueTest.cpp
index 6239dbe21634a..7b27e841cbec5 100644
--- a/lldb/unittests/Utility/RegisterValueTest.cpp
+++ b/lldb/unittests/Utility/RegisterValueTest.cpp
@@ -57,13 +57,12 @@ TEST(RegisterValueTest, GetScalarValue) {
                    APInt(128, 0x7766554433221100)));
 }
 
-static const Scalar etalon128(APInt(128, 0xffeeddccbbaa9988ull) << 64 |
-                              APInt(128, 0x7766554433221100ull));
-
-void TestSetValueFromData128(void *src, const lldb::ByteOrder endianness) {
-  RegisterInfo ri{"uint128_register",
+void TestSetValueFromData(const Scalar &etalon, void *src, size_t src_byte_size,
+                          const lldb::ByteOrder endianness,
+                          const RegisterValue::Type register_value_type) {
+  RegisterInfo ri{"test",
                   nullptr,
-                  16,
+                  static_cast<uint32_t>(src_byte_size),
                   0,
                   lldb::Encoding::eEncodingUint,
                   lldb::Format::eFormatDefault,
@@ -71,26 +70,289 @@ void TestSetValueFromData128(void *src, const lldb::ByteOrder endianness) {
                   nullptr,
                   nullptr,
                   nullptr};
-  DataExtractor src_extractor(src, 16, endianness, 8);
+  DataExtractor src_extractor(src, src_byte_size, endianness, 8);
   RegisterValue rv;
   EXPECT_TRUE(rv.SetValueFromData(ri, src_extractor, 0, false).Success());
   Scalar s;
   EXPECT_TRUE(rv.GetScalarValue(s));
-  EXPECT_EQ(s, etalon128);
+  EXPECT_EQ(rv.GetType(), register_value_type);
+  EXPECT_EQ(s, etalon);
+}
+
+static const Scalar etalon7(APInt(32, 0x0000007F));
+
+TEST(RegisterValueTest, SetValueFromData_7_le) {
+  uint8_t src[] = {0x7F};
+  TestSetValueFromData(etalon7, src, 1, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt8);
+}
+
+TEST(RegisterValueTest, SetValueFromData_7_be) {
+  uint8_t src[] = {0x7F};
+  TestSetValueFromData(etalon7, src, 1, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt8);
+}
+
+static const Scalar etalon8(APInt(32, 0x000000FE));
+
+TEST(RegisterValueTest, SetValueFromData_8_le) {
+  uint8_t src[] = {0xFE};
+  TestSetValueFromData(etalon8, src, 1, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt8);
+}
+
+TEST(RegisterValueTest, SetValueFromData_8_be) {
+  uint8_t src[] = {0xFE};
+  TestSetValueFromData(etalon8, src, 1, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt8);
+}
+
+static const Scalar etalon9(APInt(32, 0x000001FE));
+
+TEST(RegisterValueTest, SetValueFromData_9_le) {
+  uint8_t src[] = {0xFE, 0x01};
+  TestSetValueFromData(etalon9, src, 2, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt16);
+}
+
+TEST(RegisterValueTest, SetValueFromData_9_be) {
+  uint8_t src[] = {0x01, 0xFE};
+  TestSetValueFromData(etalon9, src, 2, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt16);
+}
+
+static const Scalar etalon15(APInt(32, 0x00007FED));
+
+TEST(RegisterValueTest, SetValueFromData_15_le) {
+  uint8_t src[] = {0xED, 0x7F};
+  TestSetValueFromData(etalon15, src, 2, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt16);
+}
+
+TEST(RegisterValueTest, SetValueFromData_15_be) {
+  uint8_t src[] = {0x7F, 0xED};
+  TestSetValueFromData(etalon15, src, 2, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt16);
+}
+
+static const Scalar etalon16(APInt(32, 0x0000FEDC));
+
+TEST(RegisterValueTest, SetValueFromData_16_le) {
+  uint8_t src[] = {0xDC, 0xFE};
+  TestSetValueFromData(etalon16, src, 2, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt16);
+}
+
+TEST(RegisterValueTest, SetValueFromData_16_be) {
+  uint8_t src[] = {0xFE, 0xDC};
+  TestSetValueFromData(etalon16, src, 2, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt16);
+}
+
+static const Scalar etalon17(APInt(32, 0x0001FEDC));
+
+TEST(RegisterValueTest, SetValueFromData_17_le) {
+  uint8_t src[] = {0xDC, 0xFE, 0x01};
+  TestSetValueFromData(etalon17, src, 3, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt32);
+}
+
+TEST(RegisterValueTest, SetValueFromData_17_be) {
+  uint8_t src[] = {0x01, 0xFE, 0xDC};
+  TestSetValueFromData(etalon17, src, 3, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt32);
+}
+
+static const Scalar etalon24(APInt(32, 0x00FEDCBA));
+
+TEST(RegisterValueTest, SetValueFromData_24_le) {
+  uint8_t src[] = {0xBA, 0xDC, 0xFE};
+  TestSetValueFromData(etalon24, src, 3, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt32);
+}
+
+TEST(RegisterValueTest, SetValueFromData_24_be) {
+  uint8_t src[] = {0xFE, 0xDC, 0xBA};
+  TestSetValueFromData(etalon24, src, 3, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt32);
+}
+
+static const Scalar etalon31(APInt(32, 0x7EDCBA98));
+
+TEST(RegisterValueTest, SetValueFromData_31_le) {
+  uint8_t src[] = {0x98, 0xBA, 0xDC, 0x7E};
+  TestSetValueFromData(etalon31, src, 4, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt32);
+}
+
+TEST(RegisterValueTest, SetValueFromData_31_be) {
+  uint8_t src[] = {0x7E, 0xDC, 0xBA, 0x98};
+  TestSetValueFromData(etalon31, src, 4, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt32);
+}
+
+static const Scalar etalon32(APInt(32, 0xFEDCBA98));
+
+TEST(RegisterValueTest, SetValueFromData_32_le) {
+  uint8_t src[] = {0x98, 0xBA, 0xDC, 0xFE};
+  TestSetValueFromData(etalon32, src, 4, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt32);
 }
 
-// Test that the "RegisterValue::SetValueFromData" method works correctly
-// with 128-bit little-endian data that represents an integer.
+TEST(RegisterValueTest, SetValueFromData_32_be) {
+  uint8_t src[] = {0xFE, 0xDC, 0xBA, 0x98};
+  TestSetValueFromData(etalon32, src, 4, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt32);
+}
+
+static const Scalar etalon33(APInt(64, 0x00000001FEDCBA98));
+
+TEST(RegisterValueTest, SetValueFromData_33_le) {
+  uint8_t src[] = {0x98, 0xBA, 0xDC, 0xFE, 0x01};
+  TestSetValueFromData(etalon33, src, 5, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt64);
+}
+
+TEST(RegisterValueTest, SetValueFromData_33_be) {
+  uint8_t src[] = {0x01, 0xFE, 0xDC, 0xBA, 0x98};
+  TestSetValueFromData(etalon33, src, 5, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt64);
+}
+
+static const Scalar etalon40(APInt(64, 0x000000FEDCBA9876));
+
+TEST(RegisterValueTest, SetValueFromData_40_le) {
+  uint8_t src[] = {0x76, 0x98, 0xBA, 0xDC, 0xFE};
+  TestSetValueFromData(etalon40, src, 5, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt64);
+}
+
+TEST(RegisterValueTest, SetValueFromData_40_be) {
+  uint8_t src[] = {0xFE, 0xDC, 0xBA, 0x98, 0x76};
+  TestSetValueFromData(etalon40, src, 5, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt64);
+}
+
+static const Scalar etalon63(APInt(64, 0x7EDCBA9876543210));
+
+TEST(RegisterValueTest, SetValueFromData_63_le) {
+  uint8_t src[] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0x7E};
+  TestSetValueFromData(etalon63, src, 8, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt64);
+}
+
+TEST(RegisterValueTest, SetValueFromData_63_be) {
+  uint8_t src[] = {0x7E, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10};
+  TestSetValueFromData(etalon63, src, 8, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt64);
+}
+
+static const Scalar etalon64(APInt(64, 0xFEDCBA9876543210));
+
+TEST(RegisterValueTest, SetValueFromData_64_le) {
+  uint8_t src[] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE};
+  TestSetValueFromData(etalon64, src, 8, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt64);
+}
+
+TEST(RegisterValueTest, SetValueFromData_64_be) {
+  uint8_t src[] = {0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10};
+  TestSetValueFromData(etalon64, src, 8, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt64);
+}
+
+static const Scalar etalon65(APInt(72, 0x0000000000000001ull) << 1 * 64 |
+                             APInt(72, 0x0706050403020100ull) << 0 * 64);
+
+TEST(RegisterValueTest, SetValueFromData_65_le) {
+  uint8_t src[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x01};
+  TestSetValueFromData(etalon65, src, 9, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUIntN);
+}
+
+TEST(RegisterValueTest, SetValueFromData_65_be) {
+  uint8_t src[] = {0x01, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00};
+  TestSetValueFromData(etalon65, src, 9, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUIntN);
+}
+
+static const Scalar etalon127(APInt(128, 0x7f0e0d0c0b0a0908ull) << 1 * 64 |
+                              APInt(128, 0x0706050403020100ull) << 0 * 64);
+
+TEST(RegisterValueTest, SetValueFromData_127_le) {
+  uint8_t src[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+                   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x7f};
+  TestSetValueFromData(etalon127, src, 16, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUIntN);
+}
+
+TEST(RegisterValueTest, SetValueFromData_127_be) {
+  uint8_t src[] = {0x7f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,
+                   0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00};
+  TestSetValueFromData(etalon127, src, 16, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUIntN);
+}
+
+static const Scalar etalon128(APInt(128, 0x0f0e0d0c0b0a0908ull) << 1 * 64 |
+                              APInt(128, 0x0706050403020100ull) << 0 * 64);
+
 TEST(RegisterValueTest, SetValueFromData_128_le) {
-  uint8_t src[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
-                   0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff};
-  TestSetValueFromData128(src, lldb::ByteOrder::eByteOrderLittle);
+  uint8_t src[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+                   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f};
+  TestSetValueFromData(etalon128, src, 16, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUIntN);
 }
 
-// Test that the "RegisterValue::SetValueFromData" method works correctly
-// with 128-bit big-endian data that represents an integer.
 TEST(RegisterValueTest, SetValueFromData_128_be) {
-  uint8_t src[] = {0xff, 0xee, 0xdd, 0xcc, 0xbb, 0xaa, 0x99, 0x88,
-                   0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11, 0x00};
-  TestSetValueFromData128(src, lldb::ByteOrder::eByteOrderBig);
+  uint8_t src[] = {0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,
+                   0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00};
+  TestSetValueFromData(etalon128, src, 16, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUIntN);
+}
+
+static const Scalar etalon256(APInt(256, 0x1f1e1d1c1b1a1918ull) << 3 * 64 |
+                              APInt(256, 0x1716151413121110ull) << 2 * 64 |
+                              APInt(256, 0x0f0e0d0c0b0a0908ull) << 1 * 64 |
+                              APInt(256, 0x0706050403020100ull) << 0 * 64);
+
+TEST(RegisterValueTest, SetValueFromData_256_le) {
+  uint8_t src[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+                   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+                   0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+                   0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
+  TestSetValueFromData(etalon256, src, 32, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUIntN);
+}
+
+TEST(RegisterValueTest, SetValueFromData_256_be) {
+  uint8_t src[] = {0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18,
+                   0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
+                   0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,
+                   0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00};
+  TestSetValueFromData(etalon256, src, 32, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUIntN);
+}
+
+static const Scalar etalon257(APInt(512, 0x0000000000000001ull) << 4 * 64 |
+                              APInt(512, 0x1f1e1d1c1b1a1918ull) << 3 * 64 |
+                              APInt(512, 0x1716151413121110ull) << 2 * 64 |
+                              APInt(512, 0x0f0e0d0c0b0a0908ull) << 1 * 64 |
+                              APInt(512, 0x0706050403020100ull) << 0 * 64);
+
+TEST(RegisterValueTest, SetValueFromData_257_le) {
+  uint8_t src[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+                   0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
+                   0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a,
+                   0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x01};
+  TestSetValueFromData(etalon257, src, 33, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUIntN);
+}
+
+TEST(RegisterValueTest, SetValueFromData_257_be) {
+  uint8_t src[] = {0x01, 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18,
+                   0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f,
+                   0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06,
+                   0x05, 0x04, 0x03, 0x02, 0x01, 0x00};
+  TestSetValueFromData(etalon257, src, 33, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUIntN);
 }

From 93a8ca8fc738379333242ee51d9e080fbb283c6e Mon Sep 17 00:00:00 2001
From: Nathan Corbyn <n_corbyn@apple.com>
Date: Tue, 18 Nov 2025 16:42:58 +0000
Subject: [PATCH 40/52] [AArch64][GISel] Don't crash in known-bits when copying
 from vectors to non-vectors (#168081)

Updates the demanded elements before recursing through copies in case
the type of the source register changes from a non-vector register to a
vector register.

Fixes #167842.
---
 .../CodeGen/GlobalISel/GISelValueTracking.cpp | 11 +++-
 .../GlobalISel/knownbits-copy-vector-crash.ll | 56 +++++++++++++++++++
 2 files changed, 65 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll

diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index c1fb8b6d78ff8..ecba323f8d6bf 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -247,6 +247,7 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
     for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
       const MachineOperand &Src = MI.getOperand(Idx);
       Register SrcReg = Src.getReg();
+      LLT SrcTy = MRI.getType(SrcReg);
       // Look through trivial copies and phis but don't look through trivial
       // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
       // analysis is currently unable to determine the bit width of a
@@ -255,9 +256,15 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
       // We can't use NoSubRegister by name as it's defined by each target but
       // it's always defined to be 0 by tablegen.
       if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
-          MRI.getType(SrcReg).isValid()) {
+          SrcTy.isValid()) {
+        // In case we're forwarding from a vector register to a non-vector
+        // register we need to update the demanded elements to reflect this
+        // before recursing.
+        APInt NowDemandedElts = SrcTy.isFixedVector() && !DstTy.isFixedVector()
+                                    ? APInt::getAllOnes(SrcTy.getNumElements())
+                                    : DemandedElts; // Known to be APInt(1, 1)
         // For COPYs we don't do anything, don't increase the depth.
-        computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
+        computeKnownBitsImpl(SrcReg, Known2, NowDemandedElts,
                              Depth + (Opcode != TargetOpcode::COPY));
         Known2 = Known2.anyextOrTrunc(BitWidth);
         Known = Known.intersectWith(Known2);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll
new file mode 100644
index 0000000000000..f15253682c336
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -global-isel -o - %s | FileCheck %s
+
+target triple = "aarch64-unknown-unknown"
+
+; Check we don't crash here when computing known bits.
+
+define <4 x i32> @test(<8 x i16> %in, i1 %continue) {
+; CHECK-LABEL: test:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    mov w12, wzr
+; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    mov w9, #2 // =0x2
+; CHECK-NEXT:    mov w10, #0 // =0x0
+; CHECK-NEXT:  .LBB0_1: // %loop
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    mov w11, w12
+; CHECK-NEXT:    mov w12, w12
+; CHECK-NEXT:    str q0, [sp]
+; CHECK-NEXT:    and x12, x12, #0x7
+; CHECK-NEXT:    umull x12, w12, w9
+; CHECK-NEXT:    ldrb w12, [x8, x12]
+; CHECK-NEXT:    cmp w12, #0
+; CHECK-NEXT:    cset w12, eq
+; CHECK-NEXT:    fmov s1, w12
+; CHECK-NEXT:    mov v1.b[1], w10
+; CHECK-NEXT:    mov v1.b[2], w10
+; CHECK-NEXT:    mov v1.b[3], w10
+; CHECK-NEXT:    fmov w12, s1
+; CHECK-NEXT:    tbz w0, #0, .LBB0_1
+; CHECK-NEXT:  // %bb.2: // %exit
+; CHECK-NEXT:    fmov s0, w11
+; CHECK-NEXT:    mov v0.s[1], wzr
+; CHECK-NEXT:    mov v0.s[2], wzr
+; CHECK-NEXT:    mov v0.s[3], wzr
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ret
+entry:
+  br label %loop
+
+exit:
+  %result = insertelement <4 x i32> zeroinitializer, i32 %index, i64 0
+  ret <4 x i32> %result
+
+loop:
+  %index = phi i32 [ 0, %entry ], [ %insert.bitcast, %loop ]
+  %extracted = extractelement <8 x i16> %in, i32 %index
+  %masked = and i16 %extracted, 255
+  %maskedIsZero = icmp eq i16 %masked, 0
+  %maskedIsZero.zext = zext i1 %maskedIsZero to i8
+  %insert = insertelement <4 x i8> zeroinitializer, i8 %maskedIsZero.zext, i64 0
+  %insert.bitcast = bitcast <4 x i8> %insert to i32
+  br i1 %continue, label %exit, label %loop
+}

From 2675dcd72d02ee1ac2472b7d2914bfe601ff33d4 Mon Sep 17 00:00:00 2001
From: Chad Smith <cssmith@meta.com>
Date: Tue, 18 Nov 2025 08:46:40 -0800
Subject: [PATCH 41/52] [lldb] update lldb-server platform help parsing
 (attempt 3) (#164904)

* original change #162730
* with windows fix #164843
* remove timeout that was pointed out in the comment above
* Remove test that starts and listens on a socket to avoid timeout
issues
---
 ...s.test => TestGdbserverErrorMessages.test} |   0
 .../TestPlatformErrorMessages.test            |  25 ++
 .../Shell/lldb-server/TestPlatformHelp.test   |  40 +++
 lldb/tools/lldb-server/CMakeLists.txt         |   5 +
 lldb/tools/lldb-server/PlatformOptions.td     |  75 +++++
 lldb/tools/lldb-server/lldb-platform.cpp      | 265 +++++++++++-------
 6 files changed, 301 insertions(+), 109 deletions(-)
 rename lldb/test/Shell/lldb-server/{TestErrorMessages.test => TestGdbserverErrorMessages.test} (100%)
 create mode 100644 lldb/test/Shell/lldb-server/TestPlatformErrorMessages.test
 create mode 100644 lldb/test/Shell/lldb-server/TestPlatformHelp.test
 create mode 100644 lldb/tools/lldb-server/PlatformOptions.td

diff --git a/lldb/test/Shell/lldb-server/TestErrorMessages.test b/lldb/test/Shell/lldb-server/TestGdbserverErrorMessages.test
similarity index 100%
rename from lldb/test/Shell/lldb-server/TestErrorMessages.test
rename to lldb/test/Shell/lldb-server/TestGdbserverErrorMessages.test
diff --git a/lldb/test/Shell/lldb-server/TestPlatformErrorMessages.test b/lldb/test/Shell/lldb-server/TestPlatformErrorMessages.test
new file mode 100644
index 0000000000000..7d3b37aa5fc39
--- /dev/null
+++ b/lldb/test/Shell/lldb-server/TestPlatformErrorMessages.test
@@ -0,0 +1,25 @@
+RUN: %platformserver 2>&1 | FileCheck --check-prefixes=NO_LISTEN,ALL %s
+NO_LISTEN: error: either --listen or --child-platform-fd is required
+
+RUN: %lldb-server platform --listen 2>&1 | FileCheck --check-prefixes=LISTEN_MISSING,ALL %s
+LISTEN_MISSING: error: --listen: missing argument
+
+RUN: %lldb-server p --bogus 2>&1 | FileCheck --check-prefixes=BOGUS,ALL %s
+BOGUS: error: unknown argument '--bogus'
+
+RUN: %platformserver --gdbserver-port 2>&1 | FileCheck --check-prefixes=GDBPORT_MISSING,ALL %s
+GDBPORT_MISSING: error: --gdbserver-port: missing argument
+
+RUN: %platformserver --gdbserver-port notanumber --listen :1234 2>&1 | FileCheck --check-prefixes=GDBPORT_INVALID %s
+GDBPORT_INVALID: error: invalid --gdbserver-port value
+
+RUN: %platformserver --socket-file 2>&1 | FileCheck --check-prefixes=SOCKETFILE_MISSING,ALL %s
+SOCKETFILE_MISSING: error: --socket-file: missing argument
+
+RUN: %platformserver --log-file 2>&1 | FileCheck --check-prefixes=LOGFILE_MISSING,ALL %s
+LOGFILE_MISSING: error: --log-file: missing argument
+
+RUN: %platformserver --log-channels 2>&1 | FileCheck --check-prefixes=LOGCHANNELS_MISSING,ALL %s
+LOGCHANNELS_MISSING: error: --log-channels: missing argument
+
+ALL: Use 'lldb-server{{(\.exe)?}} {{p|platform}} --help' for a complete list of options.
diff --git a/lldb/test/Shell/lldb-server/TestPlatformHelp.test b/lldb/test/Shell/lldb-server/TestPlatformHelp.test
new file mode 100644
index 0000000000000..c5ced8a318100
--- /dev/null
+++ b/lldb/test/Shell/lldb-server/TestPlatformHelp.test
@@ -0,0 +1,40 @@
+RUN: %platformserver --help 2>&1 | FileCheck %s
+RUN: %platformserver -h 2>&1 | FileCheck %s
+RUN: %lldb-server p --help 2>&1 | FileCheck %s
+RUN: %lldb-server p -h 2>&1 | FileCheck %s
+RUN: %lldb-server platform --help 2>&1 | FileCheck %s
+RUN: %lldb-server platform -h 2>&1 | FileCheck %s
+
+CHECK: OVERVIEW: lldb-server{{(\.exe)?}} platform
+
+CHECK: USAGE: lldb-server{{(\.exe)?}} {{p|platform}} [options] --listen <[host]:port> {{\[}}[--] program args...]
+
+CHECK: CONNECTION OPTIONS:
+CHECK: --gdbserver-port <port>
+CHECK-SAME: Short form: -P
+CHECK: --listen <[host]:port>
+CHECK-SAME: Short form: -L
+CHECK: --socket-file <path>
+CHECK-SAME: Short form: -f
+
+CHECK: GENERAL OPTIONS:
+CHECK: --help
+CHECK: --log-channels <channel1 categories...:channel2 categories...>
+CHECK: Short form: -c
+CHECK: --log-file <file>
+CHECK-SAME: Short form: -l
+CHECK: --server
+
+CHECK: OPTIONS:
+CHECK: -- program args
+
+CHECK: DESCRIPTION
+CHECK: Acts as a platform server for remote debugging
+
+CHECK: EXAMPLES
+CHECK: # Listen on port 1234, exit after first connection
+CHECK: lldb-server{{(\.exe)?}} platform --listen tcp://0.0.0.0:1234
+CHECK: # Listen on port 5555, accept multiple connections
+CHECK: lldb-server{{(\.exe)?}} platform --server --listen tcp://localhost:5555
+CHECK: # Listen on Unix domain socket
+CHECK: lldb-server{{(\.exe)?}} platform --listen unix:///tmp/lldb-server.sock
diff --git a/lldb/tools/lldb-server/CMakeLists.txt b/lldb/tools/lldb-server/CMakeLists.txt
index 1d8dc72a3f872..fb55c64936121 100644
--- a/lldb/tools/lldb-server/CMakeLists.txt
+++ b/lldb/tools/lldb-server/CMakeLists.txt
@@ -2,6 +2,10 @@ set(LLVM_TARGET_DEFINITIONS LLGSOptions.td)
 tablegen(LLVM LLGSOptions.inc -gen-opt-parser-defs)
 add_public_tablegen_target(LLGSOptionsTableGen)
 
+set(LLVM_TARGET_DEFINITIONS PlatformOptions.td)
+tablegen(LLVM PlatformOptions.inc -gen-opt-parser-defs)
+add_public_tablegen_target(PlatformOptionsTableGen)
+
 set(LLDB_PLUGINS)
 
 if(CMAKE_SYSTEM_NAME MATCHES "Linux|Android")
@@ -67,6 +71,7 @@ add_lldb_tool(lldb-server
 
 add_dependencies(lldb-server
   LLGSOptionsTableGen
+  PlatformOptionsTableGen
   ${tablegen_deps}
 )
 target_include_directories(lldb-server PRIVATE "${LLDB_SOURCE_DIR}/source")
diff --git a/lldb/tools/lldb-server/PlatformOptions.td b/lldb/tools/lldb-server/PlatformOptions.td
new file mode 100644
index 0000000000000..eedd1d8c35343
--- /dev/null
+++ b/lldb/tools/lldb-server/PlatformOptions.td
@@ -0,0 +1,75 @@
+include "llvm/Option/OptParser.td"
+
+class F<string name>: Flag<["--", "-"], name>;
+class R<list<string> prefixes, string name>
+  : Option<prefixes, name, KIND_REMAINING_ARGS>;
+
+multiclass SJ<string name, string help> {
+  def NAME: Separate<["--", "-"], name>,
+    HelpText<help>;
+  def NAME # _eq: Joined<["--", "-"], name # "=">,
+    Alias<!cast<Separate>(NAME)>;
+}
+
+def grp_connect : OptionGroup<"connection">, HelpText<"CONNECTION OPTIONS">;
+
+defm listen: SJ<"listen", "Host and port to listen on. Format: [host]:port or protocol://[host]:port (e.g., tcp://localhost:1234, unix:///path/to/socket). Short form: -L">,
+  MetaVarName<"<[host]:port>">,
+  Group<grp_connect>;
+def: Separate<["-"], "L">, Alias<listen>,
+  Group<grp_connect>;
+
+defm socket_file: SJ<"socket-file", "Write listening socket information (port number for TCP or path for Unix domain sockets) to the specified file. Short form: -f">,
+  MetaVarName<"<path>">,
+  Group<grp_connect>;
+def: Separate<["-"], "f">, Alias<socket_file>,
+  Group<grp_connect>;
+
+defm gdbserver_port: SJ<"gdbserver-port", "Port to use for spawned gdbserver instances. If 0 or unspecified, a port will be chosen automatically. Short form: -P">,
+  MetaVarName<"<port>">,
+  Group<grp_connect>;
+def: Separate<["-"], "P">, Alias<gdbserver_port>,
+  Group<grp_connect>;
+
+defm child_platform_fd: SJ<"child-platform-fd", "File descriptor for communication with parent platform process (internal use only).">,
+  MetaVarName<"<fd>">,
+  Group<grp_connect>,
+  Flags<[HelpHidden]>;
+
+def grp_general : OptionGroup<"general options">, HelpText<"GENERAL OPTIONS">;
+
+def server: F<"server">,
+  HelpText<"Run in server mode, accepting multiple client connections sequentially. Without this flag, the server exits after handling the first connection.">,
+  Group<grp_general>;
+
+defm log_channels: SJ<"log-channels", "Channels to log. A colon-separated list of entries. Each entry starts with a channel followed by a space-separated list of categories. Common channels: lldb, gdb-remote, platform, process. Short form: -c">,
+  MetaVarName<"<channel1 categories...:channel2 categories...>">,
+  Group<grp_general>;
+def: Separate<["-"], "c">, Alias<log_channels>,
+  Group<grp_general>;
+
+defm log_file: SJ<"log-file", "Destination file to log to. If empty, log to stderr. Short form: -l">,
+  MetaVarName<"<file>">,
+  Group<grp_general>;
+def: Separate<["-"], "l">, Alias<log_file>,
+  Group<grp_general>;
+
+def debug: F<"debug">,
+  HelpText<"(Unused, kept for backward compatibility)">,
+  Group<grp_general>,
+  Flags<[HelpHidden]>;
+
+def verbose: F<"verbose">,
+  HelpText<"(Unused, kept for backward compatibility)">,
+  Group<grp_general>,
+  Flags<[HelpHidden]>;
+
+def help: F<"help">, 
+  HelpText<"Display this help message and exit.">,
+  Group<grp_general>;
+def: Flag<["-"], "h">, Alias<help>,
+  Group<grp_general>;
+
+def REM : R<["--"], "">, 
+  HelpText<"Arguments to pass to launched gdbserver instances.">,
+  MetaVarName<"program args">;
diff --git a/lldb/tools/lldb-server/lldb-platform.cpp b/lldb/tools/lldb-server/lldb-platform.cpp
index 0bd928507ba89..59b1eb419bc2b 100644
--- a/lldb/tools/lldb-server/lldb-platform.cpp
+++ b/lldb/tools/lldb-server/lldb-platform.cpp
@@ -21,6 +21,9 @@
 #include <fstream>
 #include <optional>
 
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/OptTable.h"
+#include "llvm/Option/Option.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/WithColor.h"
@@ -56,22 +59,69 @@ using namespace llvm;
 // of target CPUs. For now, let's just use 100.
 static const int backlog = 100;
 static const int socket_error = -1;
-static int g_debug = 0;
-static int g_verbose = 0;
-static int g_server = 0;
-
-// option descriptors for getopt_long_only()
-static struct option g_long_options[] = {
-    {"debug", no_argument, &g_debug, 1},
-    {"verbose", no_argument, &g_verbose, 1},
-    {"log-file", required_argument, nullptr, 'l'},
-    {"log-channels", required_argument, nullptr, 'c'},
-    {"listen", required_argument, nullptr, 'L'},
-    {"gdbserver-port", required_argument, nullptr, 'P'},
-    {"socket-file", required_argument, nullptr, 'f'},
-    {"server", no_argument, &g_server, 1},
-    {"child-platform-fd", required_argument, nullptr, 2},
-    {nullptr, 0, nullptr, 0}};
+
+namespace {
+using namespace llvm::opt;
+
+enum ID {
+  OPT_INVALID = 0, // This is not an option ID.
+#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
+#include "PlatformOptions.inc"
+#undef OPTION
+};
+
+#define OPTTABLE_STR_TABLE_CODE
+#include "PlatformOptions.inc"
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "PlatformOptions.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
+
+static constexpr opt::OptTable::Info InfoTable[] = {
+#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
+#include "PlatformOptions.inc"
+#undef OPTION
+};
+
+class PlatformOptTable : public opt::GenericOptTable {
+public:
+  PlatformOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
+
+  void PrintHelp(llvm::StringRef Name) {
+    std::string Usage =
+        (Name + " [options] --listen <[host]:port> [[--] program args...]")
+            .str();
+
+    std::string Title = "lldb-server platform";
+
+    OptTable::printHelp(llvm::outs(), Usage.c_str(), Title.c_str());
+
+    llvm::outs() << R"(
+DESCRIPTION
+  Acts as a platform server for remote debugging. When LLDB clients connect,
+  the platform server handles platform operations (file transfers, process
+  launching) and spawns debug server instances (lldb-server gdbserver) to
+  handle actual debugging sessions.
+
+  By default, the server exits after handling one connection. Use --server
+  to keep running and accept multiple connections sequentially.
+
+EXAMPLES
+  # Listen on port 1234, exit after first connection
+  lldb-server platform --listen tcp://0.0.0.0:1234
+
+  # Listen on port 5555, accept multiple connections
+  lldb-server platform --server --listen tcp://localhost:5555
+
+  # Listen on Unix domain socket
+  lldb-server platform --listen unix:///tmp/lldb-server.sock
+
+)";
+  }
+};
+} // namespace
 
 #if defined(__APPLE__)
 #define LOW_PORT (IPPORT_RESERVED)
@@ -97,12 +147,11 @@ static void signal_handler(int signo) {
 }
 #endif
 
-static void display_usage(const char *progname, const char *subcommand) {
-  fprintf(stderr, "Usage:\n  %s %s [--log-file log-file-name] [--log-channels "
-                  "log-channel-list] [--port-file port-file-path] --server "
-                  "--listen port\n",
-          progname, subcommand);
-  exit(0);
+static void display_usage(PlatformOptTable &Opts, const char *progname,
+                          const char *subcommand) {
+  std::string Name =
+      (llvm::sys::path::filename(progname) + " " + subcommand).str();
+  Opts.PrintHelp(Name);
 }
 
 static Status parse_listen_host_port(Socket::SocketProtocol &protocol,
@@ -261,7 +310,8 @@ static Status spawn_process(const char *progname, const FileSpec &prog,
                             const Socket *conn_socket, uint16_t gdb_port,
                             const lldb_private::Args &args,
                             const std::string &log_file,
-                            const StringRef log_channels, MainLoop &main_loop) {
+                            const StringRef log_channels, MainLoop &main_loop,
+                            bool multi_client) {
   Status error;
   SharedSocket shared_socket(conn_socket, error);
   if (error.Fail())
@@ -297,9 +347,12 @@ static Status spawn_process(const char *progname, const FileSpec &prog,
 
   launch_info.SetLaunchInSeparateProcessGroup(false);
 
-  if (g_server)
+  // Set up process monitor callback based on whether we're in server mode.
+  if (multi_client)
+    // In server mode: empty callback (don't terminate when child exits).
     launch_info.SetMonitorProcessCallback([](lldb::pid_t, int, int) {});
   else
+    // In single-client mode: terminate main loop when child exits.
     launch_info.SetMonitorProcessCallback([&main_loop](lldb::pid_t, int, int) {
       main_loop.AddPendingCallback(
           [](MainLoopBase &loop) { loop.RequestTermination(); });
@@ -371,107 +424,101 @@ int main_platform(int argc, char *argv[]) {
   signal(SIGPIPE, SIG_IGN);
   signal(SIGHUP, signal_handler);
 #endif
-  int long_option_index = 0;
-  Status error;
-  std::string listen_host_port;
-  int ch;
 
-  std::string log_file;
-  StringRef
-      log_channels; // e.g. "lldb process threads:gdb-remote default:linux all"
+  // Special handling for 'help' as first argument.
+  if (argc > 0 && strcmp(argv[0], "help") == 0) {
+    PlatformOptTable Opts;
+    display_usage(Opts, progname, subcommand);
+    return EXIT_SUCCESS;
+  }
 
+  Status error;
   shared_fd_t fd = SharedSocket::kInvalidFD;
-
   uint16_t gdbserver_port = 0;
-
   FileSpec socket_file;
-  bool show_usage = false;
-  int option_error = 0;
 
-  std::string short_options(OptionParser::GetShortOptionString(g_long_options));
+  PlatformOptTable Opts;
+  BumpPtrAllocator Alloc;
+  StringSaver Saver(Alloc);
+  bool HasError = false;
 
-#if __GLIBC__
-  optind = 0;
-#else
-  optreset = 1;
-  optind = 1;
-#endif
+  opt::InputArgList Args =
+      Opts.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](llvm::StringRef Msg) {
+        WithColor::error() << Msg << "\n";
+        HasError = true;
+      });
 
-  while ((ch = getopt_long_only(argc, argv, short_options.c_str(),
-                                g_long_options, &long_option_index)) != -1) {
-    switch (ch) {
-    case 0: // Any optional that auto set themselves will return 0
-      break;
+  std::string Name =
+      (llvm::sys::path::filename(progname) + " " + subcommand).str();
+  std::string HelpText =
+      "Use '" + Name + " --help' for a complete list of options.\n";
 
-    case 'L':
-      listen_host_port.append(optarg);
-      break;
+  if (HasError) {
+    llvm::errs() << HelpText;
+    return EXIT_FAILURE;
+  }
 
-    case 'l': // Set Log File
-      if (optarg && optarg[0])
-        log_file.assign(optarg);
-      break;
+  if (Args.hasArg(OPT_help)) {
+    display_usage(Opts, progname, subcommand);
+    return EXIT_SUCCESS;
+  }
 
-    case 'c': // Log Channels
-      if (optarg && optarg[0])
-        log_channels = StringRef(optarg);
-      break;
+  // Parse arguments.
+  std::string listen_host_port = Args.getLastArgValue(OPT_listen).str();
+  std::string log_file = Args.getLastArgValue(OPT_log_file).str();
+  StringRef log_channels = Args.getLastArgValue(OPT_log_channels);
+  bool multi_client = Args.hasArg(OPT_server);
+  [[maybe_unused]] bool debug = Args.hasArg(OPT_debug);
+  [[maybe_unused]] bool verbose = Args.hasArg(OPT_verbose);
+
+  if (Args.hasArg(OPT_socket_file)) {
+    socket_file.SetFile(Args.getLastArgValue(OPT_socket_file),
+                        FileSpec::Style::native);
+  }
 
-    case 'f': // Socket file
-      if (optarg && optarg[0])
-        socket_file.SetFile(optarg, FileSpec::Style::native);
-      break;
+  if (Args.hasArg(OPT_gdbserver_port)) {
+    if (!llvm::to_integer(Args.getLastArgValue(OPT_gdbserver_port),
+                          gdbserver_port)) {
+      WithColor::error() << "invalid --gdbserver-port value\n";
+      return EXIT_FAILURE;
+    }
+  }
 
-    case 'P':
-    case 'm':
-    case 'M': {
-      uint16_t portnum;
-      if (!llvm::to_integer(optarg, portnum)) {
-        WithColor::error() << "invalid port number string " << optarg << "\n";
-        option_error = 2;
-        break;
-      }
-      // Note the condition gdbserver_port > HIGH_PORT is valid in case of using
-      // --child-platform-fd. Check gdbserver_port later.
-      if (ch == 'P')
-        gdbserver_port = portnum;
-      else if (gdbserver_port == 0)
-        gdbserver_port = portnum;
-    } break;
-
-    case 2: {
-      uint64_t _fd;
-      if (!llvm::to_integer(optarg, _fd)) {
-        WithColor::error() << "invalid fd " << optarg << "\n";
-        option_error = 6;
-      } else
-        fd = (shared_fd_t)_fd;
-    } break;
-
-    case 'h': /* fall-through is intentional */
-    case '?':
-      show_usage = true;
-      break;
+  if (Args.hasArg(OPT_child_platform_fd)) {
+    uint64_t _fd;
+    if (!llvm::to_integer(Args.getLastArgValue(OPT_child_platform_fd), _fd)) {
+      WithColor::error() << "invalid --child-platform-fd value\n";
+      return EXIT_FAILURE;
     }
+    fd = (shared_fd_t)_fd;
   }
 
   if (!LLDBServerUtilities::SetupLogging(log_file, log_channels, 0))
     return -1;
 
   // Print usage and exit if no listening port is specified.
-  if (listen_host_port.empty() && fd == SharedSocket::kInvalidFD)
-    show_usage = true;
+  if (listen_host_port.empty() && fd == SharedSocket::kInvalidFD) {
+    WithColor::error() << "either --listen or --child-platform-fd is required\n"
+                       << HelpText;
+    return EXIT_FAILURE;
+  }
 
-  if (show_usage || option_error) {
-    display_usage(progname, subcommand);
-    exit(option_error);
+  // Get remaining arguments for inferior.
+  std::vector<llvm::StringRef> Inputs;
+  for (opt::Arg *Arg : Args.filtered(OPT_INPUT))
+    Inputs.push_back(Arg->getValue());
+  if (opt::Arg *Arg = Args.getLastArg(OPT_REM)) {
+    for (const char *Val : Arg->getValues())
+      Inputs.push_back(Val);
   }
 
-  // Skip any options we consumed with getopt_long_only.
-  argc -= optind;
-  argv += optind;
   lldb_private::Args inferior_arguments;
-  inferior_arguments.SetArguments(argc, const_cast<const char **>(argv));
+  if (!Inputs.empty()) {
+    std::vector<const char *> args_ptrs;
+    for (const auto &Input : Inputs)
+      args_ptrs.push_back(Input.data());
+    inferior_arguments.SetArguments(args_ptrs.size(), args_ptrs.data());
+  }
 
   FileSpec debugserver_path = GetDebugserverPath();
   if (!debugserver_path) {
@@ -514,7 +561,7 @@ int main_platform(int argc, char *argv[]) {
     platform.SetConnection(
         std::make_unique<ConnectionFileDescriptor>(std::move(socket)));
     client_handle(platform, inferior_arguments);
-    return 0;
+    return EXIT_SUCCESS;
   }
 
   if (gdbserver_port != 0 &&
@@ -522,7 +569,7 @@ int main_platform(int argc, char *argv[]) {
     WithColor::error() << llvm::formatv("Port number {0} is not in the "
                                         "valid user port range of {1} - {2}\n",
                                         gdbserver_port, LOW_PORT, HIGH_PORT);
-    return 1;
+    return EXIT_FAILURE;
   }
 
   Socket::SocketProtocol protocol = Socket::ProtocolUnixDomain;
@@ -559,7 +606,7 @@ int main_platform(int argc, char *argv[]) {
     if (error.Fail()) {
       fprintf(stderr, "failed to write socket id to %s: %s\n",
               socket_file.GetPath().c_str(), error.AsCString());
-      return 1;
+      return EXIT_FAILURE;
     }
   }
 
@@ -577,22 +624,22 @@ int main_platform(int argc, char *argv[]) {
     llvm::Expected<std::vector<MainLoopBase::ReadHandleUP>> platform_handles =
         platform_sock->Accept(
             main_loop, [progname, gdbserver_port, &inferior_arguments, log_file,
-                        log_channels, &main_loop,
+                        log_channels, &main_loop, multi_client,
                         &platform_handles](std::unique_ptr<Socket> sock_up) {
               printf("Connection established.\n");
               Status error = spawn_process(
                   progname, HostInfo::GetProgramFileSpec(), sock_up.get(),
                   gdbserver_port, inferior_arguments, log_file, log_channels,
-                  main_loop);
+                  main_loop, multi_client);
               if (error.Fail()) {
                 Log *log = GetLog(LLDBLog::Platform);
                 LLDB_LOGF(log, "spawn_process failed: %s", error.AsCString());
                 WithColor::error()
                     << "spawn_process failed: " << error.AsCString() << "\n";
-                if (!g_server)
+                if (!multi_client)
                   main_loop.RequestTermination();
               }
-              if (!g_server)
+              if (!multi_client)
                 platform_handles->clear();
             });
     if (!platform_handles) {
@@ -616,5 +663,5 @@ int main_platform(int argc, char *argv[]) {
 
   fprintf(stderr, "lldb-server exiting...\n");
 
-  return 0;
+  return EXIT_SUCCESS;
 }

From c7d2ed43648ebd9076ee290928d7bc805906882d Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu@amd.com>
Date: Tue, 18 Nov 2025 11:50:19 -0500
Subject: [PATCH 42/52] Reland [Support][Jobserver][Tests] Simplify default
 executor init (#168165)

and make (#165264)

Truely recover Executor::getDefaultExecutor. The previous change missed
std::unique_ptr, which is needed in a normal program exit, since only
with that ThreadPoolExecutor destructor will be called in a normal
program exit, where it ensures the executor has been stopped and waits
for worker threads to finish. The wait is important as it prevents
intermittent crashes on Windows when the process is doing a full exit.
---
 llvm/lib/Support/Parallel.cpp            | 21 +-----
 llvm/unittests/Support/JobserverTest.cpp | 81 +++++++++++++++++++-----
 2 files changed, 68 insertions(+), 34 deletions(-)

diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp
index 8e0c724accb36..ab220b8f2ceba 100644
--- a/llvm/lib/Support/Parallel.cpp
+++ b/llvm/lib/Support/Parallel.cpp
@@ -193,16 +193,7 @@ class ThreadPoolExecutor : public Executor {
   JobserverClient *TheJobserver = nullptr;
 };
 
-// A global raw pointer to the executor. Lifetime is managed by the
-// objects created within createExecutor().
-static Executor *TheExec = nullptr;
-static std::once_flag Flag;
-
-// This function will be called exactly once to create the executor.
-// It contains the necessary platform-specific logic. Since functions
-// called by std::call_once cannot return value, we have to set the
-// executor as a global variable.
-void createExecutor() {
+Executor *Executor::getDefaultExecutor() {
 #ifdef _WIN32
   // The ManagedStatic enables the ThreadPoolExecutor to be stopped via
   // llvm_shutdown() which allows a "clean" fast exit, e.g. via _exit(). This
@@ -226,22 +217,16 @@ void createExecutor() {
                        ThreadPoolExecutor::Deleter>
       ManagedExec;
   static std::unique_ptr<ThreadPoolExecutor> Exec(&(*ManagedExec));
-  TheExec = Exec.get();
+  return Exec.get();
 #else
   // ManagedStatic is not desired on other platforms. When `Exec` is destroyed
   // by llvm_shutdown(), worker threads will clean up and invoke TLS
   // destructors. This can lead to race conditions if other threads attempt to
   // access TLS objects that have already been destroyed.
   static ThreadPoolExecutor Exec(strategy);
-  TheExec = &Exec;
+  return &Exec;
 #endif
 }
-
-Executor *Executor::getDefaultExecutor() {
-  // Use std::call_once to lazily and safely initialize the executor.
-  std::call_once(Flag, createExecutor);
-  return TheExec;
-}
 } // namespace
 } // namespace detail
 
diff --git a/llvm/unittests/Support/JobserverTest.cpp b/llvm/unittests/Support/JobserverTest.cpp
index d27445897db0a..1917145704608 100644
--- a/llvm/unittests/Support/JobserverTest.cpp
+++ b/llvm/unittests/Support/JobserverTest.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Parallel.h"
+#include "llvm/Support/Program.h"
 #include "llvm/Support/ThreadPool.h"
 #include "llvm/Support/raw_ostream.h"
 #include "gtest/gtest.h"
@@ -40,8 +41,14 @@
 
 using namespace llvm;
 
+// Provided by the unit test main to locate the current test binary.
+extern const char *TestMainArgv0;
+
 namespace {
 
+// Unique anchor whose address helps locate the current test binary.
+static int JobserverTestAnchor = 0;
+
 // RAII helper to set an environment variable for the duration of a test.
 class ScopedEnvironment {
   std::string Name;
@@ -382,51 +389,93 @@ TEST_F(JobserverStrategyTest, ThreadPoolConcurrencyIsLimited) {
   EXPECT_EQ(CompletedTasks, NumTasks);
 }
 
-TEST_F(JobserverStrategyTest, ParallelForIsLimited) {
+// Parent-side driver that spawns a fresh process to run the child test which
+// validates that parallelFor respects the jobserver limit when it is the first
+// user of the default executor in that process.
+TEST_F(JobserverStrategyTest, ParallelForIsLimited_Subprocess) {
+  // Mark child execution.
+  setenv("LLVM_JOBSERVER_TEST_CHILD", "1", 1);
+
+  // Find the current test binary and build args to run only the child test.
+  std::string Executable =
+      sys::fs::getMainExecutable(TestMainArgv0, &JobserverTestAnchor);
+  ASSERT_FALSE(Executable.empty()) << "Failed to get main executable path";
+  SmallVector<StringRef, 4> Args{Executable,
+                                 "--gtest_filter=JobserverStrategyTest."
+                                 "ParallelForIsLimited_SubprocessChild"};
+
+  std::string Error;
+  bool ExecFailed = false;
+  int RC = sys::ExecuteAndWait(Executable, Args, std::nullopt, {}, 0, 0, &Error,
+                               &ExecFailed);
+  unsetenv("LLVM_JOBSERVER_TEST_CHILD");
+  ASSERT_FALSE(ExecFailed) << Error;
+  ASSERT_EQ(RC, 0) << "Executable failed with exit code " << RC;
+}
+
+// Child-side test: create FIFO and make-proxy in this process, set the
+// jobserver strategy, and then run parallelFor.
+TEST_F(JobserverStrategyTest, ParallelForIsLimited_SubprocessChild) {
+  if (!getenv("LLVM_JOBSERVER_TEST_CHILD"))
+    GTEST_SKIP() << "Not running in child mode";
+
   // This test verifies that llvm::parallelFor respects the jobserver limit.
   const int NumExplicitJobs = 3;
   const int ConcurrencyLimit = NumExplicitJobs + 1; // +1 implicit
   const int NumTasks = 20;
 
-  LLVM_DEBUG(dbgs() << "Calling startMakeProxy with " << NumExplicitJobs
-                    << " jobs.\n");
   startMakeProxy(NumExplicitJobs);
-  LLVM_DEBUG(dbgs() << "MakeProxy is running.\n");
 
-  // Set the global strategy. parallelFor will use this.
+  // Set the global strategy before any default executor is created.
   parallel::strategy = jobserver_concurrency();
 
   std::atomic<int> ActiveTasks{0};
   std::atomic<int> MaxActiveTasks{0};
 
-  parallelFor(0, NumTasks, [&](int i) {
+  parallelFor(0, NumTasks, [&]([[maybe_unused]] int i) {
     int CurrentActive = ++ActiveTasks;
-    LLVM_DEBUG(dbgs() << "Task " << i << ": Active tasks: " << CurrentActive
-                      << "\n");
     int OldMax = MaxActiveTasks.load();
     while (CurrentActive > OldMax)
       MaxActiveTasks.compare_exchange_weak(OldMax, CurrentActive);
-
     std::this_thread::sleep_for(std::chrono::milliseconds(20));
     --ActiveTasks;
   });
 
-  LLVM_DEBUG(dbgs() << "ParallelFor finished. Max active tasks was "
-                    << MaxActiveTasks << ".\n");
   EXPECT_LE(MaxActiveTasks, ConcurrencyLimit);
 }
 
-TEST_F(JobserverStrategyTest, ParallelSortIsLimited) {
-  // This test serves as an integration test to ensure parallelSort completes
-  // correctly when running under the jobserver strategy. It doesn't directly
-  // measure concurrency but verifies correctness.
+// Parent-side driver for parallelSort child test.
+TEST_F(JobserverStrategyTest, ParallelSortIsLimited_Subprocess) {
+  setenv("LLVM_JOBSERVER_TEST_CHILD", "1", 1);
+
+  std::string Executable =
+      sys::fs::getMainExecutable(TestMainArgv0, &JobserverTestAnchor);
+  ASSERT_FALSE(Executable.empty()) << "Failed to get main executable path";
+  SmallVector<StringRef, 4> Args{Executable,
+                                 "--gtest_filter=JobserverStrategyTest."
+                                 "ParallelSortIsLimited_SubprocessChild"};
+
+  std::string Error;
+  bool ExecFailed = false;
+  int RC = sys::ExecuteAndWait(Executable, Args, std::nullopt, {}, 0, 0, &Error,
+                               &ExecFailed);
+  unsetenv("LLVM_JOBSERVER_TEST_CHILD");
+  ASSERT_FALSE(ExecFailed) << Error;
+  ASSERT_EQ(RC, 0) << "Executable failed with exit code " << RC;
+}
+
+// Child-side test: ensure parallelSort runs and completes correctly under the
+// jobserver strategy when it owns default executor initialization.
+TEST_F(JobserverStrategyTest, ParallelSortIsLimited_SubprocessChild) {
+  if (!getenv("LLVM_JOBSERVER_TEST_CHILD"))
+    GTEST_SKIP() << "Not running in child mode";
+
   const int NumExplicitJobs = 3;
   startMakeProxy(NumExplicitJobs);
 
   parallel::strategy = jobserver_concurrency();
 
   std::vector<int> V(1024);
-  // Fill with random data
   std::mt19937 randEngine;
   std::uniform_int_distribution<int> dist;
   for (int &i : V)

From 727ee7e2f169ec60797004dfb9b29ef7ea7cc47a Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra@codasip.com>
Date: Tue, 18 Nov 2025 16:54:12 +0000
Subject: [PATCH 43/52] [APInt] Introduce carry-less multiply primitives
 (#168527)

In line with a std proposal to introduce std::clmul, and in preparation
to introduce a clmul intrinsic, implement carry-less multiply primitives
for APIntOps, clmul[rh].

Ref: https://isocpp.org/files/papers/P3642R3.html
---
 llvm/include/llvm/ADT/APInt.h    | 21 ++++++++
 llvm/lib/Support/APInt.cpp       | 22 ++++++++-
 llvm/unittests/ADT/APIntTest.cpp | 83 ++++++++++++++++++++++++++++++++
 3 files changed, 125 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index fdb3b84b73a1f..7e73cc1957c05 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -2440,6 +2440,27 @@ LLVM_ABI APInt fshl(const APInt &Hi, const APInt &Lo, const APInt &Shift);
 /// (4) fshr(i8 255, i8 0, i8 9)  = fshr(i8 255, i8 0, i8 1) // 9 % 8
 LLVM_ABI APInt fshr(const APInt &Hi, const APInt &Lo, const APInt &Shift);
 
+/// Perform a carry-less multiply, also known as XOR multiplication, and return
+/// low-bits. All arguments and result have the same bitwidth.
+///
+/// Examples:
+/// (1) clmul(i4 1, i4 2)   = 2
+/// (2) clmul(i4 5, i4 6)   = 14
+/// (3) clmul(i4 -4, i4 2)  = -8
+/// (4) clmul(i4 -4, i4 -5) = 4
+LLVM_ABI APInt clmul(const APInt &LHS, const APInt &RHS);
+
+/// Perform a reversed carry-less multiply.
+///
+/// clmulr(a, b) = bitreverse(clmul(bitreverse(a), bitreverse(b)))
+LLVM_ABI APInt clmulr(const APInt &LHS, const APInt &RHS);
+
+/// Perform a carry-less multiply, and return high-bits. All arguments and
+/// result have the same bitwidth.
+///
+/// clmulh(a, b) = clmulr(a, b) >> 1
+LLVM_ABI APInt clmulh(const APInt &LHS, const APInt &RHS);
+
 } // namespace APIntOps
 
 // See friend declaration above. This additional declaration is required in
diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp
index f6fd5f9ddd633..673cd867f0e45 100644
--- a/llvm/lib/Support/APInt.cpp
+++ b/llvm/lib/Support/APInt.cpp
@@ -15,10 +15,10 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/bit.h"
-#include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Alignment.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -3187,3 +3187,23 @@ APInt llvm::APIntOps::fshr(const APInt &Hi, const APInt &Lo,
     return Lo;
   return Hi.shl(Hi.getBitWidth() - ShiftAmt) | Lo.lshr(ShiftAmt);
 }
+
+APInt llvm::APIntOps::clmul(const APInt &LHS, const APInt &RHS) {
+  assert(LHS.getBitWidth() == RHS.getBitWidth());
+  unsigned BW = LHS.getBitWidth();
+  APInt Result(BW, 0);
+  for (unsigned I : seq<unsigned>(BW))
+    if (RHS[I])
+      Result ^= LHS.shl(I);
+  return Result;
+}
+
+APInt llvm::APIntOps::clmulr(const APInt &LHS, const APInt &RHS) {
+  assert(LHS.getBitWidth() == RHS.getBitWidth());
+  return clmul(LHS.reverseBits(), RHS.reverseBits()).reverseBits();
+}
+
+APInt llvm::APIntOps::clmulh(const APInt &LHS, const APInt &RHS) {
+  assert(LHS.getBitWidth() == RHS.getBitWidth());
+  return clmulr(LHS, RHS).lshr(1);
+}
diff --git a/llvm/unittests/ADT/APIntTest.cpp b/llvm/unittests/ADT/APIntTest.cpp
index ca9f9f17ee112..4cb537da72e87 100644
--- a/llvm/unittests/ADT/APIntTest.cpp
+++ b/llvm/unittests/ADT/APIntTest.cpp
@@ -3823,4 +3823,87 @@ TEST(APIntTest, Fshr) {
             -8193);
 }
 
+TEST(APIntTest, clmul) {
+  EXPECT_EQ(APIntOps::clmul(APInt(4, 1), APInt(4, 2)).getZExtValue(), 2U);
+  EXPECT_EQ(APIntOps::clmul(APInt(4, 5), APInt(4, 6)).getZExtValue(), 14U);
+  EXPECT_EQ(APIntOps::clmul(APInt(4, -4, /*isSigned*/ true),
+                            APInt(4, 2, /*isSigned*/ false))
+                .getSExtValue(),
+            -8);
+  EXPECT_EQ(APIntOps::clmul(APInt(4, -4, /*isSigned*/ true),
+                            APInt(4, -5, /*isSigned*/ true))
+                .getSExtValue(),
+            4);
+  EXPECT_EQ(APIntOps::clmul(APInt(8, 0), APInt(8, 255)).getZExtValue(), 0U);
+  EXPECT_EQ(APIntOps::clmul(APInt(8, 15), APInt(8, 15)).getZExtValue(), 85U);
+  EXPECT_EQ(APIntOps::clmul(APInt(8, 1), APInt(8, 2)).getZExtValue(), 2U);
+  EXPECT_EQ(APIntOps::clmul(APInt(64, 0, /*isSigned*/ true),
+                            APInt(64, 9223372036854775807, /*isSigned*/ true))
+                .getSExtValue(),
+            0);
+  EXPECT_EQ(APIntOps::clmul(APInt(64, 1, /*isSigned*/ true),
+                            APInt(64, 2, /*isSigned*/ true))
+                .getSExtValue(),
+            2);
+  EXPECT_EQ(APIntOps::clmul(APInt(16, -2, /*isSigned*/ true),
+                            APInt(16, -1, /*isSigned*/ true))
+                .getSExtValue(),
+            -21846);
+}
+
+TEST(APIntTest, clmulr) {
+  EXPECT_EQ(APIntOps::clmulr(APInt(4, 1), APInt(4, 2)).getZExtValue(), 0U);
+  EXPECT_EQ(APIntOps::clmulr(APInt(4, 5), APInt(4, 6)).getZExtValue(), 3U);
+  EXPECT_EQ(APIntOps::clmulr(APInt(4, -4, /*isSigned*/ true),
+                             APInt(4, 2, /*isSigned*/ false))
+                .getSExtValue(),
+            3);
+  EXPECT_EQ(APIntOps::clmulr(APInt(4, -4, /*isSigned*/ true),
+                             APInt(4, -5, /*isSigned*/ true))
+                .getSExtValue(),
+            -2);
+  EXPECT_EQ(APIntOps::clmulr(APInt(8, 0), APInt(8, 255)).getZExtValue(), 0U);
+  EXPECT_EQ(APIntOps::clmulr(APInt(8, 15), APInt(8, 15)).getZExtValue(), 0U);
+  EXPECT_EQ(APIntOps::clmulr(APInt(8, 1), APInt(8, 2)).getZExtValue(), 0U);
+  EXPECT_EQ(APIntOps::clmulr(APInt(64, 0, /*isSigned*/ true),
+                             APInt(64, 9223372036854775807, /*isSigned*/ true))
+                .getSExtValue(),
+            0);
+  EXPECT_EQ(APIntOps::clmulr(APInt(64, 1, /*isSigned*/ true),
+                             APInt(64, 2, /*isSigned*/ true))
+                .getSExtValue(),
+            0);
+  EXPECT_EQ(APIntOps::clmulr(APInt(16, -2, /*isSigned*/ true),
+                             APInt(16, -1, /*isSigned*/ true))
+                .getSExtValue(),
+            -21845);
+}
+
+TEST(APIntTest, clmulh) {
+  EXPECT_EQ(APIntOps::clmulh(APInt(4, 1), APInt(4, 2)).getZExtValue(), 0U);
+  EXPECT_EQ(APIntOps::clmulh(APInt(4, 5), APInt(4, 6)).getZExtValue(), 1U);
+  EXPECT_EQ(APIntOps::clmulh(APInt(4, -4, /*isSigned*/ true),
+                             APInt(4, 2, /*isSigned*/ false))
+                .getSExtValue(),
+            1);
+  EXPECT_EQ(APIntOps::clmulh(APInt(4, -4, /*isSigned*/ true),
+                             APInt(4, -5, /*isSigned*/ true))
+                .getSExtValue(),
+            7);
+  EXPECT_EQ(APIntOps::clmulh(APInt(8, 0), APInt(8, 255)).getZExtValue(), 0U);
+  EXPECT_EQ(APIntOps::clmulh(APInt(8, 15), APInt(8, 15)).getZExtValue(), 0U);
+  EXPECT_EQ(APIntOps::clmulh(APInt(8, 1), APInt(8, 2)).getZExtValue(), 0U);
+  EXPECT_EQ(APIntOps::clmulh(APInt(64, 0, /*isSigned*/ true),
+                             APInt(64, 9223372036854775807, /*isSigned*/ true))
+                .getSExtValue(),
+            0);
+  EXPECT_EQ(APIntOps::clmulh(APInt(64, 1, /*isSigned*/ true),
+                             APInt(64, 2, /*isSigned*/ true))
+                .getSExtValue(),
+            0);
+  EXPECT_EQ(APIntOps::clmulh(APInt(16, -2, /*isSigned*/ true),
+                             APInt(16, -1, /*isSigned*/ true))
+                .getSExtValue(),
+            21845);
+}
 } // end anonymous namespace

From cb5812982d96e4c6a07ab77dfa969192d201bd20 Mon Sep 17 00:00:00 2001
From: vangthao95 <vang.thao@amd.com>
Date: Tue, 18 Nov 2025 09:00:57 -0800
Subject: [PATCH 44/52] [AMDGPU][GlobalISel] Add RegBankLegalize support for
 G_IS_FPCLASS (#167575)

---
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp     |   8 +
 .../CodeGen/AMDGPU/llvm.is.fpclass.f16.ll     | 169 +++++++---
 llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll   | 312 +++++++++++++-----
 3 files changed, 347 insertions(+), 142 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index b81a08de383d9..e36c57ad59bfd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -960,6 +960,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
       .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
 
+  addRulesForGOpcs({G_IS_FPCLASS})
+      .Any({{DivS1, S16}, {{Vcc}, {Vgpr16}}})
+      .Any({{UniS1, S16}, {{UniInVcc}, {Vgpr16}}})
+      .Any({{DivS1, S32}, {{Vcc}, {Vgpr32}}})
+      .Any({{UniS1, S32}, {{UniInVcc}, {Vgpr32}}})
+      .Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}})
+      .Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}});
+
   using namespace Intrinsic;
 
   addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index dd2cffd7bd161..dd19ba17bb292 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -1,16 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx704 < %s  | FileCheck --check-prefixes=GFX7CHECK,GFX7SELDAG %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx704 < %s  | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx704 < %s  | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s
 ; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx803 < %s  | FileCheck --check-prefixes=GFX8CHECK,GFX8SELDAG %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx803 < %s  | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 < %s  | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s
 ; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 < %s  | FileCheck --check-prefixes=GFX9CHECK,GFX9SELDAG %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 < %s  | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 < %s  | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s
 ; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10SELDAG %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s
 ; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG,GFX11SELDAG-TRUE16 %s
 ; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG,GFX11SELDAG-FAKE16 %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-TRUE16 %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-FAKE16 %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-TRUE16 %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-FAKE16 %s
+
+; FIXME: There are code size regressions in GlobalISel due to use of SGPRs and
+; moving those SGPRs into VGPRs.
 
 define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
 ; GFX7SELDAG-LABEL: sgpr_isnan_f16:
@@ -34,48 +37,98 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
 ; GFX7GLISEL-NEXT:    s_mov_b32 s2, -1
 ; GFX7GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    s_and_b32 s3, s3, 0x7fff
+; GFX7GLISEL-NEXT:    s_and_b32 s3, 0xffff, s3
 ; GFX7GLISEL-NEXT:    s_cmpk_gt_u32 s3, 0x7c00
-; GFX7GLISEL-NEXT:    s_cselect_b32 s3, 1, 0
-; GFX7GLISEL-NEXT:    s_bfe_i32 s3, s3, 0x10000
+; GFX7GLISEL-NEXT:    s_cselect_b32 s3, -1, 0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX7GLISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7GLISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX7GLISEL-NEXT:    s_endpgm
 ;
-; GFX8CHECK-LABEL: sgpr_isnan_f16:
-; GFX8CHECK:       ; %bb.0:
-; GFX8CHECK-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX8CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8CHECK-NEXT:    v_cmp_class_f16_e64 s[2:3], s2, 3
-; GFX8CHECK-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[2:3]
-; GFX8CHECK-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8CHECK-NEXT:    flat_store_dword v[0:1], v2
-; GFX8CHECK-NEXT:    s_endpgm
-;
-; GFX9CHECK-LABEL: sgpr_isnan_f16:
-; GFX9CHECK:       ; %bb.0:
-; GFX9CHECK-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX9CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9CHECK-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9CHECK-NEXT:    v_cmp_class_f16_e64 s[2:3], s2, 3
-; GFX9CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[2:3]
-; GFX9CHECK-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9CHECK-NEXT:    s_endpgm
-;
-; GFX10CHECK-LABEL: sgpr_isnan_f16:
-; GFX10CHECK:       ; %bb.0:
-; GFX10CHECK-NEXT:    s_clause 0x1
-; GFX10CHECK-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX10CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10CHECK-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10CHECK-NEXT:    v_cmp_class_f16_e64 s2, s2, 3
-; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
-; GFX10CHECK-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX10CHECK-NEXT:    s_endpgm
+; GFX8SELDAG-LABEL: sgpr_isnan_f16:
+; GFX8SELDAG:       ; %bb.0:
+; GFX8SELDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX8SELDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8SELDAG-NEXT:    v_cmp_class_f16_e64 s[2:3], s2, 3
+; GFX8SELDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8SELDAG-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[2:3]
+; GFX8SELDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8SELDAG-NEXT:    flat_store_dword v[0:1], v2
+; GFX8SELDAG-NEXT:    s_endpgm
+;
+; GFX8GLISEL-LABEL: sgpr_isnan_f16:
+; GFX8GLISEL:       ; %bb.0:
+; GFX8GLISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX8GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8GLISEL-NEXT:    v_cmp_class_f16_e64 s[2:3], s2, 3
+; GFX8GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX8GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX8GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX8GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX8GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX8GLISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8GLISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8GLISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8GLISEL-NEXT:    flat_store_dword v[0:1], v2
+; GFX8GLISEL-NEXT:    s_endpgm
+;
+; GFX9SELDAG-LABEL: sgpr_isnan_f16:
+; GFX9SELDAG:       ; %bb.0:
+; GFX9SELDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX9SELDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9SELDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9SELDAG-NEXT:    v_cmp_class_f16_e64 s[2:3], s2, 3
+; GFX9SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[2:3]
+; GFX9SELDAG-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9SELDAG-NEXT:    s_endpgm
+;
+; GFX9GLISEL-LABEL: sgpr_isnan_f16:
+; GFX9GLISEL:       ; %bb.0:
+; GFX9GLISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX9GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9GLISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9GLISEL-NEXT:    v_cmp_class_f16_e64 s[2:3], s2, 3
+; GFX9GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX9GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX9GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX9GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX9GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX9GLISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX9GLISEL-NEXT:    s_endpgm
+;
+; GFX10SELDAG-LABEL: sgpr_isnan_f16:
+; GFX10SELDAG:       ; %bb.0:
+; GFX10SELDAG-NEXT:    s_clause 0x1
+; GFX10SELDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX10SELDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10SELDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10SELDAG-NEXT:    v_cmp_class_f16_e64 s2, s2, 3
+; GFX10SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
+; GFX10SELDAG-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX10SELDAG-NEXT:    s_endpgm
+;
+; GFX10GLISEL-LABEL: sgpr_isnan_f16:
+; GFX10GLISEL:       ; %bb.0:
+; GFX10GLISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
+; GFX10GLISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10GLISEL-NEXT:    v_cmp_class_f16_e64 s2, s0, 3
+; GFX10GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX10GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX10GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX10GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX10GLISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX10GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX10GLISEL-NEXT:    s_endpgm
 ;
 ; GFX11SELDAG-TRUE16-LABEL: sgpr_isnan_f16:
 ; GFX11SELDAG-TRUE16:       ; %bb.0:
@@ -103,26 +156,36 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
 ;
 ; GFX11GLISEL-TRUE16-LABEL: sgpr_isnan_f16:
 ; GFX11GLISEL-TRUE16:       ; %bb.0:
-; GFX11GLISEL-TRUE16-NEXT:    s_clause 0x1
-; GFX11GLISEL-TRUE16-NEXT:    s_load_b32 s2, s[4:5], 0x2c
-; GFX11GLISEL-TRUE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11GLISEL-TRUE16-NEXT:    s_load_b32 s0, s[4:5], 0x2c
 ; GFX11GLISEL-TRUE16-NEXT:    v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 0
 ; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e32 vcc_lo, s2, v0.l
-; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e32 vcc_lo, s0, v0.l
+; GFX11GLISEL-TRUE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11GLISEL-TRUE16-NEXT:    s_cmp_lg_u32 vcc_lo, 0
+; GFX11GLISEL-TRUE16-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX11GLISEL-TRUE16-NEXT:    s_and_b32 s2, s2, 1
+; GFX11GLISEL-TRUE16-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX11GLISEL-TRUE16-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX11GLISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11GLISEL-TRUE16-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11GLISEL-TRUE16-NEXT:    s_endpgm
 ;
 ; GFX11GLISEL-FAKE16-LABEL: sgpr_isnan_f16:
 ; GFX11GLISEL-FAKE16:       ; %bb.0:
-; GFX11GLISEL-FAKE16-NEXT:    s_clause 0x1
-; GFX11GLISEL-FAKE16-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11GLISEL-FAKE16-NEXT:    s_load_b32 s0, s[4:5], 0x2c
+; GFX11GLISEL-FAKE16-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s2, s0, 3
 ; GFX11GLISEL-FAKE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GLISEL-FAKE16-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11GLISEL-FAKE16-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX11GLISEL-FAKE16-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX11GLISEL-FAKE16-NEXT:    s_and_b32 s2, s2, 1
+; GFX11GLISEL-FAKE16-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX11GLISEL-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX11GLISEL-FAKE16-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s2, s2, 3
-; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
-; GFX11GLISEL-FAKE16-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11GLISEL-FAKE16-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11GLISEL-FAKE16-NEXT:    s_endpgm
   %result = call i1 @llvm.is.fpclass.f16(half %x, i32 3)
   %sext = sext i1 %result to i32
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll
index 4f5432a202058..0a9fe10874c38 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll
@@ -1,14 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx704 < %s  | FileCheck --check-prefixes=GFX7CHECK,GFX7SELDAG %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx704 < %s  | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx704 < %s  | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s
 ; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx803 < %s  | FileCheck --check-prefixes=GFX8CHECK,GFX8SELDAG %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx803 < %s  | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s
-; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 < %s  | FileCheck --check-prefixes=GFX9CHECK %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 < %s  | FileCheck --check-prefixes=GFX9CHECK %s
-; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK %s
-; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefix=GFX11CHECK %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefix=GFX11CHECK %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 < %s  | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s
+; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 < %s  | FileCheck --check-prefixes=GFX9CHECK,GFX9SELDAG %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 < %s  | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s
+; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10SELDAG %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s
+; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL %s
+
+; FIXME: There are code size regressions in GlobalISel due to use of SGPRs and
+; moving those SGPRs into VGPRs.
 
 define amdgpu_kernel void @sgpr_isnan_f32(ptr addrspace(1) %out, float %x) {
 ; GFX7SELDAG-LABEL: sgpr_isnan_f32:
@@ -30,58 +33,132 @@ define amdgpu_kernel void @sgpr_isnan_f32(ptr addrspace(1) %out, float %x) {
 ; GFX7GLISEL-NEXT:    s_mov_b32 s2, -1
 ; GFX7GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_cmp_class_f32_e64 s[4:5], s3, 3
-; GFX7GLISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
+; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], s[4:5]
+; GFX7GLISEL-NEXT:    s_cselect_b32 s3, 1, 0
+; GFX7GLISEL-NEXT:    s_and_b32 s3, s3, 1
+; GFX7GLISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX7GLISEL-NEXT:    s_cselect_b32 s3, -1, 0
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX7GLISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7GLISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX7GLISEL-NEXT:    s_endpgm
 ;
-; GFX8CHECK-LABEL: sgpr_isnan_f32:
-; GFX8CHECK:       ; %bb.0:
-; GFX8CHECK-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX8CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8CHECK-NEXT:    v_cmp_class_f32_e64 s[2:3], s2, 3
-; GFX8CHECK-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[2:3]
-; GFX8CHECK-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8CHECK-NEXT:    flat_store_dword v[0:1], v2
-; GFX8CHECK-NEXT:    s_endpgm
-;
-; GFX9CHECK-LABEL: sgpr_isnan_f32:
-; GFX9CHECK:       ; %bb.0:
-; GFX9CHECK-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX9CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9CHECK-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9CHECK-NEXT:    v_cmp_class_f32_e64 s[2:3], s2, 3
-; GFX9CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[2:3]
-; GFX9CHECK-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9CHECK-NEXT:    s_endpgm
-;
-; GFX10CHECK-LABEL: sgpr_isnan_f32:
-; GFX10CHECK:       ; %bb.0:
-; GFX10CHECK-NEXT:    s_clause 0x1
-; GFX10CHECK-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX10CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10CHECK-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10CHECK-NEXT:    v_cmp_class_f32_e64 s2, s2, 3
-; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
-; GFX10CHECK-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX10CHECK-NEXT:    s_endpgm
-;
-; GFX11CHECK-LABEL: sgpr_isnan_f32:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_clause 0x1
-; GFX11CHECK-NEXT:    s_load_b32 s2, s[4:5], 0x2c
-; GFX11CHECK-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11CHECK-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f32_e64 s2, s2, 3
-; GFX11CHECK-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
-; GFX11CHECK-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11CHECK-NEXT:    s_endpgm
+; GFX8SELDAG-LABEL: sgpr_isnan_f32:
+; GFX8SELDAG:       ; %bb.0:
+; GFX8SELDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX8SELDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8SELDAG-NEXT:    v_cmp_class_f32_e64 s[2:3], s2, 3
+; GFX8SELDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8SELDAG-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[2:3]
+; GFX8SELDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8SELDAG-NEXT:    flat_store_dword v[0:1], v2
+; GFX8SELDAG-NEXT:    s_endpgm
+;
+; GFX8GLISEL-LABEL: sgpr_isnan_f32:
+; GFX8GLISEL:       ; %bb.0:
+; GFX8GLISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX8GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8GLISEL-NEXT:    v_cmp_class_f32_e64 s[2:3], s2, 3
+; GFX8GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX8GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX8GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX8GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX8GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX8GLISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8GLISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8GLISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8GLISEL-NEXT:    flat_store_dword v[0:1], v2
+; GFX8GLISEL-NEXT:    s_endpgm
+;
+; GFX9SELDAG-LABEL: sgpr_isnan_f32:
+; GFX9SELDAG:       ; %bb.0:
+; GFX9SELDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX9SELDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9SELDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9SELDAG-NEXT:    v_cmp_class_f32_e64 s[2:3], s2, 3
+; GFX9SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[2:3]
+; GFX9SELDAG-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9SELDAG-NEXT:    s_endpgm
+;
+; GFX9GLISEL-LABEL: sgpr_isnan_f32:
+; GFX9GLISEL:       ; %bb.0:
+; GFX9GLISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX9GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9GLISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9GLISEL-NEXT:    v_cmp_class_f32_e64 s[2:3], s2, 3
+; GFX9GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX9GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX9GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX9GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX9GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX9GLISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX9GLISEL-NEXT:    s_endpgm
+;
+; GFX10SELDAG-LABEL: sgpr_isnan_f32:
+; GFX10SELDAG:       ; %bb.0:
+; GFX10SELDAG-NEXT:    s_clause 0x1
+; GFX10SELDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX10SELDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10SELDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10SELDAG-NEXT:    v_cmp_class_f32_e64 s2, s2, 3
+; GFX10SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
+; GFX10SELDAG-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX10SELDAG-NEXT:    s_endpgm
+;
+; GFX10GLISEL-LABEL: sgpr_isnan_f32:
+; GFX10GLISEL:       ; %bb.0:
+; GFX10GLISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
+; GFX10GLISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10GLISEL-NEXT:    v_cmp_class_f32_e64 s2, s0, 3
+; GFX10GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX10GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX10GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX10GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX10GLISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX10GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX10GLISEL-NEXT:    s_endpgm
+;
+; GFX11SELDAG-LABEL: sgpr_isnan_f32:
+; GFX11SELDAG:       ; %bb.0:
+; GFX11SELDAG-NEXT:    s_clause 0x1
+; GFX11SELDAG-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11SELDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11SELDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11SELDAG-NEXT:    v_cmp_class_f32_e64 s2, s2, 3
+; GFX11SELDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
+; GFX11SELDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11SELDAG-NEXT:    s_endpgm
+;
+; GFX11GLISEL-LABEL: sgpr_isnan_f32:
+; GFX11GLISEL:       ; %bb.0:
+; GFX11GLISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
+; GFX11GLISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11GLISEL-NEXT:    v_cmp_class_f32_e64 s2, s0, 3
+; GFX11GLISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX11GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX11GLISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX11GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX11GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX11GLISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11GLISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11GLISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11GLISEL-NEXT:    s_endpgm
   %result = call i1 @llvm.is.fpclass.f32(float %x, i32 3)  ; nan
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
@@ -106,9 +183,14 @@ define amdgpu_kernel void @sgpr_isnan_f64(ptr addrspace(1) %out, double %x) {
 ; GFX7GLISEL:       ; %bb.0:
 ; GFX7GLISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
 ; GFX7GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7GLISEL-NEXT:    v_cmp_class_f64_e64 s[2:3], s[2:3], 3
-; GFX7GLISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[2:3]
+; GFX7GLISEL-NEXT:    v_cmp_class_f64_e64 s[4:5], s[2:3], 3
 ; GFX7GLISEL-NEXT:    s_mov_b32 s2, -1
+; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], s[4:5]
+; GFX7GLISEL-NEXT:    s_cselect_b32 s3, 1, 0
+; GFX7GLISEL-NEXT:    s_and_b32 s3, s3, 1
+; GFX7GLISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX7GLISEL-NEXT:    s_cselect_b32 s3, -1, 0
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX7GLISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7GLISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX7GLISEL-NEXT:    s_endpgm
@@ -131,40 +213,92 @@ define amdgpu_kernel void @sgpr_isnan_f64(ptr addrspace(1) %out, double %x) {
 ; GFX8GLISEL-NEXT:    v_cmp_class_f64_e64 s[2:3], s[2:3], 3
 ; GFX8GLISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8GLISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GLISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[2:3]
+; GFX8GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX8GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX8GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX8GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX8GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX8GLISEL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX8GLISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8GLISEL-NEXT:    s_endpgm
 ;
-; GFX9CHECK-LABEL: sgpr_isnan_f64:
-; GFX9CHECK:       ; %bb.0:
-; GFX9CHECK-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9CHECK-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9CHECK-NEXT:    v_cmp_class_f64_e64 s[2:3], s[2:3], 3
-; GFX9CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[2:3]
-; GFX9CHECK-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9CHECK-NEXT:    s_endpgm
-;
-; GFX10CHECK-LABEL: sgpr_isnan_f64:
-; GFX10CHECK:       ; %bb.0:
-; GFX10CHECK-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX10CHECK-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10CHECK-NEXT:    v_cmp_class_f64_e64 s2, s[2:3], 3
-; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
-; GFX10CHECK-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX10CHECK-NEXT:    s_endpgm
-;
-; GFX11CHECK-LABEL: sgpr_isnan_f64:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11CHECK-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f64_e64 s2, s[2:3], 3
-; GFX11CHECK-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
-; GFX11CHECK-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11CHECK-NEXT:    s_endpgm
+; GFX9SELDAG-LABEL: sgpr_isnan_f64:
+; GFX9SELDAG:       ; %bb.0:
+; GFX9SELDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9SELDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9SELDAG-NEXT:    v_cmp_class_f64_e64 s[2:3], s[2:3], 3
+; GFX9SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[2:3]
+; GFX9SELDAG-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9SELDAG-NEXT:    s_endpgm
+;
+; GFX9GLISEL-LABEL: sgpr_isnan_f64:
+; GFX9GLISEL:       ; %bb.0:
+; GFX9GLISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9GLISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9GLISEL-NEXT:    v_cmp_class_f64_e64 s[2:3], s[2:3], 3
+; GFX9GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX9GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX9GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX9GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX9GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX9GLISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX9GLISEL-NEXT:    s_endpgm
+;
+; GFX10SELDAG-LABEL: sgpr_isnan_f64:
+; GFX10SELDAG:       ; %bb.0:
+; GFX10SELDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10SELDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10SELDAG-NEXT:    v_cmp_class_f64_e64 s2, s[2:3], 3
+; GFX10SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
+; GFX10SELDAG-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX10SELDAG-NEXT:    s_endpgm
+;
+; GFX10GLISEL-LABEL: sgpr_isnan_f64:
+; GFX10GLISEL:       ; %bb.0:
+; GFX10GLISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10GLISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10GLISEL-NEXT:    v_cmp_class_f64_e64 s2, s[2:3], 3
+; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX10GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX10GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX10GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX10GLISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX10GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX10GLISEL-NEXT:    s_endpgm
+;
+; GFX11SELDAG-LABEL: sgpr_isnan_f64:
+; GFX11SELDAG:       ; %bb.0:
+; GFX11SELDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11SELDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11SELDAG-NEXT:    v_cmp_class_f64_e64 s2, s[2:3], 3
+; GFX11SELDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
+; GFX11SELDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11SELDAG-NEXT:    s_endpgm
+;
+; GFX11GLISEL-LABEL: sgpr_isnan_f64:
+; GFX11GLISEL:       ; %bb.0:
+; GFX11GLISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11GLISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11GLISEL-NEXT:    v_cmp_class_f64_e64 s2, s[2:3], 3
+; GFX11GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX11GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX11GLISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX11GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX11GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX11GLISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11GLISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11GLISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11GLISEL-NEXT:    s_endpgm
   %result = call i1 @llvm.is.fpclass.f64(double %x, i32 3)  ; nan
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4

From 6d3971d97f362c02a0dd3f148b6e82f61810d025 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Tue, 18 Nov 2025 09:14:01 -0800
Subject: [PATCH 45/52] [AsmParser] Use a range-based for loop (NFC) (#168488)

Identified with modernize-loop-convert.
---
 llvm/lib/AsmParser/LLParser.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 921462e28a467..799234a0b491d 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -315,11 +315,10 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
       return error(NT.second.second,
                    "use of undefined type '%" + Twine(NT.first) + "'");
 
-  for (StringMap<std::pair<Type*, LocTy> >::iterator I =
-       NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I)
-    if (I->second.second.isValid())
-      return error(I->second.second,
-                   "use of undefined type named '" + I->getKey() + "'");
+  for (const auto &[Name, TypeInfo] : NamedTypes)
+    if (TypeInfo.second.isValid())
+      return error(TypeInfo.second,
+                   "use of undefined type named '" + Name + "'");
 
   if (!ForwardRefComdats.empty())
     return error(ForwardRefComdats.begin()->second,

From 58cffea94a31e52d6492ce7103e04c6b073dee16 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234@gmail.com>
Date: Tue, 18 Nov 2025 12:15:26 -0500
Subject: [PATCH 46/52] [InstCombine] Canonicalize signed saturated additions
 (#153053)

https://alive2.llvm.org/ce/z/YGT5SN
https://alive2.llvm.org/ce/z/PVDxCw
https://alive2.llvm.org/ce/z/8buR2N

This is tricky because with positive numbers, we only go up, so we can
in fact always hit the signed_max boundary. This is important because
the intrinsic we use has the behavior of going the OTHER way, aka clamp
to INT_MIN if it goes in that direction.

And the range checking we do only works for positive numbers.

Because of this issue, we can only do this for constants as well.
---
 .../InstCombine/InstCombineSelect.cpp         |  95 +++++-
 .../InstCombine/canonicalize-const-to-bop.ll  |   3 +-
 .../InstCombine/saturating-add-sub.ll         | 320 ++++++++++++++++++
 3 files changed, 412 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 9572f9d702e1b..e7dc366b13798 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1027,10 +1027,9 @@ static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI,
   return Result;
 }
 
-static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
-                                       InstCombiner::BuilderTy &Builder) {
-  if (!Cmp->hasOneUse())
-    return nullptr;
+static Value *
+canonicalizeSaturatedAddUnsigned(ICmpInst *Cmp, Value *TVal, Value *FVal,
+                                 InstCombiner::BuilderTy &Builder) {
 
   // Match unsigned saturated add with constant.
   Value *Cmp0 = Cmp->getOperand(0);
@@ -1130,6 +1129,94 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
   return nullptr;
 }
 
+static Value *canonicalizeSaturatedAddSigned(ICmpInst *Cmp, Value *TVal,
+                                             Value *FVal,
+                                             InstCombiner::BuilderTy &Builder) {
+  // Match saturated add with constant.
+  Value *Cmp0 = Cmp->getOperand(0);
+  Value *Cmp1 = Cmp->getOperand(1);
+  ICmpInst::Predicate Pred = Cmp->getPredicate();
+  Value *X;
+  const APInt *C;
+
+  // Canonicalize INT_MAX to true value of the select.
+  if (match(FVal, m_MaxSignedValue())) {
+    std::swap(TVal, FVal);
+    Pred = CmpInst::getInversePredicate(Pred);
+  }
+
+  if (!match(TVal, m_MaxSignedValue()))
+    return nullptr;
+
+  // sge maximum signed value is canonicalized to eq maximum signed value and
+  // requires special handling (a == INT_MAX) ? INT_MAX : a + 1 -> sadd.sat(a,
+  // 1)
+  if (Pred == ICmpInst::ICMP_EQ) {
+    if (match(FVal, m_Add(m_Specific(Cmp0), m_One())) && Cmp1 == TVal) {
+      return Builder.CreateBinaryIntrinsic(
+          Intrinsic::sadd_sat, Cmp0, ConstantInt::get(Cmp0->getType(), 1));
+    }
+    return nullptr;
+  }
+
+  // (X > Y) ? INT_MAX : (X + C) --> sadd.sat(X, C)
+  // (X >= Y) ? INT_MAX : (X + C) --> sadd.sat(X, C)
+  // where Y is INT_MAX - C or INT_MAX - C - 1, and C > 0
+  if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) &&
+      match(FVal, m_Add(m_Specific(Cmp0), m_StrictlyPositive(C)))) {
+    APInt IntMax =
+        APInt::getSignedMaxValue(Cmp1->getType()->getScalarSizeInBits());
+
+    // For SGE, try to flip to SGT to normalize the comparison constant.
+    if (Pred == ICmpInst::ICMP_SGE) {
+      if (auto Flipped = getFlippedStrictnessPredicateAndConstant(
+              Pred, cast<Constant>(Cmp1))) {
+        Pred = Flipped->first;
+        Cmp1 = Flipped->second;
+      }
+    }
+
+    // Check the pattern: X > INT_MAX - C or X > INT_MAX - C - 1
+    if (Pred == ICmpInst::ICMP_SGT &&
+        (match(Cmp1, m_SpecificIntAllowPoison(IntMax - *C)) ||
+         match(Cmp1, m_SpecificIntAllowPoison(IntMax - *C - 1))))
+      return Builder.CreateBinaryIntrinsic(
+          Intrinsic::sadd_sat, Cmp0, ConstantInt::get(Cmp0->getType(), *C));
+  }
+
+  // Canonicalize predicate to less-than or less-or-equal-than.
+  if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) {
+    std::swap(Cmp0, Cmp1);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+
+  if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SLE)
+    return nullptr;
+
+  if (match(Cmp0, m_NSWSub(m_MaxSignedValue(), m_Value(X))) &&
+      match(FVal, m_c_Add(m_Specific(X), m_Specific(Cmp1)))) {
+    // (INT_MAX - X s< Y) ? INT_MAX : (X + Y) --> sadd.sat(X, Y)
+    // (INT_MAX - X s< Y) ? INT_MAX : (Y + X) --> sadd.sat(X, Y)
+    return Builder.CreateBinaryIntrinsic(Intrinsic::sadd_sat, X, Cmp1);
+  }
+
+  return nullptr;
+}
+
+static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
+                                       InstCombiner::BuilderTy &Builder) {
+  if (!Cmp->hasOneUse())
+    return nullptr;
+
+  if (Value *V = canonicalizeSaturatedAddUnsigned(Cmp, TVal, FVal, Builder))
+    return V;
+
+  if (Value *V = canonicalizeSaturatedAddSigned(Cmp, TVal, FVal, Builder))
+    return V;
+
+  return nullptr;
+}
+
 /// Try to match patterns with select and subtract as absolute difference.
 static Value *foldAbsDiff(ICmpInst *Cmp, Value *TVal, Value *FVal,
                           InstCombiner::BuilderTy &Builder) {
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll b/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll
index b3093a92624ae..f0e40f4ede161 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll
@@ -123,8 +123,7 @@ define i8 @udiv_slt_exact(i8 %x) {
 define i8 @canonicalize_icmp_operands(i8 %x) {
 ; CHECK-LABEL: define i8 @canonicalize_icmp_operands(
 ; CHECK-SAME: i8 [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.smin.i8(i8 [[X]], i8 119)
-; CHECK-NEXT:    [[S:%.*]] = add nsw i8 [[TMP1]], 8
+; CHECK-NEXT:    [[S:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X]], i8 8)
 ; CHECK-NEXT:    ret i8 [[S]]
 ;
   %add = add nsw i8 %x, 8
diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
index cfd679c0cc592..c0ad5818e448a 100644
--- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
+++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
@@ -2351,3 +2351,323 @@ define i8 @fold_add_umax_to_usub_multiuse(i8 %a) {
 }
 
 declare void @usei8(i8)
+
+define i8 @sadd_sat_uge_int_max(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_uge_int_max(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 127
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_ugt_int_max(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_ugt_int_max(
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sgt i8 %x, 127
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_eq_int_max(i8 %x) {
+; CHECK-LABEL: @sadd_sat_eq_int_max(
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 1)
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp eq i8 %x, 127
+  %add = add i8 %x, 1
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_constant(i8 %x) {
+; CHECK-LABEL: @sadd_sat_constant(
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 10)
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 118
+  %add = add i8 %x, 10
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_negative_no_fold(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_negative_no_fold(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 127
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_wrong_predicate(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_wrong_predicate(
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[Y:%.*]], 127
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP_NOT]], i8 [[ADD]], i8 127
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp slt i8 %x, 127
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_wrong_constant(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_wrong_constant(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], 125
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 126
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define <2 x i8> @sadd_sat_vector(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @sadd_sat_vector(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 127)
+; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i8> [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[CMP]], <2 x i8> splat (i8 127), <2 x i8> [[ADD]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %cmp = icmp sge <2 x i8> %x, <i8 127, i8 127>
+  %add = add <2 x i8> %x, %y
+  %r = select <2 x i1> %cmp, <2 x i8> <i8 127, i8 127>, <2 x i8> %add
+  ret <2 x i8> %r
+}
+
+define <2 x i8> @sadd_sat_vector_constant(<2 x i8> %x) {
+; CHECK-LABEL: @sadd_sat_vector_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> <i8 117, i8 107>)
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[TMP1]], <i8 10, i8 20>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %cmp = icmp sge <2 x i8> %x, <i8 118, i8 108>
+  %add = add <2 x i8> %x, <i8 10, i8 20>
+  %r = select <2 x i1> %cmp, <2 x i8> <i8 127, i8 127>, <2 x i8> %add
+  ret <2 x i8> %r
+}
+
+define i8 @sadd_sat_int_max_minus_x(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 127, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[SUB]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 127, %x
+  %cmp = icmp slt i8 %sub, %y
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_commuted(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_commuted(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 127, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[Y:%.*]], [[SUB]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 127, %x
+  %cmp = icmp sgt i8 %y, %sub
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_nonstrict(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_nonstrict(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 127, [[X:%.*]]
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp sgt i8 [[SUB]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP_NOT]], i8 [[ADD]], i8 127
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 127, %x
+  %cmp = icmp sle i8 %sub, %y
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_commuted_nonstrict(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_commuted_nonstrict(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 127, [[X:%.*]]
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp slt i8 [[Y:%.*]], [[SUB]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP_NOT]], i8 [[ADD]], i8 127
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 127, %x
+  %cmp = icmp sge i8 %y, %sub
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_wrong_constant(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_wrong_constant(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 126, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[SUB]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 126, %x
+  %cmp = icmp slt i8 %sub, %y
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_wrong_predicate(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_wrong_predicate(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 127, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[SUB]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 127, %x
+  %cmp = icmp sgt i8 %sub, %y
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define <2 x i8> @sadd_sat_int_max_minus_x_vector(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_vector(
+; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i8> splat (i8 127), [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[SUB]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i8> [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[CMP]], <2 x i8> splat (i8 127), <2 x i8> [[ADD]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %sub = sub <2 x i8> <i8 127, i8 127>, %x
+  %cmp = icmp slt <2 x i8> %sub, %y
+  %add = add <2 x i8> %x, %y
+  %r = select <2 x i1> %cmp, <2 x i8> <i8 127, i8 127>, <2 x i8> %add
+  ret <2 x i8> %r
+}
+
+define i8 @sadd_sat_commuted_select(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_commuted_select(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[Y:%.*]], 127
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 [[ADD]], i8 127
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 127
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 %add, i8 127
+  ret i8 %r
+}
+
+define i8 @sadd_sat_commuted_add(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_commuted_add(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[Y:%.*]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 127
+  %add = add i8 %y, %x
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_commuted_both(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_commuted_both(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[Y:%.*]], 127
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 [[ADD]], i8 127
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 127
+  %add = add i8 %y, %x
+  %r = select i1 %cmp, i8 %add, i8 127
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_nsw_slt(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_nsw_slt(
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub nsw i8 127, %x
+  %cmp = icmp slt i8 %sub, %y
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_nsw_sge_commuted(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_nsw_sge_commuted(
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub nsw i8 127, %x
+  %cmp = icmp sge i8 %y, %sub
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_no_nsw_neg(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_no_nsw_neg(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 127, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[SUB]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 127, %x
+  %cmp = icmp slt i8 %sub, %y
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @neg_no_nsw(i8 %x, i8 %y) {
+; CHECK-LABEL: @neg_no_nsw(
+; CHECK-NEXT:    [[ADD:%.*]] = sub i8 127, [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], [[ADD]]
+; CHECK-NEXT:    [[D:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[CMP]], i8 127, i8 [[D]]
+; CHECK-NEXT:    ret i8 [[S]]
+;
+  %add = sub i8 127, %y
+  %cmp = icmp sgt i8 %x, %add
+  %d = add i8 %x, %y
+  %s = select i1 %cmp, i8 127, i8 %d
+  ret i8 %s
+}
+
+define i8 @neg_neg_constant(i8 %x, i8 %y) {
+; CHECK-LABEL: @neg_neg_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.smin.i8(i8 [[X:%.*]], i8 -1)
+; CHECK-NEXT:    [[S:%.*]] = and i8 [[TMP1]], 127
+; CHECK-NEXT:    ret i8 [[S]]
+;
+  %cmp = icmp sgt i8 %x, -2
+  %d = add i8 %x, -128
+  %s = select i1 %cmp, i8 127, i8 %d
+  ret i8 %s
+}

From b53371210fcf1f23d1f87e5727fdf1e9aefa674f Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman@google.com>
Date: Tue, 18 Nov 2025 09:31:55 -0800
Subject: [PATCH 47/52] [CI] Only run normal check targets if requested
 (#168412)

When building just the runtimes (eg a patch only touches compiler-rt),
we do not actually run any normal check targets. This ends up causing an
empty ninja invocation, which builds more targets than necessary. Gate
the ninja build for normal check-* targets under an if statement to fix
this.
---
 .ci/monolithic-linux.sh   | 8 +++++---
 .ci/monolithic-windows.sh | 8 +++++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/.ci/monolithic-linux.sh b/.ci/monolithic-linux.sh
index 4a8418d7baa8c..ca619aa7e98a1 100755
--- a/.ci/monolithic-linux.sh
+++ b/.ci/monolithic-linux.sh
@@ -64,9 +64,11 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \
 
 start-group "ninja"
 
-# Targets are not escaped as they are passed as separate arguments.
-ninja -C "${BUILD_DIR}" -k 0 ${targets} |& tee ninja.log
-cp ${BUILD_DIR}/.ninja_log ninja.ninja_log
+if [[ "${targets}" != "" ]]; then
+  # Targets are not escaped as they are passed as separate arguments.
+  ninja -C "${BUILD_DIR}" -k 0 ${targets} |& tee ninja.log
+  cp ${BUILD_DIR}/.ninja_log ninja.ninja_log
+fi
 
 if [[ "${runtime_targets}" != "" ]]; then
   start-group "ninja Runtimes"
diff --git a/.ci/monolithic-windows.sh b/.ci/monolithic-windows.sh
index 7b926b87f3623..99e7758ce8d79 100755
--- a/.ci/monolithic-windows.sh
+++ b/.ci/monolithic-windows.sh
@@ -51,9 +51,11 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \
 
 start-group "ninja"
 
-# Targets are not escaped as they are passed as separate arguments.
-ninja -C "${BUILD_DIR}" -k 0 ${targets} |& tee ninja.log
-cp ${BUILD_DIR}/.ninja_log ninja.ninja_log
+if [[ "${targets}" != "" ]]; then
+  # Targets are not escaped as they are passed as separate arguments.
+  ninja -C "${BUILD_DIR}" -k 0 ${targets} |& tee ninja.log
+  cp ${BUILD_DIR}/.ninja_log ninja.ninja_log
+fi
 
 if [[ "${runtimes_targets}" != "" ]]; then
   start-group "ninja runtimes"

From 94e9bfb80365de0c9c71303418b33ceb767f7cf9 Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk@posteo.org>
Date: Tue, 18 Nov 2025 18:36:18 +0100
Subject: [PATCH 48/52] [AArch64] Reorder Comparison Trees to Facilitate CSE
 (#168064)

The AArch64 backend converts trees formed by conjunctions/disjunctions
of comparisons into sequences of `CCMP` instructions. The implementation
before this change checks whether a sub-tree must be processed first. If
not, it processes the operations in the order they occur in the DAG.

This may not be optimal if there is a corresponding `SUB` node for one
of the comparisons. In this case, we should process this comparison
first because we can then use the same instruction for the `SUB` node
and the comparison.

To achieve this, this commit comprises the following changes:

- Extend `canEmitConjunction` with a new output parameter `PreferFirst`,
  which reports to the caller whether the sub-tree should preferably be
  processed first.
- Set `PreferFirst` to `true` if we can find a corresponding `SUB` node
  in the DAG.
- If we can process a sub-tree with `PreferFirst = true` first (i.e., we
  do not violate any `MustBeFirst` constraint by doing so), we swap the
  sub-trees.
- The already existing code for performing the common subexpression
  elimination takes care to use only a single instruction for the
  comparison and the `SUB` node if possible.

Closes #149685.
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  41 ++++--
 llvm/test/CodeGen/AArch64/ccmp-cse.ll         | 139 ++++++++++++++++++
 2 files changed, 170 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/ccmp-cse.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d21e19b2ecd46..8f41f230b5521 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3886,22 +3886,30 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
 /// \param MustBeFirst  Set to true if this subtree needs to be negated and we
 ///                     cannot do the negation naturally. We are required to
 ///                     emit the subtree first in this case.
+/// \param PreferFirst  Set to true if processing this subtree first may
+///                     result in more efficient code.
 /// \param WillNegate   Is true if are called when the result of this
 ///                     subexpression must be negated. This happens when the
 ///                     outer expression is an OR. We can use this fact to know
 ///                     that we have a double negation (or (or ...) ...) that
 ///                     can be implemented for free.
-static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
-                               bool &MustBeFirst, bool WillNegate,
+static bool canEmitConjunction(SelectionDAG &DAG, const SDValue Val,
+                               bool &CanNegate, bool &MustBeFirst,
+                               bool &PreferFirst, bool WillNegate,
                                unsigned Depth = 0) {
   if (!Val.hasOneUse())
     return false;
   unsigned Opcode = Val->getOpcode();
   if (Opcode == ISD::SETCC) {
-    if (Val->getOperand(0).getValueType() == MVT::f128)
+    EVT VT = Val->getOperand(0).getValueType();
+    if (VT == MVT::f128)
       return false;
     CanNegate = true;
     MustBeFirst = false;
+    // Designate this operation as a preferred first operation if the result
+    // of a SUB operation can be reused.
+    PreferFirst = DAG.doesNodeExist(ISD::SUB, DAG.getVTList(VT),
+                                    {Val->getOperand(0), Val->getOperand(1)});
     return true;
   }
   // Protect against exponential runtime and stack overflow.
@@ -3913,11 +3921,15 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
     SDValue O1 = Val->getOperand(1);
     bool CanNegateL;
     bool MustBeFirstL;
-    if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
+    bool PreferFirstL;
+    if (!canEmitConjunction(DAG, O0, CanNegateL, MustBeFirstL, PreferFirstL,
+                            IsOR, Depth + 1))
       return false;
     bool CanNegateR;
     bool MustBeFirstR;
-    if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
+    bool PreferFirstR;
+    if (!canEmitConjunction(DAG, O1, CanNegateR, MustBeFirstR, PreferFirstR,
+                            IsOR, Depth + 1))
       return false;
 
     if (MustBeFirstL && MustBeFirstR)
@@ -3940,6 +3952,7 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
       CanNegate = false;
       MustBeFirst = MustBeFirstL || MustBeFirstR;
     }
+    PreferFirst = PreferFirstL || PreferFirstR;
     return true;
   }
   return false;
@@ -4001,19 +4014,25 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
   SDValue LHS = Val->getOperand(0);
   bool CanNegateL;
   bool MustBeFirstL;
-  bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
+  bool PreferFirstL;
+  bool ValidL = canEmitConjunction(DAG, LHS, CanNegateL, MustBeFirstL,
+                                   PreferFirstL, IsOR);
   assert(ValidL && "Valid conjunction/disjunction tree");
   (void)ValidL;
 
   SDValue RHS = Val->getOperand(1);
   bool CanNegateR;
   bool MustBeFirstR;
-  bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
+  bool PreferFirstR;
+  bool ValidR = canEmitConjunction(DAG, RHS, CanNegateR, MustBeFirstR,
+                                   PreferFirstR, IsOR);
   assert(ValidR && "Valid conjunction/disjunction tree");
   (void)ValidR;
 
-  // Swap sub-tree that must come first to the right side.
-  if (MustBeFirstL) {
+  bool ShouldFirstL = PreferFirstL && !PreferFirstR && !MustBeFirstR;
+
+  // Swap sub-tree that must or should come first to the right side.
+  if (MustBeFirstL || ShouldFirstL) {
     assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
     std::swap(LHS, RHS);
     std::swap(CanNegateL, CanNegateR);
@@ -4069,7 +4088,9 @@ static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
                                AArch64CC::CondCode &OutCC) {
   bool DummyCanNegate;
   bool DummyMustBeFirst;
-  if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
+  bool DummyPreferFirst;
+  if (!canEmitConjunction(DAG, Val, DummyCanNegate, DummyMustBeFirst,
+                          DummyPreferFirst, false))
     return SDValue();
 
   return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
diff --git a/llvm/test/CodeGen/AArch64/ccmp-cse.ll b/llvm/test/CodeGen/AArch64/ccmp-cse.ll
new file mode 100644
index 0000000000000..657498172a04c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ccmp-cse.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
+
+define i64 @test_single_or(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_single_or:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subs x8, x2, x1
+; CHECK-NEXT:    ccmp x2, x0, #2, hs
+; CHECK-NEXT:    csel x0, xzr, x8, hi
+; CHECK-NEXT:    ret
+  %cmp.match = icmp ult i64 %y, %x
+  %cmp.nomatch = icmp ugt i64 %y, %unrelated
+  %or.cond = or i1 %cmp.match, %cmp.nomatch
+  %sub.reuse = sub nuw i64 %y, %x
+  %res = select i1 %or.cond, i64 0, i64 %sub.reuse
+  ret i64 %res
+}
+
+define i64 @test_two_ors(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_two_ors:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subs x8, x2, x1
+; CHECK-NEXT:    ccmp x0, x1, #0, hs
+; CHECK-NEXT:    ccmp x2, x0, #2, hs
+; CHECK-NEXT:    csel x0, xzr, x8, hi
+; CHECK-NEXT:    ret
+  %cmp.match = icmp ult i64 %y, %x
+  %cmp.nomatch1 = icmp ult i64 %unrelated, %x
+  %cmp.nomatch2 = icmp ugt i64 %y, %unrelated
+  %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2
+  %or.cond = or i1 %cmp.match, %or.nomatch
+  %sub.reuse = sub nuw i64 %y, %x
+  %res = select i1 %or.cond, i64 0, i64 %sub.reuse
+  ret i64 %res
+}
+
+define i64 @test_two_ors_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_two_ors_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subs x8, x2, x1
+; CHECK-NEXT:    ccmp x0, x1, #0, hs
+; CHECK-NEXT:    ccmp x2, x0, #2, hs
+; CHECK-NEXT:    csel x0, xzr, x8, hi
+; CHECK-NEXT:    ret
+  %cmp.match = icmp ult i64 %y, %x
+  %cmp.nomatch1 = icmp ult i64 %unrelated, %x
+  %cmp.nomatch2 = icmp ugt i64 %y, %unrelated
+  %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2
+  %or.cond = or i1 %or.nomatch, %cmp.match
+  %sub.reuse = sub nuw i64 %y, %x
+  %res = select i1 %or.cond, i64 0, i64 %sub.reuse
+  ret i64 %res
+}
+
+define i64 @test_single_and(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_single_and:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subs x8, x2, x1
+; CHECK-NEXT:    ccmp x2, x0, #0, lo
+; CHECK-NEXT:    csel x0, xzr, x8, hi
+; CHECK-NEXT:    ret
+  %cmp.match = icmp ult i64 %y, %x
+  %cmp.nomatch = icmp ugt i64 %y, %unrelated
+  %and.cond = and i1 %cmp.match, %cmp.nomatch
+  %sub.reuse = sub nuw i64 %y, %x
+  %res = select i1 %and.cond, i64 0, i64 %sub.reuse
+  ret i64 %res
+}
+
+define i64 @test_single_or_sub_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_single_or_sub_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subs x8, x1, x2
+; CHECK-NEXT:    ccmp x2, x0, #2, ls
+; CHECK-NEXT:    csel x0, xzr, x8, hi
+; CHECK-NEXT:    ret
+  %cmp.match = icmp ult i64 %y, %x
+  %cmp.nomatch = icmp ugt i64 %y, %unrelated
+  %or.cond = or i1 %cmp.match, %cmp.nomatch
+  %sub.reuse = sub nuw i64 %x, %y
+  %res = select i1 %or.cond, i64 0, i64 %sub.reuse
+  ret i64 %res
+}
+
+; Negative test: We must negate the or operation, hence this must come first.
+define i64 @test_mustbefirst_overrides_preferfirst_negative(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_mustbefirst_overrides_preferfirst_negative:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp x2, x0
+; CHECK-NEXT:    sub x8, x2, x1
+; CHECK-NEXT:    ccmp x0, x1, #0, ls
+; CHECK-NEXT:    ccmp x2, x1, #2, lo
+; CHECK-NEXT:    csel x0, xzr, x8, lo
+; CHECK-NEXT:    ret
+  %cmp.match = icmp ult i64 %y, %x
+  %cmp.nomatch1 = icmp ult i64 %unrelated, %x
+  %cmp.nomatch2 = icmp ugt i64 %y, %unrelated
+  %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2
+  %and.cond = and i1 %or.nomatch, %cmp.match
+  %sub.reuse = sub nuw i64 %y, %x
+  %res = select i1 %and.cond, i64 0, i64 %sub.reuse
+  ret i64 %res
+}
+
+; Negative test: There is no analogue of SUBS for floating point.
+define float @test_negative_float(float %unrelated, float %x, float %y) nounwind {
+; CHECK-LABEL: test_negative_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp s2, s0
+; CHECK-NEXT:    fsub s0, s2, s1
+; CHECK-NEXT:    movi d3, #0000000000000000
+; CHECK-NEXT:    fccmp s2, s1, #8, le
+; CHECK-NEXT:    fcsel s0, s3, s0, mi
+; CHECK-NEXT:    ret
+  %cmp.nomatch1 = fcmp olt float %y, %x
+  %cmp.nomatch2 = fcmp ogt float %y, %unrelated
+  %or.cond = or i1 %cmp.nomatch1, %cmp.nomatch2
+  %sub.noreuse = fsub float %y, %x
+  %res = select i1 %or.cond, float 0.0, float %sub.noreuse
+  ret float %res
+}
+
+; Negative test: If both operands match a sub, do not reorder them.
+define i64 @test_prefer_right_negative(i64 %x, i64 %y, i64 %z) nounwind {
+; CHECK-LABEL: test_prefer_right_negative:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp x2, x0
+; CHECK-NEXT:    ccmp x2, x1, #0, ls
+; CHECK-NEXT:    csel x8, x0, x1, lo
+; CHECK-NEXT:    sub x0, x2, x8
+; CHECK-NEXT:    ret
+  %cmp.match1 = icmp ult i64 %z, %y
+  %cmp.match2 = icmp ugt i64 %z, %x
+  %or.cond = or i1 %cmp.match1, %cmp.match2
+  %sub.reuse1 = sub nuw i64 %z, %y
+  %sub.reuse2 = sub nuw i64 %z, %x
+  %res = select i1 %or.cond, i64 %sub.reuse2, i64 %sub.reuse1
+  ret i64 %res
+}

From 3cf1f0c127bcc11185a5f8f6a295ce678827b923 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88@gmail.com>
Date: Tue, 18 Nov 2025 20:40:40 +0300
Subject: [PATCH 49/52] [ARM] Pattern match Low Overhead Loops pseudos (NFC)
 (#168209)

Pull Request: https://github.com/llvm/llvm-project/pull/168209
---
 llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 36 -------------------------
 llvm/lib/Target/ARM/ARMInstrThumb2.td   | 31 ++++++++++++++++-----
 2 files changed, 24 insertions(+), 43 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 847b7af5a9b11..26b5e5a22386e 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -3965,31 +3965,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
       return;
     // Other cases are autogenerated.
     break;
-  case ARMISD::WLSSETUP: {
-    SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
-                                         N->getOperand(0));
-    ReplaceUses(N, New);
-    CurDAG->RemoveDeadNode(N);
-    return;
-  }
-  case ARMISD::WLS: {
-    SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
-                                         N->getOperand(1), N->getOperand(2),
-                                         N->getOperand(0));
-    ReplaceUses(N, New);
-    CurDAG->RemoveDeadNode(N);
-    return;
-  }
-  case ARMISD::LE: {
-    SDValue Ops[] = { N->getOperand(1),
-                      N->getOperand(2),
-                      N->getOperand(0) };
-    unsigned Opc = ARM::t2LoopEnd;
-    SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
-    ReplaceUses(N, New);
-    CurDAG->RemoveDeadNode(N);
-    return;
-  }
   case ARMISD::LDRD: {
     if (Subtarget->isThumb2())
       break; // TableGen handles isel in this case.
@@ -4043,17 +4018,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
     CurDAG->RemoveDeadNode(N);
     return;
   }
-  case ARMISD::LOOP_DEC: {
-    SDValue Ops[] = { N->getOperand(1),
-                      N->getOperand(2),
-                      N->getOperand(0) };
-    SDNode *Dec =
-      CurDAG->getMachineNode(ARM::t2LoopDec, dl,
-                             CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
-    ReplaceUses(N, Dec);
-    CurDAG->RemoveDeadNode(N);
-    return;
-  }
   case ARMISD::BRCOND: {
     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index c229c8e4491df..911d7ebfba141 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -5581,6 +5581,20 @@ class t2LOL<dag oops, dag iops, string asm, string ops>
   let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB];
 }
 
+def arm_wlssetup
+    : SDNode<"ARMISD::WLSSETUP",
+             SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<1, 0>]>>;
+
+def arm_wls : SDNode<"ARMISD::WLS",
+                     SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>,
+                     [SDNPHasChain]>;
+
+def arm_loop_dec : SDNode<"ARMISD::LOOP_DEC", SDTIntBinOp, [SDNPHasChain]>;
+
+def arm_le : SDNode<"ARMISD::LE",
+                    SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>,
+                    [SDNPHasChain]>;
+
 let isNotDuplicable = 1 in {
 def t2WLS : t2LOL<(outs GPRlr:$LR),
                   (ins rGPR:$Rn, wlslabel_u11:$label),
@@ -5651,15 +5665,17 @@ def t2DoLoopStartTP :
 // valid after reg alloc, as it should be lowered during MVETPAndVPTOptimisations
 // into a t2WhileLoopStartLR (or expanded).
 def t2WhileLoopSetup :
-  t2PseudoInst<(outs GPRlr:$lr), (ins rGPR:$tc), 4, IIC_Br, []>;
+    t2PseudoInst<(outs GPRlr:$lr), (ins rGPR:$tc), 4, IIC_Br,
+                 [(set i32:$lr, (arm_wlssetup i32:$tc))]>;
 
 // A pseudo to represent the decrement in a low overhead loop. A t2LoopDec and
 // t2LoopEnd together represent a LE instruction. Ideally these are converted
 // to a t2LoopEndDec which is lowered as a single instruction.
 let hasSideEffects = 0 in
 def t2LoopDec :
-  t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size),
-               4, IIC_Br, []>, Sched<[WriteBr]>;
+    t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size), 4, IIC_Br,
+                 [(set i32:$Rm, (arm_loop_dec i32:$Rn, timm:$size))]>,
+    Sched<[WriteBr]>;
 
 let isBranch = 1, isTerminator = 1, hasSideEffects = 1, Defs = [CPSR] in {
 // The branch in a t2WhileLoopSetup/t2WhileLoopStart pair, eventually turned
@@ -5667,8 +5683,8 @@ let isBranch = 1, isTerminator = 1, hasSideEffects = 1, Defs = [CPSR] in {
 def t2WhileLoopStart :
     t2PseudoInst<(outs),
                  (ins GPRlr:$tc, brtarget:$target),
-                 4, IIC_Br, []>,
-                 Sched<[WriteBr]>;
+                 4, IIC_Br, [(arm_wls i32:$tc, bb:$target)]>,
+    Sched<[WriteBr]>;
 
 // WhileLoopStartLR that sets up LR and branches on zero, equivalent to WLS. It
 // is lowered in the ARMLowOverheadLoops pass providing the branches are within
@@ -5690,8 +5706,9 @@ def t2WhileLoopStartTP :
 
 // t2LoopEnd - the branch half of a t2LoopDec/t2LoopEnd pair.
 def t2LoopEnd :
-  t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target),
-  8, IIC_Br, []>, Sched<[WriteBr]>;
+    t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target),
+                 8, IIC_Br, [(arm_le i32:$tc, bb:$target)]>,
+    Sched<[WriteBr]>;
 
 // The combination of a t2LoopDec and t2LoopEnd, performing both the LR
 // decrement and branch as a single instruction. Is lowered to a LE or

From 0b82415c59c57c40beb072a716675293e7007a65 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler@amd.com>
Date: Tue, 18 Nov 2025 18:41:04 +0100
Subject: [PATCH 50/52] [AMDGPU] Consider FLAT instructions for VMEM hazard
 detection (#137170)

In general, "Flat instructions look at the per-workitem address and
determine for each work item if the target memory address is in global,
private or scratch memory." (RDNA2 ISA) That means that FLAT
instructions need to be considered for VMEM hazards even without
"specific segment". Also, LDS DMA should be considered for LDS hazard
detection.

See also #137148
---
 llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp      | 10 ++++------
 llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir |  5 +++--
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 7a2f84a2f73eb..29d22f27a2d8e 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1502,9 +1502,8 @@ static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF,
   bool HasVmem = false;
   for (auto &MBB : MF) {
     for (auto &MI : MBB) {
-      HasLds |= SIInstrInfo::isDS(MI);
-      HasVmem |= (SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isFLAT(MI)) ||
-                 SIInstrInfo::isSegmentSpecificFLAT(MI);
+      HasLds |= SIInstrInfo::isDS(MI) || SIInstrInfo::isLDSDMA(MI);
+      HasVmem |= SIInstrInfo::isVMEM(MI);
       if (HasLds && HasVmem)
         return true;
     }
@@ -1526,10 +1525,9 @@ bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
   assert(!ST.hasExtendedWaitCounts());
 
   auto IsHazardInst = [](const MachineInstr &MI) {
-    if (SIInstrInfo::isDS(MI))
+    if (SIInstrInfo::isDS(MI) || SIInstrInfo::isLDSDMA(MI))
       return 1;
-    if ((SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isFLAT(MI)) ||
-        SIInstrInfo::isSegmentSpecificFLAT(MI))
+    if (SIInstrInfo::isVMEM(MI))
       return 2;
     return 0;
   };
diff --git a/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir b/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir
index 86e657093b5b2..ab4077d8f5b68 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir
+++ b/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir
@@ -269,11 +269,12 @@ body:            |
     S_ENDPGM 0
 ...
 
-# GCN-LABEL: name: no_hazard_lds_branch_flat
+# GCN-LABEL: name: hazard_lds_branch_flat
 # GCN:      bb.1:
+# GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
 # GCN-NEXT: FLAT_LOAD_DWORD
 ---
-name:            no_hazard_lds_branch_flat
+name:            hazard_lds_branch_flat
 body:            |
   bb.0:
     successors: %bb.1

From c88ae6eb21201ee3c699a76ba424cbe42ae2e7b1 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek@amd.com>
Date: Tue, 18 Nov 2025 11:44:03 -0600
Subject: [PATCH 51/52] [flang][OpenMP] Move two utilities from Semantics to
 Parser, NFC (#168549)

Move `GetInnermostExecPart` and `IsStrictlyStructuredBlock` from
Semantics/openmp-utils.* to Parser/openmp-utils.*. These two only depend
on the AST contents and properties.
---
 flang/include/flang/Parser/openmp-utils.h    |  2 ++
 flang/include/flang/Semantics/openmp-utils.h |  2 --
 flang/lib/Parser/openmp-utils.cpp            | 28 ++++++++++++++++++++
 flang/lib/Semantics/check-omp-atomic.cpp     |  2 ++
 flang/lib/Semantics/openmp-utils.cpp         | 28 --------------------
 5 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/flang/include/flang/Parser/openmp-utils.h b/flang/include/flang/Parser/openmp-utils.h
index 8fa4a84aff06d..36556f8dd7f4a 100644
--- a/flang/include/flang/Parser/openmp-utils.h
+++ b/flang/include/flang/Parser/openmp-utils.h
@@ -137,6 +137,8 @@ const T *GetFirstArgument(const OmpDirectiveSpecification &spec) {
 
 const BlockConstruct *GetFortranBlockConstruct(
     const ExecutionPartConstruct &epc);
+const Block &GetInnermostExecPart(const Block &block);
+bool IsStrictlyStructuredBlock(const Block &block);
 
 const OmpCombinerExpression *GetCombinerExpr(
     const OmpReductionSpecifier &rspec);
diff --git a/flang/include/flang/Semantics/openmp-utils.h b/flang/include/flang/Semantics/openmp-utils.h
index 14a4f0e93bda5..f5739ab16d643 100644
--- a/flang/include/flang/Semantics/openmp-utils.h
+++ b/flang/include/flang/Semantics/openmp-utils.h
@@ -97,8 +97,6 @@ const SomeExpr *HasStorageOverlap(
     const SomeExpr &base, llvm::ArrayRef<SomeExpr> exprs);
 bool IsAssignment(const parser::ActionStmt *x);
 bool IsPointerAssignment(const evaluate::Assignment &x);
-const parser::Block &GetInnermostExecPart(const parser::Block &block);
-bool IsStrictlyStructuredBlock(const parser::Block &block);
 } // namespace omp
 } // namespace Fortran::semantics
 
diff --git a/flang/lib/Parser/openmp-utils.cpp b/flang/lib/Parser/openmp-utils.cpp
index b9d3763cdd06d..2424828293c73 100644
--- a/flang/lib/Parser/openmp-utils.cpp
+++ b/flang/lib/Parser/openmp-utils.cpp
@@ -93,6 +93,34 @@ const BlockConstruct *GetFortranBlockConstruct(
   return nullptr;
 }
 
+/// parser::Block is a list of executable constructs, parser::BlockConstruct
+/// is Fortran's BLOCK/ENDBLOCK construct.
+/// Strip the outermost BlockConstructs, return the reference to the Block
+/// in the executable part of the innermost of the stripped constructs.
+/// Specifically, if the given `block` has a single entry (it's a list), and
+/// the entry is a BlockConstruct, get the Block contained within. Repeat
+/// this step as many times as possible.
+const Block &GetInnermostExecPart(const Block &block) {
+  const Block *iter{&block};
+  while (iter->size() == 1) {
+    const ExecutionPartConstruct &ep{iter->front()};
+    if (auto *bc{GetFortranBlockConstruct(ep)}) {
+      iter = &std::get<Block>(bc->t);
+    } else {
+      break;
+    }
+  }
+  return *iter;
+}
+
+bool IsStrictlyStructuredBlock(const Block &block) {
+  if (block.size() == 1) {
+    return GetFortranBlockConstruct(block.front()) != nullptr;
+  } else {
+    return false;
+  }
+}
+
 const OmpCombinerExpression *GetCombinerExpr(
     const OmpReductionSpecifier &rspec) {
   return addr_if(std::get<std::optional<OmpCombinerExpression>>(rspec.t));
diff --git a/flang/lib/Semantics/check-omp-atomic.cpp b/flang/lib/Semantics/check-omp-atomic.cpp
index ec03e6fe2d920..b9e34ca6e74df 100644
--- a/flang/lib/Semantics/check-omp-atomic.cpp
+++ b/flang/lib/Semantics/check-omp-atomic.cpp
@@ -19,6 +19,7 @@
 #include "flang/Evaluate/rewrite.h"
 #include "flang/Evaluate/tools.h"
 #include "flang/Parser/char-block.h"
+#include "flang/Parser/openmp-utils.h"
 #include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/openmp-utils.h"
 #include "flang/Semantics/symbol.h"
@@ -41,6 +42,7 @@
 
 namespace Fortran::semantics {
 
+using namespace Fortran::parser::omp;
 using namespace Fortran::semantics::omp;
 
 namespace operation = Fortran::evaluate::operation;
diff --git a/flang/lib/Semantics/openmp-utils.cpp b/flang/lib/Semantics/openmp-utils.cpp
index 4a40d6eec17bb..18a37d64a3b5a 100644
--- a/flang/lib/Semantics/openmp-utils.cpp
+++ b/flang/lib/Semantics/openmp-utils.cpp
@@ -496,32 +496,4 @@ bool IsPointerAssignment(const evaluate::Assignment &x) {
   return std::holds_alternative<evaluate::Assignment::BoundsSpec>(x.u) ||
       std::holds_alternative<evaluate::Assignment::BoundsRemapping>(x.u);
 }
-
-/// parser::Block is a list of executable constructs, parser::BlockConstruct
-/// is Fortran's BLOCK/ENDBLOCK construct.
-/// Strip the outermost BlockConstructs, return the reference to the Block
-/// in the executable part of the innermost of the stripped constructs.
-/// Specifically, if the given `block` has a single entry (it's a list), and
-/// the entry is a BlockConstruct, get the Block contained within. Repeat
-/// this step as many times as possible.
-const parser::Block &GetInnermostExecPart(const parser::Block &block) {
-  const parser::Block *iter{&block};
-  while (iter->size() == 1) {
-    const parser::ExecutionPartConstruct &ep{iter->front()};
-    if (auto *bc{GetFortranBlockConstruct(ep)}) {
-      iter = &std::get<parser::Block>(bc->t);
-    } else {
-      break;
-    }
-  }
-  return *iter;
-}
-
-bool IsStrictlyStructuredBlock(const parser::Block &block) {
-  if (block.size() == 1) {
-    return GetFortranBlockConstruct(block.front()) != nullptr;
-  } else {
-    return false;
-  }
-}
 } // namespace Fortran::semantics::omp

From bd8c94177537ba30c6a160afa6dd1b8b8fc1e813 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman@google.com>
Date: Tue, 18 Nov 2025 17:47:18 +0000
Subject: [PATCH 52/52] Reapply "[Github] Update PR labeller to v6.0.1
 (#167246)"

This reverts commit b3d62645158cd6f463f2e1c878f6d63b9dc4b164.

This broke the workflow because the sync-labels flag was set to a
zero-length string to work around an issue. The underlying issue has
been fixed and the value is now required to be a boolean. We can just
drop the value because we want the default behavior anyways. This should
be the last remaining breaking change from v5 that we need to migrate.
---
 .github/new-prs-labeler.yml   | 1942 +++++++++++++++++++--------------
 .github/workflows/new-prs.yml |    4 +-
 2 files changed, 1131 insertions(+), 815 deletions(-)

diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml
index efdc42d349195..bb0eef5842b0f 100644
--- a/.github/new-prs-labeler.yml
+++ b/.github/new-prs-labeler.yml
@@ -1,1131 +1,1449 @@
 BOLT:
-  - bolt/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - bolt/**/*
 
 ClangIR:
-  - clang/include/clang/CIR/**/*
-  - clang/lib/CIR/**/*
-  - clang/tools/cir-*/**/*
-  - clang/test/CIR/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/include/clang/CIR/**/*
+      - clang/lib/CIR/**/*
+      - clang/tools/cir-*/**/*
+      - clang/test/CIR/**/*
 
 clang:bytecode:
-  - clang/docs/ConstantInterpreter.rst
-  - clang/lib/AST/ByteCode/**/*
-  - clang/test/AST/ByteCode/**/*
-  - clang/unittests/AST/ByteCode/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/docs/ConstantInterpreter.rst
+      - clang/lib/AST/ByteCode/**/*
+      - clang/test/AST/ByteCode/**/*
+      - clang/unittests/AST/ByteCode/**/*
 
 clang:dataflow:
-  - clang/include/clang/Analysis/FlowSensitive/**/*
-  - clang/lib/Analysis/FlowSensitive/**/*
-  - clang/unittests/Analysis/FlowSensitive/**/*
-  - clang/docs/DataFlowAnalysisIntro.md
-  - clang/docs/DataFlowAnalysisIntroImages/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/include/clang/Analysis/FlowSensitive/**/*
+      - clang/lib/Analysis/FlowSensitive/**/*
+      - clang/unittests/Analysis/FlowSensitive/**/*
+      - clang/docs/DataFlowAnalysisIntro.md
+      - clang/docs/DataFlowAnalysisIntroImages/**/*
 
 clang:frontend:
-  - clang/lib/AST/**/*
-  - clang/include/clang/AST/**/*
-  - clang/lib/Basic/**/*
-  - clang/include/clang/Basic/**/*
-  - clang/lib/Interpreter/**/*
-  - clang/include/clang/Interpreter/**/*
-  - clang/lib/Lex/**/*
-  - clang/include/clang/Lex/**/*
-  - clang/lib/Parse/**/*
-  - clang/include/clang/Parse/**/*
-  - clang/lib/Sema/**/*
-  - clang/include/clang/Sema/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/lib/AST/**/*
+      - clang/include/clang/AST/**/*
+      - clang/lib/Basic/**/*
+      - clang/include/clang/Basic/**/*
+      - clang/lib/Interpreter/**/*
+      - clang/include/clang/Interpreter/**/*
+      - clang/lib/Lex/**/*
+      - clang/include/clang/Lex/**/*
+      - clang/lib/Parse/**/*
+      - clang/include/clang/Parse/**/*
+      - clang/lib/Sema/**/*
+      - clang/include/clang/Sema/**/*
 
 clang:headers:
-  - clang/lib/Headers/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/lib/Headers/**/*
 
 compiler-rt:
-  - compiler-rt/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/**/*
 
 flang:
-  - flang/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - flang/**/*
 
 flang:frontend:
-  - flang/Parser/**/*
-  - flang/Evaluate/**/*
-  - flang/Semantics/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - flang/Parser/**/*
+      - flang/Evaluate/**/*
+      - flang/Semantics/**/*
 
 libclc:
-  - libclc/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - libclc/**
 
 HLSL:
-  - clang/*HLSL*/**/*
-  - clang/**/*HLSL*
-  - llvm/**/Frontend/HLSL/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/*HLSL*/**/*
+      - clang/**/*HLSL*
+      - llvm/**/Frontend/HLSL/**/*
 
 lld:
-  - lld/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - lld/**/*
 
 llvm-lit:
-  - llvm/utils/lit/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/utils/lit/**/*
 
 PGO:
-  - llvm/**/ProfileData/**/*
-  - llvm/**/SampleProfile*
-  - llvm/**/CodeGen/MIRSampleProfile*
-  - llvm/lib/Transforms/Instrumentation/CGProfile.cpp
-  - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
-  - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
-  - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
-  - llvm/lib/Transforms/Instrumentation/PGO*
-  - llvm/lib/Transforms/Instrumentation/ValueProfile*
-  - llvm/test/Instrumentation/InstrProfiling/**/*
-  - llvm/test/Transforms/PGOProfile/**/*
-  - llvm/test/Transforms/SampleProfile/**/*
-  - llvm/**/llvm-profdata/**/*
-  - llvm/**/llvm-profgen/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/**/ProfileData/**/*
+      - llvm/**/SampleProfile*
+      - llvm/**/CodeGen/MIRSampleProfile*
+      - llvm/lib/Transforms/Instrumentation/CGProfile.cpp
+      - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+      - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+      - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+      - llvm/lib/Transforms/Instrumentation/PGO*
+      - llvm/lib/Transforms/Instrumentation/ValueProfile*
+      - llvm/test/Instrumentation/InstrProfiling/**/*
+      - llvm/test/Transforms/PGOProfile/**/*
+      - llvm/test/Transforms/SampleProfile/**/*
+      - llvm/**/llvm-profdata/**/*
+      - llvm/**/llvm-profgen/**/*
 
 vectorizers:
-  - llvm/lib/Transforms/Vectorize/**/*
-  - llvm/include/llvm/Transforms/Vectorize/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Transforms/Vectorize/**/*
+      - llvm/include/llvm/Transforms/Vectorize/**/*
 
 # IMPORTED FROM CODEOWNERS
 LTO:
-  - llvm/*/LTO/**
-  - llvm/*/Linker/**
-  - llvm/*/ThinLTO/**
-  - llvm/lib/Transforms/*/FunctionImport*
-  - llvm/tools/gold/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/*/LTO/**
+      - llvm/*/Linker/**
+      - llvm/*/ThinLTO/**
+      - llvm/lib/Transforms/*/FunctionImport*
+      - llvm/tools/gold/**
 
 clang:driver:
-  - clang/*/Driver/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/*/Driver/**
 
 compiler-rt:asan:
-  - compiler-rt/lib/asan/**
-  - compiler-rt/include/sanitizer/asan_interface.h
-  - compiler-rt/test/asan/**
-  - compiler-rt/lib/asan_abi/**
-  - compiler-rt/test/asan_abi/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/lib/asan/**
+      - compiler-rt/include/sanitizer/asan_interface.h
+      - compiler-rt/test/asan/**
+      - compiler-rt/lib/asan_abi/**
+      - compiler-rt/test/asan_abi/**
 
 compiler-rt:builtins:
-  - compiler-rt/lib/builtins/**
-  - compiler-rt/test/builtins/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/lib/builtins/**
+      - compiler-rt/test/builtins/**
 
 compiler-rt:cfi:
-  - compiler-rt/lib/cfi/**
-  - compiler-rt/test/cfi/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/lib/cfi/**
+      - compiler-rt/test/cfi/**
 
 compiler-rt:fuzzer:
-  - compiler-rt/lib/fuzzer/**
-  - compiler-rt/include/fuzzer/**
-  - compiler-rt/test/fuzzer/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/lib/fuzzer/**
+      - compiler-rt/include/fuzzer/**
+      - compiler-rt/test/fuzzer/**
 
 compiler-rt:hwasan:
-  - compiler-rt/lib/hwasan/**
-  - compiler-rt/include/sanitizer/hwasan_interface.h
-  - compiler-rt/test/hwasan/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/lib/hwasan/**
+      - compiler-rt/include/sanitizer/hwasan_interface.h
+      - compiler-rt/test/hwasan/**
 
 compiler-rt:lsan:
-  - compiler-rt/lib/lsan/**
-  - compiler-rt/include/sanitizer/lsan_interface.h
-  - compiler-rt/test/lsan/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/lib/lsan/**
+      - compiler-rt/include/sanitizer/lsan_interface.h
+      - compiler-rt/test/lsan/**
 
 compiler-rt:msan:
-  - compiler-rt/lib/msan/**
-  - compiler-rt/include/sanitizer/msan_interface.h
-  - compiler-rt/test/msan/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/lib/msan/**
+      - compiler-rt/include/sanitizer/msan_interface.h
+      - compiler-rt/test/msan/**
 
 compiler-rt:sanitizer:
-  - llvm/lib/Transforms/Instrumentation/*Sanitizer*
-  - compiler-rt/lib/interception/**
-  - compiler-rt/lib/*san*/**
-  - compiler-rt/include/sanitizer/**
-  - compiler-rt/test/*san*/**
-  - compiler-rt/lib/fuzzer/**
-  - compiler-rt/include/fuzzer/**
-  - compiler-rt/test/fuzzer/**
-  - compiler-rt/lib/scudo/**
-  - compiler-rt/test/scudo/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Transforms/Instrumentation/*Sanitizer*
+      - compiler-rt/lib/interception/**
+      - compiler-rt/lib/*san*/**
+      - compiler-rt/include/sanitizer/**
+      - compiler-rt/test/*san*/**
+      - compiler-rt/lib/fuzzer/**
+      - compiler-rt/include/fuzzer/**
+      - compiler-rt/test/fuzzer/**
+      - compiler-rt/lib/scudo/**
+      - compiler-rt/test/scudo/**
 
 compiler-rt:scudo:
-  - compiler-rt/lib/scudo/**
-  - compiler-rt/test/scudo/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/lib/scudo/**
+      - compiler-rt/test/scudo/**
 
 compiler-rt:tsan:
-  - compiler-rt/lib/tsan/**
-  - compiler-rt/include/sanitizer/tsan_interface.h
-  - compiler-rt/include/sanitizer/tsan_interface_atomic.h
-  - compiler-rt/test/tsan/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/lib/tsan/**
+      - compiler-rt/include/sanitizer/tsan_interface.h
+      - compiler-rt/include/sanitizer/tsan_interface_atomic.h
+      - compiler-rt/test/tsan/**
 
 compiler-rt:ubsan:
-  - compiler-rt/lib/ubsan/**
-  - compiler-rt/include/sanitizer/ubsan_interface.h
-  - compiler-rt/test/ubsan/**
-  - compiler-rt/lib/ubsan_minimal/**
-  - compiler-rt/test/ubsan_minimal/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/lib/ubsan/**
+      - compiler-rt/include/sanitizer/ubsan_interface.h
+      - compiler-rt/test/ubsan/**
+      - compiler-rt/lib/ubsan_minimal/**
+      - compiler-rt/test/ubsan_minimal/**
 
 xray:
-  - llvm/tools/llvm-xray/**
-  - compiler-rt/*/xray/**
-  - clang/include/clang/Basic/XRay*
-  - clang/lib/Basic/XRay*
-  - compiler-rt/*/xray/**
-  - llvm/include/llvm/XRay/**
-  - llvm/lib/XRay/**
-  - llvm/tools/llvm-xray/**
-  - llvm/unittests/XRay/**
-  - compiler-rt/*/xray/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/tools/llvm-xray/**
+      - compiler-rt/*/xray/**
+      - clang/include/clang/Basic/XRay*
+      - clang/lib/Basic/XRay*
+      - compiler-rt/*/xray/**
+      - llvm/include/llvm/XRay/**
+      - llvm/lib/XRay/**
+      - llvm/tools/llvm-xray/**
+      - llvm/unittests/XRay/**
+      - compiler-rt/*/xray/**
 
 clang:codegen:
-  - clang/lib/CodeGen/**
-  - clang/include/clang/CodeGen/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/lib/CodeGen/**
+      - clang/include/clang/CodeGen/**
 
 mlir:
-  - mlir/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**
 
 mlir:core:
-  - mlir/include/mlir/Support/**
-  - mlir/lib/Support/**
-  - mlir/include/mlir/Parser/**
-  - mlir/lib/Parser/**
-  - mlir/include/mlir/IR/**
-  - mlir/lib/IR/**
-  - mlir/include/mlir/Bytecode/**
-  - mlir/lib/Bytecode/**
-  - mlir/include/mlir/AsmParser/**
-  - mlir/lib/AsmParser/**
-  - mlir/include/mlir/Pass/**
-  - mlir/lib/Pass/**
-  - mlir/include/mlir/Tools/**
-  - mlir/lib/Tools/**
-  - mlir/include/mlir/Reducer/**
-  - mlir/lib/Reducer/**
-  - mlir/include/mlir/Transforms/**
-  - mlir/lib/Transforms/**
-  - mlir/include/mlir/Debug/**
-  - mlir/lib/Debug/**
-  - mlir/tools/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/include/mlir/Support/**
+      - mlir/lib/Support/**
+      - mlir/include/mlir/Parser/**
+      - mlir/lib/Parser/**
+      - mlir/include/mlir/IR/**
+      - mlir/lib/IR/**
+      - mlir/include/mlir/Bytecode/**
+      - mlir/lib/Bytecode/**
+      - mlir/include/mlir/AsmParser/**
+      - mlir/lib/AsmParser/**
+      - mlir/include/mlir/Pass/**
+      - mlir/lib/Pass/**
+      - mlir/include/mlir/Tools/**
+      - mlir/lib/Tools/**
+      - mlir/include/mlir/Reducer/**
+      - mlir/lib/Reducer/**
+      - mlir/include/mlir/Transforms/**
+      - mlir/lib/Transforms/**
+      - mlir/include/mlir/Debug/**
+      - mlir/lib/Debug/**
+      - mlir/tools/**
 
 mlir:ods:
-  - mlir/TableGen/**
-  - mlir/tblgen/**
-  - mlir/include/mlir/IR/*.td
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/TableGen/**
+      - mlir/tblgen/**
+      - mlir/include/mlir/IR/*.td
 
 mlir:bindings:
-  - mlir/Bindings/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/Bindings/**
 
 mlir:gpu:
-  - mlir/**/*GPU*/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/*GPU*/**
 
 mlir:amdgpu:
-  - mlir/**/AMDGPU/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/AMDGPU/**
 
 mlir:amx:
-  - mlir/**/AMX/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/AMX/**
 
 mlir:affine:
-  - mlir/**/Affine/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Affine/**
 
 mlir:arith:
-  - mlir/**/Arith/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Arith/**
 
 mlir:neon:
-  - mlir/**/ArmNeon/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/ArmNeon/**
 
 mlir:sme:
-  - mlir/**/ArmSME/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/ArmSME/**
 
 mlir:sve:
-  - mlir/**/ArmSVE/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/ArmSVE/**
 
 mlir:async:
-  - mlir/**/Async/**
-  - mlir/**/Async/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Async/**
+      - mlir/**/Async/**
 
 mlir:bufferization:
-  - mlir/**/Bufferization/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Bufferization/**
 
 mlir:complex:
-  - mlir/**/Complex/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Complex/**
 
 mlir:cf:
-  - mlir/**/ControlFlow/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/ControlFlow/**
 
 mlir:dlti:
-  - mlir/**/DLTI/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/DLTI/**
 
 mlir:emitc:
-  - mlir/**/*EmitC*/**
-  - mlir/lib/Target/Cpp/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/*EmitC*/**
+      - mlir/lib/Target/Cpp/**
 
 mlir:func:
-  - mlir/**/Func/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Func/**
 
 mlir:irdl:
-  - mlir/**/IRDL/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/IRDL/**
 
 mlir:index:
-  - mlir/**/Index/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Index/**
 
 mlir:llvm:
-  - mlir/**/LLVM*
-  - mlir/**/LLVM*/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/LLVM*
+      - mlir/**/LLVM*/**
 
 mlir:linalg:
-  - mlir/**/*linalg/**
-  - mlir/**/*Linalg/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/*linalg/**
+      - mlir/**/*Linalg/**
 
 mlir:mlprogram:
-  - mlir/**/MLProgram/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/MLProgram/**
 
 mlir:math:
-  - mlir/**/Math/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Math/**
 
 mlir:memref:
-  - mlir/**/MemRef/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/MemRef/**
 
 mlir:nvgpu:
-  - mlir/**/NVGPU/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/NVGPU/**
 
 mlir:openacc:
-  - mlir/**/*OpenACC*
-  - mlir/**/*OpenACC*/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/*OpenACC*
+      - mlir/**/*OpenACC*/**
 
 mlir:openmp:
-  - mlir/**/*OpenMP*
-  - mlir/**/*OpenMP*/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/*OpenMP*
+      - mlir/**/*OpenMP*/**
 
 mlir:pdl:
-  - mlir/**/PDL/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/PDL/**
 
 mlir:quant:
-  - mlir/**/Quant/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Quant/**
 
 mlir:scf:
-  - mlir/**/SCF/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/SCF/**
 
 mlir:spirv:
-  - mlir/**/SPIRV/**
-  - mlir/**/SPIRVTo*/**
-  - mlir/**/*ToSPIRV/**
-  - mlir/tools/mlir-spirv-cpu-runner/**
-  - mlir/tools/mlir-vulkan-runner/**
-  - mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/SPIRV/**
+      - mlir/**/SPIRVTo*/**
+      - mlir/**/*ToSPIRV/**
+      - mlir/tools/mlir-spirv-cpu-runner/**
+      - mlir/tools/mlir-vulkan-runner/**
+      - mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp
 
 mlir:shape:
-  - mlir/**/Shape/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Shape/**
 
 mlir:sparse:
-  - mlir/**/SparseTensor/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/SparseTensor/**
 
 mlir:tensor:
-  - mlir/**/Tensor/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Tensor/**
 
 mlir:tosa:
-  - mlir/**/*Tosa*/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/*Tosa*/**
 
 mlir:ub:
-  - mlir/**/UB/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/UB/**
 
 mlir:vector:
-  - mlir/**/*Vector/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/*Vector/**
 
 mlir:execution-engine:
-  - mlir/**/ExecutionEngine/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/ExecutionEngine/**
 
 mlir:presburger:
-  - mlir/**/*Presburger*/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/*Presburger*/**
 
 mlir:python:
-  - mlir/python/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/python/**/*
 
 mlir:vectorops:
-  - mlir/**/Vector/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Vector/**/*
 
 coroutines:
-  - clang/docs/DebuggingCoroutines.rst
-  - clang/lib/Sema/SemaCoroutine.cpp
-  - clang/lib/CodeGen/CGCoroutine.cpp
-  - clang/test/CodeGenCoroutines/**
-  - llvm/docs/Coroutines.rst
-  - llvm/include/llvm/Transforms/Coroutines/**
-  - llvm/lib/Transforms/Coroutines/**
-  - llvm/test/Transforms/Coroutines/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/docs/DebuggingCoroutines.rst
+      - clang/lib/Sema/SemaCoroutine.cpp
+      - clang/lib/CodeGen/CGCoroutine.cpp
+      - clang/test/CodeGenCoroutines/**
+      - llvm/docs/Coroutines.rst
+      - llvm/include/llvm/Transforms/Coroutines/**
+      - llvm/lib/Transforms/Coroutines/**
+      - llvm/test/Transforms/Coroutines/*
 
 clang:modules:
-  - clang/docs/StandardCPlusPlusModules.rst
-  - clang/include/clang/AST/AbstractBasicReader.h
-  - clang/include/clang/AST/AbstractBasicWriter.h
-  - clang/include/clang/AST/AbstractTypeReader.h
-  - clang/include/clang/AST/AbstractTypeWriter.h
-  - clang/include/clang/AST/PropertiesBase.td
-  - clang/include/clang/AST/ODRHash.h
-  - clang/include/clang/AST/TypeProperties.td
-  - clang/include/clang/Basic/Module.h
-  - clang/include/clang/Frontend/PrecompiledPreamble.h
-  - clang/include/clang/Lex/ModuleLoader.h
-  - clang/include/clang/Lex/ModuleMap.h
-  - clang/include/clang/Serialization/**
-  - clang/lib/AST/ODRHash.cpp
-  - clang/lib/AST/StmtProfile.cpp
-  - clang/lib/Basic/Module.cpp
-  - clang/lib/Frontend/ModuleDependencyCollector.cpp
-  - clang/lib/Frontend/PrecompiledPreamble.cpp
-  - clang/lib/Lex/ModuleMap.cpp
-  - clang/lib/Sema/SemaModule.cpp
-  - clang/lib/Serialization/**
-  - clang/test/CXX/module/**
-  - clang/test/Modules/**
-  - clang/unittests/Serialization/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/docs/StandardCPlusPlusModules.rst
+      - clang/include/clang/AST/AbstractBasicReader.h
+      - clang/include/clang/AST/AbstractBasicWriter.h
+      - clang/include/clang/AST/AbstractTypeReader.h
+      - clang/include/clang/AST/AbstractTypeWriter.h
+      - clang/include/clang/AST/PropertiesBase.td
+      - clang/include/clang/AST/ODRHash.h
+      - clang/include/clang/AST/TypeProperties.td
+      - clang/include/clang/Basic/Module.h
+      - clang/include/clang/Frontend/PrecompiledPreamble.h
+      - clang/include/clang/Lex/ModuleLoader.h
+      - clang/include/clang/Lex/ModuleMap.h
+      - clang/include/clang/Serialization/**
+      - clang/lib/AST/ODRHash.cpp
+      - clang/lib/AST/StmtProfile.cpp
+      - clang/lib/Basic/Module.cpp
+      - clang/lib/Frontend/ModuleDependencyCollector.cpp
+      - clang/lib/Frontend/PrecompiledPreamble.cpp
+      - clang/lib/Lex/ModuleMap.cpp
+      - clang/lib/Sema/SemaModule.cpp
+      - clang/lib/Serialization/**
+      - clang/test/CXX/module/**
+      - clang/test/Modules/**
+      - clang/unittests/Serialization/*
 
 clang-tidy:
-  - clang-tools-extra/clang-tidy/**
-  - clang-tools-extra/docs/clang-tidy/**
-  - clang-tools-extra/test/clang-tidy/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang-tools-extra/clang-tidy/**
+      - clang-tools-extra/docs/clang-tidy/**
+      - clang-tools-extra/test/clang-tidy/**
 
 clang-tools-extra:
-  - clang-tools-extra/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang-tools-extra/**
 
 tools:llvm-mca:
-  - llvm/tools/llvm-mca/**
-  - llvm/include/llvm/MCA/**
-  - llvm/lib/MCA/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/tools/llvm-mca/**
+      - llvm/include/llvm/MCA/**
+      - llvm/lib/MCA/**
 
 clang:
-  - any:
-    - clang/**
-    - '!clang/**/Format/**'
-    - '!clang/tools/clang-format/**'
+  - changed-files:
+    - all-globs-to-all-file:
+      - clang/**
+      - '!clang/**/Format/**'
+      - '!clang/tools/clang-format/**'
 
 testing-tools:
-  - llvm/include/llvm/FileCheck/**
-  - llvm/lib/FileCheck/**
-  - llvm/test/FileCheck/**
-  - llvm/unittests/FileCheck/**
-  - llvm/utils/lit/**
-  - llvm/utils/split-file/**
-  - llvm/utils/not/**
-  - llvm/utils/count/**
-  - llvm/utils/FileCheck/**
-  - llvm/docs/CommandGuide/FileCheck.rst
-  - llvm/docs/CommandGuide/lit.rst
-  - llvm/docs/TestingGuide.rst
-  - llvm/test/Other/FileCheck-space.txt
-  - llvm/utils/UpdateTestChecks/**
-  - llvm/utils/update*_test_checks.py
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/FileCheck/**
+      - llvm/lib/FileCheck/**
+      - llvm/test/FileCheck/**
+      - llvm/unittests/FileCheck/**
+      - llvm/utils/lit/**
+      - llvm/utils/split-file/**
+      - llvm/utils/not/**
+      - llvm/utils/count/**
+      - llvm/utils/FileCheck/**
+      - llvm/docs/CommandGuide/FileCheck.rst
+      - llvm/docs/CommandGuide/lit.rst
+      - llvm/docs/TestingGuide.rst
+      - llvm/test/Other/FileCheck-space.txt
+      - llvm/utils/UpdateTestChecks/**
+      - llvm/utils/update*_test_checks.py
 
 debuginfo:
-  - clang/lib/CodeGen/CGDebugInfo.*
-  - llvm/include/llvm/BinaryFormat/Dwarf.*
-  - llvm/include/llvm/CodeGen/*Debug*.*
-  - llvm/include/llvm/DebugInfo/**
-  - llvm/include/llvm/Debuginfod/**
-  - llvm/include/llvm/Frontend/Debug/**
-  - llvm/include/llvm/IR/Debug*.*
-  - llvm/include/llvm/Object/*Debug*.*
-  - llvm/include/llvm/ObjectYAML/*Debug*.*
-  - llvm/include/llvm/Transforms/Utils/*Debug*.*
-  - llvm/include/llvm-c/DebugInfo.h
-  - llvm/lib/BinaryFormat/Dwarf.cpp
-  - llvm/lib/CodeGen/AsmPrinter/*Debug*.*
-  - llvm/lib/CodeGen/AsmPrinter/Dwarf*.*
-  - llvm/lib/CodeGen/AsmPrinter/DIE*.*
-  - llvm/lib/CodeGen/LiveDebugValues/**
-  - llvm/lib/CodeGen/*Debug*.*
-  - llvm/lib/CodeGen/DwarfEHPrepare.cpp
-  - llvm/lib/DebugInfo/**
-  - llvm/lib/Debuginfod/**
-  - llvm/lib/DWARFLinkerParallel/**
-  - llvm/lib/IR/Debug*.cpp
-  - llvm/lib/MC/MCDwarf.cpp
-  - llvm/lib/Transforms/Utils/*Debug*.*
-  - llvm/test/DebugInfo/**
-  - llvm/test/tools/dsymutil/**
-  - llvm/test/tools/llvm-debuginfo-analyzer/**
-  - llvm/test/tools/llvm-debuginfod/**
-  - llvm/test/tools/llvm-debuginfod-find/**
-  - llvm/test/tools/llvm-dwarfdump/**
-  - llvm/test/tools/llvm-dwarfutil/**
-  - llvm/test/tools/llvm-dwp/**
-  - llvm/test/tools/llvm-gsymutil/**
-  - llvm/test/tools/llvm-pdbuti/**
-  - llvm/tools/dsymutil/**
-  - llvm/tools/llvm-debuginfo-analyzer/**
-  - llvm/tools/llvm-debuginfod/**
-  - llvm/tools/llvm-debuginfod-find/**
-  - llvm/tools/llvm-dwarfdump/**
-  - llvm/tools/llvm-dwarfutil/**
-  - llvm/tools/llvm-dwp/**
-  - llvm/tools/llvm-gsymutil/**
-  - llvm/tools/llvm-pdbutil/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/lib/CodeGen/CGDebugInfo.*
+      - llvm/include/llvm/BinaryFormat/Dwarf.*
+      - llvm/include/llvm/CodeGen/*Debug*.*
+      - llvm/include/llvm/DebugInfo/**
+      - llvm/include/llvm/Debuginfod/**
+      - llvm/include/llvm/Frontend/Debug/**
+      - llvm/include/llvm/IR/Debug*.*
+      - llvm/include/llvm/Object/*Debug*.*
+      - llvm/include/llvm/ObjectYAML/*Debug*.*
+      - llvm/include/llvm/Transforms/Utils/*Debug*.*
+      - llvm/include/llvm-c/DebugInfo.h
+      - llvm/lib/BinaryFormat/Dwarf.cpp
+      - llvm/lib/CodeGen/AsmPrinter/*Debug*.*
+      - llvm/lib/CodeGen/AsmPrinter/Dwarf*.*
+      - llvm/lib/CodeGen/AsmPrinter/DIE*.*
+      - llvm/lib/CodeGen/LiveDebugValues/**
+      - llvm/lib/CodeGen/*Debug*.*
+      - llvm/lib/CodeGen/DwarfEHPrepare.cpp
+      - llvm/lib/DebugInfo/**
+      - llvm/lib/Debuginfod/**
+      - llvm/lib/DWARFLinkerParallel/**
+      - llvm/lib/IR/Debug*.cpp
+      - llvm/lib/MC/MCDwarf.cpp
+      - llvm/lib/Transforms/Utils/*Debug*.*
+      - llvm/test/DebugInfo/**
+      - llvm/test/tools/dsymutil/**
+      - llvm/test/tools/llvm-debuginfo-analyzer/**
+      - llvm/test/tools/llvm-debuginfod/**
+      - llvm/test/tools/llvm-debuginfod-find/**
+      - llvm/test/tools/llvm-dwarfdump/**
+      - llvm/test/tools/llvm-dwarfutil/**
+      - llvm/test/tools/llvm-dwp/**
+      - llvm/test/tools/llvm-gsymutil/**
+      - llvm/test/tools/llvm-pdbuti/**
+      - llvm/tools/dsymutil/**
+      - llvm/tools/llvm-debuginfo-analyzer/**
+      - llvm/tools/llvm-debuginfod/**
+      - llvm/tools/llvm-debuginfod-find/**
+      - llvm/tools/llvm-dwarfdump/**
+      - llvm/tools/llvm-dwarfutil/**
+      - llvm/tools/llvm-dwp/**
+      - llvm/tools/llvm-gsymutil/**
+      - llvm/tools/llvm-pdbutil/**
 
 github:workflow:
-  - .github/workflows/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - .github/workflows/**
 
 cmake:
-  - cmake/**
-  - llvm/cmake/**
-  - runtimes/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - cmake/**
+      - llvm/cmake/**
+      - runtimes/**
 
 flang:driver:
-  - flang/tools/flang-driver/**
-  - flang/unittests/Frontend/**
-  - flang/lib/FrontendTool/**
-  - flang/lib/Frontend/**
-  - flang/include/flang/Frontend/**
-  - flang/include/flang/FrontendTool/**
-  - flang/test/Driver/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - flang/tools/flang-driver/**
+      - flang/unittests/Frontend/**
+      - flang/lib/FrontendTool/**
+      - flang/lib/Frontend/**
+      - flang/include/flang/Frontend/**
+      - flang/include/flang/FrontendTool/**
+      - flang/test/Driver/**
 
 backend:m68k:
-  - llvm/lib/Target/M68k/**
-  - clang/lib/Basic/Targets/M68k.*
-  - clang/lib/CodeGen/Targets/M68k.cpp
-  - llvm/test/CodeGen/M68k/**
-  - llvm/test/MC/Disassembler/M68k/**
-  - llvm/test/MC/M68k/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Target/M68k/**
+      - clang/lib/Basic/Targets/M68k.*
+      - clang/lib/CodeGen/Targets/M68k.cpp
+      - llvm/test/CodeGen/M68k/**
+      - llvm/test/MC/Disassembler/M68k/**
+      - llvm/test/MC/M68k/**
 
 libc++:
-  - libcxx/**
-  - .github/workflows/libcxx-*
+  - changed-files:
+    - any-glob-to-any-file:
+      - libcxx/**
+      - .github/workflows/libcxx-*
 
 libc++abi:
-  - libcxxabi/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - libcxxabi/**
 
 libunwind:
-  - libunwind/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - libunwind/**
 
 objectyaml:
-  - llvm/include/llvm/ObjectYAML/**
-  - llvm/lib/ObjectYAML/**
-  - llvm/test/tools/obj2yaml/**
-  - llvm/test/tools/yaml2obj/**
-  - llvm/tools/obj2yaml/**
-  - llvm/tools/yaml2obj/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/ObjectYAML/**
+      - llvm/lib/ObjectYAML/**
+      - llvm/test/tools/obj2yaml/**
+      - llvm/test/tools/yaml2obj/**
+      - llvm/tools/obj2yaml/**
+      - llvm/tools/yaml2obj/**
 
 clang:analysis:
-  - clang/include/clang/Analysis/**
-  - clang/lib/Analysis/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/include/clang/Analysis/**
+      - clang/lib/Analysis/**
 
 clang:static analyzer:
-  - clang/include/clang/StaticAnalyzer/**
-  - clang/lib/StaticAnalyzer/**
-  - clang/tools/scan-build/**
-  - clang/utils/analyzer/**
-  - clang/docs/analyzer/**
-  - clang/test/Analysis/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/include/clang/StaticAnalyzer/**
+      - clang/lib/StaticAnalyzer/**
+      - clang/tools/scan-build/**
+      - clang/utils/analyzer/**
+      - clang/docs/analyzer/**
+      - clang/test/Analysis/**
 
 pgo:
-  - llvm/lib/Transforms/Instrumentation/CGProfile.cpp
-  - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
-  - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
-  - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
-  - llvm/lib/Transforms/Instrumentation/PGO*
-  - llvm/lib/Transforms/Instrumentation/ValueProfile*
-  - llvm/test/Instrumentation/InstrProfiling/**
-  - llvm/test/Transforms/PGOProfile/**
-  - compiler-rt/lib/profile/**
-  - compiler-rt/lib/memprof/**
-  - compiler-rt/test/profile/**
-  - compiler-rt/test/memprof/**
-  - llvm/tools/llvm-profdata/**
-  - llvm/tools/llvm-profgen/**
-  - llvm/test/tools/llvm-profdata/**
-  - llvm/test/tools/llvm-profgen/**
-  - llvm/unittests/ProfileData/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Transforms/Instrumentation/CGProfile.cpp
+      - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+      - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+      - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+      - llvm/lib/Transforms/Instrumentation/PGO*
+      - llvm/lib/Transforms/Instrumentation/ValueProfile*
+      - llvm/test/Instrumentation/InstrProfiling/**
+      - llvm/test/Transforms/PGOProfile/**
+      - compiler-rt/lib/profile/**
+      - compiler-rt/lib/memprof/**
+      - compiler-rt/test/profile/**
+      - compiler-rt/test/memprof/**
+      - llvm/tools/llvm-profdata/**
+      - llvm/tools/llvm-profgen/**
+      - llvm/test/tools/llvm-profdata/**
+      - llvm/test/tools/llvm-profgen/**
+      - llvm/unittests/ProfileData/*
 
 openacc:
-  - flang/**/OpenACC/**
-  - flang/include/flang/Lower/OpenACC.h
-  - flang/docs/OpenACC.md
-  - flang/lib/Parser/openacc-parsers.cpp
-  - flang/lib/Lower/OpenACC.cpp
-  - llvm/**/Frontend/OpenACC/**
-  - llvm/unittests/Frontend/OpenACCTest.cpp
-  - mlir/test/Target/LLVMIR/openacc-llvm.mlir
-  - mlir/**/*OpenACC/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - flang/**/OpenACC/**
+      - flang/include/flang/Lower/OpenACC.h
+      - flang/docs/OpenACC.md
+      - flang/lib/Parser/openacc-parsers.cpp
+      - flang/lib/Lower/OpenACC.cpp
+      - llvm/**/Frontend/OpenACC/**
+      - llvm/unittests/Frontend/OpenACCTest.cpp
+      - mlir/test/Target/LLVMIR/openacc-llvm.mlir
+      - mlir/**/*OpenACC/**
 
 flang:runtime:
-  - flang/runtime/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - flang/runtime/**
 
 flang:parser:
-  - flang/**/Parser/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - flang/**/Parser/**
 
 flang:semantics:
-  - flang/**/Evaluate/**
-  - flang/**/Semantics/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - flang/**/Evaluate/**
+      - flang/**/Semantics/**
 
 flang:fir-hlfir:
-  - flang/**/Lower/**
-  - flang/**/Optimizer/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - flang/**/Lower/**
+      - flang/**/Optimizer/**
 
 flang:codegen:
-  - flang/**/CodeGen/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - flang/**/CodeGen/**
 
 llvm:codegen:
-  - llvm/lib/CodeGen/*
-  - llvm/lib/CodeGen/MIRParser/*
-  - llvm/lib/CodeGen/LiveDebugValues/*
-  - llvm/lib/CodeGen/AsmPrinter/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/CodeGen/*
+      - llvm/lib/CodeGen/MIRParser/*
+      - llvm/lib/CodeGen/LiveDebugValues/*
+      - llvm/lib/CodeGen/AsmPrinter/*
 
 llvm:globalisel:
-  - llvm/**/GlobalISel/**
-  - llvm/utils/TableGen/GlobalISel*
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/**/GlobalISel/**
+      - llvm/utils/TableGen/GlobalISel*
 
 function-specialization:
-  - llvm/include/llvm/Transforms/Utils/SCCPSolver.h
-  - llvm/lib/Transforms/Utils/SCCPSolver.cpp
-  - llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
-  - llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
-  - llvm/test/Transforms/FunctionSpecialization/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/Transforms/Utils/SCCPSolver.h
+      - llvm/lib/Transforms/Utils/SCCPSolver.cpp
+      - llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+      - llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+      - llvm/test/Transforms/FunctionSpecialization/*
 
 libc:
-  - libc/**
-  - utils/bazel/llvm-project-overlay/libc/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - libc/**
+      - utils/bazel/llvm-project-overlay/libc/**
 
 clang-format:
-  - clang/**/Format/**
-  - clang/tools/clang-format/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/**/Format/**
+      - clang/tools/clang-format/**
 
 flang:openmp:
-  - flang/test/**/OpenMP/**
-  - flang/lib/Lower/OpenMP.cpp
-  - flang/lib/Semantics/resolve-directives.cpp
-  - flang/lib/Semantics/check-omp-structure.cpp
-  - flang/lib/Optimizer/Transforms/OMP*
-  - flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
-  - flang/test/Lower/OpenMP/**
-  - flang/test/Transforms/omp*
-  - mlir/**/*OpenMP*
-  - mlir/test/Target/LLVMIR/openmp*
-  - llvm/lib/Frontend/OpenMP/**
-  - llvm/include/llvm/Frontend/OpenMP/**
-  - llvm/unittests/Frontend/OpenMP*
+  - changed-files:
+    - any-glob-to-any-file:
+      - flang/test/**/OpenMP/**
+      - flang/lib/Lower/OpenMP.cpp
+      - flang/lib/Semantics/resolve-directives.cpp
+      - flang/lib/Semantics/check-omp-structure.cpp
+      - flang/lib/Optimizer/Transforms/OMP*
+      - flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
+      - flang/test/Lower/OpenMP/**
+      - flang/test/Transforms/omp*
+      - mlir/**/*OpenMP*
+      - mlir/test/Target/LLVMIR/openmp*
+      - llvm/lib/Frontend/OpenMP/**
+      - llvm/include/llvm/Frontend/OpenMP/**
+      - llvm/unittests/Frontend/OpenMP*
 
 llvm:ir:
-  - llvm/lib/IR/**
-  - llvm/include/llvm/IR/**
-  - llvm/docs/LangRef.rst
-  - llvm/unittests/IR/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/IR/**
+      - llvm/include/llvm/IR/**
+      - llvm/docs/LangRef.rst
+      - llvm/unittests/IR/**
 
 llvm:SandboxIR:
-  - llvm/lib/SandboxIR/**
-  - llvm/include/llvm/SandboxIR/**
-  - llvm/docs/SandboxIR.md
-  - llvm/unittests/SandboxIR/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/SandboxIR/**
+      - llvm/include/llvm/SandboxIR/**
+      - llvm/docs/SandboxIR.md
+      - llvm/unittests/SandboxIR/**
 
 llvm:analysis:
-  - llvm/lib/Analysis/**
-  - llvm/include/llvm/Analysis/**
-  - llvm/test/Analysis/**
-  - llvm/unittests/Analysis/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Analysis/**
+      - llvm/include/llvm/Analysis/**
+      - llvm/test/Analysis/**
+      - llvm/unittests/Analysis/**
 
 llvm:adt:
-  - llvm/**/ADT/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/**/ADT/*
 
 llvm:support:
-  - llvm/**/Support/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/**/Support/**
 
 # Skip llvm/test/MC and llvm/unittests/MC, which includes target-specific directories.
 llvm:mc:
-  - llvm/include/llvm/MC/**
-  - llvm/lib/MC/**
-  - llvm/tools/llvm-mc/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/MC/**
+      - llvm/lib/MC/**
+      - llvm/tools/llvm-mc/**
 
 llvm:transforms:
-  - llvm/lib/Transforms/**
-  - llvm/include/llvm/Transforms/**
-  - llvm/test/Transforms/**
-  - llvm/unittests/Transforms/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Transforms/**
+      - llvm/include/llvm/Transforms/**
+      - llvm/test/Transforms/**
+      - llvm/unittests/Transforms/**
 
 llvm:instcombine:
-  - llvm/lib/Analysis/InstructionSimplify.cpp
-  - llvm/lib/Transforms/InstCombine/**
-  - llvm/include/llvm/Transforms/InstCombine/
-  - llvm/include/llvm/Analysis/InstructionSimplify.h
-  - llvm/test/Transforms/InstCombine/**
-  - llvm/test/Transforms/InstSimplify/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Analysis/InstructionSimplify.cpp
+      - llvm/lib/Transforms/InstCombine/**
+      - llvm/include/llvm/Transforms/InstCombine/
+      - llvm/include/llvm/Analysis/InstructionSimplify.h
+      - llvm/test/Transforms/InstCombine/**
+      - llvm/test/Transforms/InstSimplify/**
 
 llvm:vectorcombine:
-  - llvm/lib/Transforms/Vectorize/VectorCombine.cpp
-  - llvm/test/Transforms/VectorCombine/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+      - llvm/test/Transforms/VectorCombine/**
 
 clangd:
-  - clang-tools-extra/clangd/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang-tools-extra/clangd/**
 
 hlsl:
-  - clang/test/ParserHLSL/**
-  - clang/test/SemaHLSL/**
-  - clang/test/AST/HLSL/**
-  - clang/test/CodeGenHLSL/**
-  - clang/cmake/caches/HLSL.cmake
-  - clang/include/clang/Basic/HLSL*.h
-  - clang/include/clang/Sema/HLSL*.h
-  - clang/docs/HLSL/**
-  - clang/lib/Driver/ToolChains/HLSL*
-  - clang/lib/Parse/ParseHLSL.cpp
-  - clang/lib/Sema/HLSLExternalSemaSource.cpp
-  - clang/lib/Sema/SemaHLSL.cpp
-  - clang/lib/CodeGen/CGHLSLRuntime.*
-  - clang/lib/CodeGen/CGHLSLBuiltins.cpp
-  - llvm/include/llvm/Frontend/HLSL/**
-  - llvm/lib/Frontend/HLSL/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/test/ParserHLSL/**
+      - clang/test/SemaHLSL/**
+      - clang/test/AST/HLSL/**
+      - clang/test/CodeGenHLSL/**
+      - clang/cmake/caches/HLSL.cmake
+      - clang/include/clang/Basic/HLSL*.h
+      - clang/include/clang/Sema/HLSL*.h
+      - clang/docs/HLSL/**
+      - clang/lib/Driver/ToolChains/HLSL*
+      - clang/lib/Parse/ParseHLSL.cpp
+      - clang/lib/Sema/HLSLExternalSemaSource.cpp
+      - clang/lib/Sema/SemaHLSL.cpp
+      - clang/lib/CodeGen/CGHLSLRuntime.*
+      - clang/lib/CodeGen/CGHLSLBuiltins.cpp
+      - llvm/include/llvm/Frontend/HLSL/**
+      - llvm/lib/Frontend/HLSL/**
 
 llvm:SelectionDAG:
-  - llvm/include/llvm/CodeGen/SelectionDAG*.h
-  - llvm/include/llvm/CodeGen/SDNodeProperties.td
-  - llvm/include/llvm/Target/TargetSelectionDAG.td
-  - llvm/lib/CodeGen/SelectionDAG/**
-  - llvm/utils/TableGen/CodeGenDAG*
-  - llvm/utils/TableGen/DAGISel*
-  - llvm/include/llvm/CodeGen/DAGCombine.h
-  - llvm/include/llvm/CodeGen/ISDOpcodes.h
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/CodeGen/SelectionDAG*.h
+      - llvm/include/llvm/CodeGen/SDNodeProperties.td
+      - llvm/include/llvm/Target/TargetSelectionDAG.td
+      - llvm/lib/CodeGen/SelectionDAG/**
+      - llvm/utils/TableGen/CodeGenDAG*
+      - llvm/utils/TableGen/DAGISel*
+      - llvm/include/llvm/CodeGen/DAGCombine.h
+      - llvm/include/llvm/CodeGen/ISDOpcodes.h
 
 backend:DirectX:
-  - '**/*DirectX*'
-  - '**/*DXIL*'
-  - '**/*dxil*'
-  - '**/*DirectX*/**'
-  - '**/*DXIL*/**'
-  - '**/*dxil*/**'
-  - '**/*DXContainer*'
-  - '**/*DXContainer*/**'
-  - clang/lib/Sema/SemaDirectX.cpp
-  - clang/include/clang/Sema/SemaDirectX.h
-  - clang/include/clang/Basic/BuiltinsDirectX.td
-  - clang/lib/CodeGen/TargetBuiltins/DirectX.cpp
-  - clang/test/CodeGenDirectX/**
-  - clang/test/SemaDirectX/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - '**/*DirectX*'
+      - '**/*DXIL*'
+      - '**/*dxil*'
+      - '**/*DirectX*/**'
+      - '**/*DXIL*/**'
+      - '**/*dxil*/**'
+      - '**/*DXContainer*'
+      - '**/*DXContainer*/**'
+      - clang/lib/Sema/SemaDirectX.cpp
+      - clang/include/clang/Sema/SemaDirectX.h
+      - clang/include/clang/Basic/BuiltinsDirectX.td
+      - clang/lib/CodeGen/TargetBuiltins/DirectX.cpp
+      - clang/test/CodeGenDirectX/**
+      - clang/test/SemaDirectX/**
 
 backend:SPIR-V:
-  - clang/lib/Driver/ToolChains/SPIRV.*
-  - clang/lib/Sema/SemaSPIRV.cpp
-  - clang/include/clang/Sema/SemaSPIRV.h
-  - clang/include/clang/Basic/BuiltinsSPIRV.td
-  - clang/test/CodeGenSPIRV/**
-  - clang/test/SemaSPIRV/**
-  - llvm/lib/Target/SPIRV/**
-  - llvm/test/CodeGen/SPIRV/**
-  - llvm/test/Frontend/HLSL/**
-  - llvm/docs/SPIRVUsage.rst
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/lib/Driver/ToolChains/SPIRV.*
+      - clang/lib/Sema/SemaSPIRV.cpp
+      - clang/include/clang/Sema/SemaSPIRV.h
+      - clang/include/clang/Basic/BuiltinsSPIRV.td
+      - clang/test/CodeGenSPIRV/**
+      - clang/test/SemaSPIRV/**
+      - llvm/lib/Target/SPIRV/**
+      - llvm/test/CodeGen/SPIRV/**
+      - llvm/test/Frontend/HLSL/**
+      - llvm/docs/SPIRVUsage.rst
 
 mlgo:
-  - llvm/lib/Analysis/ML*
-  - llvm/include/llvm/Analysis/ML*
-  - llvm/lib/Analysis/*Runner.cpp
-  - llvm/include/llvm/Analysis/*Runner.h
-  - llvm/unittests/Analysis/ML*
-  - llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
-  - llvm/lib/Analysis/TrainingLogger.cpp
-  - llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
-  - llvm/include/llvm/Analysis/Utils/TrainingLogger.h
-  - llvm/test/Analysis/FunctionPropertiesAnalysis/*
-  - llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp
-  - llvm/test/Transforms/inline/ML/**
-  - llvm/lib/CodeGen/ML*
-  - llvm/unittests/CodeGen/ML*
-  - llvm/test/CodeGen/MLRegAlloc/**
-  - llvm/utils/mlgo-utils/**
-  - llvm/docs/MLGO.rst
-  - llvm/include/llvm/Analysis/IR2Vec.h
-  - llvm/lib/Analysis/IR2Vec.cpp
-  - llvm/lib/Analysis/models/**
-  - llvm/include/llvm/CodeGen/MIR2Vec.h
-  - llvm/lib/CodeGen/MIR2Vec.cpp
-  - llvm/test/Analysis/IR2Vec/**
-  - llvm/test/CodeGen/MIR2Vec/**
-  - llvm/unittests/Analysis/IR2VecTest.cpp
-  - llvm/unittests/CodeGen/MIR2VecTest.cpp
-  - llvm/tools/llvm-ir2vec/**
-  - llvm/docs/CommandGuide/llvm-ir2vec.rst
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Analysis/ML*
+      - llvm/include/llvm/Analysis/ML*
+      - llvm/lib/Analysis/*Runner.cpp
+      - llvm/include/llvm/Analysis/*Runner.h
+      - llvm/unittests/Analysis/ML*
+      - llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
+      - llvm/lib/Analysis/TrainingLogger.cpp
+      - llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
+      - llvm/include/llvm/Analysis/Utils/TrainingLogger.h
+      - llvm/test/Analysis/FunctionPropertiesAnalysis/*
+      - llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp
+      - llvm/test/Transforms/inline/ML/**
+      - llvm/lib/CodeGen/ML*
+      - llvm/unittests/CodeGen/ML*
+      - llvm/test/CodeGen/MLRegAlloc/**
+      - llvm/utils/mlgo-utils/**
+      - llvm/docs/MLGO.rst
+      - llvm/include/llvm/Analysis/IR2Vec.h
+      - llvm/lib/Analysis/IR2Vec.cpp
+      - llvm/lib/Analysis/models/**
+      - llvm/include/llvm/CodeGen/MIR2Vec.h
+      - llvm/lib/CodeGen/MIR2Vec.cpp
+      - llvm/test/Analysis/IR2Vec/**
+      - llvm/test/CodeGen/MIR2Vec/**
+      - llvm/unittests/Analysis/IR2VecTest.cpp
+      - llvm/unittests/CodeGen/MIR2VecTest.cpp
+      - llvm/tools/llvm-ir2vec/**
+      - llvm/docs/CommandGuide/llvm-ir2vec.rst
 
 tools:llvm-exegesis:
-  - llvm/tools/llvm-exegesis/**
-  - llvm/test/tools/llvm-exegesis/**
-  - llvm/unittests/tools/llvm-exegesis/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/tools/llvm-exegesis/**
+      - llvm/test/tools/llvm-exegesis/**
+      - llvm/unittests/tools/llvm-exegesis/**
 
 tools:llvm-reduce:
-  - llvm/tools/llvm-reduce/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/tools/llvm-reduce/**
 
 platform:windows:
-  - lld/COFF/**
-  - clang/lib/Driver/MSVC.cpp
-  - clang/lib/Driver/MinGW.cpp
-  - llvm/lib/DebugInfo/CodeView/**
-  - llvm/lib/DebugInfo/PDB/**
-  - llvm/lib/WindowsDriver/**
-  - llvm/lib/Support/Windows/**
-  - llvm/lib/BinaryFormat/COFF.cpp
+  - changed-files:
+    - any-glob-to-any-file:
+      - lld/COFF/**
+      - clang/lib/Driver/MSVC.cpp
+      - clang/lib/Driver/MinGW.cpp
+      - llvm/lib/DebugInfo/CodeView/**
+      - llvm/lib/DebugInfo/PDB/**
+      - llvm/lib/WindowsDriver/**
+      - llvm/lib/Support/Windows/**
+      - llvm/lib/BinaryFormat/COFF.cpp
 
 llvm:regalloc:
-  - llvm/**/CodeGen/CalcSpillWeights*
-  - llvm/**/CodeGen/InlineSpiller*
-  - llvm/**/CodeGen/InterferenceCache*
-  - llvm/**/CodeGen/LiveInterval*
-  - llvm/**/CodeGen/LiveRange*
-  - llvm/**/CodeGen/LiveReg*
-  - llvm/**/CodeGen/LiveVariables*
-  - llvm/**/CodeGen/MachineCopyPropagation*
-  - llvm/**/CodeGen/PHIElimination*
-  - llvm/**/CodeGen/ProcessImplicitDefs.cpp
-  - llvm/**/CodeGen/Register*
-  - llvm/**/CodeGen/RegUsage*
-  - llvm/**/CodeGen/RenameIndependentSubregs.cpp
-  - llvm/**/CodeGen/SlotIndexes.h
-  - llvm/**/CodeGen/SpillPlacement*
-  - llvm/**/CodeGen/SplitKit*
-  - llvm/**/CodeGen/VirtRegMap.h
-  - llvm/include/PBQP/**
-  - llvm/include/PBQPRAConstraint.h
-  - llvm/include/llvm/CodeGen/Spiller.h
-  - llvm/**/*RegAlloc
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/**/CodeGen/CalcSpillWeights*
+      - llvm/**/CodeGen/InlineSpiller*
+      - llvm/**/CodeGen/InterferenceCache*
+      - llvm/**/CodeGen/LiveInterval*
+      - llvm/**/CodeGen/LiveRange*
+      - llvm/**/CodeGen/LiveReg*
+      - llvm/**/CodeGen/LiveVariables*
+      - llvm/**/CodeGen/MachineCopyPropagation*
+      - llvm/**/CodeGen/PHIElimination*
+      - llvm/**/CodeGen/ProcessImplicitDefs.cpp
+      - llvm/**/CodeGen/Register*
+      - llvm/**/CodeGen/RegUsage*
+      - llvm/**/CodeGen/RenameIndependentSubregs.cpp
+      - llvm/**/CodeGen/SlotIndexes.h
+      - llvm/**/CodeGen/SpillPlacement*
+      - llvm/**/CodeGen/SplitKit*
+      - llvm/**/CodeGen/VirtRegMap.h
+      - llvm/include/PBQP/**
+      - llvm/include/PBQPRAConstraint.h
+      - llvm/include/llvm/CodeGen/Spiller.h
+      - llvm/**/*RegAlloc
 
 lldb:
-  - lldb/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - lldb/**
 
 lldb-dap:
-  - lldb/tools/lldb-dap/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - lldb/tools/lldb-dap/**
 
 backend:AMDGPU:
-  - '**/*amdgpu*'
-  - '**/*AMDGPU*'
-  - '**/*amdgpu*/**'
-  - '**/*AMDGPU*/**'
+  - changed-files:
+    - any-glob-to-any-file:
+      - '**/*amdgpu*'
+      - '**/*AMDGPU*'
+      - '**/*amdgpu*/**'
+      - '**/*AMDGPU*/**'
 
 backend:NVPTX:
-  - 'llvm/**/*nvvm*'
-  - 'llvm/**/*NVVM*'
-  - 'llvm/**/*nvptx*'
-  - 'llvm/**/*NVPTX*'
-  - 'llvm/**/*nvvm*/**'
-  - 'llvm/**/*NVVM*/**'
-  - 'llvm/**/*nvptx*/**'
-  - 'llvm/**/*NVPTX*/**'
+  - changed-files:
+    - any-glob-to-any-file:
+      - 'llvm/**/*nvvm*'
+      - 'llvm/**/*NVVM*'
+      - 'llvm/**/*nvptx*'
+      - 'llvm/**/*NVPTX*'
+      - 'llvm/**/*nvvm*/**'
+      - 'llvm/**/*NVVM*/**'
+      - 'llvm/**/*nvptx*/**'
+      - 'llvm/**/*NVPTX*/**'
 
 backend:MIPS:
-  - '**/*mips*'
-  - '**/*Mips*'
-  - '**/*mips*/**'
-  - '**/*Mips*/**'
+  - changed-files:
+    - any-glob-to-any-file:
+      - '**/*mips*'
+      - '**/*Mips*'
+      - '**/*mips*/**'
+      - '**/*Mips*/**'
 
 backend:RISC-V:
-  - '**/*riscv*'
-  - '**/*RISCV*'
-  - '**/*riscv*/**'
-  - '**/*RISCV*/**'
+  - changed-files:
+    - any-glob-to-any-file:
+      - '**/*riscv*'
+      - '**/*RISCV*'
+      - '**/*riscv*/**'
+      - '**/*RISCV*/**'
 
 backend:Xtensa:
-  - '**/*xtensa*'
-  - '**/*Xtensa*'
-  - '**/*xtensa*/**'
-  - '**/*Xtensa*/**'
+  - changed-files:
+    - any-glob-to-any-file:
+      - '**/*xtensa*'
+      - '**/*Xtensa*'
+      - '**/*xtensa*/**'
+      - '**/*Xtensa*/**'
 
 lld:coff:
-  - lld/**/COFF/**
-  - lld/Common/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - lld/**/COFF/**
+      - lld/Common/**
 
 lld:elf:
-  - lld/**/ELF/**
-  - lld/Common/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - lld/**/ELF/**
+      - lld/Common/**
 
 lld:macho:
-  - lld/**/MachO/**
-  - lld/Common/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - lld/**/MachO/**
+      - lld/Common/**
 
 lld:wasm:
-  - lld/**/wasm/**
-  - lld/Common/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - lld/**/wasm/**
+      - lld/Common/**
 
 backend:ARC:
-  - llvm/lib/Target/ARC/**
-  - clang/lib/Basic/Targets/ARC.h
-  - clang/lib/Basic/Targets/ARC.cpp
-  - clang/lib/CodeGen/Targets/ARC.cpp
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Target/ARC/**
+      - clang/lib/Basic/Targets/ARC.h
+      - clang/lib/Basic/Targets/ARC.cpp
+      - clang/lib/CodeGen/Targets/ARC.cpp
 
 backend:ARM:
-  - llvm/include/llvm/IR/IntrinsicsARM.td
-  - llvm/test/MC/ARM/**
-  - llvm/lib/Target/ARM/**
-  - llvm/test/CodeGen/ARM/**
-  - clang/lib/Basic/Targets/ARM*
-  - clang/lib/Driver/ToolChains/Arch/ARM.*
-  - clang/lib/CodeGen/Targets/ARM.cpp
-  - clang/include/clang/Basic/BuiltinsARM*
-  - llvm/test/MC/DisasemblerARM/**
-  - clang/include/clang/Sema/SemaARM.h
-  - clang/lib/Sema/SemaARM.cpp
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/IR/IntrinsicsARM.td
+      - llvm/test/MC/ARM/**
+      - llvm/lib/Target/ARM/**
+      - llvm/test/CodeGen/ARM/**
+      - clang/lib/Basic/Targets/ARM*
+      - clang/lib/Driver/ToolChains/Arch/ARM.*
+      - clang/lib/CodeGen/Targets/ARM.cpp
+      - clang/include/clang/Basic/BuiltinsARM*
+      - llvm/test/MC/DisasemblerARM/**
+      - clang/include/clang/Sema/SemaARM.h
+      - clang/lib/Sema/SemaARM.cpp
 
 backend:AArch64:
-  - llvm/include/llvm/IR/IntrinsicsAArch64.td
-  - llvm/test/MC/AArch64/**
-  - llvm/lib/Target/AArch64/**
-  - llvm/test/CodeGen/AArch64/**
-  - clang/lib/Basic/Targets/AArch64*
-  - clang/lib/Driver/ToolChains/Arch/AArch64.*
-  - clang/lib/CodeGen/Targets/AArch64.cpp
-  - clang/include/clang/Basic/BuiltinsAArch64*
-  - llvm/test/MC/Disassembler/AArch64/**
-  - clang/include/clang/Sema/SemaARM.h
-  - clang/lib/Sema/SemaARM.cpp
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/IR/IntrinsicsAArch64.td
+      - llvm/test/MC/AArch64/**
+      - llvm/lib/Target/AArch64/**
+      - llvm/test/CodeGen/AArch64/**
+      - clang/lib/Basic/Targets/AArch64*
+      - clang/lib/Driver/ToolChains/Arch/AArch64.*
+      - clang/lib/CodeGen/Targets/AArch64.cpp
+      - clang/include/clang/Basic/BuiltinsAArch64*
+      - llvm/test/MC/Disassembler/AArch64/**
+      - clang/include/clang/Sema/SemaARM.h
+      - clang/lib/Sema/SemaARM.cpp
 
 backend:CSKY:
-  - llvm/lib/Target/CSKY/**
-  - llvm/include/llvm/TargetParser/CSKYTargetParser.def
-  - llvm/include/llvm/TargetParser/CSKYTargetParser.h
-  - llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def
-  - llvm/lib/TargetParser/CSKYTargetParser.cpp
-  - llvm/lib/Support/CSKYAttributes.cpp
-  - llvm/lib/Support/CSKYAttributeParser.cpp
-  - clang/lib/Basic/Targets/CSKY.h
-  - clang/lib/Basic/Targets/CSKY.cpp
-  - clang/lib/CodeGen/Targets/CSKY.cpp
-  - clang/lib/Driver/ToolChains/CSKY*
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Target/CSKY/**
+      - llvm/include/llvm/TargetParser/CSKYTargetParser.def
+      - llvm/include/llvm/TargetParser/CSKYTargetParser.h
+      - llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def
+      - llvm/lib/TargetParser/CSKYTargetParser.cpp
+      - llvm/lib/Support/CSKYAttributes.cpp
+      - llvm/lib/Support/CSKYAttributeParser.cpp
+      - clang/lib/Basic/Targets/CSKY.h
+      - clang/lib/Basic/Targets/CSKY.cpp
+      - clang/lib/CodeGen/Targets/CSKY.cpp
+      - clang/lib/Driver/ToolChains/CSKY*
 
 backend:Hexagon:
-  - clang/include/clang/Basic/BuiltinsHexagon*.def
-  - clang/include/clang/Sema/SemaHexagon.h
-  - clang/lib/Basic/Targets/Hexagon.*
-  - clang/lib/CodeGen/Targets/Hexagon.cpp
-  - clang/lib/Driver/ToolChains/Hexagon.*
-  - clang/lib/Sema/SemaHexagon.cpp
-  - lld/ELF/Arch/Hexagon.cpp
-  - lldb/source/Plugins/ABI/Hexagon/**
-  - lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/**
-  - llvm/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def
-  - llvm/include/llvm/IR/IntrinsicsHexagon*
-  - llvm/include/llvm/Support/Hexagon*
-  - llvm/lib/Support/Hexagon*
-  - llvm/lib/Target/Hexagon/**
-  - llvm/test/CodeGen/Hexagon/**
-  - llvm/test/CodeGen/*/Hexagon/**
-  - llvm/test/DebugInfo/*/Hexagon/**
-  - llvm/test/Transforms/*/Hexagon
-  - llvm/test/MC/Disassembler/Hexagon/**
-  - llvm/test/MC/Hexagon/**
-  - llvm/test/tools/llvm-objdump/ELF/Hexagon/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/include/clang/Basic/BuiltinsHexagon*.def
+      - clang/include/clang/Sema/SemaHexagon.h
+      - clang/lib/Basic/Targets/Hexagon.*
+      - clang/lib/CodeGen/Targets/Hexagon.cpp
+      - clang/lib/Driver/ToolChains/Hexagon.*
+      - clang/lib/Sema/SemaHexagon.cpp
+      - lld/ELF/Arch/Hexagon.cpp
+      - lldb/source/Plugins/ABI/Hexagon/**
+      - lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/**
+      - llvm/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def
+      - llvm/include/llvm/IR/IntrinsicsHexagon*
+      - llvm/include/llvm/Support/Hexagon*
+      - llvm/lib/Support/Hexagon*
+      - llvm/lib/Target/Hexagon/**
+      - llvm/test/CodeGen/Hexagon/**
+      - llvm/test/CodeGen/*/Hexagon/**
+      - llvm/test/DebugInfo/*/Hexagon/**
+      - llvm/test/Transforms/*/Hexagon
+      - llvm/test/MC/Disassembler/Hexagon/**
+      - llvm/test/MC/Hexagon/**
+      - llvm/test/tools/llvm-objdump/ELF/Hexagon/**
 
 backend:Lanai:
-  - llvm/lib/Target/Lanai/**
-  - clang/lib/Basic/Targets/Lanai.h
-  - clang/lib/Basic/Targets/Lanai.cpp
-  - clang/lib/CodeGen/Targets/Lanai.cpp
-  - clang/lib/Driver/ToolChains/Lanai*
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Target/Lanai/**
+      - clang/lib/Basic/Targets/Lanai.h
+      - clang/lib/Basic/Targets/Lanai.cpp
+      - clang/lib/CodeGen/Targets/Lanai.cpp
+      - clang/lib/Driver/ToolChains/Lanai*
 
 backend:loongarch:
-  - llvm/include/llvm/IR/IntrinsicsLoongArch.td
-  - llvm/test/MC/LoongArch/**
-  - llvm/lib/Target/LoongArch/**
-  - llvm/test/CodeGen/LoongArch/**
-  - clang/lib/Basic/Targets/LoongArch*
-  - clang/lib/Driver/ToolChains/Arch/LoongArch.*
-  - clang/lib/CodeGen/Targets/LoongArch.cpp
-  - clang/include/clang/Basic/BuiltinsLoongArch*
-  - clang/include/clang/Sema/SemaLoongArch.h
-  - clang/lib/Sema/SemaLoongArch.cpp
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/IR/IntrinsicsLoongArch.td
+      - llvm/test/MC/LoongArch/**
+      - llvm/lib/Target/LoongArch/**
+      - llvm/test/CodeGen/LoongArch/**
+      - clang/lib/Basic/Targets/LoongArch*
+      - clang/lib/Driver/ToolChains/Arch/LoongArch.*
+      - clang/lib/CodeGen/Targets/LoongArch.cpp
+      - clang/include/clang/Basic/BuiltinsLoongArch*
+      - clang/include/clang/Sema/SemaLoongArch.h
+      - clang/lib/Sema/SemaLoongArch.cpp
 
 backend:MSP430:
-  - llvm/include/llvm/IR/IntrinsicsMSP430.td
-  - llvm/test/MC/MSP430/**
-  - llvm/lib/Target/MSP430/**
-  - llvm/test/CodeGen/MSP430/**
-  - clang/lib/Basic/Targets/MSP430*
-  - clang/lib/Driver/ToolChains/Arch/MSP430.*
-  - clang/lib/CodeGen/Targets/MSP430.cpp
-  - clang/include/clang/Basic/BuiltinsMSP430*
-  - llvm/test/MC/Disassembler/MSP430/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/IR/IntrinsicsMSP430.td
+      - llvm/test/MC/MSP430/**
+      - llvm/lib/Target/MSP430/**
+      - llvm/test/CodeGen/MSP430/**
+      - clang/lib/Basic/Targets/MSP430*
+      - clang/lib/Driver/ToolChains/Arch/MSP430.*
+      - clang/lib/CodeGen/Targets/MSP430.cpp
+      - clang/include/clang/Basic/BuiltinsMSP430*
+      - llvm/test/MC/Disassembler/MSP430/**
 
 backend:Sparc:
-  - llvm/include/llvm/IR/IntrinsicsSparc.td
-  - llvm/test/MC/Sparc/**
-  - llvm/lib/Target/Sparc/**
-  - llvm/test/CodeGen/Sparc/**
-  - clang/lib/Basic/Targets/Sparc*
-  - clang/lib/Driver/ToolChains/Arch/Sparc.*
-  - clang/lib/CodeGen/Targets/Sparc.cpp
-  - clang/include/clang/Basic/BuiltinsSparc*
-  - llvm/test/MC/Disassembler/Sparc/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/IR/IntrinsicsSparc.td
+      - llvm/test/MC/Sparc/**
+      - llvm/lib/Target/Sparc/**
+      - llvm/test/CodeGen/Sparc/**
+      - clang/lib/Basic/Targets/Sparc*
+      - clang/lib/Driver/ToolChains/Arch/Sparc.*
+      - clang/lib/CodeGen/Targets/Sparc.cpp
+      - clang/include/clang/Basic/BuiltinsSparc*
+      - llvm/test/MC/Disassembler/Sparc/**
 
 backend:WebAssembly:
-  - llvm/lib/Target/WebAssembly/**
-  - llvm/test/CodeGen/WebAssembly/**
-  - clang/lib/Basic/Targets/WebAssembly*
-  - clang/include/clang/Basic/BuiltinsWebAssembly.def
-  - clang/include/clang/Basic/WebAssemblyReferenceTypes.def
-  - clang/lib/CodeGen/Targets/WebAssembly*
-  - llvm/include/llvm/IR/IntinsicsWebAssembly.td
-  - llvm/include/llvm/Object/Wasm*
-  - llvm/lib/CodeGen/AsmPrinter/Wasm*
-  - llvm/lib/CodeGen/Wasm*
-  - llvm/lib/MC/MCParser/Wasm*
-  - llvm/lib/MC/Wasm*
-  - llvm/lib/ObjCopy/wasm/**
-  - llvm/lib/Object/Wasm*
-  - clang/lib/Driver/Toolchains/WebAssembly*
-  - clang/lib/Headers/wasm_simd128.h
-  - clang/test/CodeGen/WebAssembly/**
-  - clang/test/SemaCXX/*wasm*
-  - clang/test/Sema/*wasm*
-  - llvm/include/llvm/BinaryFormat/Wasm.h
-  - llvm/unittests/Target/WebAssembly/**
-  - llvm/test/DebugInfo/WebAssembly/**
-  - llvm/test/MC/WebAssembly/**
-  - clang/include/clang/Sema/SemaWasm.h
-  - clang/lib/Sema/SemaLoongWasm.cpp
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Target/WebAssembly/**
+      - llvm/test/CodeGen/WebAssembly/**
+      - clang/lib/Basic/Targets/WebAssembly*
+      - clang/include/clang/Basic/BuiltinsWebAssembly.def
+      - clang/include/clang/Basic/WebAssemblyReferenceTypes.def
+      - clang/lib/CodeGen/Targets/WebAssembly*
+      - llvm/include/llvm/IR/IntinsicsWebAssembly.td
+      - llvm/include/llvm/Object/Wasm*
+      - llvm/lib/CodeGen/AsmPrinter/Wasm*
+      - llvm/lib/CodeGen/Wasm*
+      - llvm/lib/MC/MCParser/Wasm*
+      - llvm/lib/MC/Wasm*
+      - llvm/lib/ObjCopy/wasm/**
+      - llvm/lib/Object/Wasm*
+      - clang/lib/Driver/Toolchains/WebAssembly*
+      - clang/lib/Headers/wasm_simd128.h
+      - clang/test/CodeGen/WebAssembly/**
+      - clang/test/SemaCXX/*wasm*
+      - clang/test/Sema/*wasm*
+      - llvm/include/llvm/BinaryFormat/Wasm.h
+      - llvm/unittests/Target/WebAssembly/**
+      - llvm/test/DebugInfo/WebAssembly/**
+      - llvm/test/MC/WebAssembly/**
+      - clang/include/clang/Sema/SemaWasm.h
+      - clang/lib/Sema/SemaLoongWasm.cpp
 
 backend:X86:
-  - llvm/include/llvm/IR/IntrinsicsX86.td
-  - llvm/lib/Target/X86/**
-  - llvm/test/CodeGen/X86/**
-  - llvm/test/MC/X86/**
-  - llvm/test/MC/Disassembler/X86/**
-  - llvm/test/Analysis/CostModel/X86/**
-  - llvm/test/tools/llvm-mca/X86/**
-  - clang/lib/Basic/Targets/X86/**
-  - clang/lib/Driver/ToolChains/Arch/X86.*
-  - clang/lib/CodeGen/Targets/X86.*
-  - clang/lib/Headers/**
-  - clang/test/CodeGen/X86/**
-  - clang/include/clang/Basic/BuiltinsX86*
-  - llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
-  - llvm/include/llvm/TargetParser/X86*
-  - llvm/lib/TargetParser/X86*
-  - llvm/utils/TableGen/X86*
-  - clang/include/clang/Sema/SemaX86.h
-  - clang/lib/Sema/SemaX86.cpp
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/IR/IntrinsicsX86.td
+      - llvm/lib/Target/X86/**
+      - llvm/test/CodeGen/X86/**
+      - llvm/test/MC/X86/**
+      - llvm/test/MC/Disassembler/X86/**
+      - llvm/test/Analysis/CostModel/X86/**
+      - llvm/test/tools/llvm-mca/X86/**
+      - clang/lib/Basic/Targets/X86/**
+      - clang/lib/Driver/ToolChains/Arch/X86.*
+      - clang/lib/CodeGen/Targets/X86.*
+      - clang/lib/Headers/**
+      - clang/test/CodeGen/X86/**
+      - clang/include/clang/Basic/BuiltinsX86*
+      - llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
+      - llvm/include/llvm/TargetParser/X86*
+      - llvm/lib/TargetParser/X86*
+      - llvm/utils/TableGen/X86*
+      - clang/include/clang/Sema/SemaX86.h
+      - clang/lib/Sema/SemaX86.cpp
 
 backend:PowerPC:
-  - llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC*
-  - llvm/include/llvm/BinaryFormat/XCOFF.h
-  - llvm/include/llvm/IR/IntrinsicsPowerPC.td
-  - llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
-  - llvm/lib/Target/PowerPC/**
-  - llvm/test/Analysis/**/PowerPC/**
-  - llvm/test/CodeGen/PowerPC/**
-  - llvm/test/CodeGen/MIR/PowerPC/**
-  - llvm/test/DebugInfo/XCOFF/**
-  - llvm/test/DebugInfo/PowerPC/**
-  - llvm/test/LTO/PowerPC/**
-  - llvm/test/MC/Disassembler/PowerPC/**
-  - llvm/test/MC/PowerPC/**
-  - llvm/test/MC/XCOFF/**
-  - llvm/test/Transforms/**/PowerPC/**
-  - clang/include/clang/Basic/BuiltinsPPC.*
-  - clang/lib/Basic/Targets/PPC.*
-  - clang/lib/CodeGen/Targets/PPC.cpp
-  - clang/lib/Driver/ToolChains/PPC*
-  - clang/lib/Driver/ToolChains/AIX*
-  - clang/lib/Driver/ToolChains/Arch/PPC.*
-  - clang/test/CodeGen/PowerPC/**
-  - clang/include/clang/Sema/SemaPPC.h
-  - clang/lib/Sema/SemaPPC.cpp
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC*
+      - llvm/include/llvm/BinaryFormat/XCOFF.h
+      - llvm/include/llvm/IR/IntrinsicsPowerPC.td
+      - llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
+      - llvm/lib/Target/PowerPC/**
+      - llvm/test/Analysis/**/PowerPC/**
+      - llvm/test/CodeGen/PowerPC/**
+      - llvm/test/CodeGen/MIR/PowerPC/**
+      - llvm/test/DebugInfo/XCOFF/**
+      - llvm/test/DebugInfo/PowerPC/**
+      - llvm/test/LTO/PowerPC/**
+      - llvm/test/MC/Disassembler/PowerPC/**
+      - llvm/test/MC/PowerPC/**
+      - llvm/test/MC/XCOFF/**
+      - llvm/test/Transforms/**/PowerPC/**
+      - clang/include/clang/Basic/BuiltinsPPC.*
+      - clang/lib/Basic/Targets/PPC.*
+      - clang/lib/CodeGen/Targets/PPC.cpp
+      - clang/lib/Driver/ToolChains/PPC*
+      - clang/lib/Driver/ToolChains/AIX*
+      - clang/lib/Driver/ToolChains/Arch/PPC.*
+      - clang/test/CodeGen/PowerPC/**
+      - clang/include/clang/Sema/SemaPPC.h
+      - clang/lib/Sema/SemaPPC.cpp
 
 backend:SystemZ:
-  - llvm/include/llvm/BinaryFormat/ELFRelocs/SystemZ*
-  - llvm/include/llvm/BinaryFormat/GOFF.h
-  - llvm/include/llvm/IR/IntrinsicsSystemZ.td
-  - llvm/lib/Target/SystemZ/**
-  - llvm/test/Analysis/**/SystemZ/**
-  - llvm/test/CodeGen/SystemZ/**
-  - llvm/test/DebugInfo/SystemZ/**
-  - llvm/test/ExecutionEngine/**/SystemZ/**
-  - llvm/test/MC/Disassembler/SystemZ/**
-  - llvm/test/MC/GOFF/**
-  - llvm/test/MC/SystemZ/**
-  - llvm/test/Transforms/**/SystemZ/**
-  - clang/include/clang/Basic/BuiltinsSystemZ.*
-  - clang/lib/Basic/Targets/SystemZ.*
-  - clang/lib/CodeGen/Targets/SystemZ.cpp
-  - clang/lib/Driver/ToolChains/ZOS*
-  - clang/lib/Driver/ToolChains/Arch/SystemZ.*
-  - clang/test/CodeGen/SystemZ/**
-  - clang/include/clang/Sema/SemaSystemZ.h
-  - clang/lib/Sema/SemaSystemZ.cpp
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/BinaryFormat/ELFRelocs/SystemZ*
+      - llvm/include/llvm/BinaryFormat/GOFF.h
+      - llvm/include/llvm/IR/IntrinsicsSystemZ.td
+      - llvm/lib/Target/SystemZ/**
+      - llvm/test/Analysis/**/SystemZ/**
+      - llvm/test/CodeGen/SystemZ/**
+      - llvm/test/DebugInfo/SystemZ/**
+      - llvm/test/ExecutionEngine/**/SystemZ/**
+      - llvm/test/MC/Disassembler/SystemZ/**
+      - llvm/test/MC/GOFF/**
+      - llvm/test/MC/SystemZ/**
+      - llvm/test/Transforms/**/SystemZ/**
+      - clang/include/clang/Basic/BuiltinsSystemZ.*
+      - clang/lib/Basic/Targets/SystemZ.*
+      - clang/lib/CodeGen/Targets/SystemZ.cpp
+      - clang/lib/Driver/ToolChains/ZOS*
+      - clang/lib/Driver/ToolChains/Arch/SystemZ.*
+      - clang/test/CodeGen/SystemZ/**
+      - clang/include/clang/Sema/SemaSystemZ.h
+      - clang/lib/Sema/SemaSystemZ.cpp
 
 third-party:unittests:
-  - third-party/unittests/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - third-party/unittests/**
 
 third-party:benchmark:
-  - third-party/benchmark/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - third-party/benchmark/**
 
 llvm:binary-utilities:
-  - llvm/docs/CommandGuide/llvm-*
-  - llvm/include/llvm/BinaryFormat/**
-  - llvm/include/llvm/DebugInfo/Symbolize/**
-  - llvm/include/llvm/ObjCopy/**
-  - llvm/include/llvm/Object/**
-  - llvm/lib/BinaryFormat/**
-  - llvm/lib/DebugInfo/Symbolize/**
-  - llvm/lib/ObjCopy/**
-  - llvm/lib/Object/**
-  - llvm/test/Object/**
-  - llvm/test/tools/llvm-ar/**
-  - llvm/test/tools/llvm-cxxfilt/**
-  - llvm/test/tools/llvm-nm/**
-  - llvm/test/tools/llvm-objcopy/**
-  - llvm/test/tools/llvm-objdump/**
-  - llvm/test/tools/llvm-readobj/**
-  - llvm/test/tools/llvm-size/**
-  - llvm/test/tools/llvm-strings/**
-  - llvm/test/tools/llvm-symbolizer/**
-  - llvm/tools/llvm-ar/**
-  - llvm/tools/llvm-cxxfilt/**
-  - llvm/tools/llvm-nm/**
-  - llvm/tools/llvm-objcopy/**
-  - llvm/tools/llvm-objdump/**
-  - llvm/tools/llvm-readobj/**
-  - llvm/tools/llvm-size/**
-  - llvm/tools/llvm-strings/**
-  - llvm/tools/llvm-symbolizer/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/docs/CommandGuide/llvm-*
+      - llvm/include/llvm/BinaryFormat/**
+      - llvm/include/llvm/DebugInfo/Symbolize/**
+      - llvm/include/llvm/ObjCopy/**
+      - llvm/include/llvm/Object/**
+      - llvm/lib/BinaryFormat/**
+      - llvm/lib/DebugInfo/Symbolize/**
+      - llvm/lib/ObjCopy/**
+      - llvm/lib/Object/**
+      - llvm/test/Object/**
+      - llvm/test/tools/llvm-ar/**
+      - llvm/test/tools/llvm-cxxfilt/**
+      - llvm/test/tools/llvm-nm/**
+      - llvm/test/tools/llvm-objcopy/**
+      - llvm/test/tools/llvm-objdump/**
+      - llvm/test/tools/llvm-readobj/**
+      - llvm/test/tools/llvm-size/**
+      - llvm/test/tools/llvm-strings/**
+      - llvm/test/tools/llvm-symbolizer/**
+      - llvm/tools/llvm-ar/**
+      - llvm/tools/llvm-cxxfilt/**
+      - llvm/tools/llvm-nm/**
+      - llvm/tools/llvm-objcopy/**
+      - llvm/tools/llvm-objdump/**
+      - llvm/tools/llvm-readobj/**
+      - llvm/tools/llvm-size/**
+      - llvm/tools/llvm-strings/**
+      - llvm/tools/llvm-symbolizer/**
 
 clang:openmp:
-  - clang/include/clang/Basic/OpenMP*
-  - clang/include/clang/AST/OpenMPClause.h
-  - clang/include/clang/AST/DeclOpenMP.h
-  - clang/include/clang/AST/ExprOpenMP.h
-  - clang/include/clang/AST/StmtOpenMP.h
-  - clang/lib/AST/DeclOpenMP.cpp
-  - clang/lib/AST/OpenMPClause.cpp
-  - clang/lib/AST/StmtOpenMP.cpp
-  - clang/lib/Headers/openmp_wrappers/**
-  - clang/lib/Parse/ParseOpenMP.cpp
-  - clang/lib/Basic/OpenMPKinds.cpp
-  - clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
-  - clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
-  - clang/lib/CodeGen/CgStmtOpenMP.cpp
-  - clang/lib/CodeGen/CGOpenMP*
-  - clang/lib/Sema/SemaOpenMP.cpp
-  - clang/test/OpenMP/**
-  - clang/test/AST/ast-dump-openmp-*
-  - llvm/lib/Frontend/OpenMP/**
-  - llvm/lib/Transforms/IPO/OpenMPOpt.cpp
-  - llvm/include/llvm/Frontend/OpenMP/**
-  - llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
-  - llvm/unittests/Frontend/OpenMP*
-  - llvm/test/Transforms/OpenMP/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/include/clang/Basic/OpenMP*
+      - clang/include/clang/AST/OpenMPClause.h
+      - clang/include/clang/AST/DeclOpenMP.h
+      - clang/include/clang/AST/ExprOpenMP.h
+      - clang/include/clang/AST/StmtOpenMP.h
+      - clang/lib/AST/DeclOpenMP.cpp
+      - clang/lib/AST/OpenMPClause.cpp
+      - clang/lib/AST/StmtOpenMP.cpp
+      - clang/lib/Headers/openmp_wrappers/**
+      - clang/lib/Parse/ParseOpenMP.cpp
+      - clang/lib/Basic/OpenMPKinds.cpp
+      - clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+      - clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
+      - clang/lib/CodeGen/CgStmtOpenMP.cpp
+      - clang/lib/CodeGen/CGOpenMP*
+      - clang/lib/Sema/SemaOpenMP.cpp
+      - clang/test/OpenMP/**
+      - clang/test/AST/ast-dump-openmp-*
+      - llvm/lib/Frontend/OpenMP/**
+      - llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+      - llvm/include/llvm/Frontend/OpenMP/**
+      - llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
+      - llvm/unittests/Frontend/OpenMP*
+      - llvm/test/Transforms/OpenMP/**
 
 clang:temporal-safety:
-  - clang/include/clang/Analysis/Analyses/LifetimeSafety/**
-  - clang/lib/Analysis/LifetimeSafety/**
-  - clang/unittests/Analysis/LifetimeSafety*
-  - clang/test/Sema/*lifetime-safety*
-  - clang/test/Sema/*lifetime-analysis*
-  - clang/test/Analysis/LifetimeSafety/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/include/clang/Analysis/Analyses/LifetimeSafety/**
+      - clang/lib/Analysis/LifetimeSafety/**
+      - clang/unittests/Analysis/LifetimeSafety*
+      - clang/test/Sema/*lifetime-safety*
+      - clang/test/Sema/*lifetime-analysis*
+      - clang/test/Analysis/LifetimeSafety/**
 
 clang:as-a-library:
-  - clang/tools/libclang/**
-  - clang/bindings/**
-  - clang/include/clang-c/**
-  - clang/test/LibClang/**
-  - clang/unittest/libclang/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/tools/libclang/**
+      - clang/bindings/**
+      - clang/include/clang-c/**
+      - clang/test/LibClang/**
+      - clang/unittest/libclang/**
 
 openmp:libomp:
-  - any: ['openmp/**', '!openmp/libomptarget/**']
+  - changed-files:
+    - any-glob-to-any-file:
+      - 'openmp/**'
 
 openmp:libomptarget:
-  - any: ['openmp/**', '!openmp/runtime/**']
+  - changed-files:
+    - all-globs-to-all-file:
+      - openmp/**
+      - '!openmp/runtime/**''
 
 bazel:
-  - utils/bazel/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - utils/bazel/**
 
 offload:
-  - offload/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - offload/**
 
 tablegen:
-  - llvm/include/TableGen/**
-  - llvm/lib/TableGen/**
-  - llvm/utils/TableGen/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/TableGen/**
+      - llvm/lib/TableGen/**
+      - llvm/utils/TableGen/**
 
 infrastructure:
-  - .ci/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - .ci/**
diff --git a/.github/workflows/new-prs.yml b/.github/workflows/new-prs.yml
index e1f2e754c1a3d..0d97e436d39c4 100644
--- a/.github/workflows/new-prs.yml
+++ b/.github/workflows/new-prs.yml
@@ -67,9 +67,7 @@ jobs:
       github.event.pull_request.draft == false &&
       github.event.pull_request.commits < 10
     steps:
-      - uses: actions/labeler@ac9175f8a1f3625fd0d4fb234536d26811351594 # v4.3.0
+      - uses: actions/labeler@634933edcd8ababfe52f92936142cc22ac488b1b # v6.0.1
         with:
           configuration-path: .github/new-prs-labeler.yml
-          # workaround for https://github.com/actions/labeler/issues/112
-          sync-labels: ''
           repo-token: ${{ secrets.ISSUE_SUBSCRIBER_TOKEN }}