diff --git a/.ci/monolithic-linux.sh b/.ci/monolithic-linux.sh
index 4a8418d7baa8c..ca619aa7e98a1 100755
--- a/.ci/monolithic-linux.sh
+++ b/.ci/monolithic-linux.sh
@@ -64,9 +64,11 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \
 
 start-group "ninja"
 
-# Targets are not escaped as they are passed as separate arguments.
-ninja -C "${BUILD_DIR}" -k 0 ${targets} |& tee ninja.log
-cp ${BUILD_DIR}/.ninja_log ninja.ninja_log
+if [[ "${targets}" != "" ]]; then
+  # Targets are not escaped as they are passed as separate arguments.
+  ninja -C "${BUILD_DIR}" -k 0 ${targets} |& tee ninja.log
+  cp ${BUILD_DIR}/.ninja_log ninja.ninja_log
+fi
 
 if [[ "${runtime_targets}" != "" ]]; then
   start-group "ninja Runtimes"
diff --git a/.ci/monolithic-windows.sh b/.ci/monolithic-windows.sh
index 7b926b87f3623..99e7758ce8d79 100755
--- a/.ci/monolithic-windows.sh
+++ b/.ci/monolithic-windows.sh
@@ -51,9 +51,11 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \
 
 start-group "ninja"
 
-# Targets are not escaped as they are passed as separate arguments.
-ninja -C "${BUILD_DIR}" -k 0 ${targets} |& tee ninja.log
-cp ${BUILD_DIR}/.ninja_log ninja.ninja_log
+if [[ "${targets}" != "" ]]; then
+  # Targets are not escaped as they are passed as separate arguments.
+  ninja -C "${BUILD_DIR}" -k 0 ${targets} |& tee ninja.log
+  cp ${BUILD_DIR}/.ninja_log ninja.ninja_log
+fi
 
 if [[ "${runtimes_targets}" != "" ]]; then
   start-group "ninja runtimes"
diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml
index efdc42d349195..bb0eef5842b0f 100644
--- a/.github/new-prs-labeler.yml
+++ b/.github/new-prs-labeler.yml
@@ -1,1131 +1,1449 @@
 BOLT:
-  - bolt/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - bolt/**/*
 
 ClangIR:
-  - clang/include/clang/CIR/**/*
-  - clang/lib/CIR/**/*
-  - clang/tools/cir-*/**/*
-  - clang/test/CIR/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/include/clang/CIR/**/*
+      - clang/lib/CIR/**/*
+      - clang/tools/cir-*/**/*
+      - clang/test/CIR/**/*
 
 clang:bytecode:
-  - clang/docs/ConstantInterpreter.rst
-  - clang/lib/AST/ByteCode/**/*
-  - clang/test/AST/ByteCode/**/*
-  - clang/unittests/AST/ByteCode/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/docs/ConstantInterpreter.rst
+      - clang/lib/AST/ByteCode/**/*
+      - clang/test/AST/ByteCode/**/*
+      - clang/unittests/AST/ByteCode/**/*
 
 clang:dataflow:
-  - clang/include/clang/Analysis/FlowSensitive/**/*
-  - clang/lib/Analysis/FlowSensitive/**/*
-  - clang/unittests/Analysis/FlowSensitive/**/*
-  - clang/docs/DataFlowAnalysisIntro.md
-  - clang/docs/DataFlowAnalysisIntroImages/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/include/clang/Analysis/FlowSensitive/**/*
+      - clang/lib/Analysis/FlowSensitive/**/*
+      - clang/unittests/Analysis/FlowSensitive/**/*
+      - clang/docs/DataFlowAnalysisIntro.md
+      - clang/docs/DataFlowAnalysisIntroImages/**/*
 
 clang:frontend:
-  - clang/lib/AST/**/*
-  - clang/include/clang/AST/**/*
-  - clang/lib/Basic/**/*
-  - clang/include/clang/Basic/**/*
-  - clang/lib/Interpreter/**/*
-  - clang/include/clang/Interpreter/**/*
-  - clang/lib/Lex/**/*
-  - clang/include/clang/Lex/**/*
-  - clang/lib/Parse/**/*
-  - clang/include/clang/Parse/**/*
-  - clang/lib/Sema/**/*
-  - clang/include/clang/Sema/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/lib/AST/**/*
+      - clang/include/clang/AST/**/*
+      - clang/lib/Basic/**/*
+      - clang/include/clang/Basic/**/*
+      - clang/lib/Interpreter/**/*
+      - clang/include/clang/Interpreter/**/*
+      - clang/lib/Lex/**/*
+      - clang/include/clang/Lex/**/*
+      - clang/lib/Parse/**/*
+      - clang/include/clang/Parse/**/*
+      - clang/lib/Sema/**/*
+      - clang/include/clang/Sema/**/*
 
 clang:headers:
-  - clang/lib/Headers/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/lib/Headers/**/*
 
 compiler-rt:
-  - compiler-rt/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/**/*
 
 flang:
-  - flang/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - flang/**/*
 
 flang:frontend:
-  - flang/Parser/**/*
-  - flang/Evaluate/**/*
-  - flang/Semantics/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - flang/Parser/**/*
+      - flang/Evaluate/**/*
+      - flang/Semantics/**/*
 
 libclc:
-  - libclc/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - libclc/**
 
 HLSL:
-  - clang/*HLSL*/**/*
-  - clang/**/*HLSL*
-  - llvm/**/Frontend/HLSL/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/*HLSL*/**/*
+      - clang/**/*HLSL*
+      - llvm/**/Frontend/HLSL/**/*
 
 lld:
-  - lld/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - lld/**/*
 
 llvm-lit:
-  - llvm/utils/lit/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/utils/lit/**/*
 
 PGO:
-  - llvm/**/ProfileData/**/*
-  - llvm/**/SampleProfile*
-  - llvm/**/CodeGen/MIRSampleProfile*
-  - llvm/lib/Transforms/Instrumentation/CGProfile.cpp
-  - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
-  - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
-  - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
-  - llvm/lib/Transforms/Instrumentation/PGO*
-  - llvm/lib/Transforms/Instrumentation/ValueProfile*
-  - llvm/test/Instrumentation/InstrProfiling/**/*
-  - llvm/test/Transforms/PGOProfile/**/*
-  - llvm/test/Transforms/SampleProfile/**/*
-  - llvm/**/llvm-profdata/**/*
-  - llvm/**/llvm-profgen/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/**/ProfileData/**/*
+      - llvm/**/SampleProfile*
+      - llvm/**/CodeGen/MIRSampleProfile*
+      - llvm/lib/Transforms/Instrumentation/CGProfile.cpp
+      - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+      - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+      - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+      - llvm/lib/Transforms/Instrumentation/PGO*
+      - llvm/lib/Transforms/Instrumentation/ValueProfile*
+      - llvm/test/Instrumentation/InstrProfiling/**/*
+      - llvm/test/Transforms/PGOProfile/**/*
+      - llvm/test/Transforms/SampleProfile/**/*
+      - llvm/**/llvm-profdata/**/*
+      - llvm/**/llvm-profgen/**/*
 
 vectorizers:
-  - llvm/lib/Transforms/Vectorize/**/*
-  - llvm/include/llvm/Transforms/Vectorize/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Transforms/Vectorize/**/*
+      - llvm/include/llvm/Transforms/Vectorize/**/*
 
 # IMPORTED FROM CODEOWNERS
 LTO:
-  - llvm/*/LTO/**
-  - llvm/*/Linker/**
-  - llvm/*/ThinLTO/**
-  - llvm/lib/Transforms/*/FunctionImport*
-  - llvm/tools/gold/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/*/LTO/**
+      - llvm/*/Linker/**
+      - llvm/*/ThinLTO/**
+      - llvm/lib/Transforms/*/FunctionImport*
+      - llvm/tools/gold/**
 
 clang:driver:
-  - clang/*/Driver/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/*/Driver/**
 
 compiler-rt:asan:
-  - compiler-rt/lib/asan/**
-  - compiler-rt/include/sanitizer/asan_interface.h
-  - compiler-rt/test/asan/**
-  - compiler-rt/lib/asan_abi/**
-  - compiler-rt/test/asan_abi/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/lib/asan/**
+      - compiler-rt/include/sanitizer/asan_interface.h
+      - compiler-rt/test/asan/**
+      - compiler-rt/lib/asan_abi/**
+      - compiler-rt/test/asan_abi/**
 
 compiler-rt:builtins:
-  - compiler-rt/lib/builtins/**
-  - compiler-rt/test/builtins/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/lib/builtins/**
+      - compiler-rt/test/builtins/**
 
 compiler-rt:cfi:
-  - compiler-rt/lib/cfi/**
-  - compiler-rt/test/cfi/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/lib/cfi/**
+      - compiler-rt/test/cfi/**
 
 compiler-rt:fuzzer:
-  - compiler-rt/lib/fuzzer/**
-  - compiler-rt/include/fuzzer/**
-  - compiler-rt/test/fuzzer/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/lib/fuzzer/**
+      - compiler-rt/include/fuzzer/**
+      - compiler-rt/test/fuzzer/**
 
 compiler-rt:hwasan:
-  - compiler-rt/lib/hwasan/**
-  - compiler-rt/include/sanitizer/hwasan_interface.h
-  - compiler-rt/test/hwasan/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/lib/hwasan/**
+      - compiler-rt/include/sanitizer/hwasan_interface.h
+      - compiler-rt/test/hwasan/**
 
 compiler-rt:lsan:
-  - compiler-rt/lib/lsan/**
-  - compiler-rt/include/sanitizer/lsan_interface.h
-  - compiler-rt/test/lsan/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/lib/lsan/**
+      - compiler-rt/include/sanitizer/lsan_interface.h
+      - compiler-rt/test/lsan/**
 
 compiler-rt:msan:
-  - compiler-rt/lib/msan/**
-  - compiler-rt/include/sanitizer/msan_interface.h
-  - compiler-rt/test/msan/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/lib/msan/**
+      - compiler-rt/include/sanitizer/msan_interface.h
+      - compiler-rt/test/msan/**
 
 compiler-rt:sanitizer:
-  - llvm/lib/Transforms/Instrumentation/*Sanitizer*
-  - compiler-rt/lib/interception/**
-  - compiler-rt/lib/*san*/**
-  - compiler-rt/include/sanitizer/**
-  - compiler-rt/test/*san*/**
-  - compiler-rt/lib/fuzzer/**
-  - compiler-rt/include/fuzzer/**
-  - compiler-rt/test/fuzzer/**
-  - compiler-rt/lib/scudo/**
-  - compiler-rt/test/scudo/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Transforms/Instrumentation/*Sanitizer*
+      - compiler-rt/lib/interception/**
+      - compiler-rt/lib/*san*/**
+      - compiler-rt/include/sanitizer/**
+      - compiler-rt/test/*san*/**
+      - compiler-rt/lib/fuzzer/**
+      - compiler-rt/include/fuzzer/**
+      - compiler-rt/test/fuzzer/**
+      - compiler-rt/lib/scudo/**
+      - compiler-rt/test/scudo/**
 
 compiler-rt:scudo:
-  - compiler-rt/lib/scudo/**
-  - compiler-rt/test/scudo/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/lib/scudo/**
+      - compiler-rt/test/scudo/**
 
 compiler-rt:tsan:
-  - compiler-rt/lib/tsan/**
-  - compiler-rt/include/sanitizer/tsan_interface.h
-  - compiler-rt/include/sanitizer/tsan_interface_atomic.h
-  - compiler-rt/test/tsan/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/lib/tsan/**
+      - compiler-rt/include/sanitizer/tsan_interface.h
+      - compiler-rt/include/sanitizer/tsan_interface_atomic.h
+      - compiler-rt/test/tsan/**
 
 compiler-rt:ubsan:
-  - compiler-rt/lib/ubsan/**
-  - compiler-rt/include/sanitizer/ubsan_interface.h
-  - compiler-rt/test/ubsan/**
-  - compiler-rt/lib/ubsan_minimal/**
-  - compiler-rt/test/ubsan_minimal/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - compiler-rt/lib/ubsan/**
+      - compiler-rt/include/sanitizer/ubsan_interface.h
+      - compiler-rt/test/ubsan/**
+      - compiler-rt/lib/ubsan_minimal/**
+      - compiler-rt/test/ubsan_minimal/**
 
 xray:
-  - llvm/tools/llvm-xray/**
-  - compiler-rt/*/xray/**
-  - clang/include/clang/Basic/XRay*
-  - clang/lib/Basic/XRay*
-  - compiler-rt/*/xray/**
-  - llvm/include/llvm/XRay/**
-  - llvm/lib/XRay/**
-  - llvm/tools/llvm-xray/**
-  - llvm/unittests/XRay/**
-  - compiler-rt/*/xray/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/tools/llvm-xray/**
+      - compiler-rt/*/xray/**
+      - clang/include/clang/Basic/XRay*
+      - clang/lib/Basic/XRay*
+      - compiler-rt/*/xray/**
+      - llvm/include/llvm/XRay/**
+      - llvm/lib/XRay/**
+      - llvm/tools/llvm-xray/**
+      - llvm/unittests/XRay/**
+      - compiler-rt/*/xray/**
 
 clang:codegen:
-  - clang/lib/CodeGen/**
-  - clang/include/clang/CodeGen/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/lib/CodeGen/**
+      - clang/include/clang/CodeGen/**
 
 mlir:
-  - mlir/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**
 
 mlir:core:
-  - mlir/include/mlir/Support/**
-  - mlir/lib/Support/**
-  - mlir/include/mlir/Parser/**
-  - mlir/lib/Parser/**
-  - mlir/include/mlir/IR/**
-  - mlir/lib/IR/**
-  - mlir/include/mlir/Bytecode/**
-  - mlir/lib/Bytecode/**
-  - mlir/include/mlir/AsmParser/**
-  - mlir/lib/AsmParser/**
-  - mlir/include/mlir/Pass/**
-  - mlir/lib/Pass/**
-  - mlir/include/mlir/Tools/**
-  - mlir/lib/Tools/**
-  - mlir/include/mlir/Reducer/**
-  - mlir/lib/Reducer/**
-  - mlir/include/mlir/Transforms/**
-  - mlir/lib/Transforms/**
-  - mlir/include/mlir/Debug/**
-  - mlir/lib/Debug/**
-  - mlir/tools/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/include/mlir/Support/**
+      - mlir/lib/Support/**
+      - mlir/include/mlir/Parser/**
+      - mlir/lib/Parser/**
+      - mlir/include/mlir/IR/**
+      - mlir/lib/IR/**
+      - mlir/include/mlir/Bytecode/**
+      - mlir/lib/Bytecode/**
+      - mlir/include/mlir/AsmParser/**
+      - mlir/lib/AsmParser/**
+      - mlir/include/mlir/Pass/**
+      - mlir/lib/Pass/**
+      - mlir/include/mlir/Tools/**
+      - mlir/lib/Tools/**
+      - mlir/include/mlir/Reducer/**
+      - mlir/lib/Reducer/**
+      - mlir/include/mlir/Transforms/**
+      - mlir/lib/Transforms/**
+      - mlir/include/mlir/Debug/**
+      - mlir/lib/Debug/**
+      - mlir/tools/**
 
 mlir:ods:
-  - mlir/TableGen/**
-  - mlir/tblgen/**
-  - mlir/include/mlir/IR/*.td
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/TableGen/**
+      - mlir/tblgen/**
+      - mlir/include/mlir/IR/*.td
 
 mlir:bindings:
-  - mlir/Bindings/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/Bindings/**
 
 mlir:gpu:
-  - mlir/**/*GPU*/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/*GPU*/**
 
 mlir:amdgpu:
-  - mlir/**/AMDGPU/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/AMDGPU/**
 
 mlir:amx:
-  - mlir/**/AMX/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/AMX/**
 
 mlir:affine:
-  - mlir/**/Affine/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Affine/**
 
 mlir:arith:
-  - mlir/**/Arith/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Arith/**
 
 mlir:neon:
-  - mlir/**/ArmNeon/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/ArmNeon/**
 
 mlir:sme:
-  - mlir/**/ArmSME/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/ArmSME/**
 
 mlir:sve:
-  - mlir/**/ArmSVE/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/ArmSVE/**
 
 mlir:async:
-  - mlir/**/Async/**
-  - mlir/**/Async/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Async/**
+      - mlir/**/Async/**
 
 mlir:bufferization:
-  - mlir/**/Bufferization/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Bufferization/**
 
 mlir:complex:
-  - mlir/**/Complex/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Complex/**
 
 mlir:cf:
-  - mlir/**/ControlFlow/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/ControlFlow/**
 
 mlir:dlti:
-  - mlir/**/DLTI/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/DLTI/**
 
 mlir:emitc:
-  - mlir/**/*EmitC*/**
-  - mlir/lib/Target/Cpp/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/*EmitC*/**
+      - mlir/lib/Target/Cpp/**
 
 mlir:func:
-  - mlir/**/Func/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Func/**
 
 mlir:irdl:
-  - mlir/**/IRDL/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/IRDL/**
 
 mlir:index:
-  - mlir/**/Index/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Index/**
 
 mlir:llvm:
-  - mlir/**/LLVM*
-  - mlir/**/LLVM*/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/LLVM*
+      - mlir/**/LLVM*/**
 
 mlir:linalg:
-  - mlir/**/*linalg/**
-  - mlir/**/*Linalg/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/*linalg/**
+      - mlir/**/*Linalg/**
 
 mlir:mlprogram:
-  - mlir/**/MLProgram/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/MLProgram/**
 
 mlir:math:
-  - mlir/**/Math/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Math/**
 
 mlir:memref:
-  - mlir/**/MemRef/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/MemRef/**
 
 mlir:nvgpu:
-  - mlir/**/NVGPU/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/NVGPU/**
 
 mlir:openacc:
-  - mlir/**/*OpenACC*
-  - mlir/**/*OpenACC*/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/*OpenACC*
+      - mlir/**/*OpenACC*/**
 
 mlir:openmp:
-  - mlir/**/*OpenMP*
-  - mlir/**/*OpenMP*/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/*OpenMP*
+      - mlir/**/*OpenMP*/**
 
 mlir:pdl:
-  - mlir/**/PDL/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/PDL/**
 
 mlir:quant:
-  - mlir/**/Quant/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Quant/**
 
 mlir:scf:
-  - mlir/**/SCF/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/SCF/**
 
 mlir:spirv:
-  - mlir/**/SPIRV/**
-  - mlir/**/SPIRVTo*/**
-  - mlir/**/*ToSPIRV/**
-  - mlir/tools/mlir-spirv-cpu-runner/**
-  - mlir/tools/mlir-vulkan-runner/**
-  - mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/SPIRV/**
+      - mlir/**/SPIRVTo*/**
+      - mlir/**/*ToSPIRV/**
+      - mlir/tools/mlir-spirv-cpu-runner/**
+      - mlir/tools/mlir-vulkan-runner/**
+      - mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp
 
 mlir:shape:
-  - mlir/**/Shape/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Shape/**
 
 mlir:sparse:
-  - mlir/**/SparseTensor/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/SparseTensor/**
 
 mlir:tensor:
-  - mlir/**/Tensor/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Tensor/**
 
 mlir:tosa:
-  - mlir/**/*Tosa*/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/*Tosa*/**
 
 mlir:ub:
-  - mlir/**/UB/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/UB/**
 
 mlir:vector:
-  - mlir/**/*Vector/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/*Vector/**
 
 mlir:execution-engine:
-  - mlir/**/ExecutionEngine/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/ExecutionEngine/**
 
 mlir:presburger:
-  - mlir/**/*Presburger*/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/*Presburger*/**
 
 mlir:python:
-  - mlir/python/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/python/**/*
 
 mlir:vectorops:
-  - mlir/**/Vector/**/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - mlir/**/Vector/**/*
 
 coroutines:
-  - clang/docs/DebuggingCoroutines.rst
-  - clang/lib/Sema/SemaCoroutine.cpp
-  - clang/lib/CodeGen/CGCoroutine.cpp
-  - clang/test/CodeGenCoroutines/**
-  - llvm/docs/Coroutines.rst
-  - llvm/include/llvm/Transforms/Coroutines/**
-  - llvm/lib/Transforms/Coroutines/**
-  - llvm/test/Transforms/Coroutines/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/docs/DebuggingCoroutines.rst
+      - clang/lib/Sema/SemaCoroutine.cpp
+      - clang/lib/CodeGen/CGCoroutine.cpp
+      - clang/test/CodeGenCoroutines/**
+      - llvm/docs/Coroutines.rst
+      - llvm/include/llvm/Transforms/Coroutines/**
+      - llvm/lib/Transforms/Coroutines/**
+      - llvm/test/Transforms/Coroutines/*
 
 clang:modules:
-  - clang/docs/StandardCPlusPlusModules.rst
-  - clang/include/clang/AST/AbstractBasicReader.h
-  - clang/include/clang/AST/AbstractBasicWriter.h
-  - clang/include/clang/AST/AbstractTypeReader.h
-  - clang/include/clang/AST/AbstractTypeWriter.h
-  - clang/include/clang/AST/PropertiesBase.td
-  - clang/include/clang/AST/ODRHash.h
-  - clang/include/clang/AST/TypeProperties.td
-  - clang/include/clang/Basic/Module.h
-  - clang/include/clang/Frontend/PrecompiledPreamble.h
-  - clang/include/clang/Lex/ModuleLoader.h
-  - clang/include/clang/Lex/ModuleMap.h
-  - clang/include/clang/Serialization/**
-  - clang/lib/AST/ODRHash.cpp
-  - clang/lib/AST/StmtProfile.cpp
-  - clang/lib/Basic/Module.cpp
-  - clang/lib/Frontend/ModuleDependencyCollector.cpp
-  - clang/lib/Frontend/PrecompiledPreamble.cpp
-  - clang/lib/Lex/ModuleMap.cpp
-  - clang/lib/Sema/SemaModule.cpp
-  - clang/lib/Serialization/**
-  - clang/test/CXX/module/**
-  - clang/test/Modules/**
-  - clang/unittests/Serialization/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/docs/StandardCPlusPlusModules.rst
+      - clang/include/clang/AST/AbstractBasicReader.h
+      - clang/include/clang/AST/AbstractBasicWriter.h
+      - clang/include/clang/AST/AbstractTypeReader.h
+      - clang/include/clang/AST/AbstractTypeWriter.h
+      - clang/include/clang/AST/PropertiesBase.td
+      - clang/include/clang/AST/ODRHash.h
+      - clang/include/clang/AST/TypeProperties.td
+      - clang/include/clang/Basic/Module.h
+      - clang/include/clang/Frontend/PrecompiledPreamble.h
+      - clang/include/clang/Lex/ModuleLoader.h
+      - clang/include/clang/Lex/ModuleMap.h
+      - clang/include/clang/Serialization/**
+      - clang/lib/AST/ODRHash.cpp
+      - clang/lib/AST/StmtProfile.cpp
+      - clang/lib/Basic/Module.cpp
+      - clang/lib/Frontend/ModuleDependencyCollector.cpp
+      - clang/lib/Frontend/PrecompiledPreamble.cpp
+      - clang/lib/Lex/ModuleMap.cpp
+      - clang/lib/Sema/SemaModule.cpp
+      - clang/lib/Serialization/**
+      - clang/test/CXX/module/**
+      - clang/test/Modules/**
+      - clang/unittests/Serialization/*
 
 clang-tidy:
-  - clang-tools-extra/clang-tidy/**
-  - clang-tools-extra/docs/clang-tidy/**
-  - clang-tools-extra/test/clang-tidy/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang-tools-extra/clang-tidy/**
+      - clang-tools-extra/docs/clang-tidy/**
+      - clang-tools-extra/test/clang-tidy/**
 
 clang-tools-extra:
-  - clang-tools-extra/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang-tools-extra/**
 
 tools:llvm-mca:
-  - llvm/tools/llvm-mca/**
-  - llvm/include/llvm/MCA/**
-  - llvm/lib/MCA/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/tools/llvm-mca/**
+      - llvm/include/llvm/MCA/**
+      - llvm/lib/MCA/**
 
 clang:
-  - any:
-    - clang/**
-    - '!clang/**/Format/**'
-    - '!clang/tools/clang-format/**'
+  - changed-files:
+    - all-globs-to-all-file:
+      - clang/**
+      - '!clang/**/Format/**'
+      - '!clang/tools/clang-format/**'
 
 testing-tools:
-  - llvm/include/llvm/FileCheck/**
-  - llvm/lib/FileCheck/**
-  - llvm/test/FileCheck/**
-  - llvm/unittests/FileCheck/**
-  - llvm/utils/lit/**
-  - llvm/utils/split-file/**
-  - llvm/utils/not/**
-  - llvm/utils/count/**
-  - llvm/utils/FileCheck/**
-  - llvm/docs/CommandGuide/FileCheck.rst
-  - llvm/docs/CommandGuide/lit.rst
-  - llvm/docs/TestingGuide.rst
-  - llvm/test/Other/FileCheck-space.txt
-  - llvm/utils/UpdateTestChecks/**
-  - llvm/utils/update*_test_checks.py
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/FileCheck/**
+      - llvm/lib/FileCheck/**
+      - llvm/test/FileCheck/**
+      - llvm/unittests/FileCheck/**
+      - llvm/utils/lit/**
+      - llvm/utils/split-file/**
+      - llvm/utils/not/**
+      - llvm/utils/count/**
+      - llvm/utils/FileCheck/**
+      - llvm/docs/CommandGuide/FileCheck.rst
+      - llvm/docs/CommandGuide/lit.rst
+      - llvm/docs/TestingGuide.rst
+      - llvm/test/Other/FileCheck-space.txt
+      - llvm/utils/UpdateTestChecks/**
+      - llvm/utils/update*_test_checks.py
 
 debuginfo:
-  - clang/lib/CodeGen/CGDebugInfo.*
-  - llvm/include/llvm/BinaryFormat/Dwarf.*
-  - llvm/include/llvm/CodeGen/*Debug*.*
-  - llvm/include/llvm/DebugInfo/**
-  - llvm/include/llvm/Debuginfod/**
-  - llvm/include/llvm/Frontend/Debug/**
-  - llvm/include/llvm/IR/Debug*.*
-  - llvm/include/llvm/Object/*Debug*.*
-  - llvm/include/llvm/ObjectYAML/*Debug*.*
-  - llvm/include/llvm/Transforms/Utils/*Debug*.*
-  - llvm/include/llvm-c/DebugInfo.h
-  - llvm/lib/BinaryFormat/Dwarf.cpp
-  - llvm/lib/CodeGen/AsmPrinter/*Debug*.*
-  - llvm/lib/CodeGen/AsmPrinter/Dwarf*.*
-  - llvm/lib/CodeGen/AsmPrinter/DIE*.*
-  - llvm/lib/CodeGen/LiveDebugValues/**
-  - llvm/lib/CodeGen/*Debug*.*
-  - llvm/lib/CodeGen/DwarfEHPrepare.cpp
-  - llvm/lib/DebugInfo/**
-  - llvm/lib/Debuginfod/**
-  - llvm/lib/DWARFLinkerParallel/**
-  - llvm/lib/IR/Debug*.cpp
-  - llvm/lib/MC/MCDwarf.cpp
-  - llvm/lib/Transforms/Utils/*Debug*.*
-  - llvm/test/DebugInfo/**
-  - llvm/test/tools/dsymutil/**
-  - llvm/test/tools/llvm-debuginfo-analyzer/**
-  - llvm/test/tools/llvm-debuginfod/**
-  - llvm/test/tools/llvm-debuginfod-find/**
-  - llvm/test/tools/llvm-dwarfdump/**
-  - llvm/test/tools/llvm-dwarfutil/**
-  - llvm/test/tools/llvm-dwp/**
-  - llvm/test/tools/llvm-gsymutil/**
-  - llvm/test/tools/llvm-pdbuti/**
-  - llvm/tools/dsymutil/**
-  - llvm/tools/llvm-debuginfo-analyzer/**
-  - llvm/tools/llvm-debuginfod/**
-  - llvm/tools/llvm-debuginfod-find/**
-  - llvm/tools/llvm-dwarfdump/**
-  - llvm/tools/llvm-dwarfutil/**
-  - llvm/tools/llvm-dwp/**
-  - llvm/tools/llvm-gsymutil/**
-  - llvm/tools/llvm-pdbutil/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/lib/CodeGen/CGDebugInfo.*
+      - llvm/include/llvm/BinaryFormat/Dwarf.*
+      - llvm/include/llvm/CodeGen/*Debug*.*
+      - llvm/include/llvm/DebugInfo/**
+      - llvm/include/llvm/Debuginfod/**
+      - llvm/include/llvm/Frontend/Debug/**
+      - llvm/include/llvm/IR/Debug*.*
+      - llvm/include/llvm/Object/*Debug*.*
+      - llvm/include/llvm/ObjectYAML/*Debug*.*
+      - llvm/include/llvm/Transforms/Utils/*Debug*.*
+      - llvm/include/llvm-c/DebugInfo.h
+      - llvm/lib/BinaryFormat/Dwarf.cpp
+      - llvm/lib/CodeGen/AsmPrinter/*Debug*.*
+      - llvm/lib/CodeGen/AsmPrinter/Dwarf*.*
+      - llvm/lib/CodeGen/AsmPrinter/DIE*.*
+      - llvm/lib/CodeGen/LiveDebugValues/**
+      - llvm/lib/CodeGen/*Debug*.*
+      - llvm/lib/CodeGen/DwarfEHPrepare.cpp
+      - llvm/lib/DebugInfo/**
+      - llvm/lib/Debuginfod/**
+      - llvm/lib/DWARFLinkerParallel/**
+      - llvm/lib/IR/Debug*.cpp
+      - llvm/lib/MC/MCDwarf.cpp
+      - llvm/lib/Transforms/Utils/*Debug*.*
+      - llvm/test/DebugInfo/**
+      - llvm/test/tools/dsymutil/**
+      - llvm/test/tools/llvm-debuginfo-analyzer/**
+      - llvm/test/tools/llvm-debuginfod/**
+      - llvm/test/tools/llvm-debuginfod-find/**
+      - llvm/test/tools/llvm-dwarfdump/**
+      - llvm/test/tools/llvm-dwarfutil/**
+      - llvm/test/tools/llvm-dwp/**
+      - llvm/test/tools/llvm-gsymutil/**
+      - llvm/test/tools/llvm-pdbuti/**
+      - llvm/tools/dsymutil/**
+      - llvm/tools/llvm-debuginfo-analyzer/**
+      - llvm/tools/llvm-debuginfod/**
+      - llvm/tools/llvm-debuginfod-find/**
+      - llvm/tools/llvm-dwarfdump/**
+      - llvm/tools/llvm-dwarfutil/**
+      - llvm/tools/llvm-dwp/**
+      - llvm/tools/llvm-gsymutil/**
+      - llvm/tools/llvm-pdbutil/**
 
 github:workflow:
-  - .github/workflows/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - .github/workflows/**
 
 cmake:
-  - cmake/**
-  - llvm/cmake/**
-  - runtimes/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - cmake/**
+      - llvm/cmake/**
+      - runtimes/**
 
 flang:driver:
-  - flang/tools/flang-driver/**
-  - flang/unittests/Frontend/**
-  - flang/lib/FrontendTool/**
-  - flang/lib/Frontend/**
-  - flang/include/flang/Frontend/**
-  - flang/include/flang/FrontendTool/**
-  - flang/test/Driver/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - flang/tools/flang-driver/**
+      - flang/unittests/Frontend/**
+      - flang/lib/FrontendTool/**
+      - flang/lib/Frontend/**
+      - flang/include/flang/Frontend/**
+      - flang/include/flang/FrontendTool/**
+      - flang/test/Driver/**
 
 backend:m68k:
-  - llvm/lib/Target/M68k/**
-  - clang/lib/Basic/Targets/M68k.*
-  - clang/lib/CodeGen/Targets/M68k.cpp
-  - llvm/test/CodeGen/M68k/**
-  - llvm/test/MC/Disassembler/M68k/**
-  - llvm/test/MC/M68k/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Target/M68k/**
+      - clang/lib/Basic/Targets/M68k.*
+      - clang/lib/CodeGen/Targets/M68k.cpp
+      - llvm/test/CodeGen/M68k/**
+      - llvm/test/MC/Disassembler/M68k/**
+      - llvm/test/MC/M68k/**
 
 libc++:
-  - libcxx/**
-  - .github/workflows/libcxx-*
+  - changed-files:
+    - any-glob-to-any-file:
+      - libcxx/**
+      - .github/workflows/libcxx-*
 
 libc++abi:
-  - libcxxabi/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - libcxxabi/**
 
 libunwind:
-  - libunwind/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - libunwind/**
 
 objectyaml:
-  - llvm/include/llvm/ObjectYAML/**
-  - llvm/lib/ObjectYAML/**
-  - llvm/test/tools/obj2yaml/**
-  - llvm/test/tools/yaml2obj/**
-  - llvm/tools/obj2yaml/**
-  - llvm/tools/yaml2obj/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/ObjectYAML/**
+      - llvm/lib/ObjectYAML/**
+      - llvm/test/tools/obj2yaml/**
+      - llvm/test/tools/yaml2obj/**
+      - llvm/tools/obj2yaml/**
+      - llvm/tools/yaml2obj/**
 
 clang:analysis:
-  - clang/include/clang/Analysis/**
-  - clang/lib/Analysis/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/include/clang/Analysis/**
+      - clang/lib/Analysis/**
 
 clang:static analyzer:
-  - clang/include/clang/StaticAnalyzer/**
-  - clang/lib/StaticAnalyzer/**
-  - clang/tools/scan-build/**
-  - clang/utils/analyzer/**
-  - clang/docs/analyzer/**
-  - clang/test/Analysis/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/include/clang/StaticAnalyzer/**
+      - clang/lib/StaticAnalyzer/**
+      - clang/tools/scan-build/**
+      - clang/utils/analyzer/**
+      - clang/docs/analyzer/**
+      - clang/test/Analysis/**
 
 pgo:
-  - llvm/lib/Transforms/Instrumentation/CGProfile.cpp
-  - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
-  - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
-  - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
-  - llvm/lib/Transforms/Instrumentation/PGO*
-  - llvm/lib/Transforms/Instrumentation/ValueProfile*
-  - llvm/test/Instrumentation/InstrProfiling/**
-  - llvm/test/Transforms/PGOProfile/**
-  - compiler-rt/lib/profile/**
-  - compiler-rt/lib/memprof/**
-  - compiler-rt/test/profile/**
-  - compiler-rt/test/memprof/**
-  - llvm/tools/llvm-profdata/**
-  - llvm/tools/llvm-profgen/**
-  - llvm/test/tools/llvm-profdata/**
-  - llvm/test/tools/llvm-profgen/**
-  - llvm/unittests/ProfileData/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Transforms/Instrumentation/CGProfile.cpp
+      - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+      - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+      - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+      - llvm/lib/Transforms/Instrumentation/PGO*
+      - llvm/lib/Transforms/Instrumentation/ValueProfile*
+      - llvm/test/Instrumentation/InstrProfiling/**
+      - llvm/test/Transforms/PGOProfile/**
+      - compiler-rt/lib/profile/**
+      - compiler-rt/lib/memprof/**
+      - compiler-rt/test/profile/**
+      - compiler-rt/test/memprof/**
+      - llvm/tools/llvm-profdata/**
+      - llvm/tools/llvm-profgen/**
+      - llvm/test/tools/llvm-profdata/**
+      - llvm/test/tools/llvm-profgen/**
+      - llvm/unittests/ProfileData/*
 
 openacc:
-  - flang/**/OpenACC/**
-  - flang/include/flang/Lower/OpenACC.h
-  - flang/docs/OpenACC.md
-  - flang/lib/Parser/openacc-parsers.cpp
-  - flang/lib/Lower/OpenACC.cpp
-  - llvm/**/Frontend/OpenACC/**
-  - llvm/unittests/Frontend/OpenACCTest.cpp
-  - mlir/test/Target/LLVMIR/openacc-llvm.mlir
-  - mlir/**/*OpenACC/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - flang/**/OpenACC/**
+      - flang/include/flang/Lower/OpenACC.h
+      - flang/docs/OpenACC.md
+      - flang/lib/Parser/openacc-parsers.cpp
+      - flang/lib/Lower/OpenACC.cpp
+      - llvm/**/Frontend/OpenACC/**
+      - llvm/unittests/Frontend/OpenACCTest.cpp
+      - mlir/test/Target/LLVMIR/openacc-llvm.mlir
+      - mlir/**/*OpenACC/**
 
 flang:runtime:
-  - flang/runtime/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - flang/runtime/**
 
 flang:parser:
-  - flang/**/Parser/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - flang/**/Parser/**
 
 flang:semantics:
-  - flang/**/Evaluate/**
-  - flang/**/Semantics/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - flang/**/Evaluate/**
+      - flang/**/Semantics/**
 
 flang:fir-hlfir:
-  - flang/**/Lower/**
-  - flang/**/Optimizer/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - flang/**/Lower/**
+      - flang/**/Optimizer/**
 
 flang:codegen:
-  - flang/**/CodeGen/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - flang/**/CodeGen/**
 
 llvm:codegen:
-  - llvm/lib/CodeGen/*
-  - llvm/lib/CodeGen/MIRParser/*
-  - llvm/lib/CodeGen/LiveDebugValues/*
-  - llvm/lib/CodeGen/AsmPrinter/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/CodeGen/*
+      - llvm/lib/CodeGen/MIRParser/*
+      - llvm/lib/CodeGen/LiveDebugValues/*
+      - llvm/lib/CodeGen/AsmPrinter/*
 
 llvm:globalisel:
-  - llvm/**/GlobalISel/**
-  - llvm/utils/TableGen/GlobalISel*
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/**/GlobalISel/**
+      - llvm/utils/TableGen/GlobalISel*
 
 function-specialization:
-  - llvm/include/llvm/Transforms/Utils/SCCPSolver.h
-  - llvm/lib/Transforms/Utils/SCCPSolver.cpp
-  - llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
-  - llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
-  - llvm/test/Transforms/FunctionSpecialization/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/Transforms/Utils/SCCPSolver.h
+      - llvm/lib/Transforms/Utils/SCCPSolver.cpp
+      - llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+      - llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+      - llvm/test/Transforms/FunctionSpecialization/*
 
 libc:
-  - libc/**
-  - utils/bazel/llvm-project-overlay/libc/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - libc/**
+      - utils/bazel/llvm-project-overlay/libc/**
 
 clang-format:
-  - clang/**/Format/**
-  - clang/tools/clang-format/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/**/Format/**
+      - clang/tools/clang-format/**
 
 flang:openmp:
-  - flang/test/**/OpenMP/**
-  - flang/lib/Lower/OpenMP.cpp
-  - flang/lib/Semantics/resolve-directives.cpp
-  - flang/lib/Semantics/check-omp-structure.cpp
-  - flang/lib/Optimizer/Transforms/OMP*
-  - flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
-  - flang/test/Lower/OpenMP/**
-  - flang/test/Transforms/omp*
-  - mlir/**/*OpenMP*
-  - mlir/test/Target/LLVMIR/openmp*
-  - llvm/lib/Frontend/OpenMP/**
-  - llvm/include/llvm/Frontend/OpenMP/**
-  - llvm/unittests/Frontend/OpenMP*
+  - changed-files:
+    - any-glob-to-any-file:
+      - flang/test/**/OpenMP/**
+      - flang/lib/Lower/OpenMP.cpp
+      - flang/lib/Semantics/resolve-directives.cpp
+      - flang/lib/Semantics/check-omp-structure.cpp
+      - flang/lib/Optimizer/Transforms/OMP*
+      - flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
+      - flang/test/Lower/OpenMP/**
+      - flang/test/Transforms/omp*
+      - mlir/**/*OpenMP*
+      - mlir/test/Target/LLVMIR/openmp*
+      - llvm/lib/Frontend/OpenMP/**
+      - llvm/include/llvm/Frontend/OpenMP/**
+      - llvm/unittests/Frontend/OpenMP*
 
 llvm:ir:
-  - llvm/lib/IR/**
-  - llvm/include/llvm/IR/**
-  - llvm/docs/LangRef.rst
-  - llvm/unittests/IR/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/IR/**
+      - llvm/include/llvm/IR/**
+      - llvm/docs/LangRef.rst
+      - llvm/unittests/IR/**
 
 llvm:SandboxIR:
-  - llvm/lib/SandboxIR/**
-  - llvm/include/llvm/SandboxIR/**
-  - llvm/docs/SandboxIR.md
-  - llvm/unittests/SandboxIR/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/SandboxIR/**
+      - llvm/include/llvm/SandboxIR/**
+      - llvm/docs/SandboxIR.md
+      - llvm/unittests/SandboxIR/**
 
 llvm:analysis:
-  - llvm/lib/Analysis/**
-  - llvm/include/llvm/Analysis/**
-  - llvm/test/Analysis/**
-  - llvm/unittests/Analysis/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Analysis/**
+      - llvm/include/llvm/Analysis/**
+      - llvm/test/Analysis/**
+      - llvm/unittests/Analysis/**
 
 llvm:adt:
-  - llvm/**/ADT/*
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/**/ADT/*
 
 llvm:support:
-  - llvm/**/Support/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/**/Support/**
 
 # Skip llvm/test/MC and llvm/unittests/MC, which includes target-specific directories.
 llvm:mc:
-  - llvm/include/llvm/MC/**
-  - llvm/lib/MC/**
-  - llvm/tools/llvm-mc/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/MC/**
+      - llvm/lib/MC/**
+      - llvm/tools/llvm-mc/**
 
 llvm:transforms:
-  - llvm/lib/Transforms/**
-  - llvm/include/llvm/Transforms/**
-  - llvm/test/Transforms/**
-  - llvm/unittests/Transforms/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Transforms/**
+      - llvm/include/llvm/Transforms/**
+      - llvm/test/Transforms/**
+      - llvm/unittests/Transforms/**
 
 llvm:instcombine:
-  - llvm/lib/Analysis/InstructionSimplify.cpp
-  - llvm/lib/Transforms/InstCombine/**
-  - llvm/include/llvm/Transforms/InstCombine/
-  - llvm/include/llvm/Analysis/InstructionSimplify.h
-  - llvm/test/Transforms/InstCombine/**
-  - llvm/test/Transforms/InstSimplify/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Analysis/InstructionSimplify.cpp
+      - llvm/lib/Transforms/InstCombine/**
+      - llvm/include/llvm/Transforms/InstCombine/
+      - llvm/include/llvm/Analysis/InstructionSimplify.h
+      - llvm/test/Transforms/InstCombine/**
+      - llvm/test/Transforms/InstSimplify/**
 
 llvm:vectorcombine:
-  - llvm/lib/Transforms/Vectorize/VectorCombine.cpp
-  - llvm/test/Transforms/VectorCombine/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+      - llvm/test/Transforms/VectorCombine/**
 
 clangd:
-  - clang-tools-extra/clangd/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang-tools-extra/clangd/**
 
 hlsl:
-  - clang/test/ParserHLSL/**
-  - clang/test/SemaHLSL/**
-  - clang/test/AST/HLSL/**
-  - clang/test/CodeGenHLSL/**
-  - clang/cmake/caches/HLSL.cmake
-  - clang/include/clang/Basic/HLSL*.h
-  - clang/include/clang/Sema/HLSL*.h
-  - clang/docs/HLSL/**
-  - clang/lib/Driver/ToolChains/HLSL*
-  - clang/lib/Parse/ParseHLSL.cpp
-  - clang/lib/Sema/HLSLExternalSemaSource.cpp
-  - clang/lib/Sema/SemaHLSL.cpp
-  - clang/lib/CodeGen/CGHLSLRuntime.*
-  - clang/lib/CodeGen/CGHLSLBuiltins.cpp
-  - llvm/include/llvm/Frontend/HLSL/**
-  - llvm/lib/Frontend/HLSL/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/test/ParserHLSL/**
+      - clang/test/SemaHLSL/**
+      - clang/test/AST/HLSL/**
+      - clang/test/CodeGenHLSL/**
+      - clang/cmake/caches/HLSL.cmake
+      - clang/include/clang/Basic/HLSL*.h
+      - clang/include/clang/Sema/HLSL*.h
+      - clang/docs/HLSL/**
+      - clang/lib/Driver/ToolChains/HLSL*
+      - clang/lib/Parse/ParseHLSL.cpp
+      - clang/lib/Sema/HLSLExternalSemaSource.cpp
+      - clang/lib/Sema/SemaHLSL.cpp
+      - clang/lib/CodeGen/CGHLSLRuntime.*
+      - clang/lib/CodeGen/CGHLSLBuiltins.cpp
+      - llvm/include/llvm/Frontend/HLSL/**
+      - llvm/lib/Frontend/HLSL/**
 
 llvm:SelectionDAG:
-  - llvm/include/llvm/CodeGen/SelectionDAG*.h
-  - llvm/include/llvm/CodeGen/SDNodeProperties.td
-  - llvm/include/llvm/Target/TargetSelectionDAG.td
-  - llvm/lib/CodeGen/SelectionDAG/**
-  - llvm/utils/TableGen/CodeGenDAG*
-  - llvm/utils/TableGen/DAGISel*
-  - llvm/include/llvm/CodeGen/DAGCombine.h
-  - llvm/include/llvm/CodeGen/ISDOpcodes.h
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/CodeGen/SelectionDAG*.h
+      - llvm/include/llvm/CodeGen/SDNodeProperties.td
+      - llvm/include/llvm/Target/TargetSelectionDAG.td
+      - llvm/lib/CodeGen/SelectionDAG/**
+      - llvm/utils/TableGen/CodeGenDAG*
+      - llvm/utils/TableGen/DAGISel*
+      - llvm/include/llvm/CodeGen/DAGCombine.h
+      - llvm/include/llvm/CodeGen/ISDOpcodes.h
 
 backend:DirectX:
-  - '**/*DirectX*'
-  - '**/*DXIL*'
-  - '**/*dxil*'
-  - '**/*DirectX*/**'
-  - '**/*DXIL*/**'
-  - '**/*dxil*/**'
-  - '**/*DXContainer*'
-  - '**/*DXContainer*/**'
-  - clang/lib/Sema/SemaDirectX.cpp
-  - clang/include/clang/Sema/SemaDirectX.h
-  - clang/include/clang/Basic/BuiltinsDirectX.td
-  - clang/lib/CodeGen/TargetBuiltins/DirectX.cpp
-  - clang/test/CodeGenDirectX/**
-  - clang/test/SemaDirectX/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - '**/*DirectX*'
+      - '**/*DXIL*'
+      - '**/*dxil*'
+      - '**/*DirectX*/**'
+      - '**/*DXIL*/**'
+      - '**/*dxil*/**'
+      - '**/*DXContainer*'
+      - '**/*DXContainer*/**'
+      - clang/lib/Sema/SemaDirectX.cpp
+      - clang/include/clang/Sema/SemaDirectX.h
+      - clang/include/clang/Basic/BuiltinsDirectX.td
+      - clang/lib/CodeGen/TargetBuiltins/DirectX.cpp
+      - clang/test/CodeGenDirectX/**
+      - clang/test/SemaDirectX/**
 
 backend:SPIR-V:
-  - clang/lib/Driver/ToolChains/SPIRV.*
-  - clang/lib/Sema/SemaSPIRV.cpp
-  - clang/include/clang/Sema/SemaSPIRV.h
-  - clang/include/clang/Basic/BuiltinsSPIRV.td
-  - clang/test/CodeGenSPIRV/**
-  - clang/test/SemaSPIRV/**
-  - llvm/lib/Target/SPIRV/**
-  - llvm/test/CodeGen/SPIRV/**
-  - llvm/test/Frontend/HLSL/**
-  - llvm/docs/SPIRVUsage.rst
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/lib/Driver/ToolChains/SPIRV.*
+      - clang/lib/Sema/SemaSPIRV.cpp
+      - clang/include/clang/Sema/SemaSPIRV.h
+      - clang/include/clang/Basic/BuiltinsSPIRV.td
+      - clang/test/CodeGenSPIRV/**
+      - clang/test/SemaSPIRV/**
+      - llvm/lib/Target/SPIRV/**
+      - llvm/test/CodeGen/SPIRV/**
+      - llvm/test/Frontend/HLSL/**
+      - llvm/docs/SPIRVUsage.rst
 
 mlgo:
-  - llvm/lib/Analysis/ML*
-  - llvm/include/llvm/Analysis/ML*
-  - llvm/lib/Analysis/*Runner.cpp
-  - llvm/include/llvm/Analysis/*Runner.h
-  - llvm/unittests/Analysis/ML*
-  - llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
-  - llvm/lib/Analysis/TrainingLogger.cpp
-  - llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
-  - llvm/include/llvm/Analysis/Utils/TrainingLogger.h
-  - llvm/test/Analysis/FunctionPropertiesAnalysis/*
-  - llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp
-  - llvm/test/Transforms/inline/ML/**
-  - llvm/lib/CodeGen/ML*
-  - llvm/unittests/CodeGen/ML*
-  - llvm/test/CodeGen/MLRegAlloc/**
-  - llvm/utils/mlgo-utils/**
-  - llvm/docs/MLGO.rst
-  - llvm/include/llvm/Analysis/IR2Vec.h
-  - llvm/lib/Analysis/IR2Vec.cpp
-  - llvm/lib/Analysis/models/**
-  - llvm/include/llvm/CodeGen/MIR2Vec.h
-  - llvm/lib/CodeGen/MIR2Vec.cpp
-  - llvm/test/Analysis/IR2Vec/**
-  - llvm/test/CodeGen/MIR2Vec/**
-  - llvm/unittests/Analysis/IR2VecTest.cpp
-  - llvm/unittests/CodeGen/MIR2VecTest.cpp
-  - llvm/tools/llvm-ir2vec/**
-  - llvm/docs/CommandGuide/llvm-ir2vec.rst
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Analysis/ML*
+      - llvm/include/llvm/Analysis/ML*
+      - llvm/lib/Analysis/*Runner.cpp
+      - llvm/include/llvm/Analysis/*Runner.h
+      - llvm/unittests/Analysis/ML*
+      - llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
+      - llvm/lib/Analysis/TrainingLogger.cpp
+      - llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
+      - llvm/include/llvm/Analysis/Utils/TrainingLogger.h
+      - llvm/test/Analysis/FunctionPropertiesAnalysis/*
+      - llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp
+      - llvm/test/Transforms/inline/ML/**
+      - llvm/lib/CodeGen/ML*
+      - llvm/unittests/CodeGen/ML*
+      - llvm/test/CodeGen/MLRegAlloc/**
+      - llvm/utils/mlgo-utils/**
+      - llvm/docs/MLGO.rst
+      - llvm/include/llvm/Analysis/IR2Vec.h
+      - llvm/lib/Analysis/IR2Vec.cpp
+      - llvm/lib/Analysis/models/**
+      - llvm/include/llvm/CodeGen/MIR2Vec.h
+      - llvm/lib/CodeGen/MIR2Vec.cpp
+      - llvm/test/Analysis/IR2Vec/**
+      - llvm/test/CodeGen/MIR2Vec/**
+      - llvm/unittests/Analysis/IR2VecTest.cpp
+      - llvm/unittests/CodeGen/MIR2VecTest.cpp
+      - llvm/tools/llvm-ir2vec/**
+      - llvm/docs/CommandGuide/llvm-ir2vec.rst
 
 tools:llvm-exegesis:
-  - llvm/tools/llvm-exegesis/**
-  - llvm/test/tools/llvm-exegesis/**
-  - llvm/unittests/tools/llvm-exegesis/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/tools/llvm-exegesis/**
+      - llvm/test/tools/llvm-exegesis/**
+      - llvm/unittests/tools/llvm-exegesis/**
 
 tools:llvm-reduce:
-  - llvm/tools/llvm-reduce/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/tools/llvm-reduce/**
 
 platform:windows:
-  - lld/COFF/**
-  - clang/lib/Driver/MSVC.cpp
-  - clang/lib/Driver/MinGW.cpp
-  - llvm/lib/DebugInfo/CodeView/**
-  - llvm/lib/DebugInfo/PDB/**
-  - llvm/lib/WindowsDriver/**
-  - llvm/lib/Support/Windows/**
-  - llvm/lib/BinaryFormat/COFF.cpp
+  - changed-files:
+    - any-glob-to-any-file:
+      - lld/COFF/**
+      - clang/lib/Driver/MSVC.cpp
+      - clang/lib/Driver/MinGW.cpp
+      - llvm/lib/DebugInfo/CodeView/**
+      - llvm/lib/DebugInfo/PDB/**
+      - llvm/lib/WindowsDriver/**
+      - llvm/lib/Support/Windows/**
+      - llvm/lib/BinaryFormat/COFF.cpp
 
 llvm:regalloc:
-  - llvm/**/CodeGen/CalcSpillWeights*
-  - llvm/**/CodeGen/InlineSpiller*
-  - llvm/**/CodeGen/InterferenceCache*
-  - llvm/**/CodeGen/LiveInterval*
-  - llvm/**/CodeGen/LiveRange*
-  - llvm/**/CodeGen/LiveReg*
-  - llvm/**/CodeGen/LiveVariables*
-  - llvm/**/CodeGen/MachineCopyPropagation*
-  - llvm/**/CodeGen/PHIElimination*
-  - llvm/**/CodeGen/ProcessImplicitDefs.cpp
-  - llvm/**/CodeGen/Register*
-  - llvm/**/CodeGen/RegUsage*
-  - llvm/**/CodeGen/RenameIndependentSubregs.cpp
-  - llvm/**/CodeGen/SlotIndexes.h
-  - llvm/**/CodeGen/SpillPlacement*
-  - llvm/**/CodeGen/SplitKit*
-  - llvm/**/CodeGen/VirtRegMap.h
-  - llvm/include/PBQP/**
-  - llvm/include/PBQPRAConstraint.h
-  - llvm/include/llvm/CodeGen/Spiller.h
-  - llvm/**/*RegAlloc
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/**/CodeGen/CalcSpillWeights*
+      - llvm/**/CodeGen/InlineSpiller*
+      - llvm/**/CodeGen/InterferenceCache*
+      - llvm/**/CodeGen/LiveInterval*
+      - llvm/**/CodeGen/LiveRange*
+      - llvm/**/CodeGen/LiveReg*
+      - llvm/**/CodeGen/LiveVariables*
+      - llvm/**/CodeGen/MachineCopyPropagation*
+      - llvm/**/CodeGen/PHIElimination*
+      - llvm/**/CodeGen/ProcessImplicitDefs.cpp
+      - llvm/**/CodeGen/Register*
+      - llvm/**/CodeGen/RegUsage*
+      - llvm/**/CodeGen/RenameIndependentSubregs.cpp
+      - llvm/**/CodeGen/SlotIndexes.h
+      - llvm/**/CodeGen/SpillPlacement*
+      - llvm/**/CodeGen/SplitKit*
+      - llvm/**/CodeGen/VirtRegMap.h
+      - llvm/include/PBQP/**
+      - llvm/include/PBQPRAConstraint.h
+      - llvm/include/llvm/CodeGen/Spiller.h
+      - llvm/**/*RegAlloc
 
 lldb:
-  - lldb/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - lldb/**
 
 lldb-dap:
-  - lldb/tools/lldb-dap/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - lldb/tools/lldb-dap/**
 
 backend:AMDGPU:
-  - '**/*amdgpu*'
-  - '**/*AMDGPU*'
-  - '**/*amdgpu*/**'
-  - '**/*AMDGPU*/**'
+  - changed-files:
+    - any-glob-to-any-file:
+      - '**/*amdgpu*'
+      - '**/*AMDGPU*'
+      - '**/*amdgpu*/**'
+      - '**/*AMDGPU*/**'
 
 backend:NVPTX:
-  - 'llvm/**/*nvvm*'
-  - 'llvm/**/*NVVM*'
-  - 'llvm/**/*nvptx*'
-  - 'llvm/**/*NVPTX*'
-  - 'llvm/**/*nvvm*/**'
-  - 'llvm/**/*NVVM*/**'
-  - 'llvm/**/*nvptx*/**'
-  - 'llvm/**/*NVPTX*/**'
+  - changed-files:
+    - any-glob-to-any-file:
+      - 'llvm/**/*nvvm*'
+      - 'llvm/**/*NVVM*'
+      - 'llvm/**/*nvptx*'
+      - 'llvm/**/*NVPTX*'
+      - 'llvm/**/*nvvm*/**'
+      - 'llvm/**/*NVVM*/**'
+      - 'llvm/**/*nvptx*/**'
+      - 'llvm/**/*NVPTX*/**'
 
 backend:MIPS:
-  - '**/*mips*'
-  - '**/*Mips*'
-  - '**/*mips*/**'
-  - '**/*Mips*/**'
+  - changed-files:
+    - any-glob-to-any-file:
+      - '**/*mips*'
+      - '**/*Mips*'
+      - '**/*mips*/**'
+      - '**/*Mips*/**'
 
 backend:RISC-V:
-  - '**/*riscv*'
-  - '**/*RISCV*'
-  - '**/*riscv*/**'
-  - '**/*RISCV*/**'
+  - changed-files:
+    - any-glob-to-any-file:
+      - '**/*riscv*'
+      - '**/*RISCV*'
+      - '**/*riscv*/**'
+      - '**/*RISCV*/**'
 
 backend:Xtensa:
-  - '**/*xtensa*'
-  - '**/*Xtensa*'
-  - '**/*xtensa*/**'
-  - '**/*Xtensa*/**'
+  - changed-files:
+    - any-glob-to-any-file:
+      - '**/*xtensa*'
+      - '**/*Xtensa*'
+      - '**/*xtensa*/**'
+      - '**/*Xtensa*/**'
 
 lld:coff:
-  - lld/**/COFF/**
-  - lld/Common/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - lld/**/COFF/**
+      - lld/Common/**
 
 lld:elf:
-  - lld/**/ELF/**
-  - lld/Common/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - lld/**/ELF/**
+      - lld/Common/**
 
 lld:macho:
-  - lld/**/MachO/**
-  - lld/Common/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - lld/**/MachO/**
+      - lld/Common/**
 
 lld:wasm:
-  - lld/**/wasm/**
-  - lld/Common/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - lld/**/wasm/**
+      - lld/Common/**
 
 backend:ARC:
-  - llvm/lib/Target/ARC/**
-  - clang/lib/Basic/Targets/ARC.h
-  - clang/lib/Basic/Targets/ARC.cpp
-  - clang/lib/CodeGen/Targets/ARC.cpp
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Target/ARC/**
+      - clang/lib/Basic/Targets/ARC.h
+      - clang/lib/Basic/Targets/ARC.cpp
+      - clang/lib/CodeGen/Targets/ARC.cpp
 
 backend:ARM:
-  - llvm/include/llvm/IR/IntrinsicsARM.td
-  - llvm/test/MC/ARM/**
-  - llvm/lib/Target/ARM/**
-  - llvm/test/CodeGen/ARM/**
-  - clang/lib/Basic/Targets/ARM*
-  - clang/lib/Driver/ToolChains/Arch/ARM.*
-  - clang/lib/CodeGen/Targets/ARM.cpp
-  - clang/include/clang/Basic/BuiltinsARM*
-  - llvm/test/MC/DisasemblerARM/**
-  - clang/include/clang/Sema/SemaARM.h
-  - clang/lib/Sema/SemaARM.cpp
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/IR/IntrinsicsARM.td
+      - llvm/test/MC/ARM/**
+      - llvm/lib/Target/ARM/**
+      - llvm/test/CodeGen/ARM/**
+      - clang/lib/Basic/Targets/ARM*
+      - clang/lib/Driver/ToolChains/Arch/ARM.*
+      - clang/lib/CodeGen/Targets/ARM.cpp
+      - clang/include/clang/Basic/BuiltinsARM*
+      - llvm/test/MC/DisasemblerARM/**
+      - clang/include/clang/Sema/SemaARM.h
+      - clang/lib/Sema/SemaARM.cpp
 
 backend:AArch64:
-  - llvm/include/llvm/IR/IntrinsicsAArch64.td
-  - llvm/test/MC/AArch64/**
-  - llvm/lib/Target/AArch64/**
-  - llvm/test/CodeGen/AArch64/**
-  - clang/lib/Basic/Targets/AArch64*
-  - clang/lib/Driver/ToolChains/Arch/AArch64.*
-  - clang/lib/CodeGen/Targets/AArch64.cpp
-  - clang/include/clang/Basic/BuiltinsAArch64*
-  - llvm/test/MC/Disassembler/AArch64/**
-  - clang/include/clang/Sema/SemaARM.h
-  - clang/lib/Sema/SemaARM.cpp
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/IR/IntrinsicsAArch64.td
+      - llvm/test/MC/AArch64/**
+      - llvm/lib/Target/AArch64/**
+      - llvm/test/CodeGen/AArch64/**
+      - clang/lib/Basic/Targets/AArch64*
+      - clang/lib/Driver/ToolChains/Arch/AArch64.*
+      - clang/lib/CodeGen/Targets/AArch64.cpp
+      - clang/include/clang/Basic/BuiltinsAArch64*
+      - llvm/test/MC/Disassembler/AArch64/**
+      - clang/include/clang/Sema/SemaARM.h
+      - clang/lib/Sema/SemaARM.cpp
 
 backend:CSKY:
-  - llvm/lib/Target/CSKY/**
-  - llvm/include/llvm/TargetParser/CSKYTargetParser.def
-  - llvm/include/llvm/TargetParser/CSKYTargetParser.h
-  - llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def
-  - llvm/lib/TargetParser/CSKYTargetParser.cpp
-  - llvm/lib/Support/CSKYAttributes.cpp
-  - llvm/lib/Support/CSKYAttributeParser.cpp
-  - clang/lib/Basic/Targets/CSKY.h
-  - clang/lib/Basic/Targets/CSKY.cpp
-  - clang/lib/CodeGen/Targets/CSKY.cpp
-  - clang/lib/Driver/ToolChains/CSKY*
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Target/CSKY/**
+      - llvm/include/llvm/TargetParser/CSKYTargetParser.def
+      - llvm/include/llvm/TargetParser/CSKYTargetParser.h
+      - llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def
+      - llvm/lib/TargetParser/CSKYTargetParser.cpp
+      - llvm/lib/Support/CSKYAttributes.cpp
+      - llvm/lib/Support/CSKYAttributeParser.cpp
+      - clang/lib/Basic/Targets/CSKY.h
+      - clang/lib/Basic/Targets/CSKY.cpp
+      - clang/lib/CodeGen/Targets/CSKY.cpp
+      - clang/lib/Driver/ToolChains/CSKY*
 
 backend:Hexagon:
-  - clang/include/clang/Basic/BuiltinsHexagon*.def
-  - clang/include/clang/Sema/SemaHexagon.h
-  - clang/lib/Basic/Targets/Hexagon.*
-  - clang/lib/CodeGen/Targets/Hexagon.cpp
-  - clang/lib/Driver/ToolChains/Hexagon.*
-  - clang/lib/Sema/SemaHexagon.cpp
-  - lld/ELF/Arch/Hexagon.cpp
-  - lldb/source/Plugins/ABI/Hexagon/**
-  - lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/**
-  - llvm/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def
-  - llvm/include/llvm/IR/IntrinsicsHexagon*
-  - llvm/include/llvm/Support/Hexagon*
-  - llvm/lib/Support/Hexagon*
-  - llvm/lib/Target/Hexagon/**
-  - llvm/test/CodeGen/Hexagon/**
-  - llvm/test/CodeGen/*/Hexagon/**
-  - llvm/test/DebugInfo/*/Hexagon/**
-  - llvm/test/Transforms/*/Hexagon
-  - llvm/test/MC/Disassembler/Hexagon/**
-  - llvm/test/MC/Hexagon/**
-  - llvm/test/tools/llvm-objdump/ELF/Hexagon/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/include/clang/Basic/BuiltinsHexagon*.def
+      - clang/include/clang/Sema/SemaHexagon.h
+      - clang/lib/Basic/Targets/Hexagon.*
+      - clang/lib/CodeGen/Targets/Hexagon.cpp
+      - clang/lib/Driver/ToolChains/Hexagon.*
+      - clang/lib/Sema/SemaHexagon.cpp
+      - lld/ELF/Arch/Hexagon.cpp
+      - lldb/source/Plugins/ABI/Hexagon/**
+      - lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/**
+      - llvm/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def
+      - llvm/include/llvm/IR/IntrinsicsHexagon*
+      - llvm/include/llvm/Support/Hexagon*
+      - llvm/lib/Support/Hexagon*
+      - llvm/lib/Target/Hexagon/**
+      - llvm/test/CodeGen/Hexagon/**
+      - llvm/test/CodeGen/*/Hexagon/**
+      - llvm/test/DebugInfo/*/Hexagon/**
+      - llvm/test/Transforms/*/Hexagon
+      - llvm/test/MC/Disassembler/Hexagon/**
+      - llvm/test/MC/Hexagon/**
+      - llvm/test/tools/llvm-objdump/ELF/Hexagon/**
 
 backend:Lanai:
-  - llvm/lib/Target/Lanai/**
-  - clang/lib/Basic/Targets/Lanai.h
-  - clang/lib/Basic/Targets/Lanai.cpp
-  - clang/lib/CodeGen/Targets/Lanai.cpp
-  - clang/lib/Driver/ToolChains/Lanai*
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Target/Lanai/**
+      - clang/lib/Basic/Targets/Lanai.h
+      - clang/lib/Basic/Targets/Lanai.cpp
+      - clang/lib/CodeGen/Targets/Lanai.cpp
+      - clang/lib/Driver/ToolChains/Lanai*
 
 backend:loongarch:
-  - llvm/include/llvm/IR/IntrinsicsLoongArch.td
-  - llvm/test/MC/LoongArch/**
-  - llvm/lib/Target/LoongArch/**
-  - llvm/test/CodeGen/LoongArch/**
-  - clang/lib/Basic/Targets/LoongArch*
-  - clang/lib/Driver/ToolChains/Arch/LoongArch.*
-  - clang/lib/CodeGen/Targets/LoongArch.cpp
-  - clang/include/clang/Basic/BuiltinsLoongArch*
-  - clang/include/clang/Sema/SemaLoongArch.h
-  - clang/lib/Sema/SemaLoongArch.cpp
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/IR/IntrinsicsLoongArch.td
+      - llvm/test/MC/LoongArch/**
+      - llvm/lib/Target/LoongArch/**
+      - llvm/test/CodeGen/LoongArch/**
+      - clang/lib/Basic/Targets/LoongArch*
+      - clang/lib/Driver/ToolChains/Arch/LoongArch.*
+      - clang/lib/CodeGen/Targets/LoongArch.cpp
+      - clang/include/clang/Basic/BuiltinsLoongArch*
+      - clang/include/clang/Sema/SemaLoongArch.h
+      - clang/lib/Sema/SemaLoongArch.cpp
 
 backend:MSP430:
-  - llvm/include/llvm/IR/IntrinsicsMSP430.td
-  - llvm/test/MC/MSP430/**
-  - llvm/lib/Target/MSP430/**
-  - llvm/test/CodeGen/MSP430/**
-  - clang/lib/Basic/Targets/MSP430*
-  - clang/lib/Driver/ToolChains/Arch/MSP430.*
-  - clang/lib/CodeGen/Targets/MSP430.cpp
-  - clang/include/clang/Basic/BuiltinsMSP430*
-  - llvm/test/MC/Disassembler/MSP430/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/IR/IntrinsicsMSP430.td
+      - llvm/test/MC/MSP430/**
+      - llvm/lib/Target/MSP430/**
+      - llvm/test/CodeGen/MSP430/**
+      - clang/lib/Basic/Targets/MSP430*
+      - clang/lib/Driver/ToolChains/Arch/MSP430.*
+      - clang/lib/CodeGen/Targets/MSP430.cpp
+      - clang/include/clang/Basic/BuiltinsMSP430*
+      - llvm/test/MC/Disassembler/MSP430/**
 
 backend:Sparc:
-  - llvm/include/llvm/IR/IntrinsicsSparc.td
-  - llvm/test/MC/Sparc/**
-  - llvm/lib/Target/Sparc/**
-  - llvm/test/CodeGen/Sparc/**
-  - clang/lib/Basic/Targets/Sparc*
-  - clang/lib/Driver/ToolChains/Arch/Sparc.*
-  - clang/lib/CodeGen/Targets/Sparc.cpp
-  - clang/include/clang/Basic/BuiltinsSparc*
-  - llvm/test/MC/Disassembler/Sparc/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/IR/IntrinsicsSparc.td
+      - llvm/test/MC/Sparc/**
+      - llvm/lib/Target/Sparc/**
+      - llvm/test/CodeGen/Sparc/**
+      - clang/lib/Basic/Targets/Sparc*
+      - clang/lib/Driver/ToolChains/Arch/Sparc.*
+      - clang/lib/CodeGen/Targets/Sparc.cpp
+      - clang/include/clang/Basic/BuiltinsSparc*
+      - llvm/test/MC/Disassembler/Sparc/**
 
 backend:WebAssembly:
-  - llvm/lib/Target/WebAssembly/**
-  - llvm/test/CodeGen/WebAssembly/**
-  - clang/lib/Basic/Targets/WebAssembly*
-  - clang/include/clang/Basic/BuiltinsWebAssembly.def
-  - clang/include/clang/Basic/WebAssemblyReferenceTypes.def
-  - clang/lib/CodeGen/Targets/WebAssembly*
-  - llvm/include/llvm/IR/IntinsicsWebAssembly.td
-  - llvm/include/llvm/Object/Wasm*
-  - llvm/lib/CodeGen/AsmPrinter/Wasm*
-  - llvm/lib/CodeGen/Wasm*
-  - llvm/lib/MC/MCParser/Wasm*
-  - llvm/lib/MC/Wasm*
-  - llvm/lib/ObjCopy/wasm/**
-  - llvm/lib/Object/Wasm*
-  - clang/lib/Driver/Toolchains/WebAssembly*
-  - clang/lib/Headers/wasm_simd128.h
-  - clang/test/CodeGen/WebAssembly/**
-  - clang/test/SemaCXX/*wasm*
-  - clang/test/Sema/*wasm*
-  - llvm/include/llvm/BinaryFormat/Wasm.h
-  - llvm/unittests/Target/WebAssembly/**
-  - llvm/test/DebugInfo/WebAssembly/**
-  - llvm/test/MC/WebAssembly/**
-  - clang/include/clang/Sema/SemaWasm.h
-  - clang/lib/Sema/SemaLoongWasm.cpp
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/lib/Target/WebAssembly/**
+      - llvm/test/CodeGen/WebAssembly/**
+      - clang/lib/Basic/Targets/WebAssembly*
+      - clang/include/clang/Basic/BuiltinsWebAssembly.def
+      - clang/include/clang/Basic/WebAssemblyReferenceTypes.def
+      - clang/lib/CodeGen/Targets/WebAssembly*
+      - llvm/include/llvm/IR/IntinsicsWebAssembly.td
+      - llvm/include/llvm/Object/Wasm*
+      - llvm/lib/CodeGen/AsmPrinter/Wasm*
+      - llvm/lib/CodeGen/Wasm*
+      - llvm/lib/MC/MCParser/Wasm*
+      - llvm/lib/MC/Wasm*
+      - llvm/lib/ObjCopy/wasm/**
+      - llvm/lib/Object/Wasm*
+      - clang/lib/Driver/Toolchains/WebAssembly*
+      - clang/lib/Headers/wasm_simd128.h
+      - clang/test/CodeGen/WebAssembly/**
+      - clang/test/SemaCXX/*wasm*
+      - clang/test/Sema/*wasm*
+      - llvm/include/llvm/BinaryFormat/Wasm.h
+      - llvm/unittests/Target/WebAssembly/**
+      - llvm/test/DebugInfo/WebAssembly/**
+      - llvm/test/MC/WebAssembly/**
+      - clang/include/clang/Sema/SemaWasm.h
+      - clang/lib/Sema/SemaLoongWasm.cpp
 
 backend:X86:
-  - llvm/include/llvm/IR/IntrinsicsX86.td
-  - llvm/lib/Target/X86/**
-  - llvm/test/CodeGen/X86/**
-  - llvm/test/MC/X86/**
-  - llvm/test/MC/Disassembler/X86/**
-  - llvm/test/Analysis/CostModel/X86/**
-  - llvm/test/tools/llvm-mca/X86/**
-  - clang/lib/Basic/Targets/X86/**
-  - clang/lib/Driver/ToolChains/Arch/X86.*
-  - clang/lib/CodeGen/Targets/X86.*
-  - clang/lib/Headers/**
-  - clang/test/CodeGen/X86/**
-  - clang/include/clang/Basic/BuiltinsX86*
-  - llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
-  - llvm/include/llvm/TargetParser/X86*
-  - llvm/lib/TargetParser/X86*
-  - llvm/utils/TableGen/X86*
-  - clang/include/clang/Sema/SemaX86.h
-  - clang/lib/Sema/SemaX86.cpp
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/IR/IntrinsicsX86.td
+      - llvm/lib/Target/X86/**
+      - llvm/test/CodeGen/X86/**
+      - llvm/test/MC/X86/**
+      - llvm/test/MC/Disassembler/X86/**
+      - llvm/test/Analysis/CostModel/X86/**
+      - llvm/test/tools/llvm-mca/X86/**
+      - clang/lib/Basic/Targets/X86/**
+      - clang/lib/Driver/ToolChains/Arch/X86.*
+      - clang/lib/CodeGen/Targets/X86.*
+      - clang/lib/Headers/**
+      - clang/test/CodeGen/X86/**
+      - clang/include/clang/Basic/BuiltinsX86*
+      - llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
+      - llvm/include/llvm/TargetParser/X86*
+      - llvm/lib/TargetParser/X86*
+      - llvm/utils/TableGen/X86*
+      - clang/include/clang/Sema/SemaX86.h
+      - clang/lib/Sema/SemaX86.cpp
 
 backend:PowerPC:
-  - llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC*
-  - llvm/include/llvm/BinaryFormat/XCOFF.h
-  - llvm/include/llvm/IR/IntrinsicsPowerPC.td
-  - llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
-  - llvm/lib/Target/PowerPC/**
-  - llvm/test/Analysis/**/PowerPC/**
-  - llvm/test/CodeGen/PowerPC/**
-  - llvm/test/CodeGen/MIR/PowerPC/**
-  - llvm/test/DebugInfo/XCOFF/**
-  - llvm/test/DebugInfo/PowerPC/**
-  - llvm/test/LTO/PowerPC/**
-  - llvm/test/MC/Disassembler/PowerPC/**
-  - llvm/test/MC/PowerPC/**
-  - llvm/test/MC/XCOFF/**
-  - llvm/test/Transforms/**/PowerPC/**
-  - clang/include/clang/Basic/BuiltinsPPC.*
-  - clang/lib/Basic/Targets/PPC.*
-  - clang/lib/CodeGen/Targets/PPC.cpp
-  - clang/lib/Driver/ToolChains/PPC*
-  - clang/lib/Driver/ToolChains/AIX*
-  - clang/lib/Driver/ToolChains/Arch/PPC.*
-  - clang/test/CodeGen/PowerPC/**
-  - clang/include/clang/Sema/SemaPPC.h
-  - clang/lib/Sema/SemaPPC.cpp
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC*
+      - llvm/include/llvm/BinaryFormat/XCOFF.h
+      - llvm/include/llvm/IR/IntrinsicsPowerPC.td
+      - llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
+      - llvm/lib/Target/PowerPC/**
+      - llvm/test/Analysis/**/PowerPC/**
+      - llvm/test/CodeGen/PowerPC/**
+      - llvm/test/CodeGen/MIR/PowerPC/**
+      - llvm/test/DebugInfo/XCOFF/**
+      - llvm/test/DebugInfo/PowerPC/**
+      - llvm/test/LTO/PowerPC/**
+      - llvm/test/MC/Disassembler/PowerPC/**
+      - llvm/test/MC/PowerPC/**
+      - llvm/test/MC/XCOFF/**
+      - llvm/test/Transforms/**/PowerPC/**
+      - clang/include/clang/Basic/BuiltinsPPC.*
+      - clang/lib/Basic/Targets/PPC.*
+      - clang/lib/CodeGen/Targets/PPC.cpp
+      - clang/lib/Driver/ToolChains/PPC*
+      - clang/lib/Driver/ToolChains/AIX*
+      - clang/lib/Driver/ToolChains/Arch/PPC.*
+      - clang/test/CodeGen/PowerPC/**
+      - clang/include/clang/Sema/SemaPPC.h
+      - clang/lib/Sema/SemaPPC.cpp
 
 backend:SystemZ:
-  - llvm/include/llvm/BinaryFormat/ELFRelocs/SystemZ*
-  - llvm/include/llvm/BinaryFormat/GOFF.h
-  - llvm/include/llvm/IR/IntrinsicsSystemZ.td
-  - llvm/lib/Target/SystemZ/**
-  - llvm/test/Analysis/**/SystemZ/**
-  - llvm/test/CodeGen/SystemZ/**
-  - llvm/test/DebugInfo/SystemZ/**
-  - llvm/test/ExecutionEngine/**/SystemZ/**
-  - llvm/test/MC/Disassembler/SystemZ/**
-  - llvm/test/MC/GOFF/**
-  - llvm/test/MC/SystemZ/**
-  - llvm/test/Transforms/**/SystemZ/**
-  - clang/include/clang/Basic/BuiltinsSystemZ.*
-  - clang/lib/Basic/Targets/SystemZ.*
-  - clang/lib/CodeGen/Targets/SystemZ.cpp
-  - clang/lib/Driver/ToolChains/ZOS*
-  - clang/lib/Driver/ToolChains/Arch/SystemZ.*
-  - clang/test/CodeGen/SystemZ/**
-  - clang/include/clang/Sema/SemaSystemZ.h
-  - clang/lib/Sema/SemaSystemZ.cpp
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/llvm/BinaryFormat/ELFRelocs/SystemZ*
+      - llvm/include/llvm/BinaryFormat/GOFF.h
+      - llvm/include/llvm/IR/IntrinsicsSystemZ.td
+      - llvm/lib/Target/SystemZ/**
+      - llvm/test/Analysis/**/SystemZ/**
+      - llvm/test/CodeGen/SystemZ/**
+      - llvm/test/DebugInfo/SystemZ/**
+      - llvm/test/ExecutionEngine/**/SystemZ/**
+      - llvm/test/MC/Disassembler/SystemZ/**
+      - llvm/test/MC/GOFF/**
+      - llvm/test/MC/SystemZ/**
+      - llvm/test/Transforms/**/SystemZ/**
+      - clang/include/clang/Basic/BuiltinsSystemZ.*
+      - clang/lib/Basic/Targets/SystemZ.*
+      - clang/lib/CodeGen/Targets/SystemZ.cpp
+      - clang/lib/Driver/ToolChains/ZOS*
+      - clang/lib/Driver/ToolChains/Arch/SystemZ.*
+      - clang/test/CodeGen/SystemZ/**
+      - clang/include/clang/Sema/SemaSystemZ.h
+      - clang/lib/Sema/SemaSystemZ.cpp
 
 third-party:unittests:
-  - third-party/unittests/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - third-party/unittests/**
 
 third-party:benchmark:
-  - third-party/benchmark/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - third-party/benchmark/**
 
 llvm:binary-utilities:
-  - llvm/docs/CommandGuide/llvm-*
-  - llvm/include/llvm/BinaryFormat/**
-  - llvm/include/llvm/DebugInfo/Symbolize/**
-  - llvm/include/llvm/ObjCopy/**
-  - llvm/include/llvm/Object/**
-  - llvm/lib/BinaryFormat/**
-  - llvm/lib/DebugInfo/Symbolize/**
-  - llvm/lib/ObjCopy/**
-  - llvm/lib/Object/**
-  - llvm/test/Object/**
-  - llvm/test/tools/llvm-ar/**
-  - llvm/test/tools/llvm-cxxfilt/**
-  - llvm/test/tools/llvm-nm/**
-  - llvm/test/tools/llvm-objcopy/**
-  - llvm/test/tools/llvm-objdump/**
-  - llvm/test/tools/llvm-readobj/**
-  - llvm/test/tools/llvm-size/**
-  - llvm/test/tools/llvm-strings/**
-  - llvm/test/tools/llvm-symbolizer/**
-  - llvm/tools/llvm-ar/**
-  - llvm/tools/llvm-cxxfilt/**
-  - llvm/tools/llvm-nm/**
-  - llvm/tools/llvm-objcopy/**
-  - llvm/tools/llvm-objdump/**
-  - llvm/tools/llvm-readobj/**
-  - llvm/tools/llvm-size/**
-  - llvm/tools/llvm-strings/**
-  - llvm/tools/llvm-symbolizer/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/docs/CommandGuide/llvm-*
+      - llvm/include/llvm/BinaryFormat/**
+      - llvm/include/llvm/DebugInfo/Symbolize/**
+      - llvm/include/llvm/ObjCopy/**
+      - llvm/include/llvm/Object/**
+      - llvm/lib/BinaryFormat/**
+      - llvm/lib/DebugInfo/Symbolize/**
+      - llvm/lib/ObjCopy/**
+      - llvm/lib/Object/**
+      - llvm/test/Object/**
+      - llvm/test/tools/llvm-ar/**
+      - llvm/test/tools/llvm-cxxfilt/**
+      - llvm/test/tools/llvm-nm/**
+      - llvm/test/tools/llvm-objcopy/**
+      - llvm/test/tools/llvm-objdump/**
+      - llvm/test/tools/llvm-readobj/**
+      - llvm/test/tools/llvm-size/**
+      - llvm/test/tools/llvm-strings/**
+      - llvm/test/tools/llvm-symbolizer/**
+      - llvm/tools/llvm-ar/**
+      - llvm/tools/llvm-cxxfilt/**
+      - llvm/tools/llvm-nm/**
+      - llvm/tools/llvm-objcopy/**
+      - llvm/tools/llvm-objdump/**
+      - llvm/tools/llvm-readobj/**
+      - llvm/tools/llvm-size/**
+      - llvm/tools/llvm-strings/**
+      - llvm/tools/llvm-symbolizer/**
 
 clang:openmp:
-  - clang/include/clang/Basic/OpenMP*
-  - clang/include/clang/AST/OpenMPClause.h
-  - clang/include/clang/AST/DeclOpenMP.h
-  - clang/include/clang/AST/ExprOpenMP.h
-  - clang/include/clang/AST/StmtOpenMP.h
-  - clang/lib/AST/DeclOpenMP.cpp
-  - clang/lib/AST/OpenMPClause.cpp
-  - clang/lib/AST/StmtOpenMP.cpp
-  - clang/lib/Headers/openmp_wrappers/**
-  - clang/lib/Parse/ParseOpenMP.cpp
-  - clang/lib/Basic/OpenMPKinds.cpp
-  - clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
-  - clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
-  - clang/lib/CodeGen/CgStmtOpenMP.cpp
-  - clang/lib/CodeGen/CGOpenMP*
-  - clang/lib/Sema/SemaOpenMP.cpp
-  - clang/test/OpenMP/**
-  - clang/test/AST/ast-dump-openmp-*
-  - llvm/lib/Frontend/OpenMP/**
-  - llvm/lib/Transforms/IPO/OpenMPOpt.cpp
-  - llvm/include/llvm/Frontend/OpenMP/**
-  - llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
-  - llvm/unittests/Frontend/OpenMP*
-  - llvm/test/Transforms/OpenMP/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/include/clang/Basic/OpenMP*
+      - clang/include/clang/AST/OpenMPClause.h
+      - clang/include/clang/AST/DeclOpenMP.h
+      - clang/include/clang/AST/ExprOpenMP.h
+      - clang/include/clang/AST/StmtOpenMP.h
+      - clang/lib/AST/DeclOpenMP.cpp
+      - clang/lib/AST/OpenMPClause.cpp
+      - clang/lib/AST/StmtOpenMP.cpp
+      - clang/lib/Headers/openmp_wrappers/**
+      - clang/lib/Parse/ParseOpenMP.cpp
+      - clang/lib/Basic/OpenMPKinds.cpp
+      - clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+      - clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
+      - clang/lib/CodeGen/CgStmtOpenMP.cpp
+      - clang/lib/CodeGen/CGOpenMP*
+      - clang/lib/Sema/SemaOpenMP.cpp
+      - clang/test/OpenMP/**
+      - clang/test/AST/ast-dump-openmp-*
+      - llvm/lib/Frontend/OpenMP/**
+      - llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+      - llvm/include/llvm/Frontend/OpenMP/**
+      - llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
+      - llvm/unittests/Frontend/OpenMP*
+      - llvm/test/Transforms/OpenMP/**
 
 clang:temporal-safety:
-  - clang/include/clang/Analysis/Analyses/LifetimeSafety/**
-  - clang/lib/Analysis/LifetimeSafety/**
-  - clang/unittests/Analysis/LifetimeSafety*
-  - clang/test/Sema/*lifetime-safety*
-  - clang/test/Sema/*lifetime-analysis*
-  - clang/test/Analysis/LifetimeSafety/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/include/clang/Analysis/Analyses/LifetimeSafety/**
+      - clang/lib/Analysis/LifetimeSafety/**
+      - clang/unittests/Analysis/LifetimeSafety*
+      - clang/test/Sema/*lifetime-safety*
+      - clang/test/Sema/*lifetime-analysis*
+      - clang/test/Analysis/LifetimeSafety/**
 
 clang:as-a-library:
-  - clang/tools/libclang/**
-  - clang/bindings/**
-  - clang/include/clang-c/**
-  - clang/test/LibClang/**
-  - clang/unittest/libclang/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - clang/tools/libclang/**
+      - clang/bindings/**
+      - clang/include/clang-c/**
+      - clang/test/LibClang/**
+      - clang/unittest/libclang/**
 
 openmp:libomp:
-  - any: ['openmp/**', '!openmp/libomptarget/**']
+  - changed-files:
+    - any-glob-to-any-file:
+      - 'openmp/**'
 
 openmp:libomptarget:
-  - any: ['openmp/**', '!openmp/runtime/**']
+  - changed-files:
+    - all-globs-to-all-file:
+      - openmp/**
+      - '!openmp/runtime/**''
 
 bazel:
-  - utils/bazel/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - utils/bazel/**
 
 offload:
-  - offload/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - offload/**
 
 tablegen:
-  - llvm/include/TableGen/**
-  - llvm/lib/TableGen/**
-  - llvm/utils/TableGen/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - llvm/include/TableGen/**
+      - llvm/lib/TableGen/**
+      - llvm/utils/TableGen/**
 
 infrastructure:
-  - .ci/**
+  - changed-files:
+    - any-glob-to-any-file:
+      - .ci/**
diff --git a/.github/workflows/new-prs.yml b/.github/workflows/new-prs.yml
index e1f2e754c1a3d..0d97e436d39c4 100644
--- a/.github/workflows/new-prs.yml
+++ b/.github/workflows/new-prs.yml
@@ -67,9 +67,7 @@ jobs:
       github.event.pull_request.draft == false &&
       github.event.pull_request.commits < 10
     steps:
-      - uses: actions/labeler@ac9175f8a1f3625fd0d4fb234536d26811351594 # v4.3.0
+      - uses: actions/labeler@634933edcd8ababfe52f92936142cc22ac488b1b # v6.0.1
         with:
           configuration-path: .github/new-prs-labeler.yml
-          # workaround for https://github.com/actions/labeler/issues/112
-          sync-labels: ''
           repo-token: ${{ secrets.ISSUE_SUBSCRIBER_TOKEN }}
diff --git a/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp
index 5524c4b484be1..67d0931003c54 100644
--- a/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp
@@ -301,14 +301,20 @@ void UnsafeFunctionsCheck::check(const MatchFinder::MatchResult &Result) {
   if (Custom) {
     for (const auto &Entry : CustomFunctions) {
       if (Entry.Pattern.match(*FuncDecl)) {
-        const StringRef Reason =
+        StringRef Reason =
             Entry.Reason.empty() ? "is marked as unsafe" : Entry.Reason.c_str();
 
-        if (Entry.Replacement.empty()) {
+        // Omit the replacement, when a fully-custom reason is given.
+        if (Reason.consume_front(">")) {
+          diag(SourceExpr->getExprLoc(), "function %0 %1")
+              << FuncDecl << Reason.trim() << SourceExpr->getSourceRange();
+          // Do not recommend a replacement when it is not present.
+        } else if (Entry.Replacement.empty()) {
           diag(SourceExpr->getExprLoc(),
                "function %0 %1; it should not be used")
               << FuncDecl << Reason << Entry.Replacement
               << SourceExpr->getSourceRange();
+          // Otherwise, emit the replacement.
         } else {
           diag(SourceExpr->getExprLoc(),
                "function %0 %1; '%2' should be used instead")
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index b982216297919..743397e3ec6ce 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -69,6 +69,13 @@ Potentially Breaking Changes
   - `CharTypdefsToIgnore` to `CharTypedefsToIgnore` in
     :doc:`bugprone-signed-char-misuse
     <clang-tidy/checks/bugprone/signed-char-misuse>`
+  
+- Modified the custom message format of :doc:`bugprone-unsafe-functions
+  <clang-tidy/checks/bugprone/unsafe-functions>` by assigning a special meaning
+  to the character ``>`` at the start of the value of the option
+  ``CustomFunctions``. If the option value starts with ``>``, then the
+  replacement suggestion part of the message (which would be included by
+  default) is omitted. (This does not change the warning locations.)
 
 - :program:`clang-tidy` now displays warnings from all non-system headers by
   default. Previously, users had to explicitly opt-in to header warnings using
@@ -387,6 +394,11 @@ Changes in existing checks
   <clang-tidy/checks/bugprone/unhandled-self-assignment>` check by adding
   an additional matcher that generalizes the copy-and-swap idiom pattern
   detection.
+  
+- Improved :doc:`bugprone-unsafe-functions
+  <clang-tidy/checks/bugprone/unsafe-functions>` check by hiding the default
+  suffix when the reason starts with the character `>` in the `CustomFunctions`
+  option.
 
 - Improved :doc:`cppcoreguidelines-avoid-non-const-global-variables
   <clang-tidy/checks/cppcoreguidelines/avoid-non-const-global-variables>` check
diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst
index f1fec13739271..cb7ea415c54b2 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst
@@ -96,37 +96,62 @@ to be checked. The format is the following, without newlines:
 The functions are matched using POSIX extended regular expressions.
 *(Note: The regular expressions do not support negative* ``(?!)`` *matches.)*
 
-The `reason` is optional and is used to provide additional information
-about the reasoning behind the replacement. The default reason is
-`is marked as unsafe`.
+The ``reason`` is optional and is used to provide additional information about the
+reasoning behind the replacement. The default reason is ``is marked as unsafe``.
 
-If `replacement` is empty, the text `it should not be used` will be shown
-instead of the suggestion for a replacement.
+If ``replacement`` is empty, the default text ``it should not be used`` will be
+shown instead of the suggestion for a replacement.
 
-As an example, the configuration `^original$, replacement, is deprecated;`
-will produce the following diagnostic message.
+If the ``reason`` starts with the character ``>``, the reason becomes fully custom.
+The default suffix is disabled even if a ``replacement`` is present, and only the
+reason message is shown after the matched function, to allow better control over
+the suggestions. (The starting ``>`` and whitespace directly after it are
+trimmed from the message.)
+
+As an example, the following configuration matches only the function ``original``
+in the default namespace. A similar diagnostic can also be printed using a fully
+custom reason.
 
 .. code:: c
 
+   // bugprone-unsafe-functions.CustomFunctions:
+   //   ^original$, replacement, is deprecated;
+   // Using the fully custom message syntax:
+   //   ^suspicious$,,> should be avoided if possible.
    original(); // warning: function 'original' is deprecated; 'replacement' should be used instead.
+   suspicious(); // warning: function 'suspicious' should be avoided if possible.
    ::std::original(); // no-warning
    original_function(); // no-warning
 
-If the regular expression contains the character `:`, it is matched against the
-qualified name (i.e. ``std::original``), otherwise the regex is matched against the unqualified name (``original``).
-If the regular expression starts with `::` (or `^::`), it is matched against the
-fully qualified name (``::std::original``).
+If the regular expression contains the character ``:``, it is matched against the
+qualified name (i.e. ``std::original``), otherwise the regex is matched against
+the unqualified name (``original``). If the regular expression starts with ``::``
+(or ``^::``), it is matched against the fully qualified name
+(``::std::original``).
+
+One of the use cases for fully custom messages is suggesting compiler options
+and warning flags:
+
+.. code:: c
+
+   // bugprone-unsafe-functions.CustomFunctions:
+   //   ^memcpy$,,>is recommended to have compiler hardening using '_FORTIFY_SOURCE';
+   //   ^printf$,,>is recommended to have the '-Werror=format-security' compiler warning flag;
+
+   memcpy(dest, src, 999'999); // warning: function 'memcpy' is recommended to have compiler hardening using '_FORTIFY_SOURCE'
+   printf(raw_str); // warning: function 'printf' is recommended to have the '-Werror=format-security' compiler warning flag
 
 .. note::
 
-   Fully qualified names can contain template parameters on certain C++ classes, but not on C++ functions.
-   Type aliases are resolved before matching.
+   Fully qualified names can contain template parameters on certain C++ classes,
+   but not on C++ functions. Type aliases are resolved before matching.
 
    As an example, the member function ``open`` in the class ``std::ifstream``
    has a fully qualified name of ``::std::basic_ifstream<char>::open``.
 
-   The example could also be matched with the regex ``::std::basic_ifstream<[^>]*>::open``, which matches all potential
-   template parameters, but does not match nested template classes.
+   The example could also be matched with the regex
+   ``::std::basic_ifstream<[^>]*>::open``, which matches all potential template
+   parameters, but does not match nested template classes.
 
 Options
 -------
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom.c
index 7fd71ec2f2e7b..7eaf015f06aa2 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom.c
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom.c
@@ -1,5 +1,5 @@
 // RUN: %check_clang_tidy -check-suffix=NON-STRICT-REGEX %s bugprone-unsafe-functions %t --\
-// RUN:   -config="{CheckOptions: {bugprone-unsafe-functions.CustomFunctions: '::name_match,replacement,is a qualname match;^::prefix_match,,is matched on qualname prefix'}}"
+// RUN:   -config="{CheckOptions: {bugprone-unsafe-functions.CustomFunctions: \"::name_match,,>is a qualname match, but with a fully 'custom' message;^::prefix_match,,is matched on qualname prefix\"}}"
 // RUN: %check_clang_tidy -check-suffix=STRICT-REGEX     %s bugprone-unsafe-functions %t --\
 // RUN:   -config="{CheckOptions: {bugprone-unsafe-functions.CustomFunctions: '^name_match$,replacement,is matched on function name only;^::prefix_match$,,is a full qualname match'}}"
 
@@ -11,14 +11,14 @@ void prefix_match_regex();
 
 void f1() {
   name_match();
-  // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match' is a qualname match; 'replacement' should be used instead
+  // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match' is a qualname match, but with a fully 'custom' message
   // CHECK-MESSAGES-STRICT-REGEX: :[[@LINE-2]]:3: warning: function 'name_match' is matched on function name only; 'replacement' should be used instead
   prefix_match();
   // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'prefix_match' is matched on qualname prefix; it should not be used
   // CHECK-MESSAGES-STRICT-REGEX: :[[@LINE-2]]:3: warning: function 'prefix_match' is a full qualname match; it should not be used
 
   name_match_regex();
-  // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match_regex' is a qualname match; 'replacement' should be used instead
+  // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match_regex' is a qualname match, but with a fully 'custom' message
   // no-warning STRICT-REGEX
 
   prefix_match_regex();
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7459127670cc3..c2da61e4d066a 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -500,6 +500,7 @@ Bug Fixes to Attribute Support
 - Fixes crashes or missing diagnostics with the `device_kernel` attribute. (#GH161905)
 - Fix handling of parameter indexes when an attribute is applied to a C++23 explicit object member function.
 - Fixed several false positives and false negatives in function effect (`nonblocking`) analysis. (#GH166078) (#GH166101) (#GH166110)
+- Fix ``cleanup`` attribute by delaying type checks until after the type is deduced. (#GH129631)
 
 Bug Fixes to C++ Support
 ^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 8dfe4bc08c48e..0097476bc0d8d 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -741,6 +741,17 @@ class Attr {
   // our existing general parsing we need to have a separate flag that
   // opts an attribute into strict parsing of attribute parameters
   bit StrictEnumParameters = 0;
+  // Set to true for attributes which have Sema checks which requires the type
+  // to be deduced.
+  // When `IsTypeDependent` is set to true, you should add an `ActOn*Attr`
+  // function to `Sema.h`. The signature of the function must be:
+  // `void ActOn*Attr(Decl *, const Attr *);` where the `Decl *` is the
+  // declaration the attribute will be attached to; its type will have already
+  // been deduced, and the `Attr *` is the attribute being applied to that
+  // declaration. This function should handle all type-sensitive semantics for
+  // the attribute. This function will be automatically called by
+  // `Sema::CheckAttributesOnDeducedType()`.
+  bit IsTypeDependent = 0;
   // Lists language options, one of which is required to be true for the
   // attribute to be applicable. If empty, no language options are required.
   list<LangOpt> LangOpts = [];
@@ -1400,6 +1411,7 @@ def Cleanup : InheritableAttr {
   let Args = [DeclArgument<Function, "FunctionDecl">];
   let Subjects = SubjectList<[LocalVar]>;
   let Documentation = [CleanupDocs];
+  let IsTypeDependent = 1;
   // FIXME: DeclArgument should be reworked to also store the
   // Expr instead of adding attr specific hacks like the following.
   // See the discussion in https://github.com/llvm/llvm-project/pull/14023.
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index dbf857afa08c8..47da17e5cfe83 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5253,6 +5253,18 @@ def HLSLF16ToF32 : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLDdxCoarse : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_ddx_coarse"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
+def HLSLDdyCoarse : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_ddy_coarse"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 // Builtins for XRay.
 def XRayCustomEvent : Builtin {
   let Spellings = ["__xray_customevent"];
diff --git a/clang/include/clang/Sema/CMakeLists.txt b/clang/include/clang/Sema/CMakeLists.txt
index 9077e22c2307c..3f540ea596871 100644
--- a/clang/include/clang/Sema/CMakeLists.txt
+++ b/clang/include/clang/Sema/CMakeLists.txt
@@ -8,6 +8,11 @@ clang_tablegen(AttrParsedAttrKinds.inc -gen-clang-attr-parsed-attr-kinds
   SOURCE ../Basic/Attr.td
   TARGET ClangAttrParsedAttrKinds)
 
+clang_tablegen(AttrIsTypeDependent.inc -gen-clang-attr-is-type-dependent
+  -I ${CMAKE_CURRENT_SOURCE_DIR}/../../
+  SOURCE ../Basic/Attr.td
+  TARGET ClangAttrIsTypeDependent)
+
 clang_tablegen(AttrSpellingListIndex.inc -gen-clang-attr-spelling-index
   -I ${CMAKE_CURRENT_SOURCE_DIR}/../../
   SOURCE ../Basic/Attr.td
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 6ca182338d6af..fd2a2469142e4 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -4456,6 +4456,10 @@ class Sema final : public SemaBase {
       NamedDecl *New, Decl *Old,
       AvailabilityMergeKind AMK = AvailabilityMergeKind::Redeclaration);
 
+  /// CheckAttributesOnDeducedType - Calls Sema functions for attributes that
+  /// requires the type to be deduced.
+  void CheckAttributesOnDeducedType(Decl *D);
+
   /// MergeTypedefNameDecl - We just parsed a typedef 'New' which has the
   /// same name and scope as a previous declaration 'Old'.  Figure out
   /// how to resolve this situation, merging decls or emitting
@@ -4760,6 +4764,8 @@ class Sema final : public SemaBase {
   // linkage or not.
   static bool mightHaveNonExternalLinkage(const DeclaratorDecl *FD);
 
+#include "clang/Sema/AttrIsTypeDependent.inc"
+
   ///@}
 
   //
@@ -15469,6 +15475,8 @@ class Sema final : public SemaBase {
   std::optional<FunctionEffectMode>
   ActOnEffectExpression(Expr *CondExpr, StringRef AttributeName);
 
+  void ActOnCleanupAttr(Decl *D, const Attr *A);
+
 private:
   /// The implementation of RequireCompleteType
   bool RequireCompleteTypeImpl(SourceLocation Loc, QualType T,
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index b6928ce7d9c44..12d9a98915ce3 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -924,6 +924,24 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
     return EmitRuntimeCall(
         Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
   }
+  case Builtin::BI__builtin_hlsl_elementwise_ddx_coarse: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    if (!E->getArg(0)->getType()->hasFloatingRepresentation())
+      llvm_unreachable("ddx_coarse operand must have a float representation");
+    Intrinsic::ID ID = CGM.getHLSLRuntime().getDdxCoarseIntrinsic();
+    return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID,
+                                   ArrayRef<Value *>{Op0}, nullptr,
+                                   "hlsl.ddx.coarse");
+  }
+  case Builtin::BI__builtin_hlsl_elementwise_ddy_coarse: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    if (!E->getArg(0)->getType()->hasFloatingRepresentation())
+      llvm_unreachable("ddy_coarse operand must have a float representation");
+    Intrinsic::ID ID = CGM.getHLSLRuntime().getDdyCoarseIntrinsic();
+    return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID,
+                                   ArrayRef<Value *>{Op0}, nullptr,
+                                   "hlsl.ddy.coarse");
+  }
   case Builtin::BI__builtin_get_spirv_spec_constant_bool:
   case Builtin::BI__builtin_get_spirv_spec_constant_short:
   case Builtin::BI__builtin_get_spirv_spec_constant_ushort:
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 48935584f28a2..e1200c62eccf1 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -163,6 +163,8 @@ class CGHLSLRuntime {
   GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrierWithGroupSync,
                                    group_memory_barrier_with_group_sync)
   GENERATE_HLSL_INTRINSIC_FUNCTION(GetDimensionsX, resource_getdimensions_x)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(DdxCoarse, ddx_coarse)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(DdyCoarse, ddy_coarse)
 
   //===----------------------------------------------------------------------===//
   // End of reserved area for HLSL intrinsic getters.
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 04fd68692d8d8..426fc796ffc20 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -70,6 +70,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
@@ -103,6 +104,7 @@
 #include <memory>
 #include <optional>
 #include <set>
+#include <string>
 #include <utility>
 #if LLVM_ON_UNIX
 #include <unistd.h> // getpid
@@ -2050,12 +2052,17 @@ void Driver::generateCompilationDiagnostics(
   InputList Inputs;
   BuildInputs(C.getDefaultToolChain(), C.getArgs(), Inputs);
 
+  ArgStringList IRInputs;
   for (InputList::iterator it = Inputs.begin(), ie = Inputs.end(); it != ie;) {
     bool IgnoreInput = false;
 
-    // Ignore input from stdin or any inputs that cannot be preprocessed.
-    // Check type first as not all linker inputs have a value.
-    if (types::getPreprocessedType(it->first) == types::TY_INVALID) {
+    // Save IR inputs separately, ignore input from stdin or any other inputs
+    // that cannot be preprocessed. Check type first as not all linker inputs
+    // have a value.
+    if (types::isLLVMIR(it->first)) {
+      IRInputs.push_back(it->second->getValue());
+      IgnoreInput = true;
+    } else if (types::getPreprocessedType(it->first) == types::TY_INVALID) {
       IgnoreInput = true;
     } else if (!strcmp(it->second->getValue(), "-")) {
       Diag(clang::diag::note_drv_command_failed_diag_msg)
@@ -2072,7 +2079,7 @@ void Driver::generateCompilationDiagnostics(
     }
   }
 
-  if (Inputs.empty()) {
+  if (Inputs.empty() && IRInputs.empty()) {
     Diag(clang::diag::note_drv_command_failed_diag_msg)
         << "Error generating preprocessed source(s) - "
            "no preprocessable inputs.";
@@ -2095,46 +2102,82 @@ void Driver::generateCompilationDiagnostics(
     return;
   }
 
-  // Construct the list of abstract actions to perform for this compilation. On
-  // Darwin OSes this uses the driver-driver and builds universal actions.
-  const ToolChain &TC = C.getDefaultToolChain();
-  if (TC.getTriple().isOSBinFormatMachO())
-    BuildUniversalActions(C, TC, Inputs);
-  else
-    BuildActions(C, C.getArgs(), Inputs, C.getActions());
+  // If we only have IR inputs there's no need for preprocessing.
+  if (!Inputs.empty()) {
+    // Construct the list of abstract actions to perform for this compilation.
+    // On Darwin OSes this uses the driver-driver and builds universal actions.
+    const ToolChain &TC = C.getDefaultToolChain();
+    if (TC.getTriple().isOSBinFormatMachO())
+      BuildUniversalActions(C, TC, Inputs);
+    else
+      BuildActions(C, C.getArgs(), Inputs, C.getActions());
 
-  BuildJobs(C);
+    BuildJobs(C);
 
-  // If there were errors building the compilation, quit now.
-  if (Trap.hasErrorOccurred()) {
-    Diag(clang::diag::note_drv_command_failed_diag_msg)
-        << "Error generating preprocessed source(s).";
-    return;
-  }
+    // If there were errors building the compilation, quit now.
+    if (Trap.hasErrorOccurred()) {
+      Diag(clang::diag::note_drv_command_failed_diag_msg)
+          << "Error generating preprocessed source(s).";
+      return;
+    }
+    // Generate preprocessed output.
+    SmallVector<std::pair<int, const Command *>, 4> FailingCommands;
+    C.ExecuteJobs(C.getJobs(), FailingCommands);
 
-  // Generate preprocessed output.
-  SmallVector<std::pair<int, const Command *>, 4> FailingCommands;
-  C.ExecuteJobs(C.getJobs(), FailingCommands);
+    // If any of the preprocessing commands failed, clean up and exit.
+    if (!FailingCommands.empty()) {
+      Diag(clang::diag::note_drv_command_failed_diag_msg)
+          << "Error generating preprocessed source(s).";
+      return;
+    }
 
-  // If any of the preprocessing commands failed, clean up and exit.
-  if (!FailingCommands.empty()) {
-    Diag(clang::diag::note_drv_command_failed_diag_msg)
-        << "Error generating preprocessed source(s).";
-    return;
+    const ArgStringList &TempFiles = C.getTempFiles();
+    if (TempFiles.empty()) {
+      Diag(clang::diag::note_drv_command_failed_diag_msg)
+          << "Error generating preprocessed source(s).";
+      return;
+    }
   }
 
-  const ArgStringList &TempFiles = C.getTempFiles();
-  if (TempFiles.empty()) {
-    Diag(clang::diag::note_drv_command_failed_diag_msg)
-        << "Error generating preprocessed source(s).";
-    return;
+  // Copying filenames due to ownership.
+  const ArgStringList &Files = C.getTempFiles();
+  SmallVector<std::string> TempFiles(Files.begin(), Files.end());
+
+  // We'd like to copy the IR input file into our own temp file
+  // because the build system might try to clean-up after itself.
+  for (auto const *Input : IRInputs) {
+    int FD;
+    llvm::SmallVector<char, 64> Path;
+
+    StringRef extension = llvm::sys::path::extension(Input);
+    if (!extension.empty())
+      extension = extension.drop_front();
+
+    std::error_code EC = llvm::sys::fs::createTemporaryFile(
+        llvm::sys::path::stem(Input), extension, FD, Path);
+    if (EC) {
+      Diag(clang::diag::note_drv_command_failed_diag_msg)
+          << "Error generating run script: " << "Failed copying IR input files"
+          << " " << EC.message();
+      return;
+    }
+
+    EC = llvm::sys::fs::copy_file(Input, FD);
+    if (EC) {
+      Diag(clang::diag::note_drv_command_failed_diag_msg)
+          << "Error generating run script: " << "Failed copying IR input files"
+          << " " << EC.message();
+      return;
+    }
+
+    TempFiles.push_back(std::string(Path.begin(), Path.end()));
   }
 
   Diag(clang::diag::note_drv_command_failed_diag_msg) << BugReporMsg;
 
   SmallString<128> VFS;
   SmallString<128> ReproCrashFilename;
-  for (const char *TempFile : TempFiles) {
+  for (std::string &TempFile : TempFiles) {
     Diag(clang::diag::note_drv_command_failed_diag_msg) << TempFile;
     if (Report)
       Report->TemporaryFiles.push_back(TempFile);
@@ -2151,7 +2194,7 @@ void Driver::generateCompilationDiagnostics(
   }
 
   for (const char *TempFile : SavedTemps)
-    C.addTempFile(TempFile);
+    TempFiles.push_back(TempFile);
 
   // Assume associated files are based off of the first temporary file.
   CrashReportInfo CrashInfo(TempFiles[0], VFS);
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 5ff7d83946137..77a2c73f0d446 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -1639,6 +1639,8 @@ SanitizerMask ToolChain::getSupportedSanitizers() const {
     Res |= SanitizerKind::ShadowCallStack;
   if (getTriple().isAArch64(64))
     Res |= SanitizerKind::MemTag;
+  if (getTriple().isBPF())
+    Res |= SanitizerKind::KernelAddress;
   return Res;
 }
 
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index 2e2703de18cb1..38b95ee90736a 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -2946,5 +2946,73 @@ float4 radians(float4);
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_group_memory_barrier_with_group_sync)
 __attribute__((convergent)) void GroupMemoryBarrierWithGroupSync(void);
 
+//===----------------------------------------------------------------------===//
+// ddx_coarse builtin
+//===----------------------------------------------------------------------===//
+
+/// \fn T ddx_coarse(T value)
+/// \brief Computes a low precision partial derivative with respect to the
+/// screen-space x-coordinate.
+/// \param value The input value.
+///
+/// The return value is a floating point scalar or vector containing the low
+/// prevision partial derivative of the input value.
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse)
+half ddx_coarse(half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse)
+half2 ddx_coarse(half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse)
+half3 ddx_coarse(half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse)
+half4 ddx_coarse(half4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse)
+float ddx_coarse(float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse)
+float2 ddx_coarse(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse)
+float3 ddx_coarse(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse)
+float4 ddx_coarse(float4);
+
+//===----------------------------------------------------------------------===//
+// ddy_coarse builtin
+//===----------------------------------------------------------------------===//
+
+/// \fn T ddy_coarse(T value)
+/// \brief Computes a low precision partial derivative with respect to the
+/// screen-space y-coordinate.
+/// \param value The input value.
+///
+/// The return value is a floating point scalar or vector containing the low
+/// prevision partial derivative of the input value.
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse)
+half ddy_coarse(half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse)
+half2 ddy_coarse(half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse)
+half3 ddy_coarse(half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse)
+half4 ddy_coarse(half4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse)
+float ddy_coarse(float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse)
+float2 ddy_coarse(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse)
+float3 ddy_coarse(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse)
+float4 ddy_coarse(float4);
+
 } // namespace hlsl
 #endif //_HLSL_HLSL_ALIAS_INTRINSICS_H_
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 25b89d65847ad..b7aecadc86871 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -3355,6 +3355,11 @@ void Sema::mergeDeclAttributes(NamedDecl *New, Decl *Old,
   if (!foundAny) New->dropAttrs();
 }
 
+void Sema::CheckAttributesOnDeducedType(Decl *D) {
+  for (const Attr *A : D->attrs())
+    checkAttrIsTypeDependent(D, A);
+}
+
 // Returns the number of added attributes.
 template <class T>
 static unsigned propagateAttribute(ParmVarDecl *To, const ParmVarDecl *From,
@@ -13809,6 +13814,8 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
       return;
   }
 
+  this->CheckAttributesOnDeducedType(RealDecl);
+
   // dllimport cannot be used on variable definitions.
   if (VDecl->hasAttr<DLLImportAttr>() && !VDecl->isStaticDataMember()) {
     Diag(VDecl->getLocation(), diag::err_attribute_dllimport_data_definition);
@@ -14300,6 +14307,8 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) {
         DeduceVariableDeclarationType(Var, false, nullptr))
       return;
 
+    this->CheckAttributesOnDeducedType(RealDecl);
+
     // C++11 [class.static.data]p3: A static data member can be declared with
     // the constexpr specifier; if so, its declaration shall specify
     // a brace-or-equal-initializer.
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index a9e7b44ac9d73..bda7aa32a9348 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -3511,16 +3511,6 @@ static void handleCleanupAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
     return;
   }
 
-  // We're currently more strict than GCC about what function types we accept.
-  // If this ever proves to be a problem it should be easy to fix.
-  QualType Ty = S.Context.getPointerType(cast<VarDecl>(D)->getType());
-  QualType ParamTy = FD->getParamDecl(0)->getType();
-  if (!S.IsAssignConvertCompatible(S.CheckAssignmentConstraints(
-          FD->getParamDecl(0)->getLocation(), ParamTy, Ty))) {
-    S.Diag(Loc, diag::err_attribute_cleanup_func_arg_incompatible_type)
-      << NI.getName() << ParamTy << Ty;
-    return;
-  }
   VarDecl *VD = cast<VarDecl>(D);
   // Create a reference to the variable declaration. This is a fake/dummy
   // reference.
@@ -8311,3 +8301,28 @@ void Sema::redelayDiagnostics(DelayedDiagnosticPool &pool) {
   assert(curPool && "re-emitting in undelayed context not supported");
   curPool->steal(pool);
 }
+
+void Sema::ActOnCleanupAttr(Decl *D, const Attr *A) {
+  VarDecl *VD = cast<VarDecl>(D);
+  if (VD->getType()->isDependentType())
+    return;
+
+  // Obtains the FunctionDecl that was found when handling the attribute
+  // earlier.
+  CleanupAttr *Attr = D->getAttr<CleanupAttr>();
+  FunctionDecl *FD = Attr->getFunctionDecl();
+  DeclarationNameInfo NI = FD->getNameInfo();
+
+  // We're currently more strict than GCC about what function types we accept.
+  // If this ever proves to be a problem it should be easy to fix.
+  QualType Ty = this->Context.getPointerType(VD->getType());
+  QualType ParamTy = FD->getParamDecl(0)->getType();
+  if (!this->IsAssignConvertCompatible(this->CheckAssignmentConstraints(
+          FD->getParamDecl(0)->getLocation(), ParamTy, Ty))) {
+    this->Diag(Attr->getArgLoc(),
+               diag::err_attribute_cleanup_func_arg_incompatible_type)
+        << NI.getName() << ParamTy << Ty;
+    D->dropAttr<CleanupAttr>();
+    return;
+  }
+}
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 2b9b3abbd5360..5555916c2536f 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3239,7 +3239,9 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   case Builtin::BI__builtin_hlsl_elementwise_degrees:
   case Builtin::BI__builtin_hlsl_elementwise_radians:
   case Builtin::BI__builtin_hlsl_elementwise_rsqrt:
-  case Builtin::BI__builtin_hlsl_elementwise_frac: {
+  case Builtin::BI__builtin_hlsl_elementwise_frac:
+  case Builtin::BI__builtin_hlsl_elementwise_ddx_coarse:
+  case Builtin::BI__builtin_hlsl_elementwise_ddy_coarse: {
     if (SemaRef.checkArgCount(TheCall, 1))
       return true;
     if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall,
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 1b6b559c1227b..3a4b2ccc74350 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -1007,6 +1007,15 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
       continue;
     }
 
+    if (auto *A = dyn_cast<CleanupAttr>(TmplAttr)) {
+      if (!New->hasAttr<CleanupAttr>()) {
+        auto *NewAttr = A->clone(Context);
+        NewAttr->setArgLoc(A->getArgLoc());
+        New->addAttr(NewAttr);
+      }
+      continue;
+    }
+
     assert(!TmplAttr->isPackExpansion());
     if (TmplAttr->isLateParsed() && LateAttrs) {
       // Late parsed attributes must be instantiated and attached after the
diff --git a/clang/test/CIR/CodeGen/call.c b/clang/test/CIR/CodeGen/call.c
index d780e37f3d153..99ae4506b1f16 100644
--- a/clang/test/CIR/CodeGen/call.c
+++ b/clang/test/CIR/CodeGen/call.c
@@ -130,7 +130,7 @@ int f12(void) {
 // OGCG:         %{{.+}} = call i32 @f10(i32 noundef 1) #[[ATTR0:.+]]
 // OGCG-NEXT:    %{{.+}} = call i32 @f11(i32 noundef 2) #[[ATTR1:.+]]
 
-// LLVM: attributes #[[ATTR0]] = { nounwind willreturn memory(read, errnomem: none) }
+// LLVM: attributes #[[ATTR0]] = { nounwind willreturn memory(read, errnomem: none, target_mem0: none, target_mem1: none) }
 // LLVM: attributes #[[ATTR1]] = { nounwind willreturn memory(none) }
 
 // OGCG: attributes #[[ATTR0]] = { nounwind willreturn memory(read) }
diff --git a/clang/test/CodeGenHLSL/builtins/ddx-coarse-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/ddx-coarse-builtin.hlsl
new file mode 100644
index 0000000000000..01216eefadba2
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/ddx-coarse-builtin.hlsl
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple dxil-pc-shadermodel6.3-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \
+// RUN:  FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple spirv-pc-vulkan-compute  %s \
+// RUN:  -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \
+// RUN:  FileCheck %s --check-prefixes=CHECK-SPIRV
+
+// CHECK-LABEL: half @_Z19test_f16_ddx_coarseDh
+// CHECK: %hlsl.ddx.coarse = call {{.*}} half @llvm.dx.ddx.coarse.f16(half %{{.*}})
+// CHECK: ret half %hlsl.ddx.coarse
+// CHECK-LABEL-SPIRV: half @_Z19test_f16_ddx_coarseDh
+// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} half @llvm.spv.ddx.coarse.f16(half %{{.*}})
+// CHECK-SPIRV: ret half %hlsl.ddx.coarse
+half test_f16_ddx_coarse(half val) {
+    return __builtin_hlsl_elementwise_ddx_coarse(val);
+}
+
+// CHECK-LABEL: float @_Z19test_f32_ddx_coarsef
+// CHECK: %hlsl.ddx.coarse = call {{.*}} float @llvm.dx.ddx.coarse.f32(float %{{.*}})
+// CHECK: ret float %hlsl.ddx.coarse
+// CHECK-LABEL-SPIRV: float @_Z19test_f32_ddx_coarsef
+// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} float @llvm.spv.ddx.coarse.f32(float %{{.*}})
+// CHECK-SPIRV: ret float %hlsl.ddx.coarse
+float test_f32_ddx_coarse(float val) {
+    return __builtin_hlsl_elementwise_ddx_coarse(val);
+}
diff --git a/clang/test/CodeGenHLSL/builtins/ddx-coarse.hlsl b/clang/test/CodeGenHLSL/builtins/ddx-coarse.hlsl
new file mode 100644
index 0000000000000..c200d4715629e
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/ddx-coarse.hlsl
@@ -0,0 +1,86 @@
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple dxil-pc-shadermodel6.3-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \
+// RUN:  FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple spirv-pc-vulkan-compute  %s \
+// RUN:  -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \
+// RUN:  FileCheck %s --check-prefixes=CHECK-SPIRV
+
+// CHECK-LABEL: half @_Z19test_f16_ddx_coarseDh
+// CHECK: %hlsl.ddx.coarse = call {{.*}} half @llvm.dx.ddx.coarse.f16(half %{{.*}})
+// CHECK: ret half %hlsl.ddx.coarse
+// CHECK-LABEL-SPIRV: half @_Z19test_f16_ddx_coarseDh
+// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} half @llvm.spv.ddx.coarse.f16(half %{{.*}})
+// CHECK-SPIRV: ret half %hlsl.ddx.coarse
+half test_f16_ddx_coarse(half val) {
+    return ddx_coarse(val);
+}
+
+// CHECK-LABEL: <2 x half> @_Z20test_f16_ddx_coarse2Dv2_Dh
+// CHECK: %hlsl.ddx.coarse = call {{.*}} <2 x half> @llvm.dx.ddx.coarse.v2f16(<2 x half> %{{.*}})
+// CHECK: ret <2 x half> %hlsl.ddx.coarse
+// CHECK-LABEL-SPIRV: <2 x half> @_Z20test_f16_ddx_coarse2Dv2_Dh
+// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <2 x half> @llvm.spv.ddx.coarse.v2f16(<2 x half> %{{.*}})
+// CHECK-SPIRV: ret <2 x half> %hlsl.ddx.coarse
+half2 test_f16_ddx_coarse2(half2 val) {
+    return ddx_coarse(val);
+}
+
+// CHECK-LABEL: <3 x half> @_Z20test_f16_ddx_coarse3Dv3_Dh
+// CHECK: %hlsl.ddx.coarse = call {{.*}} <3 x half> @llvm.dx.ddx.coarse.v3f16(<3 x half> %{{.*}})
+// CHECK: ret <3 x half> %hlsl.ddx.coarse
+// CHECK-LABEL-SPIRV: <3 x half> @_Z20test_f16_ddx_coarse3Dv3_Dh
+// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <3 x half> @llvm.spv.ddx.coarse.v3f16(<3 x half> %{{.*}})
+// CHECK-SPIRV: ret <3 x half> %hlsl.ddx.coarse
+half3 test_f16_ddx_coarse3(half3 val) {
+    return ddx_coarse(val);
+}
+
+// CHECK-LABEL: <4 x half> @_Z20test_f16_ddx_coarse4Dv4_Dh
+// CHECK: %hlsl.ddx.coarse = call {{.*}} <4 x half> @llvm.dx.ddx.coarse.v4f16(<4 x half> %{{.*}})
+// CHECK: ret <4 x half> %hlsl.ddx.coarse
+// CHECK-LABEL-SPIRV: <4 x half> @_Z20test_f16_ddx_coarse4Dv4_Dh
+// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <4 x half> @llvm.spv.ddx.coarse.v4f16(<4 x half> %{{.*}})
+// CHECK-SPIRV: ret <4 x half> %hlsl.ddx.coarse
+half4 test_f16_ddx_coarse4(half4 val) {
+    return ddx_coarse(val);
+}
+
+// CHECK-LABEL: float @_Z19test_f32_ddx_coarsef
+// CHECK: %hlsl.ddx.coarse = call {{.*}} float @llvm.dx.ddx.coarse.f32(float %{{.*}})
+// CHECK: ret float %hlsl.ddx.coarse
+// CHECK-LABEL-SPIRV: float @_Z19test_f32_ddx_coarsef
+// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} float @llvm.spv.ddx.coarse.f32(float %{{.*}})
+// CHECK-SPIRV: ret float %hlsl.ddx.coarse
+float test_f32_ddx_coarse(float val) {
+    return ddx_coarse(val);
+}
+
+// CHECK-LABEL: <2 x float> @_Z20test_f32_ddx_coarse2Dv2_f
+// CHECK: %hlsl.ddx.coarse = call {{.*}} <2 x float> @llvm.dx.ddx.coarse.v2f32(<2 x float> %{{.*}})
+// CHECK: ret <2 x float> %hlsl.ddx.coarse
+// CHECK-LABEL-SPIRV: <2 x float> @_Z20test_f32_ddx_coarse2Dv2_f
+// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <2 x float> @llvm.spv.ddx.coarse.v2f32(<2 x float> %{{.*}})
+// CHECK-SPIRV: ret <2 x float> %hlsl.ddx.coarse
+float2 test_f32_ddx_coarse2(float2 val) {
+    return ddx_coarse(val);
+}
+
+// CHECK-LABEL: <3 x float> @_Z20test_f32_ddx_coarse3Dv3_f
+// CHECK: %hlsl.ddx.coarse = call {{.*}} <3 x float> @llvm.dx.ddx.coarse.v3f32(<3 x float> %{{.*}})
+// CHECK: ret <3 x float> %hlsl.ddx.coarse
+// CHECK-LABEL-SPIRV: <3 x float> @_Z20test_f32_ddx_coarse3Dv3_f
+// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <3 x float> @llvm.spv.ddx.coarse.v3f32(<3 x float> %{{.*}})
+// CHECK-SPIRV: ret <3 x float> %hlsl.ddx.coarse
+float3 test_f32_ddx_coarse3(float3 val) {
+    return ddx_coarse(val);
+}
+
+// CHECK-LABEL: <4 x float> @_Z20test_f32_ddx_coarse4Dv4_f
+// CHECK: %hlsl.ddx.coarse = call {{.*}} <4 x float> @llvm.dx.ddx.coarse.v4f32(<4 x float> %{{.*}})
+// CHECK: ret <4 x float> %hlsl.ddx.coarse
+// CHECK-LABEL-SPIRV: <4 x float> @_Z20test_f32_ddx_coarse4Dv4_f
+// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <4 x float> @llvm.spv.ddx.coarse.v4f32(<4 x float> %{{.*}})
+// CHECK-SPIRV: ret <4 x float> %hlsl.ddx.coarse
+float4 test_f32_ddx_coarse4(float4 val) {
+    return ddx_coarse(val);
+}
diff --git a/clang/test/CodeGenHLSL/builtins/ddy-coarse-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/ddy-coarse-builtin.hlsl
new file mode 100644
index 0000000000000..2967deb75031f
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/ddy-coarse-builtin.hlsl
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple dxil-pc-shadermodel6.3-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \
+// RUN:  FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple spirv-pc-vulkan-compute  %s \
+// RUN:  -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \
+// RUN:  FileCheck %s --check-prefixes=CHECK-SPIRV
+
+// CHECK-LABEL: half @_Z19test_f16_ddy_coarseDh
+// CHECK: %hlsl.ddy.coarse = call {{.*}} half @llvm.dx.ddy.coarse.f16(half %{{.*}})
+// CHECK: ret half %hlsl.ddy.coarse
+// CHECK-LABEL-SPIRV: half @_Z19test_f16_ddy_coarseDh
+// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} half @llvm.spv.ddy.coarse.f16(half %{{.*}})
+// CHECK-SPIRV: ret half %hlsl.ddy.coarse
+half test_f16_ddy_coarse(half val) {
+    return __builtin_hlsl_elementwise_ddy_coarse(val);
+}
+
+// CHECK-LABEL: float @_Z19test_f32_ddy_coarsef
+// CHECK: %hlsl.ddy.coarse = call {{.*}} float @llvm.dx.ddy.coarse.f32(float %{{.*}})
+// CHECK: ret float %hlsl.ddy.coarse
+// CHECK-LABEL-SPIRV: float @_Z19test_f32_ddy_coarsef
+// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} float @llvm.spv.ddy.coarse.f32(float %{{.*}})
+// CHECK-SPIRV: ret float %hlsl.ddy.coarse
+float test_f32_ddy_coarse(float val) {
+    return __builtin_hlsl_elementwise_ddy_coarse(val);
+}
diff --git a/clang/test/CodeGenHLSL/builtins/ddy-coarse.hlsl b/clang/test/CodeGenHLSL/builtins/ddy-coarse.hlsl
new file mode 100644
index 0000000000000..faa972a1be326
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/ddy-coarse.hlsl
@@ -0,0 +1,86 @@
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple dxil-pc-shadermodel6.3-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \
+// RUN:  FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple spirv-pc-vulkan-compute  %s \
+// RUN:  -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \
+// RUN:  FileCheck %s --check-prefixes=CHECK-SPIRV
+
+// CHECK-LABEL: half @_Z19test_f16_ddy_coarseDh
+// CHECK: %hlsl.ddy.coarse = call {{.*}} half @llvm.dx.ddy.coarse.f16(half %{{.*}})
+// CHECK: ret half %hlsl.ddy.coarse
+// CHECK-LABEL-SPIRV: half @_Z19test_f16_ddy_coarseDh
+// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} half @llvm.spv.ddy.coarse.f16(half %{{.*}})
+// CHECK-SPIRV: ret half %hlsl.ddy.coarse
+half test_f16_ddy_coarse(half val) {
+    return ddy_coarse(val);
+}
+
+// CHECK-LABEL: <2 x half> @_Z20test_f16_ddy_coarse2Dv2_Dh
+// CHECK: %hlsl.ddy.coarse = call {{.*}} <2 x half> @llvm.dx.ddy.coarse.v2f16(<2 x half> %{{.*}})
+// CHECK: ret <2 x half> %hlsl.ddy.coarse
+// CHECK-LABEL-SPIRV: <2 x half> @_Z20test_f16_ddy_coarse2Dv2_Dh
+// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <2 x half> @llvm.spv.ddy.coarse.v2f16(<2 x half> %{{.*}})
+// CHECK-SPIRV: ret <2 x half> %hlsl.ddy.coarse
+half2 test_f16_ddy_coarse2(half2 val) {
+    return ddy_coarse(val);
+}
+
+// CHECK-LABEL: <3 x half> @_Z20test_f16_ddy_coarse3Dv3_Dh
+// CHECK: %hlsl.ddy.coarse = call {{.*}} <3 x half> @llvm.dx.ddy.coarse.v3f16(<3 x half> %{{.*}})
+// CHECK: ret <3 x half> %hlsl.ddy.coarse
+// CHECK-LABEL-SPIRV: <3 x half> @_Z20test_f16_ddy_coarse3Dv3_Dh
+// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <3 x half> @llvm.spv.ddy.coarse.v3f16(<3 x half> %{{.*}})
+// CHECK-SPIRV: ret <3 x half> %hlsl.ddy.coarse
+half3 test_f16_ddy_coarse3(half3 val) {
+    return ddy_coarse(val);
+}
+
+// CHECK-LABEL: <4 x half> @_Z20test_f16_ddy_coarse4Dv4_Dh
+// CHECK: %hlsl.ddy.coarse = call {{.*}} <4 x half> @llvm.dx.ddy.coarse.v4f16(<4 x half> %{{.*}})
+// CHECK: ret <4 x half> %hlsl.ddy.coarse
+// CHECK-LABEL-SPIRV: <4 x half> @_Z20test_f16_ddy_coarse4Dv4_Dh
+// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <4 x half> @llvm.spv.ddy.coarse.v4f16(<4 x half> %{{.*}})
+// CHECK-SPIRV: ret <4 x half> %hlsl.ddy.coarse
+half4 test_f16_ddy_coarse4(half4 val) {
+    return ddy_coarse(val);
+}
+
+// CHECK-LABEL: float @_Z19test_f32_ddy_coarsef
+// CHECK: %hlsl.ddy.coarse = call {{.*}} float @llvm.dx.ddy.coarse.f32(float %{{.*}})
+// CHECK: ret float %hlsl.ddy.coarse
+// CHECK-LABEL-SPIRV: float @_Z19test_f32_ddy_coarsef
+// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} float @llvm.spv.ddy.coarse.f32(float %{{.*}})
+// CHECK-SPIRV: ret float %hlsl.ddy.coarse
+float test_f32_ddy_coarse(float val) {
+    return ddy_coarse(val);
+}
+
+// CHECK-LABEL: <2 x float> @_Z20test_f32_ddy_coarse2Dv2_f
+// CHECK: %hlsl.ddy.coarse = call {{.*}} <2 x float> @llvm.dx.ddy.coarse.v2f32(<2 x float> %{{.*}})
+// CHECK: ret <2 x float> %hlsl.ddy.coarse
+// CHECK-LABEL-SPIRV: <2 x float> @_Z20test_f32_ddy_coarse2Dv2_f
+// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <2 x float> @llvm.spv.ddy.coarse.v2f32(<2 x float> %{{.*}})
+// CHECK-SPIRV: ret <2 x float> %hlsl.ddy.coarse
+float2 test_f32_ddy_coarse2(float2 val) {
+    return ddy_coarse(val);
+}
+
+// CHECK-LABEL: <3 x float> @_Z20test_f32_ddy_coarse3Dv3_f
+// CHECK: %hlsl.ddy.coarse = call {{.*}} <3 x float> @llvm.dx.ddy.coarse.v3f32(<3 x float> %{{.*}})
+// CHECK: ret <3 x float> %hlsl.ddy.coarse
+// CHECK-LABEL-SPIRV: <3 x float> @_Z20test_f32_ddy_coarse3Dv3_f
+// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <3 x float> @llvm.spv.ddy.coarse.v3f32(<3 x float> %{{.*}})
+// CHECK-SPIRV: ret <3 x float> %hlsl.ddy.coarse
+float3 test_f32_ddy_coarse3(float3 val) {
+    return ddy_coarse(val);
+}
+
+// CHECK-LABEL: <4 x float> @_Z20test_f32_ddy_coarse4Dv4_f
+// CHECK: %hlsl.ddy.coarse = call {{.*}} <4 x float> @llvm.dx.ddy.coarse.v4f32(<4 x float> %{{.*}})
+// CHECK: ret <4 x float> %hlsl.ddy.coarse
+// CHECK-LABEL-SPIRV: <4 x float> @_Z20test_f32_ddy_coarse4Dv4_f
+// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <4 x float> @llvm.spv.ddy.coarse.v4f32(<4 x float> %{{.*}})
+// CHECK-SPIRV: ret <4 x float> %hlsl.ddy.coarse
+float4 test_f32_ddy_coarse4(float4 val) {
+    return ddy_coarse(val);
+}
diff --git a/clang/test/Driver/crash-ir-repro.cpp b/clang/test/Driver/crash-ir-repro.cpp
new file mode 100644
index 0000000000000..1f31a5ca1bb34
--- /dev/null
+++ b/clang/test/Driver/crash-ir-repro.cpp
@@ -0,0 +1,15 @@
+// RUN: %clang -S -emit-llvm -o %t.ll %s
+// RUN: not %clang -S -DCRASH %s %t.ll 2>&1 | FileCheck %s
+
+// CHECK: Preprocessed source(s) and associated run script(s) are located at:
+// CHECK-NEXT: clang: note: diagnostic msg: {{.*}}.cpp
+// CHECK-NEXT: clang: note: diagnostic msg: {{.*}}.ll
+// CHECK-NEXT: clang: note: diagnostic msg: {{.*}}.sh
+
+#ifdef CRASH
+#pragma clang __debug parser_crash
+#endif
+
+int main() {
+  return 0;
+}
diff --git a/clang/test/Driver/linker-wrapper-hip-amdgcnspirv.c b/clang/test/Driver/linker-wrapper-hip-amdgcnspirv.c
new file mode 100644
index 0000000000000..429f7d3b9ee13
--- /dev/null
+++ b/clang/test/Driver/linker-wrapper-hip-amdgcnspirv.c
@@ -0,0 +1,16 @@
+// RUN: %clang -cc1 %s -triple "spirv64-amd-amdhsa" -emit-llvm-bc -o %t.bc
+// RUN: llvm-offload-binary -o %t.out "--image=file=%t.bc,triple=spirv64-amd-amdhsa,arch=amdgcnspirv,kind=hip"
+// RUN: clang-linker-wrapper \
+// RUN:     "--should-extract=amdgcnspirv" \
+// RUN:     "--host-triple=spirv64-amd-amdhsa" \
+// RUN:     "--linker-path=clang-offload-bundler" \
+// RUN:     "--emit-fatbin-only" \
+// RUN:     "-o" "%t.hipfb" \
+// RUN:     "%t.out" \
+// RUN:     --dry-run \
+// RUN: 2>&1 | FileCheck %s
+
+// clang-linker-wrapper was previously calling clang-offload-bundler with -targets=...,hip-amdgcn-amd-amdhsa--amdgcnspirv
+// This caused the runtime not to recognise the triple for the AMD SPIR-V code.
+
+// CHECK: {{".*clang-offload-bundler.*"}} {{.*}} -targets={{.*}},hip-spirv64-amd-amdhsa--amdgcnspirv
diff --git a/clang/test/Sema/type-dependent-attrs.c b/clang/test/Sema/type-dependent-attrs.c
new file mode 100644
index 0000000000000..13068b3f94ad4
--- /dev/null
+++ b/clang/test/Sema/type-dependent-attrs.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -std=c23 -fsyntax-only -verify %s
+
+int open() { return 0; }
+void close(typeof(open()) *) {}
+
+void cleanup_attr() {
+  int fd_int [[gnu::cleanup(close)]] = open();
+  auto fd_auto [[gnu::cleanup(close)]] = open();
+  float fd_invalid [[gnu::cleanup(close)]] = open(); // expected-error {{'cleanup' function 'close' parameter has type 'typeof (open()) *' (aka 'int *') which is incompatible with type 'float *'}}
+}
diff --git a/clang/test/SemaCXX/attr-cleanup.cpp b/clang/test/SemaCXX/attr-cleanup.cpp
index 32d10683edebb..6048b4e92ec3f 100644
--- a/clang/test/SemaCXX/attr-cleanup.cpp
+++ b/clang/test/SemaCXX/attr-cleanup.cpp
@@ -27,3 +27,28 @@ namespace E {
     int v1 __attribute__((cleanup(c3))); // expected-error {{'c3' is not a single function}}
   }
 }
+
+namespace F {
+  int open() { return 0; }
+  void close(decltype(open()) *) {}
+
+  void test1() {
+    auto fd [[gnu::cleanup(close)]] = open();
+  }
+
+  template <typename Ty>
+  void test2() {
+    Ty fd [[gnu::cleanup(close)]] = open();
+  }
+
+  template <typename Ty>
+  void test3() {
+    Ty fd [[gnu::cleanup(close)]] = open(); // #TEST3_CLEANUP
+  }
+
+  int main() {
+    test2<int>();
+    test3<float>(); // expected-error@#TEST3_CLEANUP {{'cleanup' function 'close' parameter has type 'decltype(open()) *' (aka 'int *') which is incompatible with type 'float *'}} \
+                       expected-note {{in instantiation of function template specialization 'F::test3<float>' requested here}}
+  }
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/ddx-coarse-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/ddx-coarse-errors.hlsl
new file mode 100644
index 0000000000000..ebad1cc6826d8
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/ddx-coarse-errors.hlsl
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -verify
+// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-library %s -fnative-half-type -verify
+
+float no_arg() {
+  return __builtin_hlsl_elementwise_ddx_coarse();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+}
+
+float too_many_args(float val) {
+  return __builtin_hlsl_elementwise_ddx_coarse(val, val);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+}
+
+float test_integer_scalar_input(int val) {
+  return __builtin_hlsl_elementwise_ddx_coarse(val);
+  // expected-error@-1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'int')}}
+}
+
+double test_double_scalar_input(double val) {
+  return __builtin_hlsl_elementwise_ddx_coarse(val);
+  // expected-error@-1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'double')}}
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/ddy-coarse-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/ddy-coarse-errors.hlsl
new file mode 100644
index 0000000000000..9cc23665882c8
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/ddy-coarse-errors.hlsl
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -verify
+// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-library %s -fnative-half-type -verify
+
+float no_arg() {
+  return __builtin_hlsl_elementwise_ddy_coarse();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+}
+
+float too_many_args(float val) {
+  return __builtin_hlsl_elementwise_ddy_coarse(val, val);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+}
+
+float test_integer_scalar_input(int val) {
+  return __builtin_hlsl_elementwise_ddy_coarse(val);
+  // expected-error@-1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'int')}}
+}
+
+double test_double_scalar_input(double val) {
+  return __builtin_hlsl_elementwise_ddy_coarse(val);
+  // expected-error@-1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'double')}}
+}
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index bd4b40192c9f2..4a4a43db6ef25 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -439,8 +439,11 @@ fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
         Args.MakeArgString(Twine("-compression-level=") + Arg->getValue()));
 
   SmallVector<StringRef> Targets = {"-targets=host-x86_64-unknown-linux-gnu"};
-  for (const auto &[File, Arch] : InputFiles)
-    Targets.push_back(Saver.save("hip-amdgcn-amd-amdhsa--" + Arch));
+  for (const auto &[File, Arch] : InputFiles) {
+    Targets.push_back(Saver.save(Arch == "amdgcnspirv"
+                                     ? "hip-spirv64-amd-amdhsa--" + Arch
+                                     : "hip-amdgcn-amd-amdhsa--" + Arch));
+  }
   CmdArgs.push_back(Saver.save(llvm::join(Targets, ",")));
 
 #ifdef _WIN32
diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp
index e49dcb9b70b0f..bee9a01a3b01a 100644
--- a/clang/utils/TableGen/ClangAttrEmitter.cpp
+++ b/clang/utils/TableGen/ClangAttrEmitter.cpp
@@ -5045,6 +5045,26 @@ void EmitClangAttrParsedAttrKinds(const RecordKeeper &Records,
      << "}\n";
 }
 
+// Emits Sema calls for type dependent attributes
+void EmitClangAttrIsTypeDependent(const RecordKeeper &Records,
+                                  raw_ostream &OS) {
+  emitSourceFileHeader("Attribute is type dependent", OS, Records);
+
+  OS << "void checkAttrIsTypeDependent(Decl *D, const Attr *A) {\n";
+  OS << "  switch (A->getKind()) {\n";
+  OS << "  default:\n";
+  OS << "    break;\n";
+  for (const auto *A : Records.getAllDerivedDefinitions("Attr")) {
+    if (A->getValueAsBit("IsTypeDependent")) {
+      OS << "  case attr::" << A->getName() << ":\n";
+      OS << "    ActOn" << A->getName() << "Attr(D, A);\n";
+      OS << "    break;\n";
+    }
+  }
+  OS << "  }\n";
+  OS << "}\n";
+}
+
 // Emits the code to dump an attribute.
 void EmitClangAttrTextNodeDump(const RecordKeeper &Records, raw_ostream &OS) {
   emitSourceFileHeader("Attribute text node dumper", OS, Records);
diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index 866040d503646..707ce617cb2d0 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -43,6 +43,7 @@ enum ActionType {
   GenClangAttrParsedAttrList,
   GenClangAttrParsedAttrImpl,
   GenClangAttrParsedAttrKinds,
+  GenClangAttrIsTypeDependent,
   GenClangAttrTextNodeDump,
   GenClangAttrNodeTraverse,
   GenClangBasicReader,
@@ -179,6 +180,9 @@ cl::opt<ActionType> Action(
         clEnumValN(GenClangAttrParsedAttrKinds,
                    "gen-clang-attr-parsed-attr-kinds",
                    "Generate a clang parsed attribute kinds"),
+        clEnumValN(GenClangAttrIsTypeDependent,
+                   "gen-clang-attr-is-type-dependent",
+                   "Generate clang is type dependent attribute code"),
         clEnumValN(GenClangAttrTextNodeDump, "gen-clang-attr-text-node-dump",
                    "Generate clang attribute text node dumper"),
         clEnumValN(GenClangAttrNodeTraverse, "gen-clang-attr-node-traverse",
@@ -423,6 +427,9 @@ bool ClangTableGenMain(raw_ostream &OS, const RecordKeeper &Records) {
   case GenClangAttrParsedAttrKinds:
     EmitClangAttrParsedAttrKinds(Records, OS);
     break;
+  case GenClangAttrIsTypeDependent:
+    EmitClangAttrIsTypeDependent(Records, OS);
+    break;
   case GenClangAttrTextNodeDump:
     EmitClangAttrTextNodeDump(Records, OS);
     break;
diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h
index fa49dcd289bc2..058bda3ebd246 100644
--- a/clang/utils/TableGen/TableGenBackends.h
+++ b/clang/utils/TableGen/TableGenBackends.h
@@ -82,6 +82,8 @@ void EmitClangAttrParsedAttrImpl(const llvm::RecordKeeper &Records,
                                  llvm::raw_ostream &OS);
 void EmitClangAttrParsedAttrKinds(const llvm::RecordKeeper &Records,
                                   llvm::raw_ostream &OS);
+void EmitClangAttrIsTypeDependent(const llvm::RecordKeeper &Records,
+                                  llvm::raw_ostream &OS);
 void EmitClangAttrTextNodeDump(const llvm::RecordKeeper &Records,
                                llvm::raw_ostream &OS);
 void EmitClangAttrNodeTraverse(const llvm::RecordKeeper &Records,
diff --git a/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cpp b/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cpp
index 664471b6987a8..4201d49df4d74 100644
--- a/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cpp
+++ b/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cpp
@@ -1,6 +1,7 @@
 // Check that when having a DYLD_ROOT_PATH set, the symbolizer still works.
 // RUN: %clangxx_asan -O0 %s -o %t
-// RUN: %env_asan_opts=verbosity=2 DYLD_ROOT_PATH="/" ASAN_SYMBOLIZER_PATH=$(which atos) \
+// RUN: which atos | tr -d '\n' > %t.symbolizer_path
+// RUN: %env_asan_opts=verbosity=2 DYLD_ROOT_PATH="/" ASAN_SYMBOLIZER_PATH=%{readfile:%t.symbolizer_path} \
 // RUN:   not %run %t 2>&1 | FileCheck %s
 //
 // Due to a bug in atos, this only works on x86_64.
diff --git a/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer.cpp b/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer.cpp
index bab4e4f3765c2..7487bd4cb40e6 100644
--- a/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer.cpp
+++ b/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer.cpp
@@ -1,7 +1,8 @@
 // Check that the `atos` symbolizer works.
 
 // RUN: %clangxx_asan -O0 %s -o %t
-// RUN: %env_asan_opts=verbosity=2 ASAN_SYMBOLIZER_PATH=$(which atos) not %run %t 2>&1 | FileCheck %s
+// RUN: which atos | tr -d '\n' > %t.symbolizer_path
+// RUN: %env_asan_opts=verbosity=2 ASAN_SYMBOLIZER_PATH=%{readfile:%t.symbolizer_path} not %run %t 2>&1 | FileCheck %s
 
 // Path returned by `which atos` is invalid on iOS.
 // UNSUPPORTED: ios, i386-darwin
diff --git a/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_reexec.cpp b/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_reexec.cpp
index 0fec18b89411a..145e162a21c0e 100644
--- a/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_reexec.cpp
+++ b/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_reexec.cpp
@@ -4,7 +4,8 @@
 // UNSUPPORTED: ios
 
 // RUN: rm -rf %t && mkdir -p %t
-// RUN: cp `%clang_asan -print-file-name=lib`/darwin/libclang_rt.asan_osx_dynamic.dylib \
+// RUN: %clang_asan -print-file-name=lib | tr -d '\n' > %t.lib_name
+// RUN: cp %{readfile:%t.lib_name}/darwin/libclang_rt.asan_osx_dynamic.dylib \
 // RUN:   %t/libclang_rt.asan_osx_dynamic.dylib
 // RUN: %clangxx_asan %s -o %t/a.out
 
diff --git a/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_remove.cpp b/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_remove.cpp
index 0672e064a1904..872848d075eaf 100644
--- a/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_remove.cpp
+++ b/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_remove.cpp
@@ -5,29 +5,27 @@
 // UNSUPPORTED: ios
 
 // RUN: rm -rf %t && mkdir -p %t
-// RUN: cp `%clang_asan -print-file-name=lib`/darwin/libclang_rt.asan_osx_dynamic.dylib \
+// RUN: %clang_asan -print-file-name=lib | tr -d '\n' > %t.lib_name
+// RUN: cp %{readfile:%t.lib_name}/darwin/libclang_rt.asan_osx_dynamic.dylib \
 // RUN:   %t/libclang_rt.asan_osx_dynamic.dylib
 
 // RUN: %clangxx_asan %s -o %t/a.out
 // RUN: %clangxx -DSHARED_LIB %s \
 // RUN:     -dynamiclib -o %t/dummy-so.dylib
 
-// RUN: ( cd %t && \
-// RUN:   DYLD_INSERT_LIBRARIES=@executable_path/libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \
-// RUN:   %run ./a.out 2>&1 ) | FileCheck %s || exit 1
+// RUN: cd %t
+// RUN: env DYLD_INSERT_LIBRARIES=@executable_path/libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \
+// RUN: %run ./a.out 2>&1 | FileCheck %s
 
-// RUN: ( cd %t && \
-// RUN:   DYLD_INSERT_LIBRARIES=libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \
-// RUN:   %run ./a.out 2>&1 ) | FileCheck %s || exit 1
+// RUN: env DYLD_INSERT_LIBRARIES=libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \
+// RUN: %run ./a.out 2>&1 | FileCheck %s
 
-// RUN: ( cd %t && \
-// RUN:   %env_asan_opts=strip_env=0 \
-// RUN:   DYLD_INSERT_LIBRARIES=libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \
-// RUN:   %run ./a.out 2>&1 ) | FileCheck %s --check-prefix=CHECK-KEEP || exit 1
+// RUN: %env_asan_opts=strip_env=0 \
+// RUN: DYLD_INSERT_LIBRARIES=libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \
+// RUN: %run ./a.out 2>&1 | FileCheck %s --check-prefix=CHECK-KEEP
 
-// RUN: ( cd %t && \
-// RUN:   DYLD_INSERT_LIBRARIES=%t/libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \
-// RUN:   %run ./a.out 2>&1 ) | FileCheck %s || exit 1
+// RUN: env DYLD_INSERT_LIBRARIES=%t/libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \
+// RUN: %run ./a.out 2>&1 | FileCheck %s
 
 #if !defined(SHARED_LIB)
 #include <stdio.h>
diff --git a/compiler-rt/test/asan/TestCases/Darwin/init_for_dlopen.cpp b/compiler-rt/test/asan/TestCases/Darwin/init_for_dlopen.cpp
index 3bf8e99703a08..9bb652cc79438 100644
--- a/compiler-rt/test/asan/TestCases/Darwin/init_for_dlopen.cpp
+++ b/compiler-rt/test/asan/TestCases/Darwin/init_for_dlopen.cpp
@@ -5,7 +5,7 @@
 // - By default the lit config sets this but we don't want this
 //   test to implicitly depend on this.
 // - It avoids requiring `--crash` to be passed to `not`.
-// RUN: APPLE_ASAN_INIT_FOR_DLOPEN=0 %env_asan_opts=abort_on_error=0 not \
+// RUN: %env_asan_opts=abort_on_error=0 APPLE_ASAN_INIT_FOR_DLOPEN=0 not \
 // RUN:   %run %t %shared_libasan 2>&1 | \
 // RUN:   FileCheck -check-prefix=CHECK-DL-OPEN-FAIL %s
 // RUN: env -u APPLE_ASAN_INIT_FOR_DLOPEN %env_asan_opts=abort_on_error=0 not \
diff --git a/compiler-rt/test/asan/TestCases/Darwin/malloc_zone-protected.cpp b/compiler-rt/test/asan/TestCases/Darwin/malloc_zone-protected.cpp
index 125b544724d3f..ac3c5898f271a 100644
--- a/compiler-rt/test/asan/TestCases/Darwin/malloc_zone-protected.cpp
+++ b/compiler-rt/test/asan/TestCases/Darwin/malloc_zone-protected.cpp
@@ -3,8 +3,7 @@
 #include <stdio.h>
 
 // RUN: %clangxx_asan %s -o %t
-// RUN: ASAN_OPTIONS="abort_on_error=1" not --crash %run %t 2>&1 | FileCheck %s
-
+// RUN: env ASAN_OPTIONS="abort_on_error=1" not --crash %run %t 2>&1 | FileCheck %s
 
 void *pwn(malloc_zone_t *unused_zone, size_t unused_size) {
   printf("PWNED\n");
diff --git a/compiler-rt/test/asan_abi/TestCases/Darwin/llvm_interface_symbols.cpp b/compiler-rt/test/asan_abi/TestCases/Darwin/llvm_interface_symbols.cpp
index 5da18aa971d43..ba7b5e5815bd6 100644
--- a/compiler-rt/test/asan_abi/TestCases/Darwin/llvm_interface_symbols.cpp
+++ b/compiler-rt/test/asan_abi/TestCases/Darwin/llvm_interface_symbols.cpp
@@ -24,7 +24,8 @@
 // RUN: diff %t.imports-sorted %t.exports-sorted
 
 // Ensure that there is no dynamic dylib linked.
-// RUN: otool -L %t | (! grep -q "dynamic.dylib")
+// RUN: otool -L %t > %t.libs
+// RUN: not grep -q "dynamic.dylib" < %t.libs
 
 // UNSUPPORTED: ios
 
diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py
index 3f7dd8e402b78..ea22fb0babc46 100644
--- a/compiler-rt/test/lit.common.cfg.py
+++ b/compiler-rt/test/lit.common.cfg.py
@@ -875,7 +875,7 @@ def is_windows_lto_supported():
         config.substitutions.append(
             (
                 "%ld_flags_rpath_so" + postfix,
-                "-install_name @rpath/`basename %dynamiclib{}`".format(postfix),
+                "-install_name @rpath/%base_dynamiclib{}".format(postfix),
             )
         )
     elif config.target_os in ("FreeBSD", "NetBSD", "OpenBSD"):
@@ -908,6 +908,9 @@ def is_windows_lto_supported():
     config.substitutions.append(
         ("%dynamiclib" + postfix, "%t.dir/%xdynamiclib_filename" + postfix)
     )
+    config.substitutions.append(
+        ("%base_dynamiclib" + postfix, "%xdynamiclib_filename" + postfix)
+    )
     config.substitutions.append(
         (
             "%xdynamiclib_filename" + postfix,
diff --git a/compiler-rt/test/rtsan/Darwin/dlopen.cpp b/compiler-rt/test/rtsan/Darwin/dlopen.cpp
index 1aabe5cb6e580..435a4353b7026 100644
--- a/compiler-rt/test/rtsan/Darwin/dlopen.cpp
+++ b/compiler-rt/test/rtsan/Darwin/dlopen.cpp
@@ -8,18 +8,19 @@
 // RUN: %clangxx -fsanitize=realtime %s -o %t.so -shared -DSHARED_LIB
 // RUN: %clangxx %s -o %t
 
-// RUN: RTSAN_DYLIB_PATH=`%clangxx -fsanitize=realtime %s -### 2>&1 \
+// RUN: %clangxx -fsanitize=realtime %s -### 2>&1 \
 // RUN:   | grep "libclang_rt.rtsan_osx_dynamic.dylib" \
-// RUN:   | sed -e 's/.*"\(.*libclang_rt.rtsan_osx_dynamic.dylib\)".*/\1/'`
+// RUN:   | sed -e 's/.*"\(.*libclang_rt.rtsan_osx_dynamic.dylib\)".*/\1/' \
+// RUN:   | tr -d '\n' > %t.rtsan_dylib_path
 
 // Launching a non-instrumented binary that dlopen's an instrumented library should fail.
 // RUN: not %run %t %t.so 2>&1 | FileCheck %s --check-prefix=CHECK-FAIL
 // Launching a non-instrumented binary with an explicit DYLD_INSERT_LIBRARIES should work.
-// RUN: DYLD_INSERT_LIBRARIES=$RTSAN_DYLIB_PATH %run %t %t.so 2>&1 | FileCheck %s
+// RUN: env DYLD_INSERT_LIBRARIES="%{readfile:%t.rtsan_dylib_path}" %run %t %t.so 2>&1 | FileCheck %s
 
 // Launching an instrumented binary with the DYLD_INSERT_LIBRARIES env variable has no error
 // RUN: %clangxx -fsanitize=realtime %s -o %t
-// RUN: DYLD_INSERT_LIBRARIES=$RTSAN_DYLIB_PATH %run %t %t.so 2>&1 | FileCheck %s --check-prefix=CHECK-INSTRUMENTED
+// RUN: env DYLD_INSERT_LIBRARIES="%{readfile:%t.rtsan_dylib_path}" %run %t %t.so 2>&1 | FileCheck %s --check-prefix=CHECK-INSTRUMENTED
 
 #include <dlfcn.h>
 #include <stdio.h>
diff --git a/compiler-rt/test/tsan/Darwin/dlopen.cpp b/compiler-rt/test/tsan/Darwin/dlopen.cpp
index 3d12b815f9c25..2ab052f1c0c26 100644
--- a/compiler-rt/test/tsan/Darwin/dlopen.cpp
+++ b/compiler-rt/test/tsan/Darwin/dlopen.cpp
@@ -9,14 +9,15 @@
 // RUN: %clangxx_tsan %s -o %t.so -shared -DSHARED_LIB
 // RUN: %clangxx_tsan -fno-sanitize=thread %s -o %t
 
-// RUN: TSAN_DYLIB_PATH=`%clangxx_tsan %s -### 2>&1 \
+// RUN: %clangxx_tsan %s -### 2>&1 \
 // RUN:   | grep "libclang_rt.tsan_osx_dynamic.dylib" \
-// RUN:   | sed -e 's/.*"\(.*libclang_rt.tsan_osx_dynamic.dylib\)".*/\1/'`
+// RUN:   | sed -e 's/.*"\(.*libclang_rt.tsan_osx_dynamic.dylib\)".*/\1/' \
+// RUN:   | tr -d '\n' > %t.tsan_dylib_path
 
 // Launching a non-instrumented binary that dlopen's an instrumented library should fail.
 // RUN: not %run %t %t.so 2>&1 | FileCheck %s --check-prefix=CHECK-FAIL
 // Launching a non-instrumented binary with an explicit DYLD_INSERT_LIBRARIES should work.
-// RUN: DYLD_INSERT_LIBRARIES=$TSAN_DYLIB_PATH %run %t %t.so 2>&1 | FileCheck %s
+// RUN: env DYLD_INSERT_LIBRARIES="%{readfile:%t.tsan_dylib_path}" %run %t %t.so 2>&1 | FileCheck %s
 
 #include <dlfcn.h>
 #include <pthread.h>
diff --git a/compiler-rt/test/tsan/Darwin/external-ignore-noninstrumented.cpp b/compiler-rt/test/tsan/Darwin/external-ignore-noninstrumented.cpp
index 916b0b893fc0d..cfa46e0f0a213 100644
--- a/compiler-rt/test/tsan/Darwin/external-ignore-noninstrumented.cpp
+++ b/compiler-rt/test/tsan/Darwin/external-ignore-noninstrumented.cpp
@@ -1,8 +1,10 @@
+// RUN: basename %t-lib.dylib | tr -d '\n' > %t.basename
 // RUN: %clangxx_tsan -shared %p/external-lib.cpp -fno-sanitize=thread -DUSE_TSAN_CALLBACKS \
-// RUN:   -o %t-lib.dylib -install_name @rpath/`basename %t-lib.dylib`
+// RUN:   -o %t-lib.dylib -install_name @rpath/%{readfile:%t.basename}
 
+// RUN: basename %t-module.dylib | tr -d '\n' > %t.basename
 // RUN: %clangxx_tsan -shared %p/external-noninstrumented-module.cpp %t-lib.dylib -fno-sanitize=thread \
-// RUN:   -o %t-module.dylib -install_name @rpath/`basename %t-module.dylib`
+// RUN:   -o %t-module.dylib -install_name @rpath/%{readfile:%t.basename}
 
 // RUN: %clangxx_tsan %s %t-module.dylib -o %t
 // RUN: %run %t 2>&1 | FileCheck %s
diff --git a/compiler-rt/test/tsan/Darwin/external.cpp b/compiler-rt/test/tsan/Darwin/external.cpp
index bf189eb1d6b5b..52fae36f0e1f4 100644
--- a/compiler-rt/test/tsan/Darwin/external.cpp
+++ b/compiler-rt/test/tsan/Darwin/external.cpp
@@ -1,14 +1,17 @@
+// RUN: basename %t-lib-instrumented.dylib | tr -d '\n' > %t.basename
 // RUN: %clangxx_tsan %p/external-lib.cpp -shared \
 // RUN:                               -o %t-lib-instrumented.dylib \
-// RUN:   -install_name @rpath/`basename %t-lib-instrumented.dylib`
+// RUN:   -install_name @rpath/%{readfile:%t.basename}
 
+// RUN: basename %t-lib-noninstrumented.dylib | tr -d '\n' > %t.basename
 // RUN: %clangxx_tsan %p/external-lib.cpp -shared -fno-sanitize=thread \
 // RUN:                               -o %t-lib-noninstrumented.dylib \
-// RUN:   -install_name @rpath/`basename %t-lib-noninstrumented.dylib`
+// RUN:   -install_name @rpath/%{readfile:%t.basename}
 
+// RUN: basename %t-lib-noninstrumented-callbacks.dylib | tr -d '\n' > %t.basename
 // RUN: %clangxx_tsan %p/external-lib.cpp -shared -fno-sanitize=thread -DUSE_TSAN_CALLBACKS \
 // RUN:                               -o %t-lib-noninstrumented-callbacks.dylib \
-// RUN:   -install_name @rpath/`basename %t-lib-noninstrumented-callbacks.dylib`
+// RUN:   -install_name @rpath/%{readfile:%t.basename}
 
 // RUN: %clangxx_tsan %s %t-lib-instrumented.dylib -o %t-lib-instrumented
 // RUN: %clangxx_tsan %s %t-lib-noninstrumented.dylib -o %t-lib-noninstrumented
diff --git a/compiler-rt/test/tsan/Darwin/malloc-stack-logging.cpp b/compiler-rt/test/tsan/Darwin/malloc-stack-logging.cpp
index 8d9c2122d0e6c..0a96e346f8012 100644
--- a/compiler-rt/test/tsan/Darwin/malloc-stack-logging.cpp
+++ b/compiler-rt/test/tsan/Darwin/malloc-stack-logging.cpp
@@ -4,7 +4,7 @@
 // use syscalls directly) to make sure other interceptors aren't called.
 
 // RUN: %clangxx_tsan -O1 %s -o %t
-// RUN: MallocStackLogging=1 %run %t 2>&1 | FileCheck %s
+// RUN: env MallocStackLogging=1 %run %t 2>&1 | FileCheck %s
 #include <pthread.h>
 #include <stdlib.h>
 #include <stdio.h>
diff --git a/flang/include/flang/Parser/openmp-utils.h b/flang/include/flang/Parser/openmp-utils.h
index 8fa4a84aff06d..36556f8dd7f4a 100644
--- a/flang/include/flang/Parser/openmp-utils.h
+++ b/flang/include/flang/Parser/openmp-utils.h
@@ -137,6 +137,8 @@ const T *GetFirstArgument(const OmpDirectiveSpecification &spec) {
 
 const BlockConstruct *GetFortranBlockConstruct(
     const ExecutionPartConstruct &epc);
+const Block &GetInnermostExecPart(const Block &block);
+bool IsStrictlyStructuredBlock(const Block &block);
 
 const OmpCombinerExpression *GetCombinerExpr(
     const OmpReductionSpecifier &rspec);
diff --git a/flang/include/flang/Semantics/openmp-utils.h b/flang/include/flang/Semantics/openmp-utils.h
index 14a4f0e93bda5..f5739ab16d643 100644
--- a/flang/include/flang/Semantics/openmp-utils.h
+++ b/flang/include/flang/Semantics/openmp-utils.h
@@ -97,8 +97,6 @@ const SomeExpr *HasStorageOverlap(
     const SomeExpr &base, llvm::ArrayRef<SomeExpr> exprs);
 bool IsAssignment(const parser::ActionStmt *x);
 bool IsPointerAssignment(const evaluate::Assignment &x);
-const parser::Block &GetInnermostExecPart(const parser::Block &block);
-bool IsStrictlyStructuredBlock(const parser::Block &block);
 } // namespace omp
 } // namespace Fortran::semantics
 
diff --git a/flang/lib/Parser/openmp-utils.cpp b/flang/lib/Parser/openmp-utils.cpp
index b9d3763cdd06d..2424828293c73 100644
--- a/flang/lib/Parser/openmp-utils.cpp
+++ b/flang/lib/Parser/openmp-utils.cpp
@@ -93,6 +93,34 @@ const BlockConstruct *GetFortranBlockConstruct(
   return nullptr;
 }
 
+/// parser::Block is a list of executable constructs, parser::BlockConstruct
+/// is Fortran's BLOCK/ENDBLOCK construct.
+/// Strip the outermost BlockConstructs, return the reference to the Block
+/// in the executable part of the innermost of the stripped constructs.
+/// Specifically, if the given `block` has a single entry (it's a list), and
+/// the entry is a BlockConstruct, get the Block contained within. Repeat
+/// this step as many times as possible.
+const Block &GetInnermostExecPart(const Block &block) {
+  const Block *iter{&block};
+  while (iter->size() == 1) {
+    const ExecutionPartConstruct &ep{iter->front()};
+    if (auto *bc{GetFortranBlockConstruct(ep)}) {
+      iter = &std::get<Block>(bc->t);
+    } else {
+      break;
+    }
+  }
+  return *iter;
+}
+
+bool IsStrictlyStructuredBlock(const Block &block) {
+  if (block.size() == 1) {
+    return GetFortranBlockConstruct(block.front()) != nullptr;
+  } else {
+    return false;
+  }
+}
+
 const OmpCombinerExpression *GetCombinerExpr(
     const OmpReductionSpecifier &rspec) {
   return addr_if(std::get<std::optional<OmpCombinerExpression>>(rspec.t));
diff --git a/flang/lib/Semantics/check-omp-atomic.cpp b/flang/lib/Semantics/check-omp-atomic.cpp
index ec03e6fe2d920..b9e34ca6e74df 100644
--- a/flang/lib/Semantics/check-omp-atomic.cpp
+++ b/flang/lib/Semantics/check-omp-atomic.cpp
@@ -19,6 +19,7 @@
 #include "flang/Evaluate/rewrite.h"
 #include "flang/Evaluate/tools.h"
 #include "flang/Parser/char-block.h"
+#include "flang/Parser/openmp-utils.h"
 #include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/openmp-utils.h"
 #include "flang/Semantics/symbol.h"
@@ -41,6 +42,7 @@
 
 namespace Fortran::semantics {
 
+using namespace Fortran::parser::omp;
 using namespace Fortran::semantics::omp;
 
 namespace operation = Fortran::evaluate::operation;
diff --git a/flang/lib/Semantics/openmp-utils.cpp b/flang/lib/Semantics/openmp-utils.cpp
index 4a40d6eec17bb..18a37d64a3b5a 100644
--- a/flang/lib/Semantics/openmp-utils.cpp
+++ b/flang/lib/Semantics/openmp-utils.cpp
@@ -496,32 +496,4 @@ bool IsPointerAssignment(const evaluate::Assignment &x) {
   return std::holds_alternative<evaluate::Assignment::BoundsSpec>(x.u) ||
       std::holds_alternative<evaluate::Assignment::BoundsRemapping>(x.u);
 }
-
-/// parser::Block is a list of executable constructs, parser::BlockConstruct
-/// is Fortran's BLOCK/ENDBLOCK construct.
-/// Strip the outermost BlockConstructs, return the reference to the Block
-/// in the executable part of the innermost of the stripped constructs.
-/// Specifically, if the given `block` has a single entry (it's a list), and
-/// the entry is a BlockConstruct, get the Block contained within. Repeat
-/// this step as many times as possible.
-const parser::Block &GetInnermostExecPart(const parser::Block &block) {
-  const parser::Block *iter{&block};
-  while (iter->size() == 1) {
-    const parser::ExecutionPartConstruct &ep{iter->front()};
-    if (auto *bc{GetFortranBlockConstruct(ep)}) {
-      iter = &std::get<parser::Block>(bc->t);
-    } else {
-      break;
-    }
-  }
-  return *iter;
-}
-
-bool IsStrictlyStructuredBlock(const parser::Block &block) {
-  if (block.size() == 1) {
-    return GetFortranBlockConstruct(block.front()) != nullptr;
-  } else {
-    return false;
-  }
-}
 } // namespace Fortran::semantics::omp
diff --git a/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
index 355fd6c3a742a..b17c3f1bdc4e7 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
@@ -394,7 +394,7 @@ subroutine vec_xl_testi8a(arg1, arg2, res)
   vector(integer(1)) :: res
   res = vec_xl(arg1, arg2)
 
-  
+
 ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
 ! LLVMIR: %[[ld:.*]] = load <16 x i8>, ptr %[[addr]], align 1
@@ -481,7 +481,7 @@ subroutine vec_xl_be_testi8a(arg1, arg2, res)
   vector(integer(1)) :: res
   res = vec_xl_be(arg1, arg2)
 
-  
+
 ! LLVMIR: %4 = load i8, ptr %0, align 1
 ! LLVMIR: %5 = getelementptr i8, ptr %1, i8 %4
 ! LLVMIR: %6 = load <16 x i8>, ptr %5, align 1
diff --git a/flang/test/Lower/PowerPC/ppc-vec-sel.f90 b/flang/test/Lower/PowerPC/ppc-vec-sel.f90
index c3de8ba9c1444..93641d1461a99 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-sel.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-sel.f90
@@ -136,7 +136,7 @@ subroutine vec_sel_testu8(arg1, arg2, arg3)
   vector(unsigned(8)) :: arg1, arg2, r
   vector(unsigned(8)) :: arg3
   r = vec_sel(arg1, arg2, arg3)
-  
+
 
 ! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
 ! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16
diff --git a/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90
index caf6d5463a833..947c8b1c7eb2c 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90
@@ -14,7 +14,7 @@ subroutine vec_st_test(arg1, arg2, arg3)
 ! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16
 ! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
-! LLVMIR: %[[bc:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32> 
+! LLVMIR: %[[bc:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
 ! LLVMIR: %[[shf:.*]] = shufflevector <4 x i32> %[[bc]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ! LLVMIR:  call void @llvm.ppc.altivec.stvx(<4 x i32> %[[shf]], ptr %[[addr]])
 end subroutine vec_st_test
@@ -28,7 +28,7 @@ subroutine vec_ste_test(arg1, arg2, arg3)
   integer(4) :: arg2
   real(4) :: arg3
   call vec_ste(arg1, arg2, arg3)
-  
+
 ! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %0, align 16
 ! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
 ! LLVMIR: %[[addr]] = getelementptr i8, ptr %2, i32 %[[arg2]]
diff --git a/flang/test/Lower/PowerPC/ppc-vec-store.f90 b/flang/test/Lower/PowerPC/ppc-vec-store.f90
index c25cc8b07cf79..1c3ab9638f117 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-store.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-store.f90
@@ -300,7 +300,7 @@ subroutine vec_xst_test_vr4i2r4(arg1, arg2, arg3)
   real(4) :: arg3
   call vec_xst(arg1, arg2, arg3)
 
-  
+
 ! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
 ! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
@@ -432,7 +432,7 @@ subroutine vec_xst_be_test_vi4i4vai4(arg1, arg2, arg3, i)
 ! LLVMIR: %[[iadd:.*]] = add nsw i64 %[[imul2]], 0
 ! LLVMIR: %[[gep1:.*]] = getelementptr <4 x i32>, ptr %2, i64 %[[iadd]]
 ! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
-! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
 ! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i32 %[[arg2]]
 ! LLVMIR: %[[src:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ! LLVMIR: store <4 x i32> %[[src]], ptr %[[gep2]], align 16
@@ -449,7 +449,7 @@ subroutine vec_xstd2_test_vr4i2r4(arg1, arg2, arg3)
   real(4) :: arg3
   call vec_xstd2(arg1, arg2, arg3)
 
-  
+
 ! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
 ! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
@@ -509,7 +509,7 @@ subroutine vec_xstd2_test_vi4i4vai4(arg1, arg2, arg3, i)
 ! LLVMIR: %[[iadd:.*]] = add nsw i64 %[[imul2]], 0
 ! LLVMIR: %[[gep1:.*]] = getelementptr <4 x i32>, ptr %2, i64 %[[iadd]]
 ! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
-! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
 ! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i32 %[[arg2]]
 ! LLVMIR: %[[src:.*]] = bitcast <4 x i32> %[[arg1]] to <2 x i64>
 ! LLVMIR: store <2 x i64> %[[src]], ptr %[[gep2]], align 16
@@ -526,7 +526,7 @@ subroutine vec_xstw4_test_vr4i2r4(arg1, arg2, arg3)
   real(4) :: arg3
   call vec_xstw4(arg1, arg2, arg3)
 
-  
+
 ! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
 ! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
@@ -584,7 +584,7 @@ subroutine vec_xstw4_test_vi4i4vai4(arg1, arg2, arg3, i)
 ! LLVMIR: %[[iadd:.*]] = add nsw i64 %[[imul2]], 0
 ! LLVMIR: %[[gep1:.*]] = getelementptr <4 x i32>, ptr %2, i64 %[[iadd]]
 ! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
-! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
 ! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i32 %[[arg2]]
 ! LLVMIR: store <4 x i32> %[[arg1]], ptr %[[gep2]], align 16
 end subroutine vec_xstw4_test_vi4i4vai4
diff --git a/flang/test/Lower/allocatable-assignment.f90 b/flang/test/Lower/allocatable-assignment.f90
index 3c220232104a5..b6b2f7b6c77b9 100644
--- a/flang/test/Lower/allocatable-assignment.f90
+++ b/flang/test/Lower/allocatable-assignment.f90
@@ -283,14 +283,14 @@ subroutine test_dyn_char(x, n, c)
 ! CHECK:           hlfir.assign %[[VAL_8]]#0 to %[[VAL_14]]#0 realloc keep_lhs_len : !fir.box<!fir.array<20x!fir.char<1,?>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>>
 
 subroutine test_derived_with_init(x, y)
-  type t 
+  type t
     integer, allocatable :: a(:)
-  end type                                                                                     
-  type(t), allocatable :: x                                                                    
-  type(t) :: y                                                                                 
+  end type
+  type(t), allocatable :: x
+  type(t) :: y
   ! The allocatable component of `x` need to be initialized
   ! during the automatic allocation (setting its rank and allocation
-  ! status) before it is assigned with the component of `y` 
+  ! status) before it is assigned with the component of `y`
   x = y
 end subroutine
 ! CHECK-LABEL:   func.func @_QMalloc_assignPtest_derived_with_init(
@@ -357,7 +357,7 @@ end function elt
 !  real :: y(2, 3) = reshape([1,2,3,4,5,6], [2,3])
 !  real, allocatable :: x (:, :)
 !  allocate(x(2,2))
-!  call test_with_lbounds(x, y) 
+!  call test_with_lbounds(x, y)
 !  print *, x(10, 20)
 !  print *, x
 !end
diff --git a/flang/test/Lower/allocatable-globals.f90 b/flang/test/Lower/allocatable-globals.f90
index 9d386688f8881..8b7420ab32391 100644
--- a/flang/test/Lower/allocatable-globals.f90
+++ b/flang/test/Lower/allocatable-globals.f90
@@ -12,7 +12,7 @@
 module mod_allocatables
   character(10), allocatable :: c(:)
 end module
-  
+
 ! CHECK-LABEL: func @_QPtest_mod_allocatables()
 subroutine test_mod_allocatables()
   use mod_allocatables, only: c
diff --git a/flang/test/Lower/allocatable-polymorphic.f90 b/flang/test/Lower/allocatable-polymorphic.f90
index 27cdf2839767d..d528fd8e546ff 100644
--- a/flang/test/Lower/allocatable-polymorphic.f90
+++ b/flang/test/Lower/allocatable-polymorphic.f90
@@ -460,7 +460,7 @@ subroutine test_allocate_with_mold()
 ! CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]](%{{.*}}) {uniq_name = "_QMpolyFtest_allocate_with_moldEx"} : (!fir.ref<!fir.array<10x!fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10x!fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}>>>, !fir.ref<!fir.array<10x!fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}>>>)
 
 ! CHECK: %[[EMBOX_X:.*]] = fir.embox %[[X_DECL]]#0(%{{.*}}) : (!fir.ref<!fir.array<10x!fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}>>>, !fir.shape<1>) -> !fir.box<!fir.array<10x!fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}>>>
-! CHECK: %[[RANK:.*]] = arith.constant 1 : i32 
+! CHECK: %[[RANK:.*]] = arith.constant 1 : i32
 ! CHECK: %[[P_BOX_NONE:.*]] = fir.convert %[[P_DECL]]#0 : (!fir.ref<!fir.class<!fir.ptr<!fir.array<?x!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>>) -> !fir.ref<!fir.box<none>>
 ! CHECK: %[[X_BOX_NONE:.*]] = fir.convert %[[EMBOX_X]] : (!fir.box<!fir.array<10x!fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}>>>) -> !fir.box<none>
 ! CHECK: fir.call @_FortranAPointerApplyMold(%[[P_BOX_NONE]], %[[X_BOX_NONE]], %[[RANK]]) {{.*}} : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32) -> ()
@@ -614,10 +614,10 @@ program test_alloc
 ! LLVM: %[[TYPE_CODE:.*]] = load i8, ptr %[[TYPE_CODE_GEP]]
 ! LLVM-NEXT: %[[EXT_TYPE_CODE:.*]] = sext i8 %[[TYPE_CODE]] to i32
 ! LLVM: %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } undef, i64 %[[ELEM_SIZE]], 1
-! LLVM: %[[TRUNC_TYPE_CODE:.*]] = trunc i32 %[[EXT_TYPE_CODE]] to i8 
+! LLVM: %[[TRUNC_TYPE_CODE:.*]] = trunc i32 %[[EXT_TYPE_CODE]] to i8
 ! LLVM: %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %{{.*}}, i8 %[[TRUNC_TYPE_CODE]], 4
 ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %{{.*}}, ptr %[[TMP:.*]]
-! LLVM: call void %{{.*}}(ptr %{{.*}}) 
+! LLVM: call void %{{.*}}(ptr %{{.*}})
 
 ! LLVM: call void @llvm.memcpy.p0.p0.i32
 ! LLVM: %[[GEP_TDESC_C2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 7
@@ -628,7 +628,7 @@ program test_alloc
 ! LLVM: %[[TYPE_CODE:.*]] = load i8, ptr %[[TYPE_CODE_GEP]]
 ! LLVM-NEXT: %[[EXT_TYPE_CODE:.*]] = sext i8 %[[TYPE_CODE]] to i32
 ! LLVM: %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } undef, i64 %[[ELEM_SIZE]], 1
-! LLVM: %[[TRUNC_TYPE_CODE:.*]] = trunc i32 %[[EXT_TYPE_CODE]] to i8 
+! LLVM: %[[TRUNC_TYPE_CODE:.*]] = trunc i32 %[[EXT_TYPE_CODE]] to i8
 ! LLVM: %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %{{.*}}, i8 %[[TRUNC_TYPE_CODE]], 4
 ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %{{.*}}, ptr %{{.*}}
 ! LLVM: call void %{{.*}}(ptr %{{.*}})
diff --git a/flang/test/Lower/allocated.f90 b/flang/test/Lower/allocated.f90
index 6e8420fc7d79a..11e856fd67bad 100644
--- a/flang/test/Lower/allocated.f90
+++ b/flang/test/Lower/allocated.f90
@@ -15,4 +15,3 @@ subroutine allocated_test(scalar, array)
     ! CHECK: cmpi ne, %[[addrToInt1]], %c0{{.*}}
     print *, allocated(array)
   end subroutine
-  
\ No newline at end of file
diff --git a/flang/test/Lower/array-elemental-calls-2.f90 b/flang/test/Lower/array-elemental-calls-2.f90
index 2674b07dece17..60c9257a19822 100644
--- a/flang/test/Lower/array-elemental-calls-2.f90
+++ b/flang/test/Lower/array-elemental-calls-2.f90
@@ -172,7 +172,7 @@ subroutine check_parentheses_logical()
 subroutine check_parentheses_derived(a)
   type t
     integer :: i
-  end type  
+  end type
   interface
     integer elemental function elem_func_derived(x)
       import :: t
diff --git a/flang/test/Lower/array-elemental-calls.f90 b/flang/test/Lower/array-elemental-calls.f90
index 853807bcb3e6c..93d2979ec9383 100644
--- a/flang/test/Lower/array-elemental-calls.f90
+++ b/flang/test/Lower/array-elemental-calls.f90
@@ -57,7 +57,7 @@ elemental impure integer function impure_func(j)
       integer, intent(in) :: j
     end function
   end interface
-  
+
   i = 42 + pure_func(j)
   i = 42 + impure_func(j)
 end subroutine
diff --git a/flang/test/Lower/array-expression-assumed-size.f90 b/flang/test/Lower/array-expression-assumed-size.f90
index a498148d07fc7..b51dc00c20e28 100644
--- a/flang/test/Lower/array-expression-assumed-size.f90
+++ b/flang/test/Lower/array-expression-assumed-size.f90
@@ -16,8 +16,8 @@ end subroutine assumed_size_forall_test
 
 ! CHECK-LABEL: func @_QPassumed_size_test(
 ! CHECK-SAME:    %[[VAL_0:.*]]: !fir.ref<!fir.array<10x?xi32>>{{.*}}) {
-! CHECK:         %[[VAL_1A:.*]] = fir.convert %c10{{.*}} : (i64) -> index 
-! CHECK:         %[[VAL_1B:.*]] = arith.cmpi sgt, %[[VAL_1A]], %c0{{.*}} : index 
+! CHECK:         %[[VAL_1A:.*]] = fir.convert %c10{{.*}} : (i64) -> index
+! CHECK:         %[[VAL_1B:.*]] = arith.cmpi sgt, %[[VAL_1A]], %c0{{.*}} : index
 ! CHECK:         %[[VAL_1:.*]] = arith.select %[[VAL_1B]], %[[VAL_1A]], %c0{{.*}} : index
 ! CHECK:         %[[VAL_2:.*]] = fir.assumed_size_extent : index
 ! CHECK:         %[[VAL_3:.*]] = arith.constant 1 : index
@@ -79,8 +79,8 @@ end subroutine assumed_size_forall_test
 ! CHECK-LABEL: func @_QPassumed_size_forall_test(
 ! CHECK-SAME:       %[[VAL_0:.*]]: !fir.ref<!fir.array<10x?xi32>>{{.*}}) {
 ! CHECK:         %[[VAL_1:.*]] = fir.alloca i32 {adapt.valuebyref, bindc_name = "i"}
-! CHECK:         %[[VAL_2A:.*]] = fir.convert %c10{{.*}} : (i64) -> index 
-! CHECK:         %[[VAL_2B:.*]] = arith.cmpi sgt, %[[VAL_2A]], %c0{{.*}} : index 
+! CHECK:         %[[VAL_2A:.*]] = fir.convert %c10{{.*}} : (i64) -> index
+! CHECK:         %[[VAL_2B:.*]] = arith.cmpi sgt, %[[VAL_2A]], %c0{{.*}} : index
 ! CHECK:         %[[VAL_2:.*]] = arith.select %[[VAL_2B]], %[[VAL_2A]], %c0{{.*}} : index
 ! CHECK:         %[[VAL_3:.*]] = fir.assumed_size_extent : index
 ! CHECK:         %[[VAL_4:.*]] = arith.constant 2 : i32
diff --git a/flang/test/Lower/array-substring.f90 b/flang/test/Lower/array-substring.f90
index 7544fbb989627..0ede04f0bb2f8 100644
--- a/flang/test/Lower/array-substring.f90
+++ b/flang/test/Lower/array-substring.f90
@@ -46,5 +46,5 @@ function test(C)
   logical :: test(1)
   character*12  C(1)
 
-  test = C(1:1)(1:8) == (/'ABCDabcd'/) 
+  test = C(1:1)(1:8) == (/'ABCDabcd'/)
 end function test
diff --git a/flang/test/Lower/array-wide-char.f90 b/flang/test/Lower/array-wide-char.f90
index 8bad280d0f056..44fcd45519d85 100644
--- a/flang/test/Lower/array-wide-char.f90
+++ b/flang/test/Lower/array-wide-char.f90
@@ -2,7 +2,7 @@
 
 character(LEN=128, KIND=4), PARAMETER :: conarr(3) = &
      [ character(128,4) :: "now is the time", "for all good men to come", &
-     "to the aid of the country" ]       
+     "to the aid of the country" ]
 character(LEN=10, KIND=4) :: arr(3) = &
      [ character(10,4) :: "good buddy", "best buddy", " " ]
 call action_on_char4(conarr)
diff --git a/flang/test/Lower/array.f90 b/flang/test/Lower/array.f90
index 710175739b3a8..cd12d7f851e67 100644
--- a/flang/test/Lower/array.f90
+++ b/flang/test/Lower/array.f90
@@ -93,7 +93,7 @@ subroutine s(i,j,k,ii,jj,kk,a1,a2,a3,a4,a5,a6,a7)
   ! CHECK: fir.coordinate_of %[[a7]], %[[t7]] :
   ! CHECK-LABEL: EndIoStatement
   print *, a7(kk, jj, ii)
-  
+
 end subroutine s
 
 ! CHECK-LABEL: range
diff --git a/flang/test/Lower/forall-pointer-assignment.f90 b/flang/test/Lower/forall-pointer-assignment.f90
index d89fb3ed5cb57..62184a77addf5 100644
--- a/flang/test/Lower/forall-pointer-assignment.f90
+++ b/flang/test/Lower/forall-pointer-assignment.f90
@@ -1,4 +1,4 @@
-! Test lower of FORALL pointer assignment 
+! Test lower of FORALL pointer assignment
 ! RUN: bbc -emit-fir %s -o - | FileCheck %s
 
 
diff --git a/flang/test/Lower/forall/forall-2.f90 b/flang/test/Lower/forall/forall-2.f90
index cdafb4f3d49e7..c6a20f5859497 100644
--- a/flang/test/Lower/forall/forall-2.f90
+++ b/flang/test/Lower/forall/forall-2.f90
@@ -16,7 +16,7 @@ subroutine implied_iters_allocatable(thing, a1)
   end type t
   type(t) :: thing(:)
   integer :: i
-  
+
   forall (i=5:13)
   ! commenting out this test for the moment (hits assert)
   !  thing(i)%arr = a1
@@ -32,7 +32,7 @@ subroutine conflicting_allocatable(thing, lo, hi)
   end type t
   type(t) :: thing(:)
   integer :: i
-  
+
   forall (i = lo:hi)
   ! commenting out this test for the moment (hits assert)
   !  thing(i)%arr = thing(hi-i)%arr
diff --git a/flang/test/Lower/forall/forall-ranked.f90 b/flang/test/Lower/forall/forall-ranked.f90
index 9e56be926e78e..f508c67468212 100644
--- a/flang/test/Lower/forall/forall-ranked.f90
+++ b/flang/test/Lower/forall/forall-ranked.f90
@@ -68,7 +68,7 @@ end function f
      integer :: arr(11)
   end type t
   type(t) :: a(10,10)
-  
+
   forall (i=1:5)
      a(i,:)%arr(i+4) = f(i)
   end forall
diff --git a/flang/test/Lower/forall/forall-where-2.f90 b/flang/test/Lower/forall/forall-where-2.f90
index c075508bef561..85aab87559c3c 100644
--- a/flang/test/Lower/forall/forall-where-2.f90
+++ b/flang/test/Lower/forall/forall-where-2.f90
@@ -6,7 +6,7 @@
 ! Test a FORALL construct with a nested WHERE construct where the mask
 ! contains temporary array expressions.
 
-subroutine test_nested_forall_where_with_temp_in_mask(a,b)  
+subroutine test_nested_forall_where_with_temp_in_mask(a,b)
   interface
     function temp_foo(i, j)
       integer :: i, j
@@ -28,10 +28,10 @@ function temp_foo(i, j)
 
 ! CHECK:  func @_QPtest_nested_forall_where_with_temp_in_mask({{.*}}) {
 ! CHECK:   %[[tempResultBox:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = ".result"}
-           ! Where condition pre-evaluation 
+           ! Where condition pre-evaluation
 ! CHECK:   fir.do_loop {{.*}} {
 ! CHECK:      fir.do_loop {{.*}} {
-                ! Evaluation of mask for iteration (i,j) into ragged array temp 
+                ! Evaluation of mask for iteration (i,j) into ragged array temp
 ! CHECK:        %[[tempResult:.*]] = fir.call @_QPtemp_foo
 ! CHECK:        fir.save_result %[[tempResult]] to %[[tempResultBox]] : !fir.box<!fir.heap<!fir.array<?xf32>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
 ! CHECK:        fir.if {{.*}} {
@@ -52,7 +52,7 @@ function temp_foo(i, j)
 ! CHECK:      fir.do_loop {{.*}} {
                 ! Array assignment at iteration (i, j)
 ! CHECK:        fir.do_loop {{.*}} {
-! CHECK:          fir.if {{.*}} {  
+! CHECK:          fir.if {{.*}} {
 ! CHECK:            arith.divf
 ! CHECK:          } else {
 ! CHECK:          }
@@ -64,7 +64,7 @@ function temp_foo(i, j)
 ! CHECK:      fir.do_loop {{.*}} {
                 ! Array assignment at iteration (i, j)
 ! CHECK:        fir.do_loop {{.*}} {
-! CHECK:          fir.if {{.*}} {  
+! CHECK:          fir.if {{.*}} {
 ! CHECK:          } else {
 ! CHECK:            arith.negf
 ! CHECK:          }
diff --git a/flang/test/Lower/forall/forall-where.f90 b/flang/test/Lower/forall/forall-where.f90
index 54ff2bd4c3f16..3202edbaec808 100644
--- a/flang/test/Lower/forall/forall-where.f90
+++ b/flang/test/Lower/forall/forall-where.f90
@@ -6,7 +6,7 @@
 !    This has both an explicit and implicit iteration space. The WHERE construct
 !    makes the assignments conditional and the where mask evaluation must happen
 !    prior to evaluating the array assignment statement.
-subroutine test_nested_forall_where(a,b)  
+subroutine test_nested_forall_where(a,b)
   type t
      real data(100)
   end type t
diff --git a/libcxx/include/__config b/libcxx/include/__config
index 8f461599ffd5b..d79ace0cbb896 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -546,6 +546,12 @@ typedef __char32_t char32_t;
 #    define _LIBCPP_DEPRECATED_(m)
 #  endif
 
+#  if defined(__DEPRECATED) && __DEPRECATED && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS)
+#    define _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS 1
+#  else
+#    define _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS 0
+#  endif
+
 #  if !defined(_LIBCPP_CXX03_LANG)
 #    define _LIBCPP_DEPRECATED_IN_CXX11 _LIBCPP_DEPRECATED
 #  else
diff --git a/libcxx/include/ccomplex b/libcxx/include/ccomplex
index ee7e088aac54d..c1cb039f83a5e 100644
--- a/libcxx/include/ccomplex
+++ b/libcxx/include/ccomplex
@@ -26,18 +26,10 @@
 #    pragma GCC system_header
 #  endif
 
-#  if _LIBCPP_STD_VER >= 20
-
-using __standard_header_ccomplex
-    _LIBCPP_DEPRECATED_("removed in C++20. Include <complex> instead.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_ccomplex _LIBCPP_NODEBUG                                    = __standard_header_ccomplex;
-
-#  elif _LIBCPP_STD_VER >= 17
-
-using __standard_header_ccomplex _LIBCPP_DEPRECATED_("Include <complex> instead.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_ccomplex _LIBCPP_NODEBUG = __standard_header_ccomplex;
-
+#  if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS
+#    warning <ccomplex> is deprecated in C++17 and removed in C++20. Include <complex> instead.
 #  endif
+
 #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
 
 #endif // _LIBCPP_CCOMPLEX
diff --git a/libcxx/include/ciso646 b/libcxx/include/ciso646
index 34164362dc10d..d9eae41291024 100644
--- a/libcxx/include/ciso646
+++ b/libcxx/include/ciso646
@@ -24,13 +24,10 @@
 #    pragma GCC system_header
 #  endif
 
-#  if _LIBCPP_STD_VER >= 20
-
-using __standard_header_ciso646
-    _LIBCPP_DEPRECATED_("removed in C++20. Include <version> instead.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_ciso646 _LIBCPP_NODEBUG                                     = __standard_header_ciso646;
-
+#  if _LIBCPP_STD_VER >= 20 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS
+#    warning <ciso646> is removed in C++20. Include <version> instead.
 #  endif
+
 #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
 
 #endif // _LIBCPP_CISO646
diff --git a/libcxx/include/cstdalign b/libcxx/include/cstdalign
index 7f8dd1e1fbaf8..7aa8cc81ad14c 100644
--- a/libcxx/include/cstdalign
+++ b/libcxx/include/cstdalign
@@ -43,17 +43,10 @@ Macros:
 #  undef __alignof_is_defined
 #  define __alignof_is_defined 1
 
-#  if _LIBCPP_STD_VER >= 20
-
-using __standard_header_cstdalign _LIBCPP_DEPRECATED_("removed in C++20.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_cstdalign _LIBCPP_NODEBUG = __standard_header_cstdalign;
-
-#  elif _LIBCPP_STD_VER >= 17
-
-using __standard_header_cstdalign _LIBCPP_DEPRECATED _LIBCPP_NODEBUG = void;
-using __use_standard_header_cstdalign _LIBCPP_NODEBUG                = __standard_header_cstdalign;
-
+#  if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS
+#    warning <cstdalign> is deprecated in C++17 and removed in C++20.
 #  endif
+
 #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
 
 #endif // _LIBCPP_CSTDALIGN
diff --git a/libcxx/include/cstdbool b/libcxx/include/cstdbool
index a432d5f08b9ae..805a287bd7627 100644
--- a/libcxx/include/cstdbool
+++ b/libcxx/include/cstdbool
@@ -31,17 +31,10 @@ Macros:
 #  undef __bool_true_false_are_defined
 #  define __bool_true_false_are_defined 1
 
-#  if _LIBCPP_STD_VER >= 20
-
-using __standard_header_cstdbool _LIBCPP_DEPRECATED_("removed in C++20.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_cstdbool _LIBCPP_NODEBUG                                      = __standard_header_cstdbool;
-
-#  elif _LIBCPP_STD_VER >= 17
-
-using __standard_header_cstdbool _LIBCPP_DEPRECATED _LIBCPP_NODEBUG = void;
-using __use_standard_header_cstdbool _LIBCPP_NODEBUG                = __standard_header_cstdbool;
-
+#  if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS
+#    warning <cstdbool> is deprecated in C++17 and removed in C++20.
 #  endif
+
 #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
 
 #endif // _LIBCPP_CSTDBOOL
diff --git a/libcxx/include/ctgmath b/libcxx/include/ctgmath
index db0786f1e2c46..13b7a96e4d8fc 100644
--- a/libcxx/include/ctgmath
+++ b/libcxx/include/ctgmath
@@ -28,17 +28,8 @@
 #    pragma GCC system_header
 #  endif
 
-#  if _LIBCPP_STD_VER >= 20
-
-using __standard_header_ctgmath
-    _LIBCPP_DEPRECATED_("removed in C++20. Include <cmath> and <complex> instead.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_ctgmath _LIBCPP_NODEBUG = __standard_header_ctgmath;
-
-#  elif _LIBCPP_STD_VER >= 17
-
-using __standard_header_ctgmath _LIBCPP_DEPRECATED_("Include <cmath> and <complex> instead.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_ctgmath _LIBCPP_NODEBUG = __standard_header_ctgmath;
-
+#  if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS
+#    warning <ctgmath> is deprecated in C++17 and removed in C++20. Include <cmath> and <complex> instead.
 #  endif
 
 #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
diff --git a/libcxx/test/libcxx/transitive_includes.gen.py b/libcxx/test/libcxx/transitive_includes.gen.py
index 6ed35af7e275e..2b643e1f2ad48 100644
--- a/libcxx/test/libcxx/transitive_includes.gen.py
+++ b/libcxx/test/libcxx/transitive_includes.gen.py
@@ -89,7 +89,7 @@
 // UNSUPPORTED: LIBCXX-FREEBSD-FIXME
 
 // RUN: mkdir %t
-// RUN: %{{cxx}} %s %{{flags}} %{{compile_flags}} --trace-includes -fshow-skipped-includes --preprocess > /dev/null 2> %t/trace-includes.txt
+// RUN: %{{cxx}} %s %{{flags}} %{{compile_flags}} -Wno-deprecated --trace-includes -fshow-skipped-includes --preprocess > /dev/null 2> %t/trace-includes.txt
 // RUN: %{{python}} %{{libcxx-dir}}/test/libcxx/transitive_includes/to_csv.py %t/trace-includes.txt > %t/actual_transitive_includes.csv
 // RUN: cat %{{libcxx-dir}}/test/libcxx/transitive_includes/%{{cxx_std}}.csv | awk '/^{escaped_header} / {{ print }}' > %t/expected_transitive_includes.csv
 // RUN: diff -w %t/expected_transitive_includes.csv %t/actual_transitive_includes.csv
diff --git a/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp
index 0eaf82ce5cef0..8df89d0ba9206 100644
--- a/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp
+++ b/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp
@@ -14,12 +14,6 @@
 // UNSUPPORTED: c++03, c++11, c++14
 // UNSUPPORTED: clang-modules-build
 
-#include "test_macros.h"
-
 #include <ccomplex>
 
-#if TEST_STD_VER >= 20
-// expected-warning@ccomplex:* {{'__standard_header_ccomplex' is deprecated: removed in C++20. Include <complex> instead.}}
-#else
-// expected-warning@ccomplex:* {{'__standard_header_ccomplex' is deprecated: Include <complex> instead.}}
-#endif
+// expected-warning@ccomplex:* {{<ccomplex> is deprecated in C++17 and removed in C++20. Include <complex> instead.}}
diff --git a/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp
index 04acd10081548..32b57033331c8 100644
--- a/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp
+++ b/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp
@@ -15,4 +15,5 @@
 // UNSUPPORTED: clang-modules-build
 
 #include <ciso646>
-// expected-warning@ciso646:* {{'__standard_header_ciso646' is deprecated: removed in C++20. Include <version> instead.}}
+
+// expected-warning@ciso646:* {{<ciso646> is removed in C++20. Include <version> instead.}}
diff --git a/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp
index dc9f1af55b3f1..23a7709a9d658 100644
--- a/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp
+++ b/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp
@@ -14,12 +14,6 @@
 // UNSUPPORTED: c++03, c++11, c++14
 // UNSUPPORTED: clang-modules-build
 
-#include "test_macros.h"
-
 #include <cstdalign>
 
-#if TEST_STD_VER >= 20
-// expected-warning@cstdalign:* {{'__standard_header_cstdalign' is deprecated: removed in C++20.}}
-#else
-// expected-warning@cstdalign:* {{'__standard_header_cstdalign' is deprecated}}
-#endif
+// expected-warning@cstdalign:* {{<cstdalign> is deprecated in C++17 and removed in C++20.}}
diff --git a/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp
index eddefe14d35ea..c2c0f03c52d3c 100644
--- a/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp
+++ b/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp
@@ -14,12 +14,6 @@
 // UNSUPPORTED: c++03, c++11, c++14
 // UNSUPPORTED: clang-modules-build
 
-#include "test_macros.h"
-
 #include <cstdbool>
 
-#if TEST_STD_VER >= 20
-// expected-warning@cstdbool:* {{'__standard_header_cstdbool' is deprecated: removed in C++20.}}
-#else
-// expected-warning@cstdbool:* {{'__standard_header_cstdbool' is deprecated}}
-#endif
+// expected-warning@cstdbool:* {{<cstdbool> is deprecated in C++17 and removed in C++20.}}
diff --git a/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp
index 097ab1643d15a..4f5564915443d 100644
--- a/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp
+++ b/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp
@@ -14,12 +14,6 @@
 // UNSUPPORTED: c++03, c++11, c++14
 // UNSUPPORTED: clang-modules-build
 
-#include "test_macros.h"
-
 #include <ctgmath>
 
-#if TEST_STD_VER >= 20
-// expected-warning@ctgmath:* {{'__standard_header_ctgmath' is deprecated: removed in C++20. Include <cmath> and <complex> instead.}}
-#else
-// expected-warning@ctgmath:* {{'__standard_header_ctgmath' is deprecated: Include <cmath> and <complex> instead.}}
-#endif
+// expected-warning@ctgmath:* {{<ctgmath> is deprecated in C++17 and removed in C++20. Include <cmath> and <complex> instead.}}
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.verify.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.verify.cpp
index 12d778408d5ec..e58e760a5ce81 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.verify.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.verify.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// REQUIRES: std-at-least-c++23
+// REQUIRES: std-at-least-c++26
 
 // <tuple>
 
@@ -21,11 +21,6 @@
 void test() {
   // expected-error@*:* {{static assertion failed}}
 
-  // Turns to an error since C++26 (Disallow Binding a Returned Glvalue to a Temporary https://wg21.link/P2748R5).
-#if TEST_STD_VER >= 26
   // expected-error@tuple:* {{returning reference to local temporary object}}
-#else
-  // expected-warning@tuple:* {{returning reference to local temporary object}}
-#endif
   std::ignore = std::make_from_tuple<const int&>(std::tuple<char>{});
 }
diff --git a/libcxx/utils/libcxx/test/format.py b/libcxx/utils/libcxx/test/format.py
index 975209c273f8c..76e9115295b99 100644
--- a/libcxx/utils/libcxx/test/format.py
+++ b/libcxx/utils/libcxx/test/format.py
@@ -99,7 +99,7 @@ def parseScript(test, preamble):
     substitutions.append(
         (
             "%{verify}",
-            "%{cxx} %s %{flags} %{compile_flags} -fsyntax-only -Wno-error -Xclang -verify -Xclang -verify-ignore-unexpected=note -ferror-limit=0",
+            "%{cxx} %s %{flags} %{compile_flags} -U_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER -fsyntax-only -Wno-error -Xclang -verify -Xclang -verify-ignore-unexpected=note -ferror-limit=0",
         )
     )
     substitutions.append(("%{run}", "%{exec} %t.exe"))
diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index 2a97df4785ecb..b0dc797292511 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -762,7 +762,7 @@ void AArch64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
     relocateNoSym(loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val);
     break;
   default:
-    llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
+    llvm_unreachable("unsupported relocation for TLS GD to IE relaxation");
   }
 }
 
diff --git a/lldb/include/lldb/Utility/RegisterValue.h b/lldb/include/lldb/Utility/RegisterValue.h
index 49aaf68be17fc..baf984cbcb052 100644
--- a/lldb/include/lldb/Utility/RegisterValue.h
+++ b/lldb/include/lldb/Utility/RegisterValue.h
@@ -46,7 +46,8 @@ class RegisterValue {
     eTypeUInt16,
     eTypeUInt32,
     eTypeUInt64,
-    eTypeUInt128,
+    eTypeUIntN, /// < This value is used when the (integer) register is larger
+                /// than 64-bits.
     eTypeFloat,
     eTypeDouble,
     eTypeLongDouble,
@@ -69,7 +70,7 @@ class RegisterValue {
     m_scalar = inst;
   }
 
-  explicit RegisterValue(llvm::APInt inst) : m_type(eTypeUInt128) {
+  explicit RegisterValue(llvm::APInt inst) : m_type(eTypeUIntN) {
     m_scalar = llvm::APInt(std::move(inst));
   }
 
@@ -178,7 +179,7 @@ class RegisterValue {
   }
 
   void operator=(llvm::APInt uint) {
-    m_type = eTypeUInt128;
+    m_type = eTypeUIntN;
     m_scalar = llvm::APInt(std::move(uint));
   }
 
@@ -217,8 +218,8 @@ class RegisterValue {
     m_scalar = uint;
   }
 
-  void SetUInt128(llvm::APInt uint) {
-    m_type = eTypeUInt128;
+  void SetUIntN(llvm::APInt uint) {
+    m_type = eTypeUIntN;
     m_scalar = std::move(uint);
   }
 
diff --git a/lldb/source/Utility/DataExtractor.cpp b/lldb/source/Utility/DataExtractor.cpp
index e9be0cba81f0c..a9aea168acf41 100644
--- a/lldb/source/Utility/DataExtractor.cpp
+++ b/lldb/source/Utility/DataExtractor.cpp
@@ -662,10 +662,6 @@ size_t DataExtractor::ExtractBytes(offset_t offset, offset_t length,
   const uint8_t *src = PeekData(offset, length);
   if (src) {
     if (dst_byte_order != GetByteOrder()) {
-      // Validate that only a word- or register-sized dst is byte swapped
-      assert(length == 1 || length == 2 || length == 4 || length == 8 ||
-             length == 10 || length == 16 || length == 32);
-
       for (uint32_t i = 0; i < length; ++i)
         (static_cast<uint8_t *>(dst))[i] = src[length - i - 1];
     } else
diff --git a/lldb/source/Utility/RegisterValue.cpp b/lldb/source/Utility/RegisterValue.cpp
index 8b2af4e3d4f0e..c28c9e2d4d106 100644
--- a/lldb/source/Utility/RegisterValue.cpp
+++ b/lldb/source/Utility/RegisterValue.cpp
@@ -127,7 +127,7 @@ bool RegisterValue::GetScalarValue(Scalar &scalar) const {
   case eTypeUInt16:
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
   case eTypeFloat:
   case eTypeDouble:
   case eTypeLongDouble:
@@ -180,8 +180,6 @@ Status RegisterValue::SetValueFromData(const RegisterInfo &reg_info,
   if (src_len > reg_info.byte_size)
     src_len = reg_info.byte_size;
 
-  type128 int128;
-
   m_type = eTypeInvalid;
   switch (reg_info.encoding) {
   case eEncodingInvalid:
@@ -196,17 +194,15 @@ Status RegisterValue::SetValueFromData(const RegisterInfo &reg_info,
       SetUInt32(src.GetMaxU32(&src_offset, src_len));
     else if (reg_info.byte_size <= 8)
       SetUInt64(src.GetMaxU64(&src_offset, src_len));
-    else if (reg_info.byte_size <= 16) {
-      uint64_t data1 = src.GetU64(&src_offset);
-      uint64_t data2 = src.GetU64(&src_offset);
-      if (src.GetByteOrder() == eByteOrderLittle) {
-        int128.x[0] = data1;
-        int128.x[1] = data2;
-      } else {
-        int128.x[0] = data2;
-        int128.x[1] = data1;
-      }
-      SetUInt128(llvm::APInt(128, int128.x));
+    else {
+      std::vector<uint8_t> native_endian_src(src_len, 0);
+      src.ExtractBytes(src_offset, src_len,
+                       llvm::sys::IsLittleEndianHost ? eByteOrderLittle
+                                                     : eByteOrderBig,
+                       native_endian_src.data());
+      llvm::APInt uint = llvm::APInt::getZero(src_len * 8);
+      llvm::LoadIntFromMemory(uint, native_endian_src.data(), src_len);
+      SetUIntN(uint);
     }
     break;
   case eEncodingIEEE754:
@@ -442,7 +438,7 @@ bool RegisterValue::SignExtend(uint32_t sign_bitpos) {
   case eTypeUInt16:
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
     return m_scalar.SignExtend(sign_bitpos);
   case eTypeFloat:
   case eTypeDouble:
@@ -465,7 +461,7 @@ bool RegisterValue::CopyValue(const RegisterValue &rhs) {
   case eTypeUInt16:
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
   case eTypeFloat:
   case eTypeDouble:
   case eTypeLongDouble:
@@ -581,7 +577,7 @@ llvm::APInt RegisterValue::GetAsUInt128(const llvm::APInt &fail_value,
   case eTypeUInt16:
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
   case eTypeFloat:
   case eTypeDouble:
   case eTypeLongDouble:
@@ -616,7 +612,7 @@ float RegisterValue::GetAsFloat(float fail_value, bool *success_ptr) const {
     break;
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
   case eTypeFloat:
   case eTypeDouble:
   case eTypeLongDouble:
@@ -636,7 +632,7 @@ double RegisterValue::GetAsDouble(double fail_value, bool *success_ptr) const {
 
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
   case eTypeFloat:
   case eTypeDouble:
   case eTypeLongDouble:
@@ -657,7 +653,7 @@ long double RegisterValue::GetAsLongDouble(long double fail_value,
 
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
   case eTypeFloat:
   case eTypeDouble:
   case eTypeLongDouble:
@@ -676,7 +672,7 @@ const void *RegisterValue::GetBytes() const {
   case eTypeUInt16:
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
   case eTypeFloat:
   case eTypeDouble:
   case eTypeLongDouble:
@@ -698,7 +694,7 @@ uint32_t RegisterValue::GetByteSize() const {
     return 2;
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
   case eTypeFloat:
   case eTypeDouble:
   case eTypeLongDouble:
@@ -721,7 +717,7 @@ bool RegisterValue::SetUInt(uint64_t uint, uint32_t byte_size) {
   } else if (byte_size <= 8) {
     SetUInt64(uint);
   } else if (byte_size <= 16) {
-    SetUInt128(llvm::APInt(128, uint));
+    SetUIntN(llvm::APInt(128, uint));
   } else
     return false;
   return true;
@@ -749,7 +745,7 @@ bool RegisterValue::operator==(const RegisterValue &rhs) const {
     case eTypeUInt16:
     case eTypeUInt32:
     case eTypeUInt64:
-    case eTypeUInt128:
+    case eTypeUIntN:
     case eTypeFloat:
     case eTypeDouble:
     case eTypeLongDouble:
@@ -774,7 +770,7 @@ bool RegisterValue::ClearBit(uint32_t bit) {
   case eTypeUInt16:
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
     if (bit < (GetByteSize() * 8)) {
       return m_scalar.ClearBit(bit);
     }
@@ -814,7 +810,7 @@ bool RegisterValue::SetBit(uint32_t bit) {
   case eTypeUInt16:
   case eTypeUInt32:
   case eTypeUInt64:
-  case eTypeUInt128:
+  case eTypeUIntN:
     if (bit < (GetByteSize() * 8)) {
       return m_scalar.SetBit(bit);
     }
diff --git a/lldb/test/Shell/lldb-server/TestErrorMessages.test b/lldb/test/Shell/lldb-server/TestGdbserverErrorMessages.test
similarity index 100%
rename from lldb/test/Shell/lldb-server/TestErrorMessages.test
rename to lldb/test/Shell/lldb-server/TestGdbserverErrorMessages.test
diff --git a/lldb/test/Shell/lldb-server/TestPlatformErrorMessages.test b/lldb/test/Shell/lldb-server/TestPlatformErrorMessages.test
new file mode 100644
index 0000000000000..7d3b37aa5fc39
--- /dev/null
+++ b/lldb/test/Shell/lldb-server/TestPlatformErrorMessages.test
@@ -0,0 +1,25 @@
+RUN: %platformserver 2>&1 | FileCheck --check-prefixes=NO_LISTEN,ALL %s
+NO_LISTEN: error: either --listen or --child-platform-fd is required
+
+RUN: %lldb-server platform --listen 2>&1 | FileCheck --check-prefixes=LISTEN_MISSING,ALL %s
+LISTEN_MISSING: error: --listen: missing argument
+
+RUN: %lldb-server p --bogus 2>&1 | FileCheck --check-prefixes=BOGUS,ALL %s
+BOGUS: error: unknown argument '--bogus'
+
+RUN: %platformserver --gdbserver-port 2>&1 | FileCheck --check-prefixes=GDBPORT_MISSING,ALL %s
+GDBPORT_MISSING: error: --gdbserver-port: missing argument
+
+RUN: %platformserver --gdbserver-port notanumber --listen :1234 2>&1 | FileCheck --check-prefixes=GDBPORT_INVALID %s
+GDBPORT_INVALID: error: invalid --gdbserver-port value
+
+RUN: %platformserver --socket-file 2>&1 | FileCheck --check-prefixes=SOCKETFILE_MISSING,ALL %s
+SOCKETFILE_MISSING: error: --socket-file: missing argument
+
+RUN: %platformserver --log-file 2>&1 | FileCheck --check-prefixes=LOGFILE_MISSING,ALL %s
+LOGFILE_MISSING: error: --log-file: missing argument
+
+RUN: %platformserver --log-channels 2>&1 | FileCheck --check-prefixes=LOGCHANNELS_MISSING,ALL %s
+LOGCHANNELS_MISSING: error: --log-channels: missing argument
+
+ALL: Use 'lldb-server{{(\.exe)?}} {{p|platform}} --help' for a complete list of options.
diff --git a/lldb/test/Shell/lldb-server/TestPlatformHelp.test b/lldb/test/Shell/lldb-server/TestPlatformHelp.test
new file mode 100644
index 0000000000000..c5ced8a318100
--- /dev/null
+++ b/lldb/test/Shell/lldb-server/TestPlatformHelp.test
@@ -0,0 +1,40 @@
+RUN: %platformserver --help 2>&1 | FileCheck %s
+RUN: %platformserver -h 2>&1 | FileCheck %s
+RUN: %lldb-server p --help 2>&1 | FileCheck %s
+RUN: %lldb-server p -h 2>&1 | FileCheck %s
+RUN: %lldb-server platform --help 2>&1 | FileCheck %s
+RUN: %lldb-server platform -h 2>&1 | FileCheck %s
+
+CHECK: OVERVIEW: lldb-server{{(\.exe)?}} platform
+
+CHECK: USAGE: lldb-server{{(\.exe)?}} {{p|platform}} [options] --listen <[host]:port> {{\[}}[--] program args...]
+
+CHECK: CONNECTION OPTIONS:
+CHECK: --gdbserver-port <port>
+CHECK-SAME: Short form: -P
+CHECK: --listen <[host]:port>
+CHECK-SAME: Short form: -L
+CHECK: --socket-file <path>
+CHECK-SAME: Short form: -f
+
+CHECK: GENERAL OPTIONS:
+CHECK: --help
+CHECK: --log-channels <channel1 categories...:channel2 categories...>
+CHECK: Short form: -c
+CHECK: --log-file <file>
+CHECK-SAME: Short form: -l
+CHECK: --server
+
+CHECK: OPTIONS:
+CHECK: -- program args
+
+CHECK: DESCRIPTION
+CHECK: Acts as a platform server for remote debugging
+
+CHECK: EXAMPLES
+CHECK: # Listen on port 1234, exit after first connection
+CHECK: lldb-server{{(\.exe)?}} platform --listen tcp://0.0.0.0:1234
+CHECK: # Listen on port 5555, accept multiple connections
+CHECK: lldb-server{{(\.exe)?}} platform --server --listen tcp://localhost:5555
+CHECK: # Listen on Unix domain socket
+CHECK: lldb-server{{(\.exe)?}} platform --listen unix:///tmp/lldb-server.sock
diff --git a/lldb/tools/lldb-server/CMakeLists.txt b/lldb/tools/lldb-server/CMakeLists.txt
index 1d8dc72a3f872..fb55c64936121 100644
--- a/lldb/tools/lldb-server/CMakeLists.txt
+++ b/lldb/tools/lldb-server/CMakeLists.txt
@@ -2,6 +2,10 @@ set(LLVM_TARGET_DEFINITIONS LLGSOptions.td)
 tablegen(LLVM LLGSOptions.inc -gen-opt-parser-defs)
 add_public_tablegen_target(LLGSOptionsTableGen)
 
+set(LLVM_TARGET_DEFINITIONS PlatformOptions.td)
+tablegen(LLVM PlatformOptions.inc -gen-opt-parser-defs)
+add_public_tablegen_target(PlatformOptionsTableGen)
+
 set(LLDB_PLUGINS)
 
 if(CMAKE_SYSTEM_NAME MATCHES "Linux|Android")
@@ -67,6 +71,7 @@ add_lldb_tool(lldb-server
 
 add_dependencies(lldb-server
   LLGSOptionsTableGen
+  PlatformOptionsTableGen
   ${tablegen_deps}
 )
 target_include_directories(lldb-server PRIVATE "${LLDB_SOURCE_DIR}/source")
diff --git a/lldb/tools/lldb-server/PlatformOptions.td b/lldb/tools/lldb-server/PlatformOptions.td
new file mode 100644
index 0000000000000..eedd1d8c35343
--- /dev/null
+++ b/lldb/tools/lldb-server/PlatformOptions.td
@@ -0,0 +1,75 @@
+include "llvm/Option/OptParser.td"
+
+class F<string name>: Flag<["--", "-"], name>;
+class R<list<string> prefixes, string name>
+  : Option<prefixes, name, KIND_REMAINING_ARGS>;
+
+multiclass SJ<string name, string help> {
+  def NAME: Separate<["--", "-"], name>,
+    HelpText<help>;
+  def NAME # _eq: Joined<["--", "-"], name # "=">,
+    Alias<!cast<Separate>(NAME)>;
+}
+
+def grp_connect : OptionGroup<"connection">, HelpText<"CONNECTION OPTIONS">;
+
+defm listen: SJ<"listen", "Host and port to listen on. Format: [host]:port or protocol://[host]:port (e.g., tcp://localhost:1234, unix:///path/to/socket). Short form: -L">,
+  MetaVarName<"<[host]:port>">,
+  Group<grp_connect>;
+def: Separate<["-"], "L">, Alias<listen>,
+  Group<grp_connect>;
+
+defm socket_file: SJ<"socket-file", "Write listening socket information (port number for TCP or path for Unix domain sockets) to the specified file. Short form: -f">,
+  MetaVarName<"<path>">,
+  Group<grp_connect>;
+def: Separate<["-"], "f">, Alias<socket_file>,
+  Group<grp_connect>;
+
+defm gdbserver_port: SJ<"gdbserver-port", "Port to use for spawned gdbserver instances. If 0 or unspecified, a port will be chosen automatically. Short form: -P">,
+  MetaVarName<"<port>">,
+  Group<grp_connect>;
+def: Separate<["-"], "P">, Alias<gdbserver_port>,
+  Group<grp_connect>;
+
+defm child_platform_fd: SJ<"child-platform-fd", "File descriptor for communication with parent platform process (internal use only).">,
+  MetaVarName<"<fd>">,
+  Group<grp_connect>,
+  Flags<[HelpHidden]>;
+
+def grp_general : OptionGroup<"general options">, HelpText<"GENERAL OPTIONS">;
+
+def server: F<"server">,
+  HelpText<"Run in server mode, accepting multiple client connections sequentially. Without this flag, the server exits after handling the first connection.">,
+  Group<grp_general>;
+
+defm log_channels: SJ<"log-channels", "Channels to log. A colon-separated list of entries. Each entry starts with a channel followed by a space-separated list of categories. Common channels: lldb, gdb-remote, platform, process. Short form: -c">,
+  MetaVarName<"<channel1 categories...:channel2 categories...>">,
+  Group<grp_general>;
+def: Separate<["-"], "c">, Alias<log_channels>,
+  Group<grp_general>;
+
+defm log_file: SJ<"log-file", "Destination file to log to. If empty, log to stderr. Short form: -l">,
+  MetaVarName<"<file>">,
+  Group<grp_general>;
+def: Separate<["-"], "l">, Alias<log_file>,
+  Group<grp_general>;
+
+def debug: F<"debug">,
+  HelpText<"(Unused, kept for backward compatibility)">,
+  Group<grp_general>,
+  Flags<[HelpHidden]>;
+
+def verbose: F<"verbose">,
+  HelpText<"(Unused, kept for backward compatibility)">,
+  Group<grp_general>,
+  Flags<[HelpHidden]>;
+
+def help: F<"help">, 
+  HelpText<"Display this help message and exit.">,
+  Group<grp_general>;
+def: Flag<["-"], "h">, Alias<help>,
+  Group<grp_general>;
+
+def REM : R<["--"], "">, 
+  HelpText<"Arguments to pass to launched gdbserver instances.">,
+  MetaVarName<"program args">;
diff --git a/lldb/tools/lldb-server/lldb-platform.cpp b/lldb/tools/lldb-server/lldb-platform.cpp
index 0bd928507ba89..59b1eb419bc2b 100644
--- a/lldb/tools/lldb-server/lldb-platform.cpp
+++ b/lldb/tools/lldb-server/lldb-platform.cpp
@@ -21,6 +21,9 @@
 #include <fstream>
 #include <optional>
 
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/OptTable.h"
+#include "llvm/Option/Option.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/WithColor.h"
@@ -56,22 +59,69 @@ using namespace llvm;
 // of target CPUs. For now, let's just use 100.
 static const int backlog = 100;
 static const int socket_error = -1;
-static int g_debug = 0;
-static int g_verbose = 0;
-static int g_server = 0;
-
-// option descriptors for getopt_long_only()
-static struct option g_long_options[] = {
-    {"debug", no_argument, &g_debug, 1},
-    {"verbose", no_argument, &g_verbose, 1},
-    {"log-file", required_argument, nullptr, 'l'},
-    {"log-channels", required_argument, nullptr, 'c'},
-    {"listen", required_argument, nullptr, 'L'},
-    {"gdbserver-port", required_argument, nullptr, 'P'},
-    {"socket-file", required_argument, nullptr, 'f'},
-    {"server", no_argument, &g_server, 1},
-    {"child-platform-fd", required_argument, nullptr, 2},
-    {nullptr, 0, nullptr, 0}};
+
+namespace {
+using namespace llvm::opt;
+
+enum ID {
+  OPT_INVALID = 0, // This is not an option ID.
+#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
+#include "PlatformOptions.inc"
+#undef OPTION
+};
+
+#define OPTTABLE_STR_TABLE_CODE
+#include "PlatformOptions.inc"
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "PlatformOptions.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
+
+static constexpr opt::OptTable::Info InfoTable[] = {
+#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
+#include "PlatformOptions.inc"
+#undef OPTION
+};
+
+class PlatformOptTable : public opt::GenericOptTable {
+public:
+  PlatformOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
+
+  void PrintHelp(llvm::StringRef Name) {
+    std::string Usage =
+        (Name + " [options] --listen <[host]:port> [[--] program args...]")
+            .str();
+
+    std::string Title = "lldb-server platform";
+
+    OptTable::printHelp(llvm::outs(), Usage.c_str(), Title.c_str());
+
+    llvm::outs() << R"(
+DESCRIPTION
+  Acts as a platform server for remote debugging. When LLDB clients connect,
+  the platform server handles platform operations (file transfers, process
+  launching) and spawns debug server instances (lldb-server gdbserver) to
+  handle actual debugging sessions.
+
+  By default, the server exits after handling one connection. Use --server
+  to keep running and accept multiple connections sequentially.
+
+EXAMPLES
+  # Listen on port 1234, exit after first connection
+  lldb-server platform --listen tcp://0.0.0.0:1234
+
+  # Listen on port 5555, accept multiple connections
+  lldb-server platform --server --listen tcp://localhost:5555
+
+  # Listen on Unix domain socket
+  lldb-server platform --listen unix:///tmp/lldb-server.sock
+
+)";
+  }
+};
+} // namespace
 
 #if defined(__APPLE__)
 #define LOW_PORT (IPPORT_RESERVED)
@@ -97,12 +147,11 @@ static void signal_handler(int signo) {
 }
 #endif
 
-static void display_usage(const char *progname, const char *subcommand) {
-  fprintf(stderr, "Usage:\n  %s %s [--log-file log-file-name] [--log-channels "
-                  "log-channel-list] [--port-file port-file-path] --server "
-                  "--listen port\n",
-          progname, subcommand);
-  exit(0);
+static void display_usage(PlatformOptTable &Opts, const char *progname,
+                          const char *subcommand) {
+  std::string Name =
+      (llvm::sys::path::filename(progname) + " " + subcommand).str();
+  Opts.PrintHelp(Name);
 }
 
 static Status parse_listen_host_port(Socket::SocketProtocol &protocol,
@@ -261,7 +310,8 @@ static Status spawn_process(const char *progname, const FileSpec &prog,
                             const Socket *conn_socket, uint16_t gdb_port,
                             const lldb_private::Args &args,
                             const std::string &log_file,
-                            const StringRef log_channels, MainLoop &main_loop) {
+                            const StringRef log_channels, MainLoop &main_loop,
+                            bool multi_client) {
   Status error;
   SharedSocket shared_socket(conn_socket, error);
   if (error.Fail())
@@ -297,9 +347,12 @@ static Status spawn_process(const char *progname, const FileSpec &prog,
 
   launch_info.SetLaunchInSeparateProcessGroup(false);
 
-  if (g_server)
+  // Set up process monitor callback based on whether we're in server mode.
+  if (multi_client)
+    // In server mode: empty callback (don't terminate when child exits).
     launch_info.SetMonitorProcessCallback([](lldb::pid_t, int, int) {});
   else
+    // In single-client mode: terminate main loop when child exits.
     launch_info.SetMonitorProcessCallback([&main_loop](lldb::pid_t, int, int) {
       main_loop.AddPendingCallback(
           [](MainLoopBase &loop) { loop.RequestTermination(); });
@@ -371,107 +424,101 @@ int main_platform(int argc, char *argv[]) {
   signal(SIGPIPE, SIG_IGN);
   signal(SIGHUP, signal_handler);
 #endif
-  int long_option_index = 0;
-  Status error;
-  std::string listen_host_port;
-  int ch;
 
-  std::string log_file;
-  StringRef
-      log_channels; // e.g. "lldb process threads:gdb-remote default:linux all"
+  // Special handling for 'help' as first argument.
+  if (argc > 0 && strcmp(argv[0], "help") == 0) {
+    PlatformOptTable Opts;
+    display_usage(Opts, progname, subcommand);
+    return EXIT_SUCCESS;
+  }
 
+  Status error;
   shared_fd_t fd = SharedSocket::kInvalidFD;
-
   uint16_t gdbserver_port = 0;
-
   FileSpec socket_file;
-  bool show_usage = false;
-  int option_error = 0;
 
-  std::string short_options(OptionParser::GetShortOptionString(g_long_options));
+  PlatformOptTable Opts;
+  BumpPtrAllocator Alloc;
+  StringSaver Saver(Alloc);
+  bool HasError = false;
 
-#if __GLIBC__
-  optind = 0;
-#else
-  optreset = 1;
-  optind = 1;
-#endif
+  opt::InputArgList Args =
+      Opts.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](llvm::StringRef Msg) {
+        WithColor::error() << Msg << "\n";
+        HasError = true;
+      });
 
-  while ((ch = getopt_long_only(argc, argv, short_options.c_str(),
-                                g_long_options, &long_option_index)) != -1) {
-    switch (ch) {
-    case 0: // Any optional that auto set themselves will return 0
-      break;
+  std::string Name =
+      (llvm::sys::path::filename(progname) + " " + subcommand).str();
+  std::string HelpText =
+      "Use '" + Name + " --help' for a complete list of options.\n";
 
-    case 'L':
-      listen_host_port.append(optarg);
-      break;
+  if (HasError) {
+    llvm::errs() << HelpText;
+    return EXIT_FAILURE;
+  }
 
-    case 'l': // Set Log File
-      if (optarg && optarg[0])
-        log_file.assign(optarg);
-      break;
+  if (Args.hasArg(OPT_help)) {
+    display_usage(Opts, progname, subcommand);
+    return EXIT_SUCCESS;
+  }
 
-    case 'c': // Log Channels
-      if (optarg && optarg[0])
-        log_channels = StringRef(optarg);
-      break;
+  // Parse arguments.
+  std::string listen_host_port = Args.getLastArgValue(OPT_listen).str();
+  std::string log_file = Args.getLastArgValue(OPT_log_file).str();
+  StringRef log_channels = Args.getLastArgValue(OPT_log_channels);
+  bool multi_client = Args.hasArg(OPT_server);
+  [[maybe_unused]] bool debug = Args.hasArg(OPT_debug);
+  [[maybe_unused]] bool verbose = Args.hasArg(OPT_verbose);
+
+  if (Args.hasArg(OPT_socket_file)) {
+    socket_file.SetFile(Args.getLastArgValue(OPT_socket_file),
+                        FileSpec::Style::native);
+  }
 
-    case 'f': // Socket file
-      if (optarg && optarg[0])
-        socket_file.SetFile(optarg, FileSpec::Style::native);
-      break;
+  if (Args.hasArg(OPT_gdbserver_port)) {
+    if (!llvm::to_integer(Args.getLastArgValue(OPT_gdbserver_port),
+                          gdbserver_port)) {
+      WithColor::error() << "invalid --gdbserver-port value\n";
+      return EXIT_FAILURE;
+    }
+  }
 
-    case 'P':
-    case 'm':
-    case 'M': {
-      uint16_t portnum;
-      if (!llvm::to_integer(optarg, portnum)) {
-        WithColor::error() << "invalid port number string " << optarg << "\n";
-        option_error = 2;
-        break;
-      }
-      // Note the condition gdbserver_port > HIGH_PORT is valid in case of using
-      // --child-platform-fd. Check gdbserver_port later.
-      if (ch == 'P')
-        gdbserver_port = portnum;
-      else if (gdbserver_port == 0)
-        gdbserver_port = portnum;
-    } break;
-
-    case 2: {
-      uint64_t _fd;
-      if (!llvm::to_integer(optarg, _fd)) {
-        WithColor::error() << "invalid fd " << optarg << "\n";
-        option_error = 6;
-      } else
-        fd = (shared_fd_t)_fd;
-    } break;
-
-    case 'h': /* fall-through is intentional */
-    case '?':
-      show_usage = true;
-      break;
+  if (Args.hasArg(OPT_child_platform_fd)) {
+    uint64_t _fd;
+    if (!llvm::to_integer(Args.getLastArgValue(OPT_child_platform_fd), _fd)) {
+      WithColor::error() << "invalid --child-platform-fd value\n";
+      return EXIT_FAILURE;
     }
+    fd = (shared_fd_t)_fd;
   }
 
   if (!LLDBServerUtilities::SetupLogging(log_file, log_channels, 0))
     return -1;
 
   // Print usage and exit if no listening port is specified.
-  if (listen_host_port.empty() && fd == SharedSocket::kInvalidFD)
-    show_usage = true;
+  if (listen_host_port.empty() && fd == SharedSocket::kInvalidFD) {
+    WithColor::error() << "either --listen or --child-platform-fd is required\n"
+                       << HelpText;
+    return EXIT_FAILURE;
+  }
 
-  if (show_usage || option_error) {
-    display_usage(progname, subcommand);
-    exit(option_error);
+  // Get remaining arguments for inferior.
+  std::vector<llvm::StringRef> Inputs;
+  for (opt::Arg *Arg : Args.filtered(OPT_INPUT))
+    Inputs.push_back(Arg->getValue());
+  if (opt::Arg *Arg = Args.getLastArg(OPT_REM)) {
+    for (const char *Val : Arg->getValues())
+      Inputs.push_back(Val);
   }
 
-  // Skip any options we consumed with getopt_long_only.
-  argc -= optind;
-  argv += optind;
   lldb_private::Args inferior_arguments;
-  inferior_arguments.SetArguments(argc, const_cast<const char **>(argv));
+  if (!Inputs.empty()) {
+    std::vector<const char *> args_ptrs;
+    for (const auto &Input : Inputs)
+      args_ptrs.push_back(Input.data());
+    inferior_arguments.SetArguments(args_ptrs.size(), args_ptrs.data());
+  }
 
   FileSpec debugserver_path = GetDebugserverPath();
   if (!debugserver_path) {
@@ -514,7 +561,7 @@ int main_platform(int argc, char *argv[]) {
     platform.SetConnection(
         std::make_unique<ConnectionFileDescriptor>(std::move(socket)));
     client_handle(platform, inferior_arguments);
-    return 0;
+    return EXIT_SUCCESS;
   }
 
   if (gdbserver_port != 0 &&
@@ -522,7 +569,7 @@ int main_platform(int argc, char *argv[]) {
     WithColor::error() << llvm::formatv("Port number {0} is not in the "
                                         "valid user port range of {1} - {2}\n",
                                         gdbserver_port, LOW_PORT, HIGH_PORT);
-    return 1;
+    return EXIT_FAILURE;
   }
 
   Socket::SocketProtocol protocol = Socket::ProtocolUnixDomain;
@@ -559,7 +606,7 @@ int main_platform(int argc, char *argv[]) {
     if (error.Fail()) {
       fprintf(stderr, "failed to write socket id to %s: %s\n",
               socket_file.GetPath().c_str(), error.AsCString());
-      return 1;
+      return EXIT_FAILURE;
     }
   }
 
@@ -577,22 +624,22 @@ int main_platform(int argc, char *argv[]) {
     llvm::Expected<std::vector<MainLoopBase::ReadHandleUP>> platform_handles =
         platform_sock->Accept(
             main_loop, [progname, gdbserver_port, &inferior_arguments, log_file,
-                        log_channels, &main_loop,
+                        log_channels, &main_loop, multi_client,
                         &platform_handles](std::unique_ptr<Socket> sock_up) {
               printf("Connection established.\n");
               Status error = spawn_process(
                   progname, HostInfo::GetProgramFileSpec(), sock_up.get(),
                   gdbserver_port, inferior_arguments, log_file, log_channels,
-                  main_loop);
+                  main_loop, multi_client);
               if (error.Fail()) {
                 Log *log = GetLog(LLDBLog::Platform);
                 LLDB_LOGF(log, "spawn_process failed: %s", error.AsCString());
                 WithColor::error()
                     << "spawn_process failed: " << error.AsCString() << "\n";
-                if (!g_server)
+                if (!multi_client)
                   main_loop.RequestTermination();
               }
-              if (!g_server)
+              if (!multi_client)
                 platform_handles->clear();
             });
     if (!platform_handles) {
@@ -616,5 +663,5 @@ int main_platform(int argc, char *argv[]) {
 
   fprintf(stderr, "lldb-server exiting...\n");
 
-  return 0;
+  return EXIT_SUCCESS;
 }
diff --git a/lldb/unittests/Utility/RegisterValueTest.cpp b/lldb/unittests/Utility/RegisterValueTest.cpp
index 6239dbe21634a..7b27e841cbec5 100644
--- a/lldb/unittests/Utility/RegisterValueTest.cpp
+++ b/lldb/unittests/Utility/RegisterValueTest.cpp
@@ -57,13 +57,12 @@ TEST(RegisterValueTest, GetScalarValue) {
                    APInt(128, 0x7766554433221100)));
 }
 
-static const Scalar etalon128(APInt(128, 0xffeeddccbbaa9988ull) << 64 |
-                              APInt(128, 0x7766554433221100ull));
-
-void TestSetValueFromData128(void *src, const lldb::ByteOrder endianness) {
-  RegisterInfo ri{"uint128_register",
+void TestSetValueFromData(const Scalar &etalon, void *src, size_t src_byte_size,
+                          const lldb::ByteOrder endianness,
+                          const RegisterValue::Type register_value_type) {
+  RegisterInfo ri{"test",
                   nullptr,
-                  16,
+                  static_cast<uint32_t>(src_byte_size),
                   0,
                   lldb::Encoding::eEncodingUint,
                   lldb::Format::eFormatDefault,
@@ -71,26 +70,289 @@ void TestSetValueFromData128(void *src, const lldb::ByteOrder endianness) {
                   nullptr,
                   nullptr,
                   nullptr};
-  DataExtractor src_extractor(src, 16, endianness, 8);
+  DataExtractor src_extractor(src, src_byte_size, endianness, 8);
   RegisterValue rv;
   EXPECT_TRUE(rv.SetValueFromData(ri, src_extractor, 0, false).Success());
   Scalar s;
   EXPECT_TRUE(rv.GetScalarValue(s));
-  EXPECT_EQ(s, etalon128);
+  EXPECT_EQ(rv.GetType(), register_value_type);
+  EXPECT_EQ(s, etalon);
+}
+
+static const Scalar etalon7(APInt(32, 0x0000007F));
+
+TEST(RegisterValueTest, SetValueFromData_7_le) {
+  uint8_t src[] = {0x7F};
+  TestSetValueFromData(etalon7, src, 1, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt8);
+}
+
+TEST(RegisterValueTest, SetValueFromData_7_be) {
+  uint8_t src[] = {0x7F};
+  TestSetValueFromData(etalon7, src, 1, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt8);
+}
+
+static const Scalar etalon8(APInt(32, 0x000000FE));
+
+TEST(RegisterValueTest, SetValueFromData_8_le) {
+  uint8_t src[] = {0xFE};
+  TestSetValueFromData(etalon8, src, 1, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt8);
+}
+
+TEST(RegisterValueTest, SetValueFromData_8_be) {
+  uint8_t src[] = {0xFE};
+  TestSetValueFromData(etalon8, src, 1, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt8);
+}
+
+static const Scalar etalon9(APInt(32, 0x000001FE));
+
+TEST(RegisterValueTest, SetValueFromData_9_le) {
+  uint8_t src[] = {0xFE, 0x01};
+  TestSetValueFromData(etalon9, src, 2, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt16);
+}
+
+TEST(RegisterValueTest, SetValueFromData_9_be) {
+  uint8_t src[] = {0x01, 0xFE};
+  TestSetValueFromData(etalon9, src, 2, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt16);
+}
+
+static const Scalar etalon15(APInt(32, 0x00007FED));
+
+TEST(RegisterValueTest, SetValueFromData_15_le) {
+  uint8_t src[] = {0xED, 0x7F};
+  TestSetValueFromData(etalon15, src, 2, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt16);
+}
+
+TEST(RegisterValueTest, SetValueFromData_15_be) {
+  uint8_t src[] = {0x7F, 0xED};
+  TestSetValueFromData(etalon15, src, 2, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt16);
+}
+
+static const Scalar etalon16(APInt(32, 0x0000FEDC));
+
+TEST(RegisterValueTest, SetValueFromData_16_le) {
+  uint8_t src[] = {0xDC, 0xFE};
+  TestSetValueFromData(etalon16, src, 2, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt16);
+}
+
+TEST(RegisterValueTest, SetValueFromData_16_be) {
+  uint8_t src[] = {0xFE, 0xDC};
+  TestSetValueFromData(etalon16, src, 2, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt16);
+}
+
+static const Scalar etalon17(APInt(32, 0x0001FEDC));
+
+TEST(RegisterValueTest, SetValueFromData_17_le) {
+  uint8_t src[] = {0xDC, 0xFE, 0x01};
+  TestSetValueFromData(etalon17, src, 3, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt32);
+}
+
+TEST(RegisterValueTest, SetValueFromData_17_be) {
+  uint8_t src[] = {0x01, 0xFE, 0xDC};
+  TestSetValueFromData(etalon17, src, 3, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt32);
+}
+
+static const Scalar etalon24(APInt(32, 0x00FEDCBA));
+
+TEST(RegisterValueTest, SetValueFromData_24_le) {
+  uint8_t src[] = {0xBA, 0xDC, 0xFE};
+  TestSetValueFromData(etalon24, src, 3, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt32);
+}
+
+TEST(RegisterValueTest, SetValueFromData_24_be) {
+  uint8_t src[] = {0xFE, 0xDC, 0xBA};
+  TestSetValueFromData(etalon24, src, 3, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt32);
+}
+
+static const Scalar etalon31(APInt(32, 0x7EDCBA98));
+
+TEST(RegisterValueTest, SetValueFromData_31_le) {
+  uint8_t src[] = {0x98, 0xBA, 0xDC, 0x7E};
+  TestSetValueFromData(etalon31, src, 4, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt32);
+}
+
+TEST(RegisterValueTest, SetValueFromData_31_be) {
+  uint8_t src[] = {0x7E, 0xDC, 0xBA, 0x98};
+  TestSetValueFromData(etalon31, src, 4, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt32);
+}
+
+static const Scalar etalon32(APInt(32, 0xFEDCBA98));
+
+TEST(RegisterValueTest, SetValueFromData_32_le) {
+  uint8_t src[] = {0x98, 0xBA, 0xDC, 0xFE};
+  TestSetValueFromData(etalon32, src, 4, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt32);
 }
 
-// Test that the "RegisterValue::SetValueFromData" method works correctly
-// with 128-bit little-endian data that represents an integer.
+TEST(RegisterValueTest, SetValueFromData_32_be) {
+  uint8_t src[] = {0xFE, 0xDC, 0xBA, 0x98};
+  TestSetValueFromData(etalon32, src, 4, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt32);
+}
+
+static const Scalar etalon33(APInt(64, 0x00000001FEDCBA98));
+
+TEST(RegisterValueTest, SetValueFromData_33_le) {
+  uint8_t src[] = {0x98, 0xBA, 0xDC, 0xFE, 0x01};
+  TestSetValueFromData(etalon33, src, 5, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt64);
+}
+
+TEST(RegisterValueTest, SetValueFromData_33_be) {
+  uint8_t src[] = {0x01, 0xFE, 0xDC, 0xBA, 0x98};
+  TestSetValueFromData(etalon33, src, 5, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt64);
+}
+
+static const Scalar etalon40(APInt(64, 0x000000FEDCBA9876));
+
+TEST(RegisterValueTest, SetValueFromData_40_le) {
+  uint8_t src[] = {0x76, 0x98, 0xBA, 0xDC, 0xFE};
+  TestSetValueFromData(etalon40, src, 5, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt64);
+}
+
+TEST(RegisterValueTest, SetValueFromData_40_be) {
+  uint8_t src[] = {0xFE, 0xDC, 0xBA, 0x98, 0x76};
+  TestSetValueFromData(etalon40, src, 5, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt64);
+}
+
+static const Scalar etalon63(APInt(64, 0x7EDCBA9876543210));
+
+TEST(RegisterValueTest, SetValueFromData_63_le) {
+  uint8_t src[] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0x7E};
+  TestSetValueFromData(etalon63, src, 8, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt64);
+}
+
+TEST(RegisterValueTest, SetValueFromData_63_be) {
+  uint8_t src[] = {0x7E, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10};
+  TestSetValueFromData(etalon63, src, 8, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt64);
+}
+
+static const Scalar etalon64(APInt(64, 0xFEDCBA9876543210));
+
+TEST(RegisterValueTest, SetValueFromData_64_le) {
+  uint8_t src[] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE};
+  TestSetValueFromData(etalon64, src, 8, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUInt64);
+}
+
+TEST(RegisterValueTest, SetValueFromData_64_be) {
+  uint8_t src[] = {0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10};
+  TestSetValueFromData(etalon64, src, 8, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUInt64);
+}
+
+static const Scalar etalon65(APInt(72, 0x0000000000000001ull) << 1 * 64 |
+                             APInt(72, 0x0706050403020100ull) << 0 * 64);
+
+TEST(RegisterValueTest, SetValueFromData_65_le) {
+  uint8_t src[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x01};
+  TestSetValueFromData(etalon65, src, 9, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUIntN);
+}
+
+TEST(RegisterValueTest, SetValueFromData_65_be) {
+  uint8_t src[] = {0x01, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00};
+  TestSetValueFromData(etalon65, src, 9, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUIntN);
+}
+
+static const Scalar etalon127(APInt(128, 0x7f0e0d0c0b0a0908ull) << 1 * 64 |
+                              APInt(128, 0x0706050403020100ull) << 0 * 64);
+
+TEST(RegisterValueTest, SetValueFromData_127_le) {
+  uint8_t src[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+                   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x7f};
+  TestSetValueFromData(etalon127, src, 16, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUIntN);
+}
+
+TEST(RegisterValueTest, SetValueFromData_127_be) {
+  uint8_t src[] = {0x7f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,
+                   0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00};
+  TestSetValueFromData(etalon127, src, 16, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUIntN);
+}
+
+static const Scalar etalon128(APInt(128, 0x0f0e0d0c0b0a0908ull) << 1 * 64 |
+                              APInt(128, 0x0706050403020100ull) << 0 * 64);
+
 TEST(RegisterValueTest, SetValueFromData_128_le) {
-  uint8_t src[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
-                   0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff};
-  TestSetValueFromData128(src, lldb::ByteOrder::eByteOrderLittle);
+  uint8_t src[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+                   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f};
+  TestSetValueFromData(etalon128, src, 16, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUIntN);
 }
 
-// Test that the "RegisterValue::SetValueFromData" method works correctly
-// with 128-bit big-endian data that represents an integer.
 TEST(RegisterValueTest, SetValueFromData_128_be) {
-  uint8_t src[] = {0xff, 0xee, 0xdd, 0xcc, 0xbb, 0xaa, 0x99, 0x88,
-                   0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11, 0x00};
-  TestSetValueFromData128(src, lldb::ByteOrder::eByteOrderBig);
+  uint8_t src[] = {0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,
+                   0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00};
+  TestSetValueFromData(etalon128, src, 16, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUIntN);
+}
+
+static const Scalar etalon256(APInt(256, 0x1f1e1d1c1b1a1918ull) << 3 * 64 |
+                              APInt(256, 0x1716151413121110ull) << 2 * 64 |
+                              APInt(256, 0x0f0e0d0c0b0a0908ull) << 1 * 64 |
+                              APInt(256, 0x0706050403020100ull) << 0 * 64);
+
+TEST(RegisterValueTest, SetValueFromData_256_le) {
+  uint8_t src[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+                   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+                   0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+                   0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
+  TestSetValueFromData(etalon256, src, 32, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUIntN);
+}
+
+TEST(RegisterValueTest, SetValueFromData_256_be) {
+  uint8_t src[] = {0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18,
+                   0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
+                   0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,
+                   0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00};
+  TestSetValueFromData(etalon256, src, 32, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUIntN);
+}
+
+static const Scalar etalon257(APInt(512, 0x0000000000000001ull) << 4 * 64 |
+                              APInt(512, 0x1f1e1d1c1b1a1918ull) << 3 * 64 |
+                              APInt(512, 0x1716151413121110ull) << 2 * 64 |
+                              APInt(512, 0x0f0e0d0c0b0a0908ull) << 1 * 64 |
+                              APInt(512, 0x0706050403020100ull) << 0 * 64);
+
+TEST(RegisterValueTest, SetValueFromData_257_le) {
+  uint8_t src[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+                   0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
+                   0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a,
+                   0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x01};
+  TestSetValueFromData(etalon257, src, 33, lldb::ByteOrder::eByteOrderLittle,
+                       RegisterValue::eTypeUIntN);
+}
+
+TEST(RegisterValueTest, SetValueFromData_257_be) {
+  uint8_t src[] = {0x01, 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18,
+                   0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f,
+                   0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06,
+                   0x05, 0x04, 0x03, 0x02, 0x01, 0x00};
+  TestSetValueFromData(etalon257, src, 33, lldb::ByteOrder::eByteOrderBig,
+                       RegisterValue::eTypeUIntN);
 }
diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake
index 9a2e73a1e3718..84c03cd6432ed 100644
--- a/llvm/cmake/modules/TableGen.cmake
+++ b/llvm/cmake/modules/TableGen.cmake
@@ -66,6 +66,16 @@ function(tablegen project ofn)
     list(APPEND LLVM_TABLEGEN_FLAGS "-omit-comments")
   endif()
 
+  set(EXTRA_OUTPUTS)
+  if("-gen-register-info" IN_LIST ARGN)
+    cmake_path(GET ofn STEM OUTPUT_BASENAME)
+    list(APPEND EXTRA_OUTPUTS
+         ${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_BASENAME}Enums.inc
+         ${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_BASENAME}Header.inc
+         ${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_BASENAME}MCDesc.inc
+         ${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_BASENAME}TargetDesc.inc)
+  endif()
+
   # MSVC can't support long string literals ("long" > 65534 bytes)[1], so if there's
   # a possibility of generated tables being consumed by MSVC, generate arrays of
   # char literals, instead. If we're cross-compiling, then conservatively assume
@@ -126,7 +136,7 @@ function(tablegen project ofn)
     set(LLVM_TABLEGEN_JOB_POOL "")
   endif()
 
-  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn} ${EXTRA_OUTPUTS}
     COMMAND ${tablegen_exe} ${ARG_UNPARSED_ARGUMENTS}
     ${tblgen_includes}
     ${LLVM_TABLEGEN_FLAGS}
diff --git a/llvm/docs/TableGen/BackEnds.rst b/llvm/docs/TableGen/BackEnds.rst
index 7f571378860b2..1e3cb8783df16 100644
--- a/llvm/docs/TableGen/BackEnds.rst
+++ b/llvm/docs/TableGen/BackEnds.rst
@@ -355,6 +355,13 @@ ClangAttrParsedAttrKinds
 ``AttributeList::getKind`` function, mapping a string (and syntax) to a parsed
 attribute ``AttributeList::Kind`` enumeration.
 
+ClangAttrIsTypeDependent
+------------------------
+
+**Purpose**: Creates ``AttrIsTypeDependent.inc``, which is used to implement the
+``Sema::CheckAttributesOnDeducedType`` function, mapping an attribute kind to a
+Sema function if it exists.
+
 ClangAttrDump
 -------------
 
diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index fdb3b84b73a1f..7e73cc1957c05 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -2440,6 +2440,27 @@ LLVM_ABI APInt fshl(const APInt &Hi, const APInt &Lo, const APInt &Shift);
 /// (4) fshr(i8 255, i8 0, i8 9)  = fshr(i8 255, i8 0, i8 1) // 9 % 8
 LLVM_ABI APInt fshr(const APInt &Hi, const APInt &Lo, const APInt &Shift);
 
+/// Perform a carry-less multiply, also known as XOR multiplication, and return
+/// low-bits. All arguments and result have the same bitwidth.
+///
+/// Examples:
+/// (1) clmul(i4 1, i4 2)   = 2
+/// (2) clmul(i4 5, i4 6)   = 14
+/// (3) clmul(i4 -4, i4 2)  = -8
+/// (4) clmul(i4 -4, i4 -5) = 4
+LLVM_ABI APInt clmul(const APInt &LHS, const APInt &RHS);
+
+/// Perform a reversed carry-less multiply.
+///
+/// clmulr(a, b) = bitreverse(clmul(bitreverse(a), bitreverse(b)))
+LLVM_ABI APInt clmulr(const APInt &LHS, const APInt &RHS);
+
+/// Perform a carry-less multiply, and return high-bits. All arguments and
+/// result have the same bitwidth.
+///
+/// clmulh(a, b) = clmulr(a, b) >> 1
+LLVM_ABI APInt clmulh(const APInt &LHS, const APInt &RHS);
+
 } // namespace APIntOps
 
 // See friend declaration above. This additional declaration is required in
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index cec7d09f494d6..4c932c523e423 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3492,6 +3492,13 @@ class LLVM_ABI TargetLoweringBase {
     return MathUsed && (VT.isSimple() || !isOperationExpand(Opcode, VT));
   }
 
+  // Return true if the target wants to optimize the mul overflow intrinsic
+  // for the given \p VT.
+  virtual bool shouldOptimizeMulOverflowWithZeroHighBits(LLVMContext &Context,
+                                                         EVT VT) const {
+    return false;
+  }
+
   // Return true if it is profitable to use a scalar input to a BUILD_VECTOR
   // even if the vector itself has multiple uses.
   virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const {
diff --git a/llvm/include/llvm/IR/Constant.h b/llvm/include/llvm/IR/Constant.h
index 0be1fc172ebd4..e8ce453559ed7 100644
--- a/llvm/include/llvm/IR/Constant.h
+++ b/llvm/include/llvm/IR/Constant.h
@@ -79,6 +79,9 @@ class Constant : public User {
   /// Return true if the value is the smallest signed value.
   LLVM_ABI bool isMinSignedValue() const;
 
+  /// Return true if the value is the largest signed value.
+  LLVM_ABI bool isMaxSignedValue() const;
+
   /// Return true if this is a finite and non-zero floating-point scalar
   /// constant or a fixed width vector constant with all finite and non-zero
   /// elements.
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index d7db935ee07f1..5a4cc776b26a5 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -170,6 +170,8 @@ def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>
     [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>;
 def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
 def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>;
+def int_dx_ddx_coarse : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+def int_dx_ddy_coarse : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
 def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
 def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
 def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index f39c6cda2c579..2f7c25550a0cc 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -134,6 +134,8 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]
   def int_spv_group_memory_barrier_with_group_sync
       : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>;
   def int_spv_discard : DefaultAttrsIntrinsic<[], [], []>;
+  def int_spv_ddx_coarse : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+  def int_spv_ddy_coarse : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
   def int_spv_uclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
   def int_spv_sclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
   def int_spv_nclamp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
diff --git a/llvm/include/llvm/TableGen/Main.h b/llvm/include/llvm/TableGen/Main.h
index bafce3a463acc..daede9f5a46f0 100644
--- a/llvm/include/llvm/TableGen/Main.h
+++ b/llvm/include/llvm/TableGen/Main.h
@@ -14,7 +14,6 @@
 #define LLVM_TABLEGEN_MAIN_H
 
 #include "llvm/Support/CommandLine.h"
-#include <functional>
 #include <map>
 
 namespace llvm {
@@ -30,18 +29,17 @@ struct TableGenOutputFiles {
 };
 
 /// Returns true on error, false otherwise.
-using TableGenMainFn = bool(raw_ostream &OS, const RecordKeeper &Records);
+using TableGenMainFn =
+    function_ref<bool(raw_ostream &OS, const RecordKeeper &Records)>;
 
 /// Perform the action using Records, and store output in OutFiles.
 /// Returns true on error, false otherwise.
-using MultiFileTableGenMainFn = bool(TableGenOutputFiles &OutFiles,
-                                     const RecordKeeper &Records);
+using MultiFileTableGenMainFn = function_ref<bool(TableGenOutputFiles &OutFiles,
+                                                  const RecordKeeper &Records)>;
 
-int TableGenMain(const char *argv0,
-                 std::function<TableGenMainFn> MainFn = nullptr);
+int TableGenMain(const char *argv0, TableGenMainFn MainFn = nullptr);
 
-int TableGenMain(const char *argv0,
-                 std::function<MultiFileTableGenMainFn> MainFn = nullptr);
+int TableGenMain(const char *argv0, MultiFileTableGenMainFn MainFn = nullptr);
 
 /// Controls emitting large character arrays as strings or character arrays.
 /// Typically set to false when building with MSVC.
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 6f44713bd22cd..8968f6b934d77 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -41,6 +41,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Statepoint.h"
@@ -6676,6 +6677,62 @@ static MinMaxOptResult OptimizeConstMinMax(const Constant *RHSConst,
   return MinMaxOptResult::CannotOptimize;
 }
 
+static Value *simplifySVEIntReduction(Intrinsic::ID IID, Type *ReturnType,
+                                      Value *Op0, Value *Op1) {
+  Constant *C0 = dyn_cast<Constant>(Op0);
+  Constant *C1 = dyn_cast<Constant>(Op1);
+  unsigned Width = ReturnType->getPrimitiveSizeInBits();
+
+  // All false predicate or reduction of neutral values ==> neutral result.
+  switch (IID) {
+  case Intrinsic::aarch64_sve_eorv:
+  case Intrinsic::aarch64_sve_orv:
+  case Intrinsic::aarch64_sve_saddv:
+  case Intrinsic::aarch64_sve_uaddv:
+  case Intrinsic::aarch64_sve_umaxv:
+    if ((C0 && C0->isNullValue()) || (C1 && C1->isNullValue()))
+      return ConstantInt::get(ReturnType, 0);
+    break;
+  case Intrinsic::aarch64_sve_andv:
+  case Intrinsic::aarch64_sve_uminv:
+    if ((C0 && C0->isNullValue()) || (C1 && C1->isAllOnesValue()))
+      return ConstantInt::get(ReturnType, APInt::getMaxValue(Width));
+    break;
+  case Intrinsic::aarch64_sve_smaxv:
+    if ((C0 && C0->isNullValue()) || (C1 && C1->isMinSignedValue()))
+      return ConstantInt::get(ReturnType, APInt::getSignedMinValue(Width));
+    break;
+  case Intrinsic::aarch64_sve_sminv:
+    if ((C0 && C0->isNullValue()) || (C1 && C1->isMaxSignedValue()))
+      return ConstantInt::get(ReturnType, APInt::getSignedMaxValue(Width));
+    break;
+  }
+
+  switch (IID) {
+  case Intrinsic::aarch64_sve_andv:
+  case Intrinsic::aarch64_sve_orv:
+  case Intrinsic::aarch64_sve_smaxv:
+  case Intrinsic::aarch64_sve_sminv:
+  case Intrinsic::aarch64_sve_umaxv:
+  case Intrinsic::aarch64_sve_uminv:
+    // sve_reduce_##(all, splat(X)) ==> X
+    if (C0 && C0->isAllOnesValue()) {
+      if (Value *SplatVal = getSplatValue(Op1)) {
+        assert(SplatVal->getType() == ReturnType && "Unexpected result type!");
+        return SplatVal;
+      }
+    }
+    break;
+  case Intrinsic::aarch64_sve_eorv:
+    // sve_reduce_xor(all, splat(X)) ==> 0
+    if (C0 && C0->isAllOnesValue())
+      return ConstantInt::get(ReturnType, 0);
+    break;
+  }
+
+  return nullptr;
+}
+
 Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType,
                                      Value *Op0, Value *Op1,
                                      const SimplifyQuery &Q,
@@ -7037,6 +7094,17 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType,
 
     break;
   }
+
+  case Intrinsic::aarch64_sve_andv:
+  case Intrinsic::aarch64_sve_eorv:
+  case Intrinsic::aarch64_sve_orv:
+  case Intrinsic::aarch64_sve_saddv:
+  case Intrinsic::aarch64_sve_smaxv:
+  case Intrinsic::aarch64_sve_sminv:
+  case Intrinsic::aarch64_sve_uaddv:
+  case Intrinsic::aarch64_sve_umaxv:
+  case Intrinsic::aarch64_sve_uminv:
+    return simplifySVEIntReduction(IID, ReturnType, Op0, Op1);
   default:
     break;
   }
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 921462e28a467..799234a0b491d 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -315,11 +315,10 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
       return error(NT.second.second,
                    "use of undefined type '%" + Twine(NT.first) + "'");
 
-  for (StringMap<std::pair<Type*, LocTy> >::iterator I =
-       NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I)
-    if (I->second.second.isValid())
-      return error(I->second.second,
-                   "use of undefined type named '" + I->getKey() + "'");
+  for (const auto &[Name, TypeInfo] : NamedTypes)
+    if (TypeInfo.second.isValid())
+      return error(TypeInfo.second,
+                   "use of undefined type named '" + Name + "'");
 
   if (!ForwardRefComdats.empty())
     return error(ForwardRefComdats.begin()->second,
diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index f497c574ee75d..36d0d35d024cc 100644
--- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -616,9 +616,8 @@ void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
 /// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
 /// table into the values table.
 void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
-  for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end();
-       VI != VE; ++VI)
-    EnumerateValue(VI->getValue());
+  for (const auto &VI : VST)
+    EnumerateValue(VI.getValue());
 }
 
 /// Insert all of the values referenced by named metadata in the specified
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index b6dd174f9be80..587c1372b19cb 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -431,6 +431,8 @@ class CodeGenPrepare {
   bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
                           unsigned AddrSpace);
   bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
+  bool optimizeMulWithOverflow(Instruction *I, bool IsSigned,
+                               ModifyDT &ModifiedDT);
   bool optimizeInlineAsmInst(CallInst *CS);
   bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
   bool optimizeExt(Instruction *&I);
@@ -2797,6 +2799,10 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
         }
       }
       return false;
+    case Intrinsic::umul_with_overflow:
+      return optimizeMulWithOverflow(II, /*IsSigned=*/false, ModifiedDT);
+    case Intrinsic::smul_with_overflow:
+      return optimizeMulWithOverflow(II, /*IsSigned=*/true, ModifiedDT);
     }
 
     SmallVector<Value *, 2> PtrOps;
@@ -6391,6 +6397,182 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
   return true;
 }
 
+// This is a helper for CodeGenPrepare::optimizeMulWithOverflow.
+// Check the pattern we are interested in where there are maximum 2 uses
+// of the intrinsic which are the extract instructions.
+static bool matchOverflowPattern(Instruction *&I, ExtractValueInst *&MulExtract,
+                                 ExtractValueInst *&OverflowExtract) {
+  // Bail out if it's more than 2 users:
+  if (I->hasNUsesOrMore(3))
+    return false;
+
+  for (User *U : I->users()) {
+    auto *Extract = dyn_cast<ExtractValueInst>(U);
+    if (!Extract || Extract->getNumIndices() != 1)
+      return false;
+
+    unsigned Index = Extract->getIndices()[0];
+    if (Index == 0)
+      MulExtract = Extract;
+    else if (Index == 1)
+      OverflowExtract = Extract;
+    else
+      return false;
+  }
+  return true;
+}
+
+// Rewrite the mul_with_overflow intrinsic by checking if both of the
+// operands' value ranges are within the legal type. If so, we can optimize the
+// multiplication algorithm. This code is supposed to be written during the step
+// of type legalization, but given that we need to reconstruct the IR which is
+// not doable there, we do it here.
+// The IR after the optimization will look like:
+// entry:
+//   if signed:
+//     ( (lhs_lo>>BW-1) ^ lhs_hi) || ( (rhs_lo>>BW-1) ^ rhs_hi) ? overflow,
+//     overflow_no
+//   else:
+//     (lhs_hi != 0) || (rhs_hi != 0) ? overflow, overflow_no
+// overflow_no:
+// overflow:
+// overflow.res:
+// \returns true if optimization was applied
+// TODO: This optimization can be further improved to optimize branching on
+// overflow where the 'overflow_no' BB can branch directly to the false
+// successor of overflow, but that would add additional complexity so we leave
+// it for future work.
+bool CodeGenPrepare::optimizeMulWithOverflow(Instruction *I, bool IsSigned,
+                                             ModifyDT &ModifiedDT) {
+  // Check if target supports this optimization.
+  if (!TLI->shouldOptimizeMulOverflowWithZeroHighBits(
+          I->getContext(),
+          TLI->getValueType(*DL, I->getType()->getContainedType(0))))
+    return false;
+
+  ExtractValueInst *MulExtract = nullptr, *OverflowExtract = nullptr;
+  if (!matchOverflowPattern(I, MulExtract, OverflowExtract))
+    return false;
+
+  // Keep track of the instruction to stop reoptimizing it again.
+  InsertedInsts.insert(I);
+
+  Value *LHS = I->getOperand(0);
+  Value *RHS = I->getOperand(1);
+  Type *Ty = LHS->getType();
+  unsigned VTHalfBitWidth = Ty->getScalarSizeInBits() / 2;
+  Type *LegalTy = Ty->getWithNewBitWidth(VTHalfBitWidth);
+
+  // New BBs:
+  BasicBlock *OverflowEntryBB =
+      I->getParent()->splitBasicBlock(I, "", /*Before*/ true);
+  OverflowEntryBB->takeName(I->getParent());
+  // Keep the 'br' instruction that is generated as a result of the split to be
+  // erased/replaced later.
+  Instruction *OldTerminator = OverflowEntryBB->getTerminator();
+  BasicBlock *NoOverflowBB =
+      BasicBlock::Create(I->getContext(), "overflow.no", I->getFunction());
+  NoOverflowBB->moveAfter(OverflowEntryBB);
+  BasicBlock *OverflowBB =
+      BasicBlock::Create(I->getContext(), "overflow", I->getFunction());
+  OverflowBB->moveAfter(NoOverflowBB);
+
+  // BB overflow.entry:
+  IRBuilder<> Builder(OverflowEntryBB);
+  // Extract low and high halves of LHS:
+  Value *LoLHS = Builder.CreateTrunc(LHS, LegalTy, "lo.lhs");
+  Value *HiLHS = Builder.CreateLShr(LHS, VTHalfBitWidth, "lhs.lsr");
+  HiLHS = Builder.CreateTrunc(HiLHS, LegalTy, "hi.lhs");
+
+  // Extract low and high halves of RHS:
+  Value *LoRHS = Builder.CreateTrunc(RHS, LegalTy, "lo.rhs");
+  Value *HiRHS = Builder.CreateLShr(RHS, VTHalfBitWidth, "rhs.lsr");
+  HiRHS = Builder.CreateTrunc(HiRHS, LegalTy, "hi.rhs");
+
+  Value *IsAnyBitTrue;
+  if (IsSigned) {
+    Value *SignLoLHS =
+        Builder.CreateAShr(LoLHS, VTHalfBitWidth - 1, "sign.lo.lhs");
+    Value *SignLoRHS =
+        Builder.CreateAShr(LoRHS, VTHalfBitWidth - 1, "sign.lo.rhs");
+    Value *XorLHS = Builder.CreateXor(HiLHS, SignLoLHS);
+    Value *XorRHS = Builder.CreateXor(HiRHS, SignLoRHS);
+    Value *Or = Builder.CreateOr(XorLHS, XorRHS, "or.lhs.rhs");
+    IsAnyBitTrue = Builder.CreateCmp(ICmpInst::ICMP_NE, Or,
+                                     ConstantInt::getNullValue(Or->getType()));
+  } else {
+    Value *CmpLHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiLHS,
+                                      ConstantInt::getNullValue(LegalTy));
+    Value *CmpRHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiRHS,
+                                      ConstantInt::getNullValue(LegalTy));
+    IsAnyBitTrue = Builder.CreateOr(CmpLHS, CmpRHS, "or.lhs.rhs");
+  }
+  Builder.CreateCondBr(IsAnyBitTrue, OverflowBB, NoOverflowBB);
+
+  // BB overflow.no:
+  Builder.SetInsertPoint(NoOverflowBB);
+  Value *ExtLoLHS, *ExtLoRHS;
+  if (IsSigned) {
+    ExtLoLHS = Builder.CreateSExt(LoLHS, Ty, "lo.lhs.ext");
+    ExtLoRHS = Builder.CreateSExt(LoRHS, Ty, "lo.rhs.ext");
+  } else {
+    ExtLoLHS = Builder.CreateZExt(LoLHS, Ty, "lo.lhs.ext");
+    ExtLoRHS = Builder.CreateZExt(LoRHS, Ty, "lo.rhs.ext");
+  }
+
+  Value *Mul = Builder.CreateMul(ExtLoLHS, ExtLoRHS, "mul.overflow.no");
+
+  // Create the 'overflow.res' BB to merge the results of
+  // the two paths:
+  BasicBlock *OverflowResBB = I->getParent();
+  OverflowResBB->setName("overflow.res");
+
+  // BB overflow.no: jump to overflow.res BB
+  Builder.CreateBr(OverflowResBB);
+  // No we don't need the old terminator in overflow.entry BB, erase it:
+  OldTerminator->eraseFromParent();
+
+  // BB overflow.res:
+  Builder.SetInsertPoint(OverflowResBB, OverflowResBB->getFirstInsertionPt());
+  // Create PHI nodes to merge results from no.overflow BB and overflow BB to
+  // replace the extract instructions.
+  PHINode *OverflowResPHI = Builder.CreatePHI(Ty, 2),
+          *OverflowFlagPHI =
+              Builder.CreatePHI(IntegerType::getInt1Ty(I->getContext()), 2);
+
+  // Add the incoming values from no.overflow BB and later from overflow BB.
+  OverflowResPHI->addIncoming(Mul, NoOverflowBB);
+  OverflowFlagPHI->addIncoming(ConstantInt::getFalse(I->getContext()),
+                               NoOverflowBB);
+
+  // Replace all users of MulExtract and OverflowExtract to use the PHI nodes.
+  if (MulExtract) {
+    MulExtract->replaceAllUsesWith(OverflowResPHI);
+    MulExtract->eraseFromParent();
+  }
+  if (OverflowExtract) {
+    OverflowExtract->replaceAllUsesWith(OverflowFlagPHI);
+    OverflowExtract->eraseFromParent();
+  }
+
+  // Remove the intrinsic from parent (overflow.res BB) as it will be part of
+  // overflow BB
+  I->removeFromParent();
+  // BB overflow:
+  I->insertInto(OverflowBB, OverflowBB->end());
+  Builder.SetInsertPoint(OverflowBB, OverflowBB->end());
+  Value *MulOverflow = Builder.CreateExtractValue(I, {0}, "mul.overflow");
+  Value *OverflowFlag = Builder.CreateExtractValue(I, {1}, "overflow.flag");
+  Builder.CreateBr(OverflowResBB);
+
+  // Add The Extracted values to the PHINodes in the overflow.res BB.
+  OverflowResPHI->addIncoming(MulOverflow, OverflowBB);
+  OverflowFlagPHI->addIncoming(OverflowFlag, OverflowBB);
+
+  ModifiedDT = ModifyDT::ModifyBBDT;
+  return true;
+}
+
 /// If there are any memory operands, use OptimizeMemoryInst to sink their
 /// address computing into the block when possible / profitable.
 bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index c1fb8b6d78ff8..ecba323f8d6bf 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -247,6 +247,7 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
     for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
       const MachineOperand &Src = MI.getOperand(Idx);
       Register SrcReg = Src.getReg();
+      LLT SrcTy = MRI.getType(SrcReg);
       // Look through trivial copies and phis but don't look through trivial
       // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
       // analysis is currently unable to determine the bit width of a
@@ -255,9 +256,15 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
       // We can't use NoSubRegister by name as it's defined by each target but
       // it's always defined to be 0 by tablegen.
       if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
-          MRI.getType(SrcReg).isValid()) {
+          SrcTy.isValid()) {
+        // In case we're forwarding from a vector register to a non-vector
+        // register we need to update the demanded elements to reflect this
+        // before recursing.
+        APInt NowDemandedElts = SrcTy.isFixedVector() && !DstTy.isFixedVector()
+                                    ? APInt::getAllOnes(SrcTy.getNumElements())
+                                    : DemandedElts; // Known to be APInt(1, 1)
         // For COPYs we don't do anything, don't increase the depth.
-        computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
+        computeKnownBitsImpl(SrcReg, Known2, NowDemandedElts,
                              Depth + (Opcode != TargetOpcode::COPY));
         Known2 = Known2.anyextOrTrunc(BitWidth);
         Known = Known.intersectWith(Known2);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index cacb292acee18..ba28e4dda3313 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3439,6 +3439,18 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     Observer.changedInstr(MI);
     return Legalized;
   }
+  case TargetOpcode::G_LROUND:
+  case TargetOpcode::G_LLROUND:
+    Observer.changingInstr(MI);
+
+    if (TypeIdx == 0)
+      widenScalarDst(MI, WideTy);
+    else
+      widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+
+    Observer.changedInstr(MI);
+    return Legalized;
+
   case TargetOpcode::G_INTTOPTR:
     if (TypeIdx != 1)
       return UnableToLegalize;
diff --git a/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp b/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp
index bca820fa807c8..4acc064dbc212 100644
--- a/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp
+++ b/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp
@@ -64,7 +64,6 @@ dwarf::CFIProgram DWARFCFIState::convert(MCCFIInstruction Directive) {
       /* CodeAlignmentFactor */ 1, /* DataAlignmentFactor */ 1,
       Context->getTargetTriple().getArch());
 
-  auto MaybeCurrentRow = getCurrentUnwindRow();
   switch (Directive.getOperation()) {
   case MCCFIInstruction::OpSameValue:
     CFIP.addInstruction(dwarf::DW_CFA_same_value, Directive.getRegister());
diff --git a/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt
index ab287c7af60be..6be59b0890c44 100644
--- a/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt
+++ b/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt
@@ -22,6 +22,7 @@ add_llvm_component_library(LLVMOrcDebugging
   BinaryFormat
   DebugInfoDWARF
   JITLink
+  Object
   OrcJIT
   OrcShared
   Support
diff --git a/llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp
index 9f556b0d07a8b..653645ff03f15 100644
--- a/llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp
@@ -1,4 +1,4 @@
-//===------- ELFDebugObjectPlugin.cpp - JITLink debug objects ---------===//
+//===--------- ELFDebugObjectPlugin.cpp - JITLink debug objects -----------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index cbce8bd736102..a3aa5e9571657 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -183,6 +183,23 @@ bool Constant::isMinSignedValue() const {
   return false;
 }
 
+bool Constant::isMaxSignedValue() const {
+  // Check for INT_MAX integers
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
+    return CI->isMaxValue(/*isSigned=*/true);
+
+  // Check for FP which are bitcasted from INT_MAX integers
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+    return CFP->getValueAPF().bitcastToAPInt().isMaxSignedValue();
+
+  // Check for splats of INT_MAX values.
+  if (getType()->isVectorTy())
+    if (const auto *SplatVal = getSplatValue())
+      return SplatVal->isMaxSignedValue();
+
+  return false;
+}
+
 bool Constant::isNotMinSignedValue() const {
   // Check for INT_MIN integers
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp
index f6fd5f9ddd633..673cd867f0e45 100644
--- a/llvm/lib/Support/APInt.cpp
+++ b/llvm/lib/Support/APInt.cpp
@@ -15,10 +15,10 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/bit.h"
-#include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Alignment.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -3187,3 +3187,23 @@ APInt llvm::APIntOps::fshr(const APInt &Hi, const APInt &Lo,
     return Lo;
   return Hi.shl(Hi.getBitWidth() - ShiftAmt) | Lo.lshr(ShiftAmt);
 }
+
+APInt llvm::APIntOps::clmul(const APInt &LHS, const APInt &RHS) {
+  assert(LHS.getBitWidth() == RHS.getBitWidth());
+  unsigned BW = LHS.getBitWidth();
+  APInt Result(BW, 0);
+  for (unsigned I : seq<unsigned>(BW))
+    if (RHS[I])
+      Result ^= LHS.shl(I);
+  return Result;
+}
+
+APInt llvm::APIntOps::clmulr(const APInt &LHS, const APInt &RHS) {
+  assert(LHS.getBitWidth() == RHS.getBitWidth());
+  return clmul(LHS.reverseBits(), RHS.reverseBits()).reverseBits();
+}
+
+APInt llvm::APIntOps::clmulh(const APInt &LHS, const APInt &RHS) {
+  assert(LHS.getBitWidth() == RHS.getBitWidth());
+  return clmulr(LHS, RHS).lshr(1);
+}
diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp
index 8e0c724accb36..ab220b8f2ceba 100644
--- a/llvm/lib/Support/Parallel.cpp
+++ b/llvm/lib/Support/Parallel.cpp
@@ -193,16 +193,7 @@ class ThreadPoolExecutor : public Executor {
   JobserverClient *TheJobserver = nullptr;
 };
 
-// A global raw pointer to the executor. Lifetime is managed by the
-// objects created within createExecutor().
-static Executor *TheExec = nullptr;
-static std::once_flag Flag;
-
-// This function will be called exactly once to create the executor.
-// It contains the necessary platform-specific logic. Since functions
-// called by std::call_once cannot return value, we have to set the
-// executor as a global variable.
-void createExecutor() {
+Executor *Executor::getDefaultExecutor() {
 #ifdef _WIN32
   // The ManagedStatic enables the ThreadPoolExecutor to be stopped via
   // llvm_shutdown() which allows a "clean" fast exit, e.g. via _exit(). This
@@ -226,22 +217,16 @@ void createExecutor() {
                        ThreadPoolExecutor::Deleter>
       ManagedExec;
   static std::unique_ptr<ThreadPoolExecutor> Exec(&(*ManagedExec));
-  TheExec = Exec.get();
+  return Exec.get();
 #else
   // ManagedStatic is not desired on other platforms. When `Exec` is destroyed
   // by llvm_shutdown(), worker threads will clean up and invoke TLS
   // destructors. This can lead to race conditions if other threads attempt to
   // access TLS objects that have already been destroyed.
   static ThreadPoolExecutor Exec(strategy);
-  TheExec = &Exec;
+  return &Exec;
 #endif
 }
-
-Executor *Executor::getDefaultExecutor() {
-  // Use std::call_once to lazily and safely initialize the executor.
-  std::call_once(Flag, createExecutor);
-  return TheExec;
-}
 } // namespace
 } // namespace detail
 
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 3330b70cdc2e1..939e9c6bf5d2f 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -127,8 +127,7 @@ static int WriteOutput(const TGParser &Parser, const char *argv0,
   return 0;
 }
 
-int llvm::TableGenMain(const char *argv0,
-                       std::function<MultiFileTableGenMainFn> MainFn) {
+int llvm::TableGenMain(const char *argv0, MultiFileTableGenMainFn MainFn) {
   RecordKeeper Records;
   TGTimer &Timer = Records.getTimer();
 
@@ -209,8 +208,7 @@ int llvm::TableGenMain(const char *argv0,
   return 0;
 }
 
-int llvm::TableGenMain(const char *argv0,
-                       std::function<TableGenMainFn> MainFn) {
+int llvm::TableGenMain(const char *argv0, TableGenMainFn MainFn) {
   return TableGenMain(argv0, [&MainFn](TableGenOutputFiles &OutFiles,
                                        const RecordKeeper &Records) {
     std::string S;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index f1db05dda4e40..08466667c0fa5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -4403,43 +4403,46 @@ bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
 
 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
                                               bool Invert) {
-  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
-    uint64_t ImmVal = CNode->getZExtValue();
-    SDLoc DL(N);
-
-    if (Invert)
-      ImmVal = ~ImmVal;
+  uint64_t ImmVal;
+  if (auto CI = dyn_cast<ConstantSDNode>(N))
+    ImmVal = CI->getZExtValue();
+  else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
+    ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+  else
+    return false;
 
-    // Shift mask depending on type size.
-    switch (VT.SimpleTy) {
-    case MVT::i8:
-      ImmVal &= 0xFF;
-      ImmVal |= ImmVal << 8;
-      ImmVal |= ImmVal << 16;
-      ImmVal |= ImmVal << 32;
-      break;
-    case MVT::i16:
-      ImmVal &= 0xFFFF;
-      ImmVal |= ImmVal << 16;
-      ImmVal |= ImmVal << 32;
-      break;
-    case MVT::i32:
-      ImmVal &= 0xFFFFFFFF;
-      ImmVal |= ImmVal << 32;
-      break;
-    case MVT::i64:
-      break;
-    default:
-      llvm_unreachable("Unexpected type");
-    }
+  if (Invert)
+    ImmVal = ~ImmVal;
 
-    uint64_t encoding;
-    if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
-      Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
-      return true;
-    }
+  // Shift mask depending on type size.
+  switch (VT.SimpleTy) {
+  case MVT::i8:
+    ImmVal &= 0xFF;
+    ImmVal |= ImmVal << 8;
+    ImmVal |= ImmVal << 16;
+    ImmVal |= ImmVal << 32;
+    break;
+  case MVT::i16:
+    ImmVal &= 0xFFFF;
+    ImmVal |= ImmVal << 16;
+    ImmVal |= ImmVal << 32;
+    break;
+  case MVT::i32:
+    ImmVal &= 0xFFFFFFFF;
+    ImmVal |= ImmVal << 32;
+    break;
+  case MVT::i64:
+    break;
+  default:
+    llvm_unreachable("Unexpected type");
   }
-  return false;
+
+  uint64_t encoding;
+  if (!AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding))
+    return false;
+
+  Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
+  return true;
 }
 
 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 42567883b2594..8f41f230b5521 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3886,22 +3886,30 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
 /// \param MustBeFirst  Set to true if this subtree needs to be negated and we
 ///                     cannot do the negation naturally. We are required to
 ///                     emit the subtree first in this case.
+/// \param PreferFirst  Set to true if processing this subtree first may
+///                     result in more efficient code.
 /// \param WillNegate   Is true if are called when the result of this
 ///                     subexpression must be negated. This happens when the
 ///                     outer expression is an OR. We can use this fact to know
 ///                     that we have a double negation (or (or ...) ...) that
 ///                     can be implemented for free.
-static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
-                               bool &MustBeFirst, bool WillNegate,
+static bool canEmitConjunction(SelectionDAG &DAG, const SDValue Val,
+                               bool &CanNegate, bool &MustBeFirst,
+                               bool &PreferFirst, bool WillNegate,
                                unsigned Depth = 0) {
   if (!Val.hasOneUse())
     return false;
   unsigned Opcode = Val->getOpcode();
   if (Opcode == ISD::SETCC) {
-    if (Val->getOperand(0).getValueType() == MVT::f128)
+    EVT VT = Val->getOperand(0).getValueType();
+    if (VT == MVT::f128)
       return false;
     CanNegate = true;
     MustBeFirst = false;
+    // Designate this operation as a preferred first operation if the result
+    // of a SUB operation can be reused.
+    PreferFirst = DAG.doesNodeExist(ISD::SUB, DAG.getVTList(VT),
+                                    {Val->getOperand(0), Val->getOperand(1)});
     return true;
   }
   // Protect against exponential runtime and stack overflow.
@@ -3913,11 +3921,15 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
     SDValue O1 = Val->getOperand(1);
     bool CanNegateL;
     bool MustBeFirstL;
-    if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
+    bool PreferFirstL;
+    if (!canEmitConjunction(DAG, O0, CanNegateL, MustBeFirstL, PreferFirstL,
+                            IsOR, Depth + 1))
       return false;
     bool CanNegateR;
     bool MustBeFirstR;
-    if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
+    bool PreferFirstR;
+    if (!canEmitConjunction(DAG, O1, CanNegateR, MustBeFirstR, PreferFirstR,
+                            IsOR, Depth + 1))
       return false;
 
     if (MustBeFirstL && MustBeFirstR)
@@ -3940,6 +3952,7 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
       CanNegate = false;
       MustBeFirst = MustBeFirstL || MustBeFirstR;
     }
+    PreferFirst = PreferFirstL || PreferFirstR;
     return true;
   }
   return false;
@@ -4001,19 +4014,25 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
   SDValue LHS = Val->getOperand(0);
   bool CanNegateL;
   bool MustBeFirstL;
-  bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
+  bool PreferFirstL;
+  bool ValidL = canEmitConjunction(DAG, LHS, CanNegateL, MustBeFirstL,
+                                   PreferFirstL, IsOR);
   assert(ValidL && "Valid conjunction/disjunction tree");
   (void)ValidL;
 
   SDValue RHS = Val->getOperand(1);
   bool CanNegateR;
   bool MustBeFirstR;
-  bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
+  bool PreferFirstR;
+  bool ValidR = canEmitConjunction(DAG, RHS, CanNegateR, MustBeFirstR,
+                                   PreferFirstR, IsOR);
   assert(ValidR && "Valid conjunction/disjunction tree");
   (void)ValidR;
 
-  // Swap sub-tree that must come first to the right side.
-  if (MustBeFirstL) {
+  bool ShouldFirstL = PreferFirstL && !PreferFirstR && !MustBeFirstR;
+
+  // Swap sub-tree that must or should come first to the right side.
+  if (MustBeFirstL || ShouldFirstL) {
     assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
     std::swap(LHS, RHS);
     std::swap(CanNegateL, CanNegateR);
@@ -4069,7 +4088,9 @@ static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
                                AArch64CC::CondCode &OutCC) {
   bool DummyCanNegate;
   bool DummyMustBeFirst;
-  if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
+  bool DummyPreferFirst;
+  if (!canEmitConjunction(DAG, Val, DummyCanNegate, DummyMustBeFirst,
+                          DummyPreferFirst, false))
     return SDValue();
 
   return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
@@ -18851,6 +18872,15 @@ bool AArch64TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
   return (Index == 0 || Index == ResVT.getVectorMinNumElements());
 }
 
+bool AArch64TargetLowering::shouldOptimizeMulOverflowWithZeroHighBits(
+    LLVMContext &Context, EVT VT) const {
+  if (getTypeAction(Context, VT) != TypeExpandInteger)
+    return false;
+
+  EVT LegalTy = EVT::getIntegerVT(Context, VT.getSizeInBits() / 2);
+  return getTypeAction(Context, LegalTy) == TargetLowering::TypeLegal;
+}
+
 /// Turn vector tests of the signbit in the form of:
 ///   xor (sra X, elt_size(X)-1), -1
 /// into:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 70bfae717fb76..be198e54cbcbf 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -333,6 +333,11 @@ class AArch64TargetLowering : public TargetLowering {
     return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
   }
 
+  // Return true if the target wants to optimize the mul overflow intrinsic
+  // for the given \p VT.
+  bool shouldOptimizeMulOverflowWithZeroHighBits(LLVMContext &Context,
+                                                 EVT VT) const override;
+
   Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
                         AtomicOrdering Ord) const override;
   Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index c8c21c4822ffe..e99b3f8ff07e0 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -989,7 +989,7 @@ let Predicates = [HasSVE_or_SME] in {
             (DUP_ZR_D (MOVi64imm (bitcast_fpimm_to_i64 f64:$val)))>;
 
   // Duplicate FP immediate into all vector elements
-  let AddedComplexity = 2 in {
+  let AddedComplexity = 3 in {
     def : Pat<(nxv8f16 (splat_vector fpimm16:$imm8)),
               (FDUP_ZI_H fpimm16:$imm8)>;
     def : Pat<(nxv4f16 (splat_vector fpimm16:$imm8)),
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index a88817c9d2d19..fdf69b04bf676 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -449,10 +449,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .minScalar(0, s32)
       .libcallFor({{s32, s32}, {s64, s32}, {s128, s32}});
 
-  // TODO: Libcall support for s128.
-  // TODO: s16 should be legal with full FP16 support.
   getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
-      .legalFor({{s64, s32}, {s64, s64}});
+      .legalFor({{s64, s32}, {s64, s64}})
+      .legalFor(HasFP16, {{s64, s16}})
+      .minScalar(0, s64)
+      .minScalar(1, s32)
+      .libcallFor({{s64, s128}});
 
   // TODO: Custom legalization for mismatched types.
   getActionDefinitionsBuilder(G_FCOPYSIGN)
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 1664f4ad0c8fa..1e771e1fb9403 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -347,6 +347,11 @@ def SVELogicalImm16Pat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i16>",
 def SVELogicalImm32Pat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i32>", []>;
 def SVELogicalImm64Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64>", []>;
 
+def SVELogicalFPImm16Pat : ComplexPattern<f16, 1, "SelectSVELogicalImm<MVT::i16>", []>;
+def SVELogicalFPImm32Pat : ComplexPattern<f32, 1, "SelectSVELogicalImm<MVT::i32>", []>;
+def SVELogicalFPImm64Pat : ComplexPattern<f64, 1, "SelectSVELogicalImm<MVT::i64>", []>;
+def SVELogicalBFPImmPat : ComplexPattern<bf16, 1, "SelectSVELogicalImm<MVT::i16>", []>;
+
 def SVELogicalImm8NotPat  : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i8, true>", []>;
 def SVELogicalImm16NotPat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i16, true>", []>;
 def SVELogicalImm32NotPat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i32, true>", []>;
@@ -2160,6 +2165,26 @@ multiclass sve_int_dup_mask_imm<string asm> {
             (!cast<Instruction>(NAME) i64:$imm)>;
   def : Pat<(nxv2i64 (splat_vector (i64 (SVELogicalImm64Pat i64:$imm)))),
             (!cast<Instruction>(NAME) i64:$imm)>;
+
+  def : Pat<(nxv8f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv4f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv2f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv4f32 (splat_vector (f32 (SVELogicalFPImm32Pat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv2f32 (splat_vector (f32 (SVELogicalFPImm32Pat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv2f64 (splat_vector (f64 (SVELogicalFPImm64Pat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+
+  def : Pat<(nxv8bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv4bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
+  def : Pat<(nxv2bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))),
+            (!cast<Instruction>(NAME) i64:$imm)>;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index b81a08de383d9..e36c57ad59bfd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -960,6 +960,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
       .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
 
+  addRulesForGOpcs({G_IS_FPCLASS})
+      .Any({{DivS1, S16}, {{Vcc}, {Vgpr16}}})
+      .Any({{UniS1, S16}, {{UniInVcc}, {Vgpr16}}})
+      .Any({{DivS1, S32}, {{Vcc}, {Vgpr32}}})
+      .Any({{UniS1, S32}, {{UniInVcc}, {Vgpr32}}})
+      .Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}})
+      .Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}});
+
   using namespace Intrinsic;
 
   addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
index 9af812960542c..b7078825928be 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
@@ -314,9 +314,7 @@ class SplitGraph {
 #endif
 
   bool empty() const { return Nodes.empty(); }
-  const iterator_range<nodes_iterator> nodes() const {
-    return {Nodes.begin(), Nodes.end()};
-  }
+  iterator_range<nodes_iterator> nodes() const { return Nodes; }
   const Node &getNode(unsigned ID) const { return *Nodes[ID]; }
 
   unsigned getNumNodes() const { return Nodes.size(); }
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 7a2f84a2f73eb..29d22f27a2d8e 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1502,9 +1502,8 @@ static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF,
   bool HasVmem = false;
   for (auto &MBB : MF) {
     for (auto &MI : MBB) {
-      HasLds |= SIInstrInfo::isDS(MI);
-      HasVmem |= (SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isFLAT(MI)) ||
-                 SIInstrInfo::isSegmentSpecificFLAT(MI);
+      HasLds |= SIInstrInfo::isDS(MI) || SIInstrInfo::isLDSDMA(MI);
+      HasVmem |= SIInstrInfo::isVMEM(MI);
       if (HasLds && HasVmem)
         return true;
     }
@@ -1526,10 +1525,9 @@ bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
   assert(!ST.hasExtendedWaitCounts());
 
   auto IsHazardInst = [](const MachineInstr &MI) {
-    if (SIInstrInfo::isDS(MI))
+    if (SIInstrInfo::isDS(MI) || SIInstrInfo::isLDSDMA(MI))
       return 1;
-    if ((SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isFLAT(MI)) ||
-        SIInstrInfo::isSegmentSpecificFLAT(MI))
+    if (SIInstrInfo::isVMEM(MI))
       return 2;
     return 0;
   };
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 847b7af5a9b11..26b5e5a22386e 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -3965,31 +3965,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
       return;
     // Other cases are autogenerated.
     break;
-  case ARMISD::WLSSETUP: {
-    SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
-                                         N->getOperand(0));
-    ReplaceUses(N, New);
-    CurDAG->RemoveDeadNode(N);
-    return;
-  }
-  case ARMISD::WLS: {
-    SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
-                                         N->getOperand(1), N->getOperand(2),
-                                         N->getOperand(0));
-    ReplaceUses(N, New);
-    CurDAG->RemoveDeadNode(N);
-    return;
-  }
-  case ARMISD::LE: {
-    SDValue Ops[] = { N->getOperand(1),
-                      N->getOperand(2),
-                      N->getOperand(0) };
-    unsigned Opc = ARM::t2LoopEnd;
-    SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
-    ReplaceUses(N, New);
-    CurDAG->RemoveDeadNode(N);
-    return;
-  }
   case ARMISD::LDRD: {
     if (Subtarget->isThumb2())
       break; // TableGen handles isel in this case.
@@ -4043,17 +4018,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
     CurDAG->RemoveDeadNode(N);
     return;
   }
-  case ARMISD::LOOP_DEC: {
-    SDValue Ops[] = { N->getOperand(1),
-                      N->getOperand(2),
-                      N->getOperand(0) };
-    SDNode *Dec =
-      CurDAG->getMachineNode(ARM::t2LoopDec, dl,
-                             CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
-    ReplaceUses(N, Dec);
-    CurDAG->RemoveDeadNode(N);
-    return;
-  }
   case ARMISD::BRCOND: {
     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index c229c8e4491df..911d7ebfba141 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -5581,6 +5581,20 @@ class t2LOL<dag oops, dag iops, string asm, string ops>
   let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB];
 }
 
+def arm_wlssetup
+    : SDNode<"ARMISD::WLSSETUP",
+             SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<1, 0>]>>;
+
+def arm_wls : SDNode<"ARMISD::WLS",
+                     SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>,
+                     [SDNPHasChain]>;
+
+def arm_loop_dec : SDNode<"ARMISD::LOOP_DEC", SDTIntBinOp, [SDNPHasChain]>;
+
+def arm_le : SDNode<"ARMISD::LE",
+                    SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>,
+                    [SDNPHasChain]>;
+
 let isNotDuplicable = 1 in {
 def t2WLS : t2LOL<(outs GPRlr:$LR),
                   (ins rGPR:$Rn, wlslabel_u11:$label),
@@ -5651,15 +5665,17 @@ def t2DoLoopStartTP :
 // valid after reg alloc, as it should be lowered during MVETPAndVPTOptimisations
 // into a t2WhileLoopStartLR (or expanded).
 def t2WhileLoopSetup :
-  t2PseudoInst<(outs GPRlr:$lr), (ins rGPR:$tc), 4, IIC_Br, []>;
+    t2PseudoInst<(outs GPRlr:$lr), (ins rGPR:$tc), 4, IIC_Br,
+                 [(set i32:$lr, (arm_wlssetup i32:$tc))]>;
 
 // A pseudo to represent the decrement in a low overhead loop. A t2LoopDec and
 // t2LoopEnd together represent a LE instruction. Ideally these are converted
 // to a t2LoopEndDec which is lowered as a single instruction.
 let hasSideEffects = 0 in
 def t2LoopDec :
-  t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size),
-               4, IIC_Br, []>, Sched<[WriteBr]>;
+    t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size), 4, IIC_Br,
+                 [(set i32:$Rm, (arm_loop_dec i32:$Rn, timm:$size))]>,
+    Sched<[WriteBr]>;
 
 let isBranch = 1, isTerminator = 1, hasSideEffects = 1, Defs = [CPSR] in {
 // The branch in a t2WhileLoopSetup/t2WhileLoopStart pair, eventually turned
@@ -5667,8 +5683,8 @@ let isBranch = 1, isTerminator = 1, hasSideEffects = 1, Defs = [CPSR] in {
 def t2WhileLoopStart :
     t2PseudoInst<(outs),
                  (ins GPRlr:$tc, brtarget:$target),
-                 4, IIC_Br, []>,
-                 Sched<[WriteBr]>;
+                 4, IIC_Br, [(arm_wls i32:$tc, bb:$target)]>,
+    Sched<[WriteBr]>;
 
 // WhileLoopStartLR that sets up LR and branches on zero, equivalent to WLS. It
 // is lowered in the ARMLowOverheadLoops pass providing the branches are within
@@ -5690,8 +5706,9 @@ def t2WhileLoopStartTP :
 
 // t2LoopEnd - the branch half of a t2LoopDec/t2LoopEnd pair.
 def t2LoopEnd :
-  t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target),
-  8, IIC_Br, []>, Sched<[WriteBr]>;
+    t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target),
+                 8, IIC_Br, [(arm_le i32:$tc, bb:$target)]>,
+    Sched<[WriteBr]>;
 
 // The combination of a t2LoopDec and t2LoopEnd, performing both the LR
 // decrement and branch as a single instruction. Is lowered to a LE or
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 67437f6969b27..8b2866260e9c9 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -930,6 +930,24 @@ def Discard : DXILOp<82, discard> {
   let stages = [Stages<DXIL1_0, [pixel]>];
 }
 
+def DerivCoarseX : DXILOp<83, unary> {
+  let Doc = "computes the rate of change per stamp in x direction";
+  let intrinsics = [IntrinSelect<int_dx_ddx_coarse>];
+  let arguments = [OverloadTy];
+  let result = OverloadTy;
+  let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
+  let stages = [Stages<DXIL1_0, [library, pixel]>];
+}
+
+def DerivCoarseY : DXILOp<84, unary> {
+  let Doc = "computes the rate of change per stamp in y direction";
+  let intrinsics = [IntrinSelect<int_dx_ddy_coarse>];
+  let arguments = [OverloadTy];
+  let result = OverloadTy;
+  let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
+  let stages = [Stages<DXIL1_0, [library, pixel]>];
+}
+
 def ThreadId : DXILOp<93, threadId> {
   let Doc = "Reads the thread ID";
   let intrinsics = [IntrinSelect<int_dx_thread_id>];
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
index 6cacbf6564db2..a755dd522969d 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -64,6 +64,8 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
   case Intrinsic::dx_wave_reduce_usum:
   case Intrinsic::dx_imad:
   case Intrinsic::dx_umad:
+  case Intrinsic::dx_ddx_coarse:
+  case Intrinsic::dx_ddy_coarse:
     return true;
   default:
     return false;
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 47022b3f89a8b..76fd834fd7219 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -1697,11 +1697,16 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType(unsigned BitWidth,
   MachineIRBuilder MIRBuilder(DepMBB, DepMBB.getFirstNonPHI());
   const MachineInstr *NewMI =
       createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) {
-        return BuildMI(MIRBuilder.getMBB(), *MIRBuilder.getInsertPt(),
-                       MIRBuilder.getDL(), TII.get(SPIRVOPcode))
-            .addDef(createTypeVReg(CurMF->getRegInfo()))
-            .addImm(BitWidth)
-            .addImm(0);
+        auto NewTypeMI = BuildMI(MIRBuilder.getMBB(), *MIRBuilder.getInsertPt(),
+                                 MIRBuilder.getDL(), TII.get(SPIRVOPcode))
+                             .addDef(createTypeVReg(CurMF->getRegInfo()))
+                             .addImm(BitWidth);
+        // Don't add Encoding to FP type
+        if (!Ty->isFloatTy()) {
+          return NewTypeMI.addImm(0);
+        } else {
+          return NewTypeMI;
+        }
       });
   add(Ty, false, NewMI);
   return finishCreatingSPIRVType(Ty, NewMI);
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index fc87288a4a212..0653b4eb9dfe2 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -328,6 +328,8 @@ class SPIRVInstructionSelector : public InstructionSelector {
                            MachineInstr &I) const;
   bool selectFrexp(Register ResVReg, const SPIRVType *ResType,
                    MachineInstr &I) const;
+  bool selectDpdCoarse(Register ResVReg, const SPIRVType *ResType,
+                       MachineInstr &I, const unsigned DPdOpCode) const;
   // Utilities
   std::pair<Register, bool>
   buildI32Constant(uint32_t Val, MachineInstr &I,
@@ -371,6 +373,7 @@ class SPIRVInstructionSelector : public InstructionSelector {
   bool loadHandleBeforePosition(Register &HandleReg, const SPIRVType *ResType,
                                 GIntrinsic &HandleDef, MachineInstr &Pos) const;
   void decorateUsesAsNonUniform(Register &NonUniformReg) const;
+  void errorIfInstrOutsideShader(MachineInstr &I) const;
 };
 
 bool sampledTypeIsSignedInteger(const llvm::Type *HandleType) {
@@ -3140,6 +3143,58 @@ bool SPIRVInstructionSelector::wrapIntoSpecConstantOp(
   return Result;
 }
 
+bool SPIRVInstructionSelector::selectDpdCoarse(Register ResVReg,
+                                               const SPIRVType *ResType,
+                                               MachineInstr &I,
+                                               const unsigned DPdOpCode) const {
+  // TODO: This should check specifically for Fragment Execution Model, but STI
+  // doesn't provide that information yet. See #167562
+  errorIfInstrOutsideShader(I);
+
+  // If the arg/result types are half then we need to wrap the instr in
+  // conversions to float
+  // This case occurs because a half arg/result is legal in HLSL but not spirv.
+  Register SrcReg = I.getOperand(2).getReg();
+  SPIRVType *SrcType = GR.getSPIRVTypeForVReg(SrcReg);
+  unsigned BitWidth = std::min(GR.getScalarOrVectorBitWidth(SrcType),
+                               GR.getScalarOrVectorBitWidth(ResType));
+  if (BitWidth == 32)
+    return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(DPdOpCode))
+        .addDef(ResVReg)
+        .addUse(GR.getSPIRVTypeID(ResType))
+        .addUse(I.getOperand(2).getReg());
+
+  MachineIRBuilder MIRBuilder(I);
+  unsigned componentCount = GR.getScalarOrVectorComponentCount(SrcType);
+  SPIRVType *F32ConvertTy = GR.getOrCreateSPIRVFloatType(32, I, TII);
+  if (componentCount != 1)
+    F32ConvertTy = GR.getOrCreateSPIRVVectorType(F32ConvertTy, componentCount,
+                                                 MIRBuilder, false);
+
+  const TargetRegisterClass *RegClass = GR.getRegClass(SrcType);
+  Register ConvertToVReg = MRI->createVirtualRegister(RegClass);
+  Register DpdOpVReg = MRI->createVirtualRegister(RegClass);
+
+  bool Result =
+      BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFConvert))
+          .addDef(ConvertToVReg)
+          .addUse(GR.getSPIRVTypeID(F32ConvertTy))
+          .addUse(SrcReg)
+          .constrainAllUses(TII, TRI, RBI);
+  Result &= BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(DPdOpCode))
+                .addDef(DpdOpVReg)
+                .addUse(GR.getSPIRVTypeID(F32ConvertTy))
+                .addUse(ConvertToVReg)
+                .constrainAllUses(TII, TRI, RBI);
+  Result &=
+      BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFConvert))
+          .addDef(ResVReg)
+          .addUse(GR.getSPIRVTypeID(ResType))
+          .addUse(DpdOpVReg)
+          .constrainAllUses(TII, TRI, RBI);
+  return Result;
+}
+
 bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
                                                const SPIRVType *ResType,
                                                MachineInstr &I) const {
@@ -3528,7 +3583,12 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
   case Intrinsic::spv_unpackhalf2x16: {
     return selectExtInst(ResVReg, ResType, I, GL::UnpackHalf2x16);
   }
-
+  case Intrinsic::spv_ddx_coarse: {
+    return selectDpdCoarse(ResVReg, ResType, I, SPIRV::OpDPdxCoarse);
+  }
+  case Intrinsic::spv_ddy_coarse: {
+    return selectDpdCoarse(ResVReg, ResType, I, SPIRV::OpDPdyCoarse);
+  }
   default: {
     std::string DiagMsg;
     raw_string_ostream OS(DiagMsg);
@@ -4694,6 +4754,17 @@ bool SPIRVInstructionSelector::loadHandleBeforePosition(
       .constrainAllUses(TII, TRI, RBI);
 }
 
+void SPIRVInstructionSelector::errorIfInstrOutsideShader(
+    MachineInstr &I) const {
+  if (!STI.isShader()) {
+    std::string DiagMsg;
+    raw_string_ostream OS(DiagMsg);
+    I.print(OS, true, false, false, false);
+    DiagMsg += " is only supported in shaders.\n";
+    report_fatal_error(DiagMsg.c_str(), false);
+  }
+}
+
 namespace llvm {
 InstructionSelector *
 createSPIRVInstructionSelector(const SPIRVTargetMachine &TM,
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index b8cd9c1358f00..bd754d17694b8 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -934,7 +934,8 @@ void RequirementHandler::initAvailableCapabilitiesForVulkan(
                     Capability::UniformBufferArrayDynamicIndexing,
                     Capability::SampledImageArrayDynamicIndexing,
                     Capability::StorageBufferArrayDynamicIndexing,
-                    Capability::StorageImageArrayDynamicIndexing});
+                    Capability::StorageImageArrayDynamicIndexing,
+                    Capability::DerivativeControl});
 
   // Became core in Vulkan 1.2
   if (ST.isAtLeastSPIRVVer(VersionTuple(1, 5))) {
@@ -2148,6 +2149,12 @@ void addInstrRequirements(const MachineInstr &MI,
     }
     break;
   }
+  case SPIRV::OpDPdxCoarse:
+  case SPIRV::OpDPdyCoarse: {
+    Reqs.addCapability(SPIRV::Capability::DerivativeControl);
+    break;
+  }
+
   default:
     break;
   }
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 621f1868d3311..864e5dc67682c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54688,11 +54688,14 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
       KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
       // Check the shift amount is byte aligned.
       // Check the truncation doesn't use any shifted in (zero) top bits.
-      // Check the shift amount doesn't depend on the original load.
+      // Check the shift amount doesn't depend on the original load chain.
       if (KnownAmt.countMinTrailingZeros() >= 3 &&
           KnownAmt.getMaxValue().ule(SrcVT.getSizeInBits() -
                                      VT.getSizeInBits()) &&
-          !Ld->isPredecessorOf(ShAmt.getNode())) {
+          none_of(Ld->uses(), [&ShAmt](SDUse &Use) {
+            return Use.getResNo() == 1 &&
+                   Use.getUser()->isPredecessorOf(ShAmt.getNode());
+          })) {
         EVT PtrVT = Ld->getBasePtr().getValueType();
         SDValue PtrBitOfs = DAG.getZExtOrTrunc(ShAmt, DL, PtrVT);
         SDValue PtrByteOfs =
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 9572f9d702e1b..e7dc366b13798 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1027,10 +1027,9 @@ static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI,
   return Result;
 }
 
-static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
-                                       InstCombiner::BuilderTy &Builder) {
-  if (!Cmp->hasOneUse())
-    return nullptr;
+static Value *
+canonicalizeSaturatedAddUnsigned(ICmpInst *Cmp, Value *TVal, Value *FVal,
+                                 InstCombiner::BuilderTy &Builder) {
 
   // Match unsigned saturated add with constant.
   Value *Cmp0 = Cmp->getOperand(0);
@@ -1130,6 +1129,94 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
   return nullptr;
 }
 
+static Value *canonicalizeSaturatedAddSigned(ICmpInst *Cmp, Value *TVal,
+                                             Value *FVal,
+                                             InstCombiner::BuilderTy &Builder) {
+  // Match saturated add with constant.
+  Value *Cmp0 = Cmp->getOperand(0);
+  Value *Cmp1 = Cmp->getOperand(1);
+  ICmpInst::Predicate Pred = Cmp->getPredicate();
+  Value *X;
+  const APInt *C;
+
+  // Canonicalize INT_MAX to true value of the select.
+  if (match(FVal, m_MaxSignedValue())) {
+    std::swap(TVal, FVal);
+    Pred = CmpInst::getInversePredicate(Pred);
+  }
+
+  if (!match(TVal, m_MaxSignedValue()))
+    return nullptr;
+
+  // sge maximum signed value is canonicalized to eq maximum signed value and
+  // requires special handling (a == INT_MAX) ? INT_MAX : a + 1 -> sadd.sat(a,
+  // 1)
+  if (Pred == ICmpInst::ICMP_EQ) {
+    if (match(FVal, m_Add(m_Specific(Cmp0), m_One())) && Cmp1 == TVal) {
+      return Builder.CreateBinaryIntrinsic(
+          Intrinsic::sadd_sat, Cmp0, ConstantInt::get(Cmp0->getType(), 1));
+    }
+    return nullptr;
+  }
+
+  // (X > Y) ? INT_MAX : (X + C) --> sadd.sat(X, C)
+  // (X >= Y) ? INT_MAX : (X + C) --> sadd.sat(X, C)
+  // where Y is INT_MAX - C or INT_MAX - C - 1, and C > 0
+  if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) &&
+      match(FVal, m_Add(m_Specific(Cmp0), m_StrictlyPositive(C)))) {
+    APInt IntMax =
+        APInt::getSignedMaxValue(Cmp1->getType()->getScalarSizeInBits());
+
+    // For SGE, try to flip to SGT to normalize the comparison constant.
+    if (Pred == ICmpInst::ICMP_SGE) {
+      if (auto Flipped = getFlippedStrictnessPredicateAndConstant(
+              Pred, cast<Constant>(Cmp1))) {
+        Pred = Flipped->first;
+        Cmp1 = Flipped->second;
+      }
+    }
+
+    // Check the pattern: X > INT_MAX - C or X > INT_MAX - C - 1
+    if (Pred == ICmpInst::ICMP_SGT &&
+        (match(Cmp1, m_SpecificIntAllowPoison(IntMax - *C)) ||
+         match(Cmp1, m_SpecificIntAllowPoison(IntMax - *C - 1))))
+      return Builder.CreateBinaryIntrinsic(
+          Intrinsic::sadd_sat, Cmp0, ConstantInt::get(Cmp0->getType(), *C));
+  }
+
+  // Canonicalize predicate to less-than or less-or-equal-than.
+  if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) {
+    std::swap(Cmp0, Cmp1);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+
+  if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SLE)
+    return nullptr;
+
+  if (match(Cmp0, m_NSWSub(m_MaxSignedValue(), m_Value(X))) &&
+      match(FVal, m_c_Add(m_Specific(X), m_Specific(Cmp1)))) {
+    // (INT_MAX - X s< Y) ? INT_MAX : (X + Y) --> sadd.sat(X, Y)
+    // (INT_MAX - X s< Y) ? INT_MAX : (Y + X) --> sadd.sat(X, Y)
+    return Builder.CreateBinaryIntrinsic(Intrinsic::sadd_sat, X, Cmp1);
+  }
+
+  return nullptr;
+}
+
+static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
+                                       InstCombiner::BuilderTy &Builder) {
+  if (!Cmp->hasOneUse())
+    return nullptr;
+
+  if (Value *V = canonicalizeSaturatedAddUnsigned(Cmp, TVal, FVal, Builder))
+    return V;
+
+  if (Value *V = canonicalizeSaturatedAddSigned(Cmp, TVal, FVal, Builder))
+    return V;
+
+  return nullptr;
+}
+
 /// Try to match patterns with select and subtract as absolute difference.
 static Value *foldAbsDiff(ICmpInst *Cmp, Value *TVal, Value *FVal,
                           InstCombiner::BuilderTy &Builder) {
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 7c364f86fb0e8..c9f249a8733ac 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
@@ -248,6 +249,11 @@ static cl::opt<bool>
                          "platforms that support this"),
                 cl::Hidden, cl::init(true));
 
+static cl::opt<int>
+    ClShadowAddrSpace("asan-shadow-addr-space",
+                      cl::desc("Address space for pointers to the shadow map"),
+                      cl::Hidden, cl::init(0));
+
 static cl::opt<bool> ClWithIfuncSuppressRemat(
     "asan-with-ifunc-suppress-remat",
     cl::desc("Suppress rematerialization of dynamic shadow address by passing "
@@ -436,6 +442,15 @@ static cl::opt<AsanDtorKind> ClOverrideDestructorKind(
                           "Use global destructors")),
     cl::init(AsanDtorKind::Invalid), cl::Hidden);
 
+static SmallSet<unsigned, 8> SrcAddrSpaces;
+static cl::list<unsigned> ClAddrSpaces(
+    "asan-instrument-address-spaces",
+    cl::desc("Only instrument variables in the specified address spaces."),
+    cl::Hidden, cl::CommaSeparated, cl::ZeroOrMore,
+    cl::callback([](const unsigned &AddrSpace) {
+      SrcAddrSpaces.insert(AddrSpace);
+    }));
+
 // Debug flags.
 
 static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,
@@ -503,6 +518,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
   bool IsAMDGPU = TargetTriple.isAMDGPU();
   bool IsHaiku = TargetTriple.isOSHaiku();
   bool IsWasm = TargetTriple.isWasm();
+  bool IsBPF = TargetTriple.isBPF();
 
   ShadowMapping Mapping;
 
@@ -579,6 +595,8 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
     else if (IsHaiku && IsX86_64)
       Mapping.Offset = (kSmallX86_64ShadowOffsetBase &
                         (kSmallX86_64ShadowOffsetAlignMask << Mapping.Scale));
+    else if (IsBPF)
+      Mapping.Offset = kDynamicShadowSentinel;
     else
       Mapping.Offset = kDefaultShadowOffset64;
   }
@@ -1355,11 +1373,25 @@ static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) {
 static bool isUnsupportedAMDGPUAddrspace(Value *Addr) {
   Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType());
   unsigned int AddrSpace = PtrTy->getPointerAddressSpace();
+  // Globals in address space 1 and 4 are supported for AMDGPU.
   if (AddrSpace == 3 || AddrSpace == 5)
     return true;
   return false;
 }
 
+static bool isSupportedAddrspace(const Triple &TargetTriple, Value *Addr) {
+  Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType());
+  unsigned int AddrSpace = PtrTy->getPointerAddressSpace();
+
+  if (!SrcAddrSpaces.empty())
+    return SrcAddrSpaces.count(AddrSpace);
+
+  if (TargetTriple.isAMDGPU())
+    return !isUnsupportedAMDGPUAddrspace(Addr);
+
+  return AddrSpace == 0;
+}
+
 Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
   // Shadow >> scale
   Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
@@ -1423,10 +1455,9 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
 }
 
 bool AddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
-  // Instrument accesses from different address spaces only for AMDGPU.
-  Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
-  if (PtrTy->getPointerAddressSpace() != 0 &&
-      !(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(Ptr)))
+  // Check whether the target supports sanitizing the address space
+  // of the pointer.
+  if (!isSupportedAddrspace(TargetTriple, Ptr))
     return true;
 
   // Ignore swifterror addresses.
@@ -1942,7 +1973,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
 
   Type *ShadowTy =
       IntegerType::get(*C, std::max(8U, TypeStoreSize >> Mapping.Scale));
-  Type *ShadowPtrTy = PointerType::get(*C, 0);
+  Type *ShadowPtrTy = PointerType::get(*C, ClShadowAddrSpace);
   Value *ShadowPtr = memToShadow(AddrLong, IRB);
   const uint64_t ShadowAlign =
       std::max<uint64_t>(Alignment.valueOrOne().value() >> Mapping.Scale, 1);
@@ -2089,9 +2120,7 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const {
     return false;
   if (!Ty->isSized()) return false;
   if (!G->hasInitializer()) return false;
-  // Globals in address space 1 and 4 are supported for AMDGPU.
-  if (G->getAddressSpace() &&
-      !(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(G)))
+  if (!isSupportedAddrspace(TargetTriple, G))
     return false;
   if (GlobalWasGeneratedByCompiler(G)) return false; // Our own globals.
   // Two problems with thread-locals:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index f533a47150a7b..741392247c0d6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -152,11 +152,12 @@ class VPBuilder {
   /// its underlying Instruction.
   VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
                               Instruction *Inst = nullptr,
+                              const VPIRFlags &Flags = {},
                               const VPIRMetadata &MD = {},
                               DebugLoc DL = DebugLoc::getUnknown(),
                               const Twine &Name = "") {
     VPInstruction *NewVPInst = tryInsertInstruction(
-        new VPInstruction(Opcode, Operands, {}, MD, DL, Name));
+        new VPInstruction(Opcode, Operands, Flags, MD, DL, Name));
     NewVPInst->setUnderlyingValue(Inst);
     return NewVPInst;
   }
@@ -329,7 +330,7 @@ class VPBuilder {
     else if (Opcode == Instruction::ZExt)
       Flags = VPIRFlags::NonNegFlagsTy(false);
     return tryInsertInstruction(
-        new VPWidenCastRecipe(Opcode, Op, ResultTy, Flags));
+        new VPWidenCastRecipe(Opcode, Op, ResultTy, nullptr, Flags));
   }
 
   VPScalarIVStepsRecipe *
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 356d759b94799..c680b6fca84cd 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7750,7 +7750,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(VPInstruction *VPI,
                 },
                 Range);
   if (ShouldUseVectorIntrinsic)
-    return new VPWidenIntrinsicRecipe(*CI, ID, Ops, CI->getType(), *VPI,
+    return new VPWidenIntrinsicRecipe(*CI, ID, Ops, CI->getType(), *VPI, *VPI,
                                       VPI->getDebugLoc());
 
   Function *Variant = nullptr;
@@ -7804,7 +7804,8 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(VPInstruction *VPI,
     }
 
     Ops.push_back(VPI->getOperand(VPI->getNumOperands() - 1));
-    return new VPWidenCallRecipe(CI, Variant, Ops, VPI->getDebugLoc());
+    return new VPWidenCallRecipe(CI, Variant, Ops, *VPI, *VPI,
+                                 VPI->getDebugLoc());
   }
 
   return nullptr;
@@ -7842,7 +7843,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) {
       auto *SafeRHS =
           Builder.createSelect(Mask, Ops[1], One, VPI->getDebugLoc());
       Ops[1] = SafeRHS;
-      return new VPWidenRecipe(*I, Ops, *VPI, VPI->getDebugLoc());
+      return new VPWidenRecipe(*I, Ops, *VPI, *VPI, VPI->getDebugLoc());
     }
     [[fallthrough]];
   }
@@ -7888,7 +7889,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) {
       // For other binops, the legacy cost model only checks the second operand.
       NewOps[1] = GetConstantViaSCEV(NewOps[1]);
     }
-    return new VPWidenRecipe(*I, NewOps, *VPI, VPI->getDebugLoc());
+    return new VPWidenRecipe(*I, NewOps, *VPI, *VPI, VPI->getDebugLoc());
   }
   case Instruction::ExtractValue: {
     SmallVector<VPValue *> NewOps(VPI->operands());
@@ -7896,7 +7897,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) {
     assert(EVI->getNumIndices() == 1 && "Expected one extractvalue index");
     unsigned Idx = EVI->getIndices()[0];
     NewOps.push_back(Plan.getConstantInt(32, Idx));
-    return new VPWidenRecipe(*I, NewOps, *VPI, VPI->getDebugLoc());
+    return new VPWidenRecipe(*I, NewOps, *VPI, *VPI, VPI->getDebugLoc());
   }
   };
 }
@@ -7981,7 +7982,8 @@ VPReplicateRecipe *VPRecipeBuilder::handleReplication(VPInstruction *VPI,
           (Range.Start.isScalable() && isa<IntrinsicInst>(I))) &&
          "Should not predicate a uniform recipe");
   auto *Recipe =
-      new VPReplicateRecipe(I, VPI->operands(), IsUniform, BlockInMask, *VPI);
+      new VPReplicateRecipe(I, VPI->operands(), IsUniform, BlockInMask, *VPI,
+                            *VPI, VPI->getDebugLoc());
   return Recipe;
 }
 
@@ -8231,17 +8233,19 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
     return nullptr;
 
   if (VPI->getOpcode() == Instruction::GetElementPtr)
-    return new VPWidenGEPRecipe(cast<GetElementPtrInst>(Instr), R->operands());
+    return new VPWidenGEPRecipe(cast<GetElementPtrInst>(Instr), R->operands(),
+                                *VPI, VPI->getDebugLoc());
 
   if (VPI->getOpcode() == Instruction::Select)
-    return new VPWidenSelectRecipe(*cast<SelectInst>(Instr), R->operands(),
-                                   *VPI);
+    return new VPWidenSelectRecipe(cast<SelectInst>(Instr), R->operands(), *VPI,
+                                   *VPI, VPI->getDebugLoc());
 
   if (Instruction::isCast(VPI->getOpcode())) {
-    auto *CastR = cast<VPInstructionWithType>(R);
     auto *CI = cast<CastInst>(Instr);
+    auto *CastR = cast<VPInstructionWithType>(VPI);
     return new VPWidenCastRecipe(CI->getOpcode(), VPI->getOperand(0),
-                                 CastR->getResultType(), *CI, *VPI);
+                                 CastR->getResultType(), CI, *VPI, *VPI,
+                                 VPI->getDebugLoc());
   }
 
   return tryToWiden(VPI);
@@ -8269,8 +8273,8 @@ VPRecipeBuilder::tryToCreatePartialReduction(VPInstruction *Reduction,
     SmallVector<VPValue *, 2> Ops;
     Ops.push_back(Plan.getOrAddLiveIn(Zero));
     Ops.push_back(BinOp);
-    BinOp = new VPWidenRecipe(*ReductionI, Ops, VPIRMetadata(),
-                              ReductionI->getDebugLoc());
+    BinOp = new VPWidenRecipe(*ReductionI, Ops, VPIRFlags(*ReductionI),
+                              VPIRMetadata(), ReductionI->getDebugLoc());
     Builder.insert(BinOp->getDefiningRecipe());
     ReductionOpcode = Instruction::Add;
   }
@@ -8454,9 +8458,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
           Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) {
         // Only create recipe for the final invariant store of the reduction.
         if (Legal->isInvariantStoreOfReduction(SI)) {
+          auto *VPI = cast<VPInstruction>(SingleDef);
           auto *Recipe = new VPReplicateRecipe(
-              SI, R.operands(), true /* IsUniform */, nullptr /*Mask*/,
-              *cast<VPInstruction>(SingleDef));
+              SI, R.operands(), true /* IsUniform */, nullptr /*Mask*/, *VPI,
+              *VPI, VPI->getDebugLoc());
           Recipe->insertBefore(*MiddleVPBB, MBIP);
         }
         R.eraseFromParent();
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index fc29ab0c84093..fedbcfb6bd32a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -882,14 +882,6 @@ class VPIRFlags {
 /// A pure-virtual common base class for recipes defining a single VPValue and
 /// using IR flags.
 struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
-  VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
-                      DebugLoc DL = DebugLoc::getUnknown())
-      : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags() {}
-
-  VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
-                      Instruction &I)
-      : VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()), VPIRFlags(I) {}
-
   VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
                       const VPIRFlags &Flags,
                       DebugLoc DL = DebugLoc::getUnknown())
@@ -1474,9 +1466,12 @@ class LLVM_ABI_FOR_TEST VPWidenRecipe : public VPRecipeWithIRFlags,
         VPIRMetadata(Metadata), Opcode(Opcode) {}
 
   VPWidenRecipe(Instruction &I, ArrayRef<VPValue *> Operands,
-                const VPIRMetadata &Metadata, DebugLoc DL)
-      : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
-        VPIRMetadata(Metadata), Opcode(I.getOpcode()) {}
+                const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
+                DebugLoc DL = {})
+      : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
+        VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
+    setUnderlyingValue(&I);
+  }
 
   ~VPWidenRecipe() override = default;
 
@@ -1517,30 +1512,22 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
 
 public:
   VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
-                    CastInst &UI, const VPIRMetadata &Metadata)
-      : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI),
-        VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
-    assert(UI.getOpcode() == Opcode &&
-           "opcode of underlying cast doesn't match");
-  }
-  VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
-                    const VPIRFlags &Flags = {},
+                    CastInst *CI = nullptr, const VPIRFlags &Flags = {},
                     const VPIRMetadata &Metadata = {},
                     DebugLoc DL = DebugLoc::getUnknown())
       : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL),
         VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
     assert(flagsValidForOpcode(Opcode) &&
            "Set flags not supported for the provided opcode");
+    setUnderlyingValue(CI);
   }
 
   ~VPWidenCastRecipe() override = default;
 
   VPWidenCastRecipe *clone() override {
-    auto *New = new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy, *this,
-                                      *this, getDebugLoc());
-    if (auto *UV = getUnderlyingValue())
-      New->setUnderlyingValue(UV);
-    return New;
+    return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
+                                 cast_or_null<CastInst>(getUnderlyingValue()),
+                                 *this, *this, getDebugLoc());
   }
 
   VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
@@ -1585,13 +1572,17 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
 public:
   VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID,
                          ArrayRef<VPValue *> CallArguments, Type *Ty,
+                         const VPIRFlags &Flags = {},
                          const VPIRMetadata &MD = {},
                          DebugLoc DL = DebugLoc::getUnknown())
-      : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
+      : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
+                            DL),
         VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
         MayReadFromMemory(CI.mayReadFromMemory()),
         MayWriteToMemory(CI.mayWriteToMemory()),
-        MayHaveSideEffects(CI.mayHaveSideEffects()) {}
+        MayHaveSideEffects(CI.mayHaveSideEffects()) {
+    setUnderlyingValue(&CI);
+  }
 
   VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
                          ArrayRef<VPValue *> CallArguments, Type *Ty,
@@ -1617,7 +1608,7 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
   VPWidenIntrinsicRecipe *clone() override {
     if (Value *CI = getUnderlyingValue())
       return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
-                                        operands(), ResultTy, *this,
+                                        operands(), ResultTy, *this, *this,
                                         getDebugLoc());
     return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
                                       *this, *this, getDebugLoc());
@@ -1671,10 +1662,11 @@ class LLVM_ABI_FOR_TEST VPWidenCallRecipe : public VPRecipeWithIRFlags,
 public:
   VPWidenCallRecipe(Value *UV, Function *Variant,
                     ArrayRef<VPValue *> CallArguments,
-                    DebugLoc DL = DebugLoc::getUnknown())
-      : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
-                            *cast<Instruction>(UV)),
-        VPIRMetadata(*cast<Instruction>(UV)), Variant(Variant) {
+                    const VPIRFlags &Flags = {},
+                    const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
+      : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, Flags, DL),
+        VPIRMetadata(Metadata), Variant(Variant) {
+    setUnderlyingValue(UV);
     assert(
         isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
         "last operand must be the called function");
@@ -1684,7 +1676,7 @@ class LLVM_ABI_FOR_TEST VPWidenCallRecipe : public VPRecipeWithIRFlags,
 
   VPWidenCallRecipe *clone() override {
     return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
-                                 getDebugLoc());
+                                 *this, *this, getDebugLoc());
   }
 
   VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
@@ -1761,16 +1753,19 @@ class VPHistogramRecipe : public VPRecipeBase {
 /// instruction.
 struct LLVM_ABI_FOR_TEST VPWidenSelectRecipe : public VPRecipeWithIRFlags,
                                                public VPIRMetadata {
-  VPWidenSelectRecipe(SelectInst &I, ArrayRef<VPValue *> Operands,
-                      const VPIRMetadata &MD = {})
-      : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, I),
-        VPIRMetadata(MD) {}
+  VPWidenSelectRecipe(SelectInst *SI, ArrayRef<VPValue *> Operands,
+                      const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
+                      DebugLoc DL = {})
+      : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, Flags, DL),
+        VPIRMetadata(MD) {
+    setUnderlyingValue(SI);
+  }
 
   ~VPWidenSelectRecipe() override = default;
 
   VPWidenSelectRecipe *clone() override {
-    return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()),
-                                   operands(), *this);
+    return new VPWidenSelectRecipe(cast<SelectInst>(getUnderlyingInstr()),
+                                   operands(), *this, *this, getDebugLoc());
   }
 
   VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
@@ -1822,9 +1817,12 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags {
   }
 
 public:
-  VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef<VPValue *> Operands)
-      : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP),
+  VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef<VPValue *> Operands,
+                   const VPIRFlags &Flags = {},
+                   DebugLoc DL = DebugLoc::getUnknown())
+      : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, Flags, DL),
         SourceElementTy(GEP->getSourceElementType()) {
+    setUnderlyingValue(GEP);
     SmallVector<std::pair<unsigned, MDNode *>> Metadata;
     (void)Metadata;
     getMetadataToPropagate(GEP, Metadata);
@@ -1835,7 +1833,7 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags {
 
   VPWidenGEPRecipe *clone() override {
     return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()),
-                                operands());
+                                operands(), *this, getDebugLoc());
   }
 
   VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
@@ -2929,10 +2927,12 @@ class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags,
 public:
   VPReplicateRecipe(Instruction *I, ArrayRef<VPValue *> Operands,
                     bool IsSingleScalar, VPValue *Mask = nullptr,
-                    VPIRMetadata Metadata = {})
-      : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
+                    const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
+                    DebugLoc DL = DebugLoc::getUnknown())
+      : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, Flags, DL),
         VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
         IsPredicated(Mask) {
+    setUnderlyingValue(I);
     if (Mask)
       addOperand(Mask);
   }
@@ -2940,9 +2940,9 @@ class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags,
   ~VPReplicateRecipe() override = default;
 
   VPReplicateRecipe *clone() override {
-    auto *Copy =
-        new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsSingleScalar,
-                              isPredicated() ? getMask() : nullptr, *this);
+    auto *Copy = new VPReplicateRecipe(
+        getUnderlyingInstr(), operands(), IsSingleScalar,
+        isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
     Copy->transferFlags(*this);
     return Copy;
   }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 612202d049774..dbbde1cafa9f2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -190,7 +190,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
       // recipes.
       if (Br->isConditional()) {
         VPValue *Cond = getOrCreateVPOperand(Br->getCondition());
-        VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst,
+        VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst, {},
                                  VPIRMetadata(*Inst), Inst->getDebugLoc());
       }
 
@@ -205,7 +205,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
       SmallVector<VPValue *> Ops = {getOrCreateVPOperand(SI->getCondition())};
       for (auto Case : SI->cases())
         Ops.push_back(getOrCreateVPOperand(Case.getCaseValue()));
-      VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst,
+      VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst, {},
                                VPIRMetadata(*Inst), Inst->getDebugLoc());
       continue;
     }
@@ -255,13 +255,14 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
       if (auto *CI = dyn_cast<CastInst>(Inst)) {
         NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0],
                                             CI->getType(), CI->getDebugLoc(),
-                                            {}, MD);
+                                            VPIRFlags(*CI), MD);
         NewR->setUnderlyingValue(CI);
       } else {
         // Build VPInstruction for any arbitrary Instruction without specific
         // representation in VPlan.
-        NewR = VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst, MD,
-                                        Inst->getDebugLoc());
+        NewR =
+            VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst,
+                                     VPIRFlags(*Inst), MD, Inst->getDebugLoc());
       }
     }
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index fca6554ad77c6..ef36e29aaa5c4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2056,24 +2056,26 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
   switch (OpType) {
   case OperationType::OverflowingBinOp:
     return Opcode == Instruction::Add || Opcode == Instruction::Sub ||
-           Opcode == Instruction::Mul ||
+           Opcode == Instruction::Mul || Opcode == Instruction::Shl ||
            Opcode == VPInstruction::VPInstruction::CanonicalIVIncrementForPart;
   case OperationType::Trunc:
     return Opcode == Instruction::Trunc;
   case OperationType::DisjointOp:
     return Opcode == Instruction::Or;
   case OperationType::PossiblyExactOp:
-    return Opcode == Instruction::AShr;
+    return Opcode == Instruction::AShr || Opcode == Instruction::LShr ||
+           Opcode == Instruction::UDiv || Opcode == Instruction::SDiv;
   case OperationType::GEPOp:
     return Opcode == Instruction::GetElementPtr ||
            Opcode == VPInstruction::PtrAdd ||
            Opcode == VPInstruction::WidePtrAdd;
   case OperationType::FPMathOp:
-    return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
-           Opcode == Instruction::FSub || Opcode == Instruction::FNeg ||
-           Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
-           Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc ||
-           Opcode == Instruction::FCmp || Opcode == Instruction::Select ||
+    return Opcode == Instruction::Call || Opcode == Instruction::FAdd ||
+           Opcode == Instruction::FMul || Opcode == Instruction::FSub ||
+           Opcode == Instruction::FNeg || Opcode == Instruction::FDiv ||
+           Opcode == Instruction::FRem || Opcode == Instruction::FPExt ||
+           Opcode == Instruction::FPTrunc || Opcode == Instruction::FCmp ||
+           Opcode == Instruction::Select ||
            Opcode == VPInstruction::WideIVStep ||
            Opcode == VPInstruction::ReductionStartVector ||
            Opcode == VPInstruction::ComputeReductionResult;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 26563242de283..25557f1d5d651 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -104,24 +104,26 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
               nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/, *VPI,
               Ingredient.getDebugLoc());
         } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
-          NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
+          NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands(), *VPI,
+                                           Ingredient.getDebugLoc());
         } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
           Intrinsic::ID VectorID = getVectorIntrinsicIDForCall(CI, &TLI);
           if (VectorID == Intrinsic::not_intrinsic)
             return false;
           NewRecipe = new VPWidenIntrinsicRecipe(
               *CI, getVectorIntrinsicIDForCall(CI, &TLI),
-              drop_end(Ingredient.operands()), CI->getType(), *VPI,
-              CI->getDebugLoc());
+              drop_end(Ingredient.operands()), CI->getType(), VPIRFlags(*CI),
+              *VPI, CI->getDebugLoc());
         } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
-          NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands(), *VPI);
+          NewRecipe = new VPWidenSelectRecipe(SI, Ingredient.operands(), *VPI,
+                                              *VPI, Ingredient.getDebugLoc());
         } else if (auto *CI = dyn_cast<CastInst>(Inst)) {
-          NewRecipe =
-              new VPWidenCastRecipe(CI->getOpcode(), Ingredient.getOperand(0),
-                                    CI->getType(), *CI, *VPI);
+          NewRecipe = new VPWidenCastRecipe(
+              CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
+              VPIRFlags(*CI), VPIRMetadata(*CI));
         } else {
           NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands(), *VPI,
-                                        Ingredient.getDebugLoc());
+                                        *VPI, Ingredient.getDebugLoc());
         }
       }
 
@@ -226,7 +228,8 @@ static bool sinkScalarOperands(VPlan &Plan) {
         // then cloning should be sufficient here.
         Instruction *I = SinkCandidate->getUnderlyingInstr();
         Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true,
-                                      nullptr /*Mask*/, *SinkCandidateRepR);
+                                      nullptr /*Mask*/, *SinkCandidateRepR,
+                                      *SinkCandidateRepR);
         // TODO: add ".cloned" suffix to name of Clone's VPValue.
       } else {
         Clone = SinkCandidate->clone();
@@ -385,7 +388,8 @@ static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe,
   // mask but in the replicate region.
   auto *RecipeWithoutMask = new VPReplicateRecipe(
       PredRecipe->getUnderlyingInstr(), drop_end(PredRecipe->operands()),
-      PredRecipe->isSingleScalar(), nullptr /*Mask*/, *PredRecipe);
+      PredRecipe->isSingleScalar(), nullptr /*Mask*/, *PredRecipe, *PredRecipe,
+      PredRecipe->getDebugLoc());
   auto *Pred =
       Plan.createVPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask);
 
@@ -691,7 +695,7 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
     // analysis.
     auto Users = collectUsersRecursively(PhiR);
     for (VPUser *U : reverse(Users)) {
-      auto *Def = dyn_cast<VPSingleDefRecipe>(U);
+      auto *Def = dyn_cast<VPRecipeWithIRFlags>(U);
       auto *RepR = dyn_cast<VPReplicateRecipe>(U);
       // Skip recipes that shouldn't be narrowed.
       if (!Def || !isa<VPReplicateRecipe, VPWidenRecipe>(Def) ||
@@ -704,7 +708,8 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
         continue;
 
       auto *Clone = new VPReplicateRecipe(Def->getUnderlyingInstr(),
-                                          Def->operands(), /*IsUniform*/ true);
+                                          Def->operands(), /*IsUniform*/ true,
+                                          /*Mask*/ nullptr, /*Flags*/ *Def);
       Clone->insertAfter(Def);
       Def->replaceAllUsesWith(Clone);
     }
@@ -1423,12 +1428,13 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
       if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
         continue;
 
-      auto *RepOrWidenR = cast<VPSingleDefRecipe>(&R);
+      auto *RepOrWidenR = cast<VPRecipeWithIRFlags>(&R);
       if (RepR && isa<StoreInst>(RepR->getUnderlyingInstr()) &&
           vputils::isSingleScalar(RepR->getOperand(1))) {
         auto *Clone = new VPReplicateRecipe(
             RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
-            true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Metadata*/);
+            true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Flags*/,
+            *RepR /*Metadata*/, RepR->getDebugLoc());
         Clone->insertBefore(RepOrWidenR);
         unsigned ExtractOpc =
             vputils::isUniformAcrossVFsAndUFs(RepR->getOperand(1))
@@ -1469,9 +1475,9 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
           }))
         continue;
 
-      auto *Clone = new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(),
-                                          RepOrWidenR->operands(),
-                                          true /*IsSingleScalar*/);
+      auto *Clone = new VPReplicateRecipe(
+          RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
+          true /*IsSingleScalar*/, nullptr, *RepOrWidenR);
       Clone->insertBefore(RepOrWidenR);
       RepOrWidenR->replaceAllUsesWith(Clone);
       if (isDeadRecipe(*RepOrWidenR))
@@ -3824,15 +3830,15 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
         Ext0->getOpcode() == Ext1->getOpcode() &&
         IsMulAccValidAndClampRange(Mul, Ext0, Ext1, Ext) && Mul->hasOneUse()) {
       auto *NewExt0 = new VPWidenCastRecipe(
-          Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), *Ext0,
-          *Ext0, Ext0->getDebugLoc());
+          Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), nullptr,
+          *Ext0, *Ext0, Ext0->getDebugLoc());
       NewExt0->insertBefore(Ext0);
 
       VPWidenCastRecipe *NewExt1 = NewExt0;
       if (Ext0 != Ext1) {
         NewExt1 = new VPWidenCastRecipe(Ext1->getOpcode(), Ext1->getOperand(0),
-                                        Ext->getResultType(), *Ext1, *Ext1,
-                                        Ext1->getDebugLoc());
+                                        Ext->getResultType(), nullptr, *Ext1,
+                                        *Ext1, Ext1->getDebugLoc());
         NewExt1->insertBefore(Ext1);
       }
       Mul->setOperand(0, NewExt0);
@@ -4353,7 +4359,7 @@ narrowInterleaveGroupOp(VPValue *V, SmallPtrSetImpl<VPValue *> &NarrowedOps) {
   // process one original iteration.
   auto *N = new VPReplicateRecipe(&WideLoad->getIngredient(), {PtrOp},
                                   /*IsUniform*/ true,
-                                  /*Mask*/ nullptr, *WideLoad);
+                                  /*Mask*/ nullptr, {}, *WideLoad);
   N->insertBefore(WideLoad);
   NarrowedOps.insert(N);
   return N;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index d4b8b72beb942..d76d2ed5f1c76 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -518,9 +518,9 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
     // TODO: have cloning of replicate recipes also provide the desired result
     // coupled with setting its operands to NewOps (deriving IsSingleScalar and
     // Mask from the operands?)
-    New =
-        new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
-                              /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
+    New = new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
+                                /*IsSingleScalar=*/true, /*Mask=*/nullptr,
+                                *RepR, *RepR, RepR->getDebugLoc());
   } else {
     assert(isa<VPInstruction>(DefR) &&
            "DefR must be a VPReplicateRecipe or VPInstruction");
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll
new file mode 100644
index 0000000000000..f15253682c336
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -global-isel -o - %s | FileCheck %s
+
+target triple = "aarch64-unknown-unknown"
+
+; Check we don't crash here when computing known bits.
+
+define <4 x i32> @test(<8 x i16> %in, i1 %continue) {
+; CHECK-LABEL: test:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    mov w12, wzr
+; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    mov w9, #2 // =0x2
+; CHECK-NEXT:    mov w10, #0 // =0x0
+; CHECK-NEXT:  .LBB0_1: // %loop
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    mov w11, w12
+; CHECK-NEXT:    mov w12, w12
+; CHECK-NEXT:    str q0, [sp]
+; CHECK-NEXT:    and x12, x12, #0x7
+; CHECK-NEXT:    umull x12, w12, w9
+; CHECK-NEXT:    ldrb w12, [x8, x12]
+; CHECK-NEXT:    cmp w12, #0
+; CHECK-NEXT:    cset w12, eq
+; CHECK-NEXT:    fmov s1, w12
+; CHECK-NEXT:    mov v1.b[1], w10
+; CHECK-NEXT:    mov v1.b[2], w10
+; CHECK-NEXT:    mov v1.b[3], w10
+; CHECK-NEXT:    fmov w12, s1
+; CHECK-NEXT:    tbz w0, #0, .LBB0_1
+; CHECK-NEXT:  // %bb.2: // %exit
+; CHECK-NEXT:    fmov s0, w11
+; CHECK-NEXT:    mov v0.s[1], wzr
+; CHECK-NEXT:    mov v0.s[2], wzr
+; CHECK-NEXT:    mov v0.s[3], wzr
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ret
+entry:
+  br label %loop
+
+exit:
+  %result = insertelement <4 x i32> zeroinitializer, i32 %index, i64 0
+  ret <4 x i32> %result
+
+loop:
+  %index = phi i32 [ 0, %entry ], [ %insert.bitcast, %loop ]
+  %extracted = extractelement <8 x i16> %in, i32 %index
+  %masked = and i16 %extracted, 255
+  %maskedIsZero = icmp eq i16 %masked, 0
+  %maskedIsZero.zext = zext i1 %maskedIsZero to i8
+  %insert = insertelement <4 x i8> zeroinitializer, i8 %maskedIsZero.zext, i64 0
+  %insert.bitcast = bitcast <4 x i8> %insert to i32
+  br i1 %continue, label %exit, label %loop
+}
diff --git a/llvm/test/CodeGen/AArch64/ccmp-cse.ll b/llvm/test/CodeGen/AArch64/ccmp-cse.ll
new file mode 100644
index 0000000000000..657498172a04c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ccmp-cse.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
+
+define i64 @test_single_or(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_single_or:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subs x8, x2, x1
+; CHECK-NEXT:    ccmp x2, x0, #2, hs
+; CHECK-NEXT:    csel x0, xzr, x8, hi
+; CHECK-NEXT:    ret
+  %cmp.match = icmp ult i64 %y, %x
+  %cmp.nomatch = icmp ugt i64 %y, %unrelated
+  %or.cond = or i1 %cmp.match, %cmp.nomatch
+  %sub.reuse = sub nuw i64 %y, %x
+  %res = select i1 %or.cond, i64 0, i64 %sub.reuse
+  ret i64 %res
+}
+
+define i64 @test_two_ors(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_two_ors:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subs x8, x2, x1
+; CHECK-NEXT:    ccmp x0, x1, #0, hs
+; CHECK-NEXT:    ccmp x2, x0, #2, hs
+; CHECK-NEXT:    csel x0, xzr, x8, hi
+; CHECK-NEXT:    ret
+  %cmp.match = icmp ult i64 %y, %x
+  %cmp.nomatch1 = icmp ult i64 %unrelated, %x
+  %cmp.nomatch2 = icmp ugt i64 %y, %unrelated
+  %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2
+  %or.cond = or i1 %cmp.match, %or.nomatch
+  %sub.reuse = sub nuw i64 %y, %x
+  %res = select i1 %or.cond, i64 0, i64 %sub.reuse
+  ret i64 %res
+}
+
+define i64 @test_two_ors_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_two_ors_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subs x8, x2, x1
+; CHECK-NEXT:    ccmp x0, x1, #0, hs
+; CHECK-NEXT:    ccmp x2, x0, #2, hs
+; CHECK-NEXT:    csel x0, xzr, x8, hi
+; CHECK-NEXT:    ret
+  %cmp.match = icmp ult i64 %y, %x
+  %cmp.nomatch1 = icmp ult i64 %unrelated, %x
+  %cmp.nomatch2 = icmp ugt i64 %y, %unrelated
+  %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2
+  %or.cond = or i1 %or.nomatch, %cmp.match
+  %sub.reuse = sub nuw i64 %y, %x
+  %res = select i1 %or.cond, i64 0, i64 %sub.reuse
+  ret i64 %res
+}
+
+define i64 @test_single_and(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_single_and:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subs x8, x2, x1
+; CHECK-NEXT:    ccmp x2, x0, #0, lo
+; CHECK-NEXT:    csel x0, xzr, x8, hi
+; CHECK-NEXT:    ret
+  %cmp.match = icmp ult i64 %y, %x
+  %cmp.nomatch = icmp ugt i64 %y, %unrelated
+  %and.cond = and i1 %cmp.match, %cmp.nomatch
+  %sub.reuse = sub nuw i64 %y, %x
+  %res = select i1 %and.cond, i64 0, i64 %sub.reuse
+  ret i64 %res
+}
+
+define i64 @test_single_or_sub_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_single_or_sub_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subs x8, x1, x2
+; CHECK-NEXT:    ccmp x2, x0, #2, ls
+; CHECK-NEXT:    csel x0, xzr, x8, hi
+; CHECK-NEXT:    ret
+  %cmp.match = icmp ult i64 %y, %x
+  %cmp.nomatch = icmp ugt i64 %y, %unrelated
+  %or.cond = or i1 %cmp.match, %cmp.nomatch
+  %sub.reuse = sub nuw i64 %x, %y
+  %res = select i1 %or.cond, i64 0, i64 %sub.reuse
+  ret i64 %res
+}
+
+; Negative test: We must negate the or operation, hence this must come first.
+define i64 @test_mustbefirst_overrides_preferfirst_negative(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_mustbefirst_overrides_preferfirst_negative:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp x2, x0
+; CHECK-NEXT:    sub x8, x2, x1
+; CHECK-NEXT:    ccmp x0, x1, #0, ls
+; CHECK-NEXT:    ccmp x2, x1, #2, lo
+; CHECK-NEXT:    csel x0, xzr, x8, lo
+; CHECK-NEXT:    ret
+  %cmp.match = icmp ult i64 %y, %x
+  %cmp.nomatch1 = icmp ult i64 %unrelated, %x
+  %cmp.nomatch2 = icmp ugt i64 %y, %unrelated
+  %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2
+  %and.cond = and i1 %or.nomatch, %cmp.match
+  %sub.reuse = sub nuw i64 %y, %x
+  %res = select i1 %and.cond, i64 0, i64 %sub.reuse
+  ret i64 %res
+}
+
+; Negative test: There is no analogue of SUBS for floating point.
+define float @test_negative_float(float %unrelated, float %x, float %y) nounwind {
+; CHECK-LABEL: test_negative_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp s2, s0
+; CHECK-NEXT:    fsub s0, s2, s1
+; CHECK-NEXT:    movi d3, #0000000000000000
+; CHECK-NEXT:    fccmp s2, s1, #8, le
+; CHECK-NEXT:    fcsel s0, s3, s0, mi
+; CHECK-NEXT:    ret
+  %cmp.nomatch1 = fcmp olt float %y, %x
+  %cmp.nomatch2 = fcmp ogt float %y, %unrelated
+  %or.cond = or i1 %cmp.nomatch1, %cmp.nomatch2
+  %sub.noreuse = fsub float %y, %x
+  %res = select i1 %or.cond, float 0.0, float %sub.noreuse
+  ret float %res
+}
+
+; Negative test: If both operands match a sub, do not reorder them.
+define i64 @test_prefer_right_negative(i64 %x, i64 %y, i64 %z) nounwind {
+; CHECK-LABEL: test_prefer_right_negative:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp x2, x0
+; CHECK-NEXT:    ccmp x2, x1, #0, ls
+; CHECK-NEXT:    csel x8, x0, x1, lo
+; CHECK-NEXT:    sub x0, x2, x8
+; CHECK-NEXT:    ret
+  %cmp.match1 = icmp ult i64 %z, %y
+  %cmp.match2 = icmp ugt i64 %z, %x
+  %or.cond = or i1 %cmp.match1, %cmp.match2
+  %sub.reuse1 = sub nuw i64 %z, %y
+  %sub.reuse2 = sub nuw i64 %z, %x
+  %res = select i1 %or.cond, i64 %sub.reuse2, i64 %sub.reuse1
+  ret i64 %res
+}
diff --git a/llvm/test/CodeGen/AArch64/i128-math.ll b/llvm/test/CodeGen/AArch64/i128-math.ll
index 9e1c0c1b115ab..12ae241dda4bd 100644
--- a/llvm/test/CodeGen/AArch64/i128-math.ll
+++ b/llvm/test/CodeGen/AArch64/i128-math.ll
@@ -262,20 +262,28 @@ define i128 @u128_mul(i128 %x, i128 %y) {
 define { i128, i8 } @u128_checked_mul(i128 %x, i128 %y) {
 ; CHECK-LABEL: u128_checked_mul:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr x8, x1, x3
+; CHECK-NEXT:    cbz x8, .LBB17_2
+; CHECK-NEXT:  // %bb.1: // %overflow
 ; CHECK-NEXT:    mul x9, x3, x0
 ; CHECK-NEXT:    cmp x1, #0
 ; CHECK-NEXT:    ccmp x3, #0, #4, ne
-; CHECK-NEXT:    umulh x8, x1, x2
-; CHECK-NEXT:    umulh x10, x3, x0
+; CHECK-NEXT:    umulh x10, x1, x2
+; CHECK-NEXT:    umulh x8, x3, x0
 ; CHECK-NEXT:    madd x9, x1, x2, x9
-; CHECK-NEXT:    ccmp xzr, x8, #0, eq
-; CHECK-NEXT:    umulh x11, x0, x2
 ; CHECK-NEXT:    ccmp xzr, x10, #0, eq
+; CHECK-NEXT:    umulh x11, x0, x2
+; CHECK-NEXT:    ccmp xzr, x8, #0, eq
 ; CHECK-NEXT:    mul x0, x0, x2
 ; CHECK-NEXT:    cset w8, ne
 ; CHECK-NEXT:    adds x1, x11, x9
 ; CHECK-NEXT:    csinc w8, w8, wzr, lo
 ; CHECK-NEXT:    eor w2, w8, #0x1
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB17_2: // %overflow.no
+; CHECK-NEXT:    umulh x1, x0, x2
+; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:    eor w2, w8, #0x1
 ; CHECK-NEXT:    ret
   %1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
   %2 = extractvalue { i128, i1 } %1, 0
@@ -290,19 +298,27 @@ define { i128, i8 } @u128_checked_mul(i128 %x, i128 %y) {
 define { i128, i8 } @u128_overflowing_mul(i128 %x, i128 %y) {
 ; CHECK-LABEL: u128_overflowing_mul:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr x8, x1, x3
+; CHECK-NEXT:    cbz x8, .LBB18_2
+; CHECK-NEXT:  // %bb.1: // %overflow
 ; CHECK-NEXT:    mul x9, x3, x0
 ; CHECK-NEXT:    cmp x1, #0
 ; CHECK-NEXT:    ccmp x3, #0, #4, ne
-; CHECK-NEXT:    umulh x8, x1, x2
-; CHECK-NEXT:    umulh x10, x3, x0
+; CHECK-NEXT:    umulh x10, x1, x2
+; CHECK-NEXT:    umulh x8, x3, x0
 ; CHECK-NEXT:    madd x9, x1, x2, x9
-; CHECK-NEXT:    ccmp xzr, x8, #0, eq
-; CHECK-NEXT:    umulh x11, x0, x2
 ; CHECK-NEXT:    ccmp xzr, x10, #0, eq
+; CHECK-NEXT:    umulh x11, x0, x2
+; CHECK-NEXT:    ccmp xzr, x8, #0, eq
 ; CHECK-NEXT:    mul x0, x0, x2
 ; CHECK-NEXT:    cset w8, ne
 ; CHECK-NEXT:    adds x1, x11, x9
 ; CHECK-NEXT:    csinc w2, w8, wzr, lo
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB18_2: // %overflow.no
+; CHECK-NEXT:    umulh x1, x0, x2
+; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:    mov w2, wzr
 ; CHECK-NEXT:    ret
   %1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
   %2 = extractvalue { i128, i1 } %1, 0
@@ -316,19 +332,28 @@ define { i128, i8 } @u128_overflowing_mul(i128 %x, i128 %y) {
 define i128 @u128_saturating_mul(i128 %x, i128 %y) {
 ; CHECK-LABEL: u128_saturating_mul:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mul x9, x3, x0
+; CHECK-NEXT:    orr x8, x1, x3
+; CHECK-NEXT:    cbz x8, .LBB19_2
+; CHECK-NEXT:  // %bb.1: // %overflow
+; CHECK-NEXT:    mul x8, x3, x0
 ; CHECK-NEXT:    cmp x1, #0
 ; CHECK-NEXT:    ccmp x3, #0, #4, ne
-; CHECK-NEXT:    umulh x8, x1, x2
-; CHECK-NEXT:    umulh x10, x3, x0
-; CHECK-NEXT:    madd x9, x1, x2, x9
-; CHECK-NEXT:    ccmp xzr, x8, #0, eq
-; CHECK-NEXT:    umulh x11, x0, x2
+; CHECK-NEXT:    umulh x10, x1, x2
+; CHECK-NEXT:    umulh x9, x3, x0
+; CHECK-NEXT:    madd x11, x1, x2, x8
 ; CHECK-NEXT:    ccmp xzr, x10, #0, eq
+; CHECK-NEXT:    umulh x12, x0, x2
+; CHECK-NEXT:    ccmp xzr, x9, #0, eq
 ; CHECK-NEXT:    mul x8, x0, x2
 ; CHECK-NEXT:    cset w10, ne
-; CHECK-NEXT:    adds x9, x11, x9
+; CHECK-NEXT:    adds x9, x12, x11
 ; CHECK-NEXT:    csinc w10, w10, wzr, lo
+; CHECK-NEXT:    b .LBB19_3
+; CHECK-NEXT:  .LBB19_2: // %overflow.no
+; CHECK-NEXT:    umulh x9, x0, x2
+; CHECK-NEXT:    mov w10, wzr
+; CHECK-NEXT:    mul x8, x0, x2
+; CHECK-NEXT:  .LBB19_3: // %overflow.res
 ; CHECK-NEXT:    cmp w10, #0
 ; CHECK-NEXT:    csinv x0, x8, xzr, eq
 ; CHECK-NEXT:    csinv x1, x9, xzr, eq
@@ -355,6 +380,11 @@ define i128 @i128_mul(i128 %x, i128 %y) {
 define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) {
 ; CHECK-LABEL: i128_checked_mul:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor x8, x3, x2, asr #63
+; CHECK-NEXT:    eor x9, x1, x0, asr #63
+; CHECK-NEXT:    orr x8, x9, x8
+; CHECK-NEXT:    cbz x8, .LBB21_2
+; CHECK-NEXT:  // %bb.1: // %overflow
 ; CHECK-NEXT:    asr x9, x1, #63
 ; CHECK-NEXT:    umulh x10, x0, x2
 ; CHECK-NEXT:    asr x13, x3, #63
@@ -364,24 +394,30 @@ define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) {
 ; CHECK-NEXT:    adds x10, x11, x10
 ; CHECK-NEXT:    mul x14, x0, x3
 ; CHECK-NEXT:    umulh x12, x0, x3
-; CHECK-NEXT:    adc x9, x8, x9
+; CHECK-NEXT:    adc x8, x8, x9
+; CHECK-NEXT:    mov x9, x1
 ; CHECK-NEXT:    mul x13, x0, x13
-; CHECK-NEXT:    adds x8, x14, x10
-; CHECK-NEXT:    mul x15, x1, x3
-; CHECK-NEXT:    smulh x10, x1, x3
-; CHECK-NEXT:    mov x1, x8
-; CHECK-NEXT:    adc x11, x12, x13
-; CHECK-NEXT:    asr x12, x9, #63
-; CHECK-NEXT:    asr x13, x11, #63
-; CHECK-NEXT:    adds x9, x9, x11
 ; CHECK-NEXT:    asr x11, x8, #63
+; CHECK-NEXT:    mul x15, x1, x3
+; CHECK-NEXT:    adds x1, x14, x10
+; CHECK-NEXT:    smulh x9, x9, x3
+; CHECK-NEXT:    adc x10, x12, x13
+; CHECK-NEXT:    asr x12, x10, #63
+; CHECK-NEXT:    adds x8, x8, x10
+; CHECK-NEXT:    asr x10, x1, #63
 ; CHECK-NEXT:    mul x0, x0, x2
-; CHECK-NEXT:    adc x12, x12, x13
-; CHECK-NEXT:    adds x9, x15, x9
-; CHECK-NEXT:    adc x10, x10, x12
-; CHECK-NEXT:    cmp x9, x11
-; CHECK-NEXT:    ccmp x10, x11, #0, eq
-; CHECK-NEXT:    cset w2, eq
+; CHECK-NEXT:    adc x11, x11, x12
+; CHECK-NEXT:    adds x8, x15, x8
+; CHECK-NEXT:    adc x9, x9, x11
+; CHECK-NEXT:    cmp x8, x10
+; CHECK-NEXT:    ccmp x9, x10, #0, eq
+; CHECK-NEXT:    cset w8, ne
+; CHECK-NEXT:    eor w2, w8, #0x1
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB21_2: // %overflow.no
+; CHECK-NEXT:    smulh x1, x0, x2
+; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:    eor w2, w8, #0x1
 ; CHECK-NEXT:    ret
   %1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
   %2 = extractvalue { i128, i1 } %1, 0
@@ -396,6 +432,11 @@ define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) {
 define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) {
 ; CHECK-LABEL: i128_overflowing_mul:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor x8, x3, x2, asr #63
+; CHECK-NEXT:    eor x9, x1, x0, asr #63
+; CHECK-NEXT:    orr x8, x9, x8
+; CHECK-NEXT:    cbz x8, .LBB22_2
+; CHECK-NEXT:  // %bb.1: // %overflow
 ; CHECK-NEXT:    asr x9, x1, #63
 ; CHECK-NEXT:    umulh x10, x0, x2
 ; CHECK-NEXT:    asr x13, x3, #63
@@ -405,24 +446,29 @@ define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) {
 ; CHECK-NEXT:    adds x10, x11, x10
 ; CHECK-NEXT:    mul x14, x0, x3
 ; CHECK-NEXT:    umulh x12, x0, x3
-; CHECK-NEXT:    adc x9, x8, x9
+; CHECK-NEXT:    adc x8, x8, x9
+; CHECK-NEXT:    mov x9, x1
 ; CHECK-NEXT:    mul x13, x0, x13
-; CHECK-NEXT:    adds x8, x14, x10
-; CHECK-NEXT:    mul x15, x1, x3
-; CHECK-NEXT:    smulh x10, x1, x3
-; CHECK-NEXT:    mov x1, x8
-; CHECK-NEXT:    adc x11, x12, x13
-; CHECK-NEXT:    asr x12, x9, #63
-; CHECK-NEXT:    asr x13, x11, #63
-; CHECK-NEXT:    adds x9, x9, x11
 ; CHECK-NEXT:    asr x11, x8, #63
+; CHECK-NEXT:    mul x15, x1, x3
+; CHECK-NEXT:    adds x1, x14, x10
+; CHECK-NEXT:    smulh x9, x9, x3
+; CHECK-NEXT:    adc x10, x12, x13
+; CHECK-NEXT:    asr x12, x10, #63
+; CHECK-NEXT:    adds x8, x8, x10
+; CHECK-NEXT:    asr x10, x1, #63
 ; CHECK-NEXT:    mul x0, x0, x2
-; CHECK-NEXT:    adc x12, x12, x13
-; CHECK-NEXT:    adds x9, x15, x9
-; CHECK-NEXT:    adc x10, x10, x12
-; CHECK-NEXT:    cmp x9, x11
-; CHECK-NEXT:    ccmp x10, x11, #0, eq
+; CHECK-NEXT:    adc x11, x11, x12
+; CHECK-NEXT:    adds x8, x15, x8
+; CHECK-NEXT:    adc x9, x9, x11
+; CHECK-NEXT:    cmp x8, x10
+; CHECK-NEXT:    ccmp x9, x10, #0, eq
 ; CHECK-NEXT:    cset w2, ne
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB22_2: // %overflow.no
+; CHECK-NEXT:    smulh x1, x0, x2
+; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:    mov w2, wzr
 ; CHECK-NEXT:    ret
   %1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
   %2 = extractvalue { i128, i1 } %1, 0
@@ -436,6 +482,11 @@ define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) {
 define i128 @i128_saturating_mul(i128 %x, i128 %y) {
 ; CHECK-LABEL: i128_saturating_mul:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor x8, x3, x2, asr #63
+; CHECK-NEXT:    eor x9, x1, x0, asr #63
+; CHECK-NEXT:    orr x8, x9, x8
+; CHECK-NEXT:    cbz x8, .LBB23_2
+; CHECK-NEXT:  // %bb.1: // %overflow
 ; CHECK-NEXT:    asr x9, x1, #63
 ; CHECK-NEXT:    umulh x10, x0, x2
 ; CHECK-NEXT:    asr x13, x3, #63
@@ -445,29 +496,35 @@ define i128 @i128_saturating_mul(i128 %x, i128 %y) {
 ; CHECK-NEXT:    adds x10, x11, x10
 ; CHECK-NEXT:    mul x14, x0, x3
 ; CHECK-NEXT:    umulh x12, x0, x3
-; CHECK-NEXT:    adc x8, x8, x9
+; CHECK-NEXT:    adc x9, x8, x9
 ; CHECK-NEXT:    mul x13, x0, x13
-; CHECK-NEXT:    adds x9, x14, x10
-; CHECK-NEXT:    mul x11, x1, x3
-; CHECK-NEXT:    adc x10, x12, x13
-; CHECK-NEXT:    smulh x12, x1, x3
-; CHECK-NEXT:    asr x13, x8, #63
-; CHECK-NEXT:    asr x14, x10, #63
-; CHECK-NEXT:    adds x8, x8, x10
-; CHECK-NEXT:    adc x10, x13, x14
-; CHECK-NEXT:    adds x8, x11, x8
-; CHECK-NEXT:    asr x11, x9, #63
-; CHECK-NEXT:    mul x13, x0, x2
-; CHECK-NEXT:    adc x10, x12, x10
-; CHECK-NEXT:    eor x12, x3, x1
-; CHECK-NEXT:    eor x8, x8, x11
-; CHECK-NEXT:    eor x10, x10, x11
-; CHECK-NEXT:    asr x11, x12, #63
-; CHECK-NEXT:    orr x8, x8, x10
-; CHECK-NEXT:    eor x10, x11, #0x7fffffffffffffff
-; CHECK-NEXT:    cmp x8, #0
-; CHECK-NEXT:    csinv x0, x13, x11, eq
-; CHECK-NEXT:    csel x1, x10, x9, ne
+; CHECK-NEXT:    adds x8, x14, x10
+; CHECK-NEXT:    mul x15, x1, x3
+; CHECK-NEXT:    asr x14, x8, #63
+; CHECK-NEXT:    smulh x10, x1, x3
+; CHECK-NEXT:    adc x11, x12, x13
+; CHECK-NEXT:    asr x12, x9, #63
+; CHECK-NEXT:    asr x13, x11, #63
+; CHECK-NEXT:    adds x11, x9, x11
+; CHECK-NEXT:    mul x9, x0, x2
+; CHECK-NEXT:    adc x12, x12, x13
+; CHECK-NEXT:    adds x11, x15, x11
+; CHECK-NEXT:    adc x10, x10, x12
+; CHECK-NEXT:    cmp x11, x14
+; CHECK-NEXT:    ccmp x10, x14, #0, eq
+; CHECK-NEXT:    cset w10, ne
+; CHECK-NEXT:    b .LBB23_3
+; CHECK-NEXT:  .LBB23_2: // %overflow.no
+; CHECK-NEXT:    smulh x8, x0, x2
+; CHECK-NEXT:    mov w10, wzr
+; CHECK-NEXT:    mul x9, x0, x2
+; CHECK-NEXT:  .LBB23_3: // %overflow.res
+; CHECK-NEXT:    eor x11, x3, x1
+; CHECK-NEXT:    cmp w10, #0
+; CHECK-NEXT:    asr x11, x11, #63
+; CHECK-NEXT:    eor x12, x11, #0x7fffffffffffffff
+; CHECK-NEXT:    csinv x0, x9, x11, eq
+; CHECK-NEXT:    csel x1, x12, x8, ne
 ; CHECK-NEXT:    ret
   %1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
   %2 = extractvalue { i128, i1 } %1, 0
diff --git a/llvm/test/CodeGen/AArch64/i128_with_overflow.ll b/llvm/test/CodeGen/AArch64/i128_with_overflow.ll
index 9924b7c63f763..3d90e094a5747 100644
--- a/llvm/test/CodeGen/AArch64/i128_with_overflow.ll
+++ b/llvm/test/CodeGen/AArch64/i128_with_overflow.ll
@@ -224,21 +224,29 @@ cleanup:
 define i128 @test_umul_i128(i128 noundef %x, i128 noundef %y) {
 ; CHECK-LABEL: test_umul_i128:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    orr x8, x1, x3
+; CHECK-NEXT:    cbz x8, .LBB4_2
+; CHECK-NEXT:  // %bb.1: // %overflow
 ; CHECK-NEXT:    mul x9, x3, x0
 ; CHECK-NEXT:    cmp x1, #0
 ; CHECK-NEXT:    ccmp x3, #0, #4, ne
-; CHECK-NEXT:    umulh x8, x1, x2
-; CHECK-NEXT:    umulh x10, x3, x0
+; CHECK-NEXT:    umulh x10, x1, x2
+; CHECK-NEXT:    umulh x8, x3, x0
 ; CHECK-NEXT:    madd x9, x1, x2, x9
-; CHECK-NEXT:    ccmp xzr, x8, #0, eq
-; CHECK-NEXT:    umulh x11, x0, x2
 ; CHECK-NEXT:    ccmp xzr, x10, #0, eq
+; CHECK-NEXT:    umulh x11, x0, x2
+; CHECK-NEXT:    ccmp xzr, x8, #0, eq
+; CHECK-NEXT:    mul x0, x0, x2
 ; CHECK-NEXT:    cset w8, ne
 ; CHECK-NEXT:    adds x1, x11, x9
 ; CHECK-NEXT:    csinc w8, w8, wzr, lo
-; CHECK-NEXT:    cmp w8, #1
-; CHECK-NEXT:    b.ne .LBB4_2
-; CHECK-NEXT:  // %bb.1: // %if.then
+; CHECK-NEXT:    cbnz w8, .LBB4_3
+; CHECK-NEXT:    b .LBB4_4
+; CHECK-NEXT:  .LBB4_2: // %overflow.no
+; CHECK-NEXT:    umulh x1, x0, x2
+; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:    cbz w8, .LBB4_4
+; CHECK-NEXT:  .LBB4_3: // %if.then
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
@@ -247,9 +255,7 @@ define i128 @test_umul_i128(i128 noundef %x, i128 noundef %y) {
 ; CHECK-NEXT:    sxtw x0, w0
 ; CHECK-NEXT:    asr x1, x0, #63
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB4_2: // %if.end
-; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:  .LBB4_4: // %cleanup
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
@@ -273,34 +279,40 @@ cleanup:
 define i128 @test_smul_i128(i128 noundef %x, i128 noundef %y) {
 ; CHECK-LABEL: test_smul_i128:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    asr x10, x1, #63
-; CHECK-NEXT:    umulh x11, x0, x2
-; CHECK-NEXT:    asr x14, x3, #63
-; CHECK-NEXT:    mov x8, x1
-; CHECK-NEXT:    mul x12, x1, x2
-; CHECK-NEXT:    umulh x9, x1, x2
-; CHECK-NEXT:    mul x10, x10, x2
-; CHECK-NEXT:    adds x11, x12, x11
-; CHECK-NEXT:    mul x15, x0, x3
-; CHECK-NEXT:    umulh x13, x0, x3
-; CHECK-NEXT:    adc x9, x9, x10
-; CHECK-NEXT:    mul x14, x0, x14
-; CHECK-NEXT:    mul x16, x1, x3
-; CHECK-NEXT:    adds x1, x15, x11
-; CHECK-NEXT:    asr x11, x9, #63
-; CHECK-NEXT:    smulh x8, x8, x3
-; CHECK-NEXT:    adc x10, x13, x14
-; CHECK-NEXT:    asr x12, x10, #63
-; CHECK-NEXT:    adds x9, x9, x10
-; CHECK-NEXT:    adc x10, x11, x12
-; CHECK-NEXT:    adds x9, x16, x9
-; CHECK-NEXT:    asr x11, x1, #63
-; CHECK-NEXT:    adc x8, x8, x10
-; CHECK-NEXT:    eor x8, x8, x11
-; CHECK-NEXT:    eor x9, x9, x11
+; CHECK-NEXT:    eor x8, x3, x2, asr #63
+; CHECK-NEXT:    eor x9, x1, x0, asr #63
 ; CHECK-NEXT:    orr x8, x9, x8
-; CHECK-NEXT:    cbz x8, .LBB5_2
-; CHECK-NEXT:  // %bb.1: // %if.then
+; CHECK-NEXT:    cbz x8, .LBB5_4
+; CHECK-NEXT:  // %bb.1: // %overflow
+; CHECK-NEXT:    asr x9, x1, #63
+; CHECK-NEXT:    umulh x10, x0, x2
+; CHECK-NEXT:    asr x13, x3, #63
+; CHECK-NEXT:    mul x11, x1, x2
+; CHECK-NEXT:    umulh x8, x1, x2
+; CHECK-NEXT:    mul x9, x9, x2
+; CHECK-NEXT:    adds x10, x11, x10
+; CHECK-NEXT:    mul x14, x0, x3
+; CHECK-NEXT:    umulh x12, x0, x3
+; CHECK-NEXT:    adc x8, x8, x9
+; CHECK-NEXT:    mov x9, x1
+; CHECK-NEXT:    mul x13, x0, x13
+; CHECK-NEXT:    asr x11, x8, #63
+; CHECK-NEXT:    mul x15, x1, x3
+; CHECK-NEXT:    adds x1, x14, x10
+; CHECK-NEXT:    smulh x9, x9, x3
+; CHECK-NEXT:    adc x10, x12, x13
+; CHECK-NEXT:    asr x12, x10, #63
+; CHECK-NEXT:    adds x8, x8, x10
+; CHECK-NEXT:    asr x10, x1, #63
+; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:    adc x11, x11, x12
+; CHECK-NEXT:    adds x8, x15, x8
+; CHECK-NEXT:    adc x9, x9, x11
+; CHECK-NEXT:    cmp x8, x10
+; CHECK-NEXT:    ccmp x9, x10, #0, eq
+; CHECK-NEXT:    cset w8, ne
+; CHECK-NEXT:    cbz w8, .LBB5_3
+; CHECK-NEXT:  .LBB5_2: // %if.then
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
@@ -309,10 +321,13 @@ define i128 @test_smul_i128(i128 noundef %x, i128 noundef %y) {
 ; CHECK-NEXT:    sxtw x0, w0
 ; CHECK-NEXT:    asr x1, x0, #63
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:  .LBB5_3: // %cleanup
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB5_2: // %if.end
+; CHECK-NEXT:  .LBB5_4: // %overflow.no
+; CHECK-NEXT:    smulh x1, x0, x2
 ; CHECK-NEXT:    mul x0, x0, x2
-; CHECK-NEXT:    ret
+; CHECK-NEXT:    cbnz w8, .LBB5_2
+; CHECK-NEXT:    b .LBB5_3
 entry:
   %0 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
   %1 = extractvalue { i128, i1 } %0, 1
diff --git a/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll b/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
index cb042757a4a42..3a4be1bda7cd6 100644
--- a/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
@@ -1,12 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16
 ; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16
-; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK-NOFP16,CHECK-GI
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK-FP16,CHECK-GI
-
-; CHECK-GI:       warning: Instruction selection used fallback path for testmhhs
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for testmhws
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for testmhxs
+; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK-NOFP16
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK-FP16
 
 define i16 @testmhhs(half %x) {
 ; CHECK-NOFP16-LABEL: testmhhs:
diff --git a/llvm/test/CodeGen/AArch64/llround-conv.ll b/llvm/test/CodeGen/AArch64/llround-conv.ll
index 4cc089804ce97..bdee73076347a 100644
--- a/llvm/test/CodeGen/AArch64/llround-conv.ll
+++ b/llvm/test/CodeGen/AArch64/llround-conv.ll
@@ -1,9 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s
-; RUN: llc < %s -mtriple=aarch64 -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI:       warning: Instruction selection used fallback path for testmswl
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for testmsll
+; RUN: llc < %s -mtriple=aarch64 -mattr=+neon -global-isel | FileCheck %s
 
 define i32 @testmsws(float %x) {
 ; CHECK-LABEL: testmsws:
diff --git a/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll b/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
index a29dea0eb9f9f..0b18f220067ca 100644
--- a/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
@@ -1,12 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16
 ; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16
-; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK-NOFP16,CHECK-GI
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK-FP16,CHECK-GI
-
-; CHECK-GI:       warning: Instruction selection used fallback path for testmhhs
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for testmhws
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for testmhxs
+; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK-NOFP16
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK-FP16
 
 define i16 @testmhhs(half %x) {
 ; CHECK-NOFP16-LABEL: testmhhs:
diff --git a/llvm/test/CodeGen/AArch64/lround-conv.ll b/llvm/test/CodeGen/AArch64/lround-conv.ll
index 0bf82b538e70c..4b1782457cc10 100644
--- a/llvm/test/CodeGen/AArch64/lround-conv.ll
+++ b/llvm/test/CodeGen/AArch64/lround-conv.ll
@@ -1,9 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s
-; RUN: llc < %s -mtriple=aarch64 -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI:       warning: Instruction selection used fallback path for testmswl
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for testmsll
+; RUN: llc < %s -mtriple=aarch64 -mattr=+neon -global-isel | FileCheck %s
 
 define i32 @testmsws(float %x) {
 ; CHECK-LABEL: testmsws:
diff --git a/llvm/test/CodeGen/AArch64/mul-i128-overflow.ll b/llvm/test/CodeGen/AArch64/mul-i128-overflow.ll
new file mode 100644
index 0000000000000..7b60f81539aa8
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/mul-i128-overflow.ll
@@ -0,0 +1,261 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 -o - %s | FileCheck %s
+
+
+declare i32 @error()
+
+define i128 @test1(i128 noundef %x, i128 noundef %y) {
+; CHECK-LABEL: test1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    eor x8, x3, x2, asr #63
+; CHECK-NEXT:    eor x9, x1, x0, asr #63
+; CHECK-NEXT:    orr x8, x9, x8
+; CHECK-NEXT:    cbz x8, .LBB0_4
+; CHECK-NEXT:  // %bb.1: // %overflow
+; CHECK-NEXT:    asr x9, x1, #63
+; CHECK-NEXT:    umulh x10, x0, x2
+; CHECK-NEXT:    asr x13, x3, #63
+; CHECK-NEXT:    mul x11, x1, x2
+; CHECK-NEXT:    umulh x8, x1, x2
+; CHECK-NEXT:    mul x9, x9, x2
+; CHECK-NEXT:    adds x10, x11, x10
+; CHECK-NEXT:    mul x14, x0, x3
+; CHECK-NEXT:    umulh x12, x0, x3
+; CHECK-NEXT:    adc x8, x8, x9
+; CHECK-NEXT:    mov x9, x1
+; CHECK-NEXT:    mul x13, x0, x13
+; CHECK-NEXT:    asr x11, x8, #63
+; CHECK-NEXT:    mul x15, x1, x3
+; CHECK-NEXT:    adds x1, x14, x10
+; CHECK-NEXT:    smulh x9, x9, x3
+; CHECK-NEXT:    adc x10, x12, x13
+; CHECK-NEXT:    asr x12, x10, #63
+; CHECK-NEXT:    adds x8, x8, x10
+; CHECK-NEXT:    asr x10, x1, #63
+; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:    adc x11, x11, x12
+; CHECK-NEXT:    adds x8, x15, x8
+; CHECK-NEXT:    adc x9, x9, x11
+; CHECK-NEXT:    cmp x8, x10
+; CHECK-NEXT:    ccmp x9, x10, #0, eq
+; CHECK-NEXT:    cset w8, ne
+; CHECK-NEXT:    cbz w8, .LBB0_3
+; CHECK-NEXT:  .LBB0_2: // %if.then
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl error
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sxtw x0, w0
+; CHECK-NEXT:    asr x1, x0, #63
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:  .LBB0_3: // %cleanup
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB0_4: // %overflow.no
+; CHECK-NEXT:    smulh x1, x0, x2
+; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:    cbnz w8, .LBB0_2
+; CHECK-NEXT:    b .LBB0_3
+entry:
+  %0 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
+  %1 = extractvalue { i128, i1 } %0, 1
+  br i1 %1, label %if.then, label %if.end
+
+if.then:
+  %call = tail call i32 @error()
+  %conv1 = sext i32 %call to i128
+  br label %cleanup
+
+if.end:
+  %2 = extractvalue { i128, i1 } %0, 0
+  br label %cleanup
+
+cleanup:
+  %retval.0 = phi i128 [ %conv1, %if.then ], [ %2, %if.end ]
+  ret i128 %retval.0
+}
+
+define i128 @test2(i128 noundef %x, i128 noundef %y, ptr %out) {
+; CHECK-LABEL: test2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    eor x8, x3, x2, asr #63
+; CHECK-NEXT:    eor x9, x1, x0, asr #63
+; CHECK-NEXT:    orr x8, x9, x8
+; CHECK-NEXT:    cbz x8, .LBB1_4
+; CHECK-NEXT:  // %bb.1: // %overflow
+; CHECK-NEXT:    asr x9, x1, #63
+; CHECK-NEXT:    umulh x10, x0, x2
+; CHECK-NEXT:    asr x13, x3, #63
+; CHECK-NEXT:    mul x11, x1, x2
+; CHECK-NEXT:    umulh x8, x1, x2
+; CHECK-NEXT:    mul x9, x9, x2
+; CHECK-NEXT:    adds x10, x11, x10
+; CHECK-NEXT:    mul x14, x0, x3
+; CHECK-NEXT:    umulh x12, x0, x3
+; CHECK-NEXT:    adc x8, x8, x9
+; CHECK-NEXT:    mov x9, x1
+; CHECK-NEXT:    mul x13, x0, x13
+; CHECK-NEXT:    asr x11, x8, #63
+; CHECK-NEXT:    mul x15, x1, x3
+; CHECK-NEXT:    adds x1, x14, x10
+; CHECK-NEXT:    smulh x9, x9, x3
+; CHECK-NEXT:    adc x10, x12, x13
+; CHECK-NEXT:    asr x12, x10, #63
+; CHECK-NEXT:    adds x8, x8, x10
+; CHECK-NEXT:    asr x10, x1, #63
+; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:    adc x11, x11, x12
+; CHECK-NEXT:    adds x8, x15, x8
+; CHECK-NEXT:    adc x9, x9, x11
+; CHECK-NEXT:    cmp x8, x10
+; CHECK-NEXT:    ccmp x9, x10, #0, eq
+; CHECK-NEXT:    cset w8, ne
+; CHECK-NEXT:    stp x0, x1, [x4]
+; CHECK-NEXT:    cbz w8, .LBB1_3
+; CHECK-NEXT:  .LBB1_2: // %if.then
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl error
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sxtw x0, w0
+; CHECK-NEXT:    asr x1, x0, #63
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:  .LBB1_3: // %cleanup
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB1_4: // %overflow.no
+; CHECK-NEXT:    smulh x1, x0, x2
+; CHECK-NEXT:    mul x0, x0, x2
+; CHECK-NEXT:    stp x0, x1, [x4]
+; CHECK-NEXT:    cbnz w8, .LBB1_2
+; CHECK-NEXT:    b .LBB1_3
+entry:
+  %0 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
+  %1 = extractvalue { i128, i1 } %0, 0
+  store i128 %1, ptr %out
+  %2 = extractvalue { i128, i1 } %0, 1
+  br i1 %2, label %if.then, label %cleanup
+
+if.then:
+  %call = tail call i32 @error()
+  %conv1 = sext i32 %call to i128
+  br label %cleanup
+
+cleanup:
+  %retval.0 = phi i128 [ %conv1, %if.then ], [ %1, %entry ]
+  ret i128 %retval.0
+}
+
+define i128 @test3(i128 noundef %x, i128 noundef %y, ptr %out) {
+; CHECK-LABEL: test3:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    orr x8, x1, x3
+; CHECK-NEXT:    cbz x8, .LBB2_3
+; CHECK-NEXT:  // %bb.1: // %overflow
+; CHECK-NEXT:    mul x8, x3, x0
+; CHECK-NEXT:    cmp x1, #0
+; CHECK-NEXT:    ccmp x3, #0, #4, ne
+; CHECK-NEXT:    umulh x10, x1, x2
+; CHECK-NEXT:    umulh x9, x3, x0
+; CHECK-NEXT:    madd x11, x1, x2, x8
+; CHECK-NEXT:    ccmp xzr, x10, #0, eq
+; CHECK-NEXT:    umulh x12, x0, x2
+; CHECK-NEXT:    ccmp xzr, x9, #0, eq
+; CHECK-NEXT:    mul x8, x0, x2
+; CHECK-NEXT:    cset w10, ne
+; CHECK-NEXT:    adds x9, x12, x11
+; CHECK-NEXT:    csinc w10, w10, wzr, lo
+; CHECK-NEXT:    stp x8, x9, [x4]
+; CHECK-NEXT:    cbnz w10, .LBB2_4
+; CHECK-NEXT:  .LBB2_2:
+; CHECK-NEXT:    mov x1, xzr
+; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB2_3: // %overflow.no
+; CHECK-NEXT:    umulh x9, x0, x2
+; CHECK-NEXT:    mov w10, wzr
+; CHECK-NEXT:    mul x8, x0, x2
+; CHECK-NEXT:    stp x8, x9, [x4]
+; CHECK-NEXT:    cbz w10, .LBB2_2
+; CHECK-NEXT:  .LBB2_4: // %if.then
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl error
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sxtw x0, w0
+; CHECK-NEXT:    asr x1, x0, #63
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
+  %1 = extractvalue { i128, i1 } %0, 0
+  store i128 %1, ptr %out
+  %2 = extractvalue { i128, i1 } %0, 1
+  br i1 %2, label %if.then, label %cleanup
+
+if.then:
+  %call = tail call i32 @error()
+  %conv1 = sext i32 %call to i128
+  br label %cleanup
+
+cleanup:
+  %retval.0 = phi i128 [ %conv1, %if.then ], [ 1, %entry ]
+  ret i128 %retval.0
+}
+
+define i128 @test4(i128 noundef %x, i128 noundef %y, i128 %out) {
+; CHECK-LABEL: test4:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    orr x8, x1, x3
+; CHECK-NEXT:    cbz x8, .LBB3_2
+; CHECK-NEXT:  // %bb.1: // %overflow
+; CHECK-NEXT:    mul x8, x3, x0
+; CHECK-NEXT:    cmp x1, #0
+; CHECK-NEXT:    ccmp x3, #0, #4, ne
+; CHECK-NEXT:    umulh x10, x1, x2
+; CHECK-NEXT:    umulh x9, x3, x0
+; CHECK-NEXT:    madd x11, x1, x2, x8
+; CHECK-NEXT:    ccmp xzr, x10, #0, eq
+; CHECK-NEXT:    umulh x12, x0, x2
+; CHECK-NEXT:    ccmp xzr, x9, #0, eq
+; CHECK-NEXT:    mul x8, x0, x2
+; CHECK-NEXT:    cset w10, ne
+; CHECK-NEXT:    adds x9, x12, x11
+; CHECK-NEXT:    csinc w10, w10, wzr, lo
+; CHECK-NEXT:    b .LBB3_3
+; CHECK-NEXT:  .LBB3_2: // %overflow.no
+; CHECK-NEXT:    umulh x9, x0, x2
+; CHECK-NEXT:    mov w10, wzr
+; CHECK-NEXT:    mul x8, x0, x2
+; CHECK-NEXT:  .LBB3_3: // %overflow.res
+; CHECK-NEXT:    adds x0, x8, x4
+; CHECK-NEXT:    adc x1, x9, x5
+; CHECK-NEXT:    cbz w10, .LBB3_5
+; CHECK-NEXT:  // %bb.4: // %if.then
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl error
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sxtw x0, w0
+; CHECK-NEXT:    asr x1, x0, #63
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:  .LBB3_5: // %cleanup
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
+  %1 = extractvalue { i128, i1 } %0, 0
+  %res = add i128 %1, %out
+  %2 = extractvalue { i128, i1 } %0, 1
+  br i1 %2, label %if.then, label %cleanup
+
+if.then:
+  %call = tail call i32 @error()
+  %conv1 = sext i32 %call to i128
+  br label %cleanup
+
+cleanup:
+  %retval.0 = phi i128 [ %conv1, %if.then ], [ %res, %entry ]
+  ret i128 %retval.0
+}
diff --git a/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll b/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll
index 16e8feb0dc5bb..fc3e018f2ec7a 100644
--- a/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll
+++ b/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll
@@ -632,7 +632,6 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    uunpkhi z3.s, z2.h
 ; SVE-NEXT:    uunpkhi z4.s, z1.h
-; SVE-NEXT:    mov w8, #32768 // =0x8000
 ; SVE-NEXT:    uunpklo z2.s, z2.h
 ; SVE-NEXT:    uunpklo z1.s, z1.h
 ; SVE-NEXT:    ptrue p1.s
@@ -643,9 +642,8 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
 ; SVE-NEXT:    fmul z3.s, z4.s, z3.s
 ; SVE-NEXT:    fmul z1.s, z1.s, z2.s
 ; SVE-NEXT:    bfcvt z2.h, p1/m, z3.s
-; SVE-NEXT:    fmov h3, w8
+; SVE-NEXT:    dupm z3.h, #0x8000
 ; SVE-NEXT:    bfcvt z1.h, p1/m, z1.s
-; SVE-NEXT:    mov z3.h, h3
 ; SVE-NEXT:    uzp1 z1.h, z1.h, z2.h
 ; SVE-NEXT:    sel z1.h, p0, z1.h, z3.h
 ; SVE-NEXT:    uunpkhi z3.s, z0.h
@@ -665,10 +663,8 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
 ;
 ; SVE-B16B16-LABEL: fsub_sel_fmul_negzero_nxv8bf16:
 ; SVE-B16B16:       // %bb.0:
-; SVE-B16B16-NEXT:    mov w8, #32768 // =0x8000
+; SVE-B16B16-NEXT:    dupm z3.h, #0x8000
 ; SVE-B16B16-NEXT:    bfmul z1.h, z1.h, z2.h
-; SVE-B16B16-NEXT:    fmov h3, w8
-; SVE-B16B16-NEXT:    mov z3.h, h3
 ; SVE-B16B16-NEXT:    sel z1.h, p0, z1.h, z3.h
 ; SVE-B16B16-NEXT:    bfsub z0.h, z0.h, z1.h
 ; SVE-B16B16-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-fp-combine.ll b/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
index 53aba04028d62..57389ad2fe9b2 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
@@ -1134,10 +1134,9 @@ define <vscale x 2 x double> @fadd_sel_fmul_d_negzero(<vscale x 2 x double> %a,
 define <vscale x 8 x half> @fsub_sel_fmul_h_negzero(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x i1> %mask) {
 ; CHECK-LABEL: fsub_sel_fmul_h_negzero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-NEXT:    dupm z3.h, #0x8000
 ; CHECK-NEXT:    fmul z1.h, z1.h, z2.h
-; CHECK-NEXT:    mov z2.h, w8
-; CHECK-NEXT:    sel z1.h, p0, z1.h, z2.h
+; CHECK-NEXT:    sel z1.h, p0, z1.h, z3.h
 ; CHECK-NEXT:    fsub z0.h, z0.h, z1.h
 ; CHECK-NEXT:    ret
   %fmul = fmul <vscale x 8 x half> %b, %c
@@ -1150,10 +1149,9 @@ define <vscale x 8 x half> @fsub_sel_fmul_h_negzero(<vscale x 8 x half> %a, <vsc
 define <vscale x 4 x float> @fsub_sel_fmul_s_negzero(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %mask) {
 ; CHECK-LABEL: fsub_sel_fmul_s_negzero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
+; CHECK-NEXT:    mov z3.s, #0x80000000
 ; CHECK-NEXT:    fmul z1.s, z1.s, z2.s
-; CHECK-NEXT:    mov z2.s, w8
-; CHECK-NEXT:    sel z1.s, p0, z1.s, z2.s
+; CHECK-NEXT:    sel z1.s, p0, z1.s, z3.s
 ; CHECK-NEXT:    fsub z0.s, z0.s, z1.s
 ; CHECK-NEXT:    ret
   %fmul = fmul <vscale x 4 x float> %b, %c
@@ -1166,10 +1164,9 @@ define <vscale x 4 x float> @fsub_sel_fmul_s_negzero(<vscale x 4 x float> %a, <v
 define <vscale x 2 x double> @fsub_sel_fmul_d_negzero(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %mask) {
 ; CHECK-LABEL: fsub_sel_fmul_d_negzero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-NEXT:    mov z3.d, #0x8000000000000000
 ; CHECK-NEXT:    fmul z1.d, z1.d, z2.d
-; CHECK-NEXT:    mov z2.d, x8
-; CHECK-NEXT:    sel z1.d, p0, z1.d, z2.d
+; CHECK-NEXT:    sel z1.d, p0, z1.d, z3.d
 ; CHECK-NEXT:    fsub z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %fmul = fmul <vscale x 2 x double> %b, %c
diff --git a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
index 8750867c56731..1223ae1c0cbdd 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
@@ -51,10 +51,9 @@ define half @fadda_nxv6f16(<vscale x 6 x half> %v, half %s) {
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-NEXT:    dupm z2.h, #0x8000
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    str z0, [sp]
-; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    fmov s0, s1
 ; CHECK-NEXT:    st1h { z2.d }, p0, [sp, #3, mul vl]
 ; CHECK-NEXT:    ptrue p0.h
@@ -77,12 +76,11 @@ define half @fadda_nxv10f16(<vscale x 10 x half> %v, half %s) {
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
 ; CHECK-NEXT:    str z1, [sp]
+; CHECK-NEXT:    addvl x8, sp, #1
 ; CHECK-NEXT:    ptrue p1.d
 ; CHECK-NEXT:    fadda h2, p0, h2, z0.h
-; CHECK-NEXT:    mov z0.h, w8
-; CHECK-NEXT:    addvl x8, sp, #1
+; CHECK-NEXT:    dupm z0.h, #0x8000
 ; CHECK-NEXT:    st1h { z0.d }, p1, [sp, #1, mul vl]
 ; CHECK-NEXT:    ldr z1, [sp]
 ; CHECK-NEXT:    str z1, [sp, #1, mul vl]
@@ -105,11 +103,10 @@ define half @fadda_nxv12f16(<vscale x 12 x half> %v, half %s) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    fadda h2, p0, h2, z0.h
-; CHECK-NEXT:    uunpklo z0.s, z1.h
-; CHECK-NEXT:    mov z1.h, w8
-; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    uzp1 z0.h, z1.h, z0.h
 ; CHECK-NEXT:    fadda h2, p0, h2, z0.h
 ; CHECK-NEXT:    fmov s0, s2
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
index 4ae7ac7b292e9..897ade00320db 100644
--- a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
@@ -454,18 +454,17 @@ declare <vscale x 4 x i64> @llvm.fptosi.sat.nxv4f16.nxv4i64(<vscale x 4 x half>)
 define <vscale x 2 x i32> @test_signed_v2f16_v2i32(<vscale x 2 x half> %f) {
 ; CHECK-LABEL: test_signed_v2f16_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
-; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    mov z2.d, #0xffffffff80000000
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.d, #0xffffffff80000000
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z0.h, z2.h
-; CHECK-NEXT:    mov z2.d, #0x7fffffff
+; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    fcvtzs z2.d, p1/m, z0.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    mov z1.d, #0x7fffffff
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    fcvtzs z1.d, p1/m, z0.h
-; CHECK-NEXT:    sel z0.d, p2, z2.d, z1.d
+; CHECK-NEXT:    sel z0.d, p1, z1.d, z2.d
 ; CHECK-NEXT:    mov z0.d, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 2 x i32> @llvm.fptosi.sat.nxv2f16.nxv2i32(<vscale x 2 x half> %f)
@@ -475,18 +474,17 @@ define <vscale x 2 x i32> @test_signed_v2f16_v2i32(<vscale x 2 x half> %f) {
 define <vscale x 4 x i32> @test_signed_v4f16_v4i32(<vscale x 4 x half> %f) {
 ; CHECK-LABEL: test_signed_v4f16_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
-; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    mov z2.s, #0x80000000
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.s, #0x80000000
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z0.h, z2.h
-; CHECK-NEXT:    mov z2.s, #0x7fffffff
+; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    fcvtzs z2.s, p1/m, z0.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    mov z1.s, #0x7fffffff
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    fcvtzs z1.s, p1/m, z0.h
-; CHECK-NEXT:    sel z0.s, p2, z2.s, z1.s
+; CHECK-NEXT:    sel z0.s, p1, z1.s, z2.s
 ; CHECK-NEXT:    mov z0.s, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 4 x i32> @llvm.fptosi.sat.nxv4f16.nxv4i32(<vscale x 4 x half> %f)
@@ -496,26 +494,25 @@ define <vscale x 4 x i32> @test_signed_v4f16_v4i32(<vscale x 4 x half> %f) {
 define <vscale x 8 x i32> @test_signed_v8f16_v8i32(<vscale x 8 x half> %f) {
 ; CHECK-LABEL: test_signed_v8f16_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
-; CHECK-NEXT:    uunpklo z1.s, z0.h
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
+; CHECK-NEXT:    uunpklo z2.s, z0.h
+; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    uunpkhi z0.s, z0.h
-; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    mov z3.s, #0x80000000
 ; CHECK-NEXT:    mov z4.s, #0x80000000
 ; CHECK-NEXT:    mov z5.h, w8
-; CHECK-NEXT:    fcmge p1.h, p0/z, z1.h, z2.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z0.h, z2.h
-; CHECK-NEXT:    mov z2.s, #0x7fffffff
+; CHECK-NEXT:    fcmge p1.h, p0/z, z2.h, z1.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    mov z1.s, #0x7fffffff
 ; CHECK-NEXT:    fcmgt p3.h, p0/z, z0.h, z5.h
-; CHECK-NEXT:    fcvtzs z3.s, p1/m, z1.h
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z5.h
+; CHECK-NEXT:    fcvtzs z3.s, p1/m, z2.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z2.h, z5.h
 ; CHECK-NEXT:    fcvtzs z4.s, p2/m, z0.h
-; CHECK-NEXT:    fcmuo p2.h, p0/z, z1.h, z1.h
+; CHECK-NEXT:    fcmuo p2.h, p0/z, z2.h, z2.h
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    sel z0.s, p1, z2.s, z3.s
-; CHECK-NEXT:    sel z1.s, p3, z2.s, z4.s
+; CHECK-NEXT:    sel z0.s, p1, z1.s, z3.s
+; CHECK-NEXT:    sel z1.s, p3, z1.s, z4.s
 ; CHECK-NEXT:    mov z0.s, p2/m, #0 // =0x0
 ; CHECK-NEXT:    mov z1.s, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
@@ -526,18 +523,17 @@ define <vscale x 8 x i32> @test_signed_v8f16_v8i32(<vscale x 8 x half> %f) {
 define <vscale x 4 x i16> @test_signed_v4f16_v4i16(<vscale x 4 x half> %f) {
 ; CHECK-LABEL: test_signed_v4f16_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #63488 // =0xf800
+; CHECK-NEXT:    dupm z1.h, #0xf800
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z2.s, #-32768 // =0xffffffffffff8000
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #30719 // =0x77ff
+; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.h, w8
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.s, #32767 // =0x7fff
-; CHECK-NEXT:    fcvtzs z2.s, p1/m, z0.h
+; CHECK-NEXT:    mov z1.s, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT:    fcvtzs z1.s, p1/m, z0.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z2.h
+; CHECK-NEXT:    mov z2.s, #32767 // =0x7fff
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    sel z0.s, p2, z1.s, z2.s
+; CHECK-NEXT:    sel z0.s, p1, z2.s, z1.s
 ; CHECK-NEXT:    mov z0.s, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f16.nxv4i16(<vscale x 4 x half> %f)
@@ -547,18 +543,17 @@ define <vscale x 4 x i16> @test_signed_v4f16_v4i16(<vscale x 4 x half> %f) {
 define <vscale x 8 x i16> @test_signed_v8f16_v8i16(<vscale x 8 x half> %f) {
 ; CHECK-LABEL: test_signed_v8f16_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #63488 // =0xf800
+; CHECK-NEXT:    dupm z1.h, #0xf800
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z2.h, #-32768 // =0xffffffffffff8000
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #30719 // =0x77ff
+; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.h, w8
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.h, #32767 // =0x7fff
-; CHECK-NEXT:    fcvtzs z2.h, p1/m, z0.h
+; CHECK-NEXT:    mov z1.h, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT:    fcvtzs z1.h, p1/m, z0.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z2.h
+; CHECK-NEXT:    mov z2.h, #32767 // =0x7fff
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    sel z0.h, p2, z1.h, z2.h
+; CHECK-NEXT:    sel z0.h, p1, z2.h, z1.h
 ; CHECK-NEXT:    mov z0.h, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f16.nxv8i16(<vscale x 8 x half> %f)
@@ -568,18 +563,17 @@ define <vscale x 8 x i16> @test_signed_v8f16_v8i16(<vscale x 8 x half> %f) {
 define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) {
 ; CHECK-LABEL: test_signed_v2f16_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
-; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    mov z2.d, #0x8000000000000000
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT:    mov z1.d, #0x8000000000000000
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z0.h, z2.h
-; CHECK-NEXT:    mov z2.d, #0x7fffffffffffffff
+; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    fcvtzs z2.d, p1/m, z0.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    mov z1.d, #0x7fffffffffffffff
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    fcvtzs z1.d, p1/m, z0.h
-; CHECK-NEXT:    sel z0.d, p2, z2.d, z1.d
+; CHECK-NEXT:    sel z0.d, p1, z1.d, z2.d
 ; CHECK-NEXT:    mov z0.d, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
     %x = call <vscale x 2 x i64> @llvm.fptosi.sat.nxv2f16.nxv2i64(<vscale x 2 x half> %f)
@@ -589,26 +583,25 @@ define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) {
 define <vscale x 4 x i64> @test_signed_v4f16_v4i64(<vscale x 4 x half> %f) {
 ; CHECK-LABEL: test_signed_v4f16_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
-; CHECK-NEXT:    uunpklo z1.d, z0.s
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
+; CHECK-NEXT:    uunpklo z2.d, z0.s
+; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    uunpkhi z0.d, z0.s
-; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    mov z3.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z4.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z5.h, w8
-; CHECK-NEXT:    fcmge p1.h, p0/z, z1.h, z2.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z0.h, z2.h
-; CHECK-NEXT:    mov z2.d, #0x7fffffffffffffff
+; CHECK-NEXT:    fcmge p1.h, p0/z, z2.h, z1.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    mov z1.d, #0x7fffffffffffffff
 ; CHECK-NEXT:    fcmgt p3.h, p0/z, z0.h, z5.h
-; CHECK-NEXT:    fcvtzs z3.d, p1/m, z1.h
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z5.h
+; CHECK-NEXT:    fcvtzs z3.d, p1/m, z2.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z2.h, z5.h
 ; CHECK-NEXT:    fcvtzs z4.d, p2/m, z0.h
-; CHECK-NEXT:    fcmuo p2.h, p0/z, z1.h, z1.h
+; CHECK-NEXT:    fcmuo p2.h, p0/z, z2.h, z2.h
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT:    sel z0.d, p1, z2.d, z3.d
-; CHECK-NEXT:    sel z1.d, p3, z2.d, z4.d
+; CHECK-NEXT:    sel z0.d, p1, z1.d, z3.d
+; CHECK-NEXT:    sel z1.d, p3, z1.d, z4.d
 ; CHECK-NEXT:    mov z0.d, p2/m, #0 // =0x0
 ; CHECK-NEXT:    mov z1.d, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-llrint.ll b/llvm/test/CodeGen/AArch64/sve-llrint.ll
index f964d70e0a05c..c2bb0c81ab405 100644
--- a/llvm/test/CodeGen/AArch64/sve-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-llrint.ll
@@ -5,9 +5,8 @@ define <vscale x 1 x i64> @llrint_v1i64_v1f16(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: llrint_v1i64_v1f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z2.d, #0x8000000000000000
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
@@ -28,9 +27,8 @@ define <vscale x 2 x i64> @llrint_v1i64_v2f16(<vscale x 2 x half> %x) {
 ; CHECK-LABEL: llrint_v1i64_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z2.d, #0x8000000000000000
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
@@ -52,10 +50,9 @@ define <vscale x 4 x i64> @llrint_v4i64_v4f16(<vscale x 4 x half> %x) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uunpklo z1.d, z0.s
 ; CHECK-NEXT:    uunpkhi z0.d, z0.s
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z2.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z3.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z4.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z5.d, #0x7fffffffffffffff
@@ -92,10 +89,9 @@ define <vscale x 8 x i64> @llrint_v8i64_v8f16(<vscale x 8 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    uunpklo z1.s, z0.h
 ; CHECK-NEXT:    uunpkhi z0.s, z0.h
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z4.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z4.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z5.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z6.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z7.d, #0x8000000000000000
@@ -162,12 +158,13 @@ define <vscale x 16 x i64> @llrint_v16i64_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    uunpklo z2.s, z0.h
 ; CHECK-NEXT:    uunpkhi z3.s, z0.h
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    uunpklo z7.s, z1.h
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z0.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    uunpkhi z1.s, z1.h
-; CHECK-NEXT:    mov z0.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z5.d, #0x8000000000000000
+; CHECK-NEXT:    mov z29.h, w8
 ; CHECK-NEXT:    mov z31.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpklo z4.d, z2.s
 ; CHECK-NEXT:    uunpklo z24.d, z3.s
@@ -175,10 +172,8 @@ define <vscale x 16 x i64> @llrint_v16i64_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    uunpkhi z6.d, z2.s
 ; CHECK-NEXT:    uunpklo z26.d, z7.s
 ; CHECK-NEXT:    uunpkhi z7.d, z7.s
-; CHECK-NEXT:    mov z2.h, w8
-; CHECK-NEXT:    mov w8, #31743 // =0x7bff
+; CHECK-NEXT:    mov z2.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpklo z30.d, z1.s
-; CHECK-NEXT:    mov z29.h, w8
 ; CHECK-NEXT:    mov z3.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpkhi z1.d, z1.s
 ; CHECK-NEXT:    movprfx z27, z4
@@ -191,17 +186,17 @@ define <vscale x 16 x i64> @llrint_v16i64_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    frintx z26.h, p0/m, z26.h
 ; CHECK-NEXT:    frintx z7.h, p0/m, z7.h
 ; CHECK-NEXT:    mov z6.d, #0x8000000000000000
-; CHECK-NEXT:    fcmge p1.h, p0/z, z27.h, z2.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z24.h, z2.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z25.h, z2.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z28.h, z2.h
-; CHECK-NEXT:    fcmge p5.h, p0/z, z26.h, z2.h
-; CHECK-NEXT:    fcvtzs z0.d, p1/m, z27.h
+; CHECK-NEXT:    fcmge p1.h, p0/z, z27.h, z0.h
+; CHECK-NEXT:    fcmge p3.h, p0/z, z24.h, z0.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z25.h, z0.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z28.h, z0.h
+; CHECK-NEXT:    fcmge p5.h, p0/z, z26.h, z0.h
+; CHECK-NEXT:    fcvtzs z2.d, p1/m, z27.h
 ; CHECK-NEXT:    fcvtzs z4.d, p3/m, z24.h
 ; CHECK-NEXT:    fcvtzs z5.d, p4/m, z25.h
 ; CHECK-NEXT:    fcmgt p3.h, p0/z, z27.h, z29.h
 ; CHECK-NEXT:    fcvtzs z3.d, p2/m, z28.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z7.h, z2.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z7.h, z0.h
 ; CHECK-NEXT:    fcvtzs z6.d, p5/m, z26.h
 ; CHECK-NEXT:    fcmuo p1.h, p0/z, z27.h, z27.h
 ; CHECK-NEXT:    movprfx z27, z30
@@ -212,7 +207,7 @@ define <vscale x 16 x i64> @llrint_v16i64_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    fcmuo p2.h, p0/z, z28.h, z28.h
 ; CHECK-NEXT:    mov z28.d, #0x8000000000000000
 ; CHECK-NEXT:    fcvtzs z31.d, p4/m, z7.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z2.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z0.h
 ; CHECK-NEXT:    fcmgt p6.h, p0/z, z24.h, z29.h
 ; CHECK-NEXT:    fcmuo p7.h, p0/z, z24.h, z24.h
 ; CHECK-NEXT:    mov z24.d, #0x7fffffffffffffff
@@ -221,31 +216,31 @@ define <vscale x 16 x i64> @llrint_v16i64_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    fcmuo p10.h, p0/z, z25.h, z25.h
 ; CHECK-NEXT:    mov z25.d, #0x8000000000000000
 ; CHECK-NEXT:    sel z1.d, p5, z24.d, z3.d
-; CHECK-NEXT:    mov z0.d, p3/m, z24.d
 ; CHECK-NEXT:    sel z3.d, p8, z24.d, z5.d
-; CHECK-NEXT:    fcmge p4.h, p0/z, z30.h, z2.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z30.h, z0.h
+; CHECK-NEXT:    sel z0.d, p3, z24.d, z2.d
 ; CHECK-NEXT:    sel z2.d, p6, z24.d, z4.d
-; CHECK-NEXT:    mov z0.d, p1/m, #0 // =0x0
 ; CHECK-NEXT:    mov z1.d, p2/m, #0 // =0x0
 ; CHECK-NEXT:    mov z3.d, p10/m, #0 // =0x0
 ; CHECK-NEXT:    ldr p10, [sp, #1, mul vl] // 2-byte Reload
+; CHECK-NEXT:    fcmgt p9.h, p0/z, z26.h, z29.h
 ; CHECK-NEXT:    mov z2.d, p7/m, #0 // =0x0
 ; CHECK-NEXT:    ldr p7, [sp, #4, mul vl] // 2-byte Reload
-; CHECK-NEXT:    fcmgt p9.h, p0/z, z26.h, z29.h
 ; CHECK-NEXT:    fcvtzs z25.d, p4/m, z30.h
+; CHECK-NEXT:    mov z0.d, p1/m, #0 // =0x0
 ; CHECK-NEXT:    fcmgt p5.h, p0/z, z7.h, z29.h
 ; CHECK-NEXT:    fcmgt p6.h, p0/z, z27.h, z29.h
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z30.h, z29.h
 ; CHECK-NEXT:    sel z4.d, p9, z24.d, z6.d
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z30.h, z29.h
 ; CHECK-NEXT:    fcmuo p8.h, p0/z, z7.h, z7.h
 ; CHECK-NEXT:    sel z5.d, p5, z24.d, z31.d
 ; CHECK-NEXT:    ldr p5, [sp, #6, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    sel z6.d, p6, z24.d, z28.d
 ; CHECK-NEXT:    ldr p6, [sp, #5, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    fcmuo p9.h, p0/z, z27.h, z27.h
+; CHECK-NEXT:    fcmuo p3.h, p0/z, z26.h, z26.h
 ; CHECK-NEXT:    sel z7.d, p4, z24.d, z25.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Reload
-; CHECK-NEXT:    fcmuo p3.h, p0/z, z26.h, z26.h
 ; CHECK-NEXT:    mov z5.d, p8/m, #0 // =0x0
 ; CHECK-NEXT:    ldr p8, [sp, #3, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z30.h, z30.h
@@ -302,48 +297,47 @@ define <vscale x 32 x i64> @llrint_v32i64_v32f16(<vscale x 32 x half> %x) {
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4e, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x40, 0x1c // $d14 @ cfa - 56 * VG - 16
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4f, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x40, 0x1c // $d15 @ cfa - 64 * VG - 16
 ; CHECK-NEXT:    uunpklo z4.s, z0.h
-; CHECK-NEXT:    uunpkhi z5.s, z0.h
-; CHECK-NEXT:    mov w9, #64511 // =0xfbff
-; CHECK-NEXT:    uunpklo z6.s, z1.h
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    uunpkhi z28.s, z1.h
-; CHECK-NEXT:    mov z30.h, w9
+; CHECK-NEXT:    uunpkhi z0.s, z0.h
 ; CHECK-NEXT:    mov w9, #31743 // =0x7bff
+; CHECK-NEXT:    uunpklo z5.s, z1.h
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z28.h, #-1025 // =0xfffffffffffffbff
+; CHECK-NEXT:    uunpkhi z29.s, z1.h
+; CHECK-NEXT:    mov z7.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpklo z13.s, z2.h
 ; CHECK-NEXT:    mov z9.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpkhi z14.s, z2.h
 ; CHECK-NEXT:    uunpkhi z17.s, z3.h
-; CHECK-NEXT:    uunpklo z7.d, z4.s
+; CHECK-NEXT:    uunpklo z6.d, z4.s
 ; CHECK-NEXT:    uunpkhi z4.d, z4.s
-; CHECK-NEXT:    uunpklo z27.d, z5.s
-; CHECK-NEXT:    uunpklo z31.d, z6.s
-; CHECK-NEXT:    uunpkhi z8.d, z6.s
-; CHECK-NEXT:    uunpkhi z29.d, z5.s
-; CHECK-NEXT:    uunpkhi z11.d, z28.s
-; CHECK-NEXT:    uunpklo z10.d, z28.s
+; CHECK-NEXT:    uunpklo z27.d, z0.s
+; CHECK-NEXT:    uunpklo z31.d, z5.s
+; CHECK-NEXT:    uunpkhi z8.d, z5.s
+; CHECK-NEXT:    uunpkhi z30.d, z0.s
+; CHECK-NEXT:    uunpkhi z11.d, z29.s
+; CHECK-NEXT:    uunpklo z10.d, z29.s
 ; CHECK-NEXT:    uunpklo z15.s, z3.h
 ; CHECK-NEXT:    uunpklo z16.d, z14.s
 ; CHECK-NEXT:    uunpkhi z14.d, z14.s
 ; CHECK-NEXT:    mov z24.d, #0x8000000000000000
-; CHECK-NEXT:    movprfx z1, z7
-; CHECK-NEXT:    frintx z1.h, p0/m, z7.h
 ; CHECK-NEXT:    movprfx z5, z27
 ; CHECK-NEXT:    frintx z5.h, p0/m, z27.h
+; CHECK-NEXT:    movprfx z1, z6
+; CHECK-NEXT:    frintx z1.h, p0/m, z6.h
 ; CHECK-NEXT:    frintx z4.h, p0/m, z4.h
 ; CHECK-NEXT:    movprfx z12, z31
 ; CHECK-NEXT:    frintx z12.h, p0/m, z31.h
 ; CHECK-NEXT:    movprfx z27, z8
 ; CHECK-NEXT:    frintx z27.h, p0/m, z8.h
-; CHECK-NEXT:    movprfx z6, z29
-; CHECK-NEXT:    frintx z6.h, p0/m, z29.h
+; CHECK-NEXT:    movprfx z6, z30
+; CHECK-NEXT:    frintx z6.h, p0/m, z30.h
 ; CHECK-NEXT:    movprfx z31, z10
 ; CHECK-NEXT:    frintx z31.h, p0/m, z10.h
-; CHECK-NEXT:    mov z7.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z8.d, #0x8000000000000000
+; CHECK-NEXT:    frintx z11.h, p0/m, z11.h
 ; CHECK-NEXT:    movprfx z3, z16
 ; CHECK-NEXT:    frintx z3.h, p0/m, z16.h
-; CHECK-NEXT:    frintx z11.h, p0/m, z11.h
-; CHECK-NEXT:    mov z29.h, w9
+; CHECK-NEXT:    mov z30.h, w9
 ; CHECK-NEXT:    uunpklo z10.d, z13.s
 ; CHECK-NEXT:    uunpkhi z13.d, z13.s
 ; CHECK-NEXT:    uunpkhi z20.d, z15.s
@@ -354,124 +348,124 @@ define <vscale x 32 x i64> @llrint_v32i64_v32f16(<vscale x 32 x half> %x) {
 ; CHECK-NEXT:    uunpklo z15.d, z15.s
 ; CHECK-NEXT:    mov z2.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z21.d, #0x8000000000000000
+; CHECK-NEXT:    frintx z10.h, p0/m, z10.h
 ; CHECK-NEXT:    mov z26.d, #0x8000000000000000
-; CHECK-NEXT:    mov z28.d, #0x7fffffffffffffff
+; CHECK-NEXT:    mov z29.d, #0x7fffffffffffffff
 ; CHECK-NEXT:    movprfx z19, z13
 ; CHECK-NEXT:    frintx z19.h, p0/m, z13.h
 ; CHECK-NEXT:    movprfx z13, z14
 ; CHECK-NEXT:    frintx z13.h, p0/m, z14.h
-; CHECK-NEXT:    frintx z10.h, p0/m, z10.h
 ; CHECK-NEXT:    frintx z16.h, p0/m, z16.h
 ; CHECK-NEXT:    mov z22.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z23.d, #0x8000000000000000
-; CHECK-NEXT:    frintx z15.h, p0/m, z15.h
 ; CHECK-NEXT:    mov z14.d, #0x8000000000000000
-; CHECK-NEXT:    fcmge p4.h, p0/z, z4.h, z30.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z12.h, z30.h
-; CHECK-NEXT:    fcmgt p9.h, p0/z, z12.h, z29.h
+; CHECK-NEXT:    frintx z15.h, p0/m, z15.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z4.h, z28.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z12.h, z28.h
+; CHECK-NEXT:    fcmgt p9.h, p0/z, z12.h, z30.h
 ; CHECK-NEXT:    fcmuo p8.h, p0/z, z12.h, z12.h
 ; CHECK-NEXT:    fcvtzs z7.d, p4/m, z4.h
 ; CHECK-NEXT:    fcvtzs z8.d, p2/m, z12.h
 ; CHECK-NEXT:    mov z12.d, #0x8000000000000000
-; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z30.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z28.h
 ; CHECK-NEXT:    fcmuo p10.h, p0/z, z11.h, z11.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z5.h, z30.h
-; CHECK-NEXT:    mov z8.d, p9/m, z28.d
+; CHECK-NEXT:    fcmge p3.h, p0/z, z5.h, z28.h
+; CHECK-NEXT:    mov z8.d, p9/m, z29.d
 ; CHECK-NEXT:    fcvtzs z9.d, p4/m, z27.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z11.h, z30.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z11.h, z28.h
 ; CHECK-NEXT:    fcvtzs z24.d, p3/m, z5.h
 ; CHECK-NEXT:    mov z8.d, p8/m, #0 // =0x0
-; CHECK-NEXT:    fcmge p1.h, p0/z, z6.h, z30.h
-; CHECK-NEXT:    fcmge p5.h, p0/z, z1.h, z30.h
+; CHECK-NEXT:    fcmge p1.h, p0/z, z6.h, z28.h
+; CHECK-NEXT:    fcmge p5.h, p0/z, z1.h, z28.h
 ; CHECK-NEXT:    str z8, [x8, #4, mul vl]
 ; CHECK-NEXT:    fcvtzs z12.d, p4/m, z11.h
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z29.h
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z30.h
 ; CHECK-NEXT:    uunpkhi z11.d, z17.s
 ; CHECK-NEXT:    movprfx z17, z20
 ; CHECK-NEXT:    frintx z17.h, p0/m, z20.h
 ; CHECK-NEXT:    fcvtzs z25.d, p1/m, z6.h
 ; CHECK-NEXT:    mov z20.d, #0x8000000000000000
 ; CHECK-NEXT:    fcvtzs z0.d, p5/m, z1.h
-; CHECK-NEXT:    fcmge p6.h, p0/z, z10.h, z30.h
+; CHECK-NEXT:    fcmge p6.h, p0/z, z10.h, z28.h
 ; CHECK-NEXT:    frintx z11.h, p0/m, z11.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z31.h, z30.h
-; CHECK-NEXT:    fcmge p1.h, p0/z, z13.h, z30.h
+; CHECK-NEXT:    fcmge p3.h, p0/z, z31.h, z28.h
+; CHECK-NEXT:    fcmge p1.h, p0/z, z13.h, z28.h
 ; CHECK-NEXT:    fcvtzs z18.d, p6/m, z10.h
-; CHECK-NEXT:    fcmgt p11.h, p0/z, z10.h, z29.h
-; CHECK-NEXT:    fcmge p5.h, p0/z, z11.h, z30.h
+; CHECK-NEXT:    fcmgt p11.h, p0/z, z10.h, z30.h
+; CHECK-NEXT:    fcmge p5.h, p0/z, z11.h, z28.h
 ; CHECK-NEXT:    fcvtzs z2.d, p3/m, z31.h
 ; CHECK-NEXT:    fcvtzs z21.d, p1/m, z13.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z17.h, z30.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z16.h, z30.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z17.h, z28.h
+; CHECK-NEXT:    fcmge p3.h, p0/z, z16.h, z28.h
 ; CHECK-NEXT:    fcmuo p1.h, p0/z, z10.h, z10.h
-; CHECK-NEXT:    sel z10.d, p4, z28.d, z12.d
-; CHECK-NEXT:    sel z12.d, p11, z28.d, z18.d
+; CHECK-NEXT:    sel z10.d, p4, z29.d, z12.d
+; CHECK-NEXT:    sel z12.d, p11, z29.d, z18.d
 ; CHECK-NEXT:    fcvtzs z26.d, p5/m, z11.h
 ; CHECK-NEXT:    fcvtzs z22.d, p2/m, z17.h
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z29.h
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z30.h
 ; CHECK-NEXT:    fcvtzs z23.d, p3/m, z16.h
 ; CHECK-NEXT:    mov z10.d, p10/m, #0 // =0x0
 ; CHECK-NEXT:    mov z12.d, p1/m, #0 // =0x0
-; CHECK-NEXT:    fcmge p6.h, p0/z, z19.h, z30.h
+; CHECK-NEXT:    fcmge p6.h, p0/z, z19.h, z28.h
 ; CHECK-NEXT:    str z10, [x8, #7, mul vl]
-; CHECK-NEXT:    fcmge p7.h, p0/z, z3.h, z30.h
+; CHECK-NEXT:    fcmge p7.h, p0/z, z3.h, z28.h
 ; CHECK-NEXT:    str z12, [x8, #8, mul vl]
-; CHECK-NEXT:    mov z26.d, p4/m, z28.d
-; CHECK-NEXT:    fcmge p2.h, p0/z, z15.h, z30.h
-; CHECK-NEXT:    mov z30.d, #0x8000000000000000
+; CHECK-NEXT:    mov z26.d, p4/m, z29.d
+; CHECK-NEXT:    fcmge p2.h, p0/z, z15.h, z28.h
+; CHECK-NEXT:    mov z28.d, #0x8000000000000000
 ; CHECK-NEXT:    fcvtzs z14.d, p6/m, z19.h
-; CHECK-NEXT:    fcmgt p5.h, p0/z, z16.h, z29.h
-; CHECK-NEXT:    fcmgt p3.h, p0/z, z17.h, z29.h
+; CHECK-NEXT:    fcmgt p5.h, p0/z, z16.h, z30.h
+; CHECK-NEXT:    fcmgt p3.h, p0/z, z17.h, z30.h
 ; CHECK-NEXT:    fcvtzs z20.d, p7/m, z3.h
-; CHECK-NEXT:    fcvtzs z30.d, p2/m, z15.h
+; CHECK-NEXT:    fcvtzs z28.d, p2/m, z15.h
 ; CHECK-NEXT:    fcmuo p1.h, p0/z, z11.h, z11.h
 ; CHECK-NEXT:    fcmuo p2.h, p0/z, z16.h, z16.h
-; CHECK-NEXT:    sel z11.d, p5, z28.d, z23.d
-; CHECK-NEXT:    sel z16.d, p3, z28.d, z22.d
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z19.h, z29.h
-; CHECK-NEXT:    fcmgt p3.h, p0/z, z15.h, z29.h
+; CHECK-NEXT:    sel z11.d, p5, z29.d, z23.d
+; CHECK-NEXT:    sel z16.d, p3, z29.d, z22.d
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z19.h, z30.h
+; CHECK-NEXT:    fcmgt p3.h, p0/z, z15.h, z30.h
 ; CHECK-NEXT:    mov z26.d, p1/m, #0 // =0x0
 ; CHECK-NEXT:    mov z11.d, p2/m, #0 // =0x0
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z13.h, z29.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z13.h, z30.h
 ; CHECK-NEXT:    fcmuo p6.h, p0/z, z17.h, z17.h
 ; CHECK-NEXT:    str z26, [x8, #15, mul vl]
-; CHECK-NEXT:    sel z26.d, p4, z28.d, z14.d
+; CHECK-NEXT:    sel z26.d, p4, z29.d, z14.d
 ; CHECK-NEXT:    str z11, [x8, #14, mul vl]
-; CHECK-NEXT:    mov z30.d, p3/m, z28.d
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z3.h, z29.h
+; CHECK-NEXT:    mov z28.d, p3/m, z29.d
+; CHECK-NEXT:    fcmgt p2.h, p0/z, z3.h, z30.h
 ; CHECK-NEXT:    fcmuo p4.h, p0/z, z13.h, z13.h
 ; CHECK-NEXT:    fcmuo p3.h, p0/z, z3.h, z3.h
-; CHECK-NEXT:    sel z3.d, p1, z28.d, z21.d
+; CHECK-NEXT:    sel z3.d, p1, z29.d, z21.d
 ; CHECK-NEXT:    mov z16.d, p6/m, #0 // =0x0
-; CHECK-NEXT:    fcmgt p12.h, p0/z, z27.h, z29.h
-; CHECK-NEXT:    sel z11.d, p2, z28.d, z20.d
+; CHECK-NEXT:    fcmgt p12.h, p0/z, z27.h, z30.h
+; CHECK-NEXT:    sel z11.d, p2, z29.d, z20.d
 ; CHECK-NEXT:    str z16, [x8, #13, mul vl]
 ; CHECK-NEXT:    mov z3.d, p4/m, #0 // =0x0
 ; CHECK-NEXT:    fcmuo p6.h, p0/z, z15.h, z15.h
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z4.h, z29.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z4.h, z30.h
 ; CHECK-NEXT:    mov z11.d, p3/m, #0 // =0x0
-; CHECK-NEXT:    mov z9.d, p12/m, z28.d
+; CHECK-NEXT:    mov z9.d, p12/m, z29.d
 ; CHECK-NEXT:    str z3, [x8, #11, mul vl]
 ; CHECK-NEXT:    fcmuo p5.h, p0/z, z19.h, z19.h
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z5.h, z29.h
+; CHECK-NEXT:    fcmgt p2.h, p0/z, z5.h, z30.h
 ; CHECK-NEXT:    str z11, [x8, #10, mul vl]
-; CHECK-NEXT:    mov z30.d, p6/m, #0 // =0x0
-; CHECK-NEXT:    sel z3.d, p1, z28.d, z7.d
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z6.h, z29.h
+; CHECK-NEXT:    mov z28.d, p6/m, #0 // =0x0
+; CHECK-NEXT:    sel z3.d, p1, z29.d, z7.d
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z6.h, z30.h
 ; CHECK-NEXT:    fcmuo p3.h, p0/z, z27.h, z27.h
-; CHECK-NEXT:    str z30, [x8, #12, mul vl]
+; CHECK-NEXT:    str z28, [x8, #12, mul vl]
 ; CHECK-NEXT:    mov z26.d, p5/m, #0 // =0x0
-; CHECK-NEXT:    sel z7.d, p2, z28.d, z24.d
-; CHECK-NEXT:    fcmgt p6.h, p0/z, z31.h, z29.h
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z29.h
+; CHECK-NEXT:    sel z7.d, p2, z29.d, z24.d
+; CHECK-NEXT:    fcmgt p6.h, p0/z, z31.h, z30.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z30.h
 ; CHECK-NEXT:    str z26, [x8, #9, mul vl]
-; CHECK-NEXT:    sel z24.d, p4, z28.d, z25.d
+; CHECK-NEXT:    sel z24.d, p4, z29.d, z25.d
 ; CHECK-NEXT:    mov z9.d, p3/m, #0 // =0x0
 ; CHECK-NEXT:    fcmuo p5.h, p0/z, z31.h, z31.h
 ; CHECK-NEXT:    fcmuo p2.h, p0/z, z6.h, z6.h
-; CHECK-NEXT:    mov z2.d, p6/m, z28.d
+; CHECK-NEXT:    mov z2.d, p6/m, z29.d
 ; CHECK-NEXT:    str z9, [x8, #5, mul vl]
-; CHECK-NEXT:    mov z0.d, p1/m, z28.d
+; CHECK-NEXT:    mov z0.d, p1/m, z29.d
 ; CHECK-NEXT:    fcmuo p3.h, p0/z, z5.h, z5.h
 ; CHECK-NEXT:    fcmuo p4.h, p0/z, z4.h, z4.h
 ; CHECK-NEXT:    mov z2.d, p5/m, #0 // =0x0
diff --git a/llvm/test/CodeGen/AArch64/sve-lrint.ll b/llvm/test/CodeGen/AArch64/sve-lrint.ll
index f517e7fe8dc16..f1224d30d53cc 100644
--- a/llvm/test/CodeGen/AArch64/sve-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-lrint.ll
@@ -6,9 +6,8 @@ define <vscale x 1 x iXLen> @lrint_v1f16(<vscale x 1 x half> %x) {
 ; CHECK-LABEL: lrint_v1f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z2.d, #0x8000000000000000
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
@@ -29,9 +28,8 @@ define <vscale x 2 x iXLen> @lrint_v2f16(<vscale x 2 x half> %x) {
 ; CHECK-LABEL: lrint_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov z1.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z2.d, #0x8000000000000000
-; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
 ; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
@@ -53,10 +51,9 @@ define <vscale x 4 x iXLen> @lrint_v4f16(<vscale x 4 x half> %x) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uunpklo z1.d, z0.s
 ; CHECK-NEXT:    uunpkhi z0.d, z0.s
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z2.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z2.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z3.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z4.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z5.d, #0x7fffffffffffffff
@@ -93,10 +90,9 @@ define <vscale x 8 x iXLen> @lrint_v8f16(<vscale x 8 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    uunpklo z1.s, z0.h
 ; CHECK-NEXT:    uunpkhi z0.s, z0.h
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z4.h, w8
 ; CHECK-NEXT:    mov w8, #31743 // =0x7bff
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z4.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    mov z5.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z6.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z7.d, #0x8000000000000000
@@ -163,12 +159,13 @@ define <vscale x 16 x iXLen> @lrint_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    uunpklo z2.s, z0.h
 ; CHECK-NEXT:    uunpkhi z3.s, z0.h
-; CHECK-NEXT:    mov w8, #64511 // =0xfbff
+; CHECK-NEXT:    mov w8, #31743 // =0x7bff
 ; CHECK-NEXT:    uunpklo z7.s, z1.h
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z0.h, #-1025 // =0xfffffffffffffbff
 ; CHECK-NEXT:    uunpkhi z1.s, z1.h
-; CHECK-NEXT:    mov z0.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z5.d, #0x8000000000000000
+; CHECK-NEXT:    mov z29.h, w8
 ; CHECK-NEXT:    mov z31.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpklo z4.d, z2.s
 ; CHECK-NEXT:    uunpklo z24.d, z3.s
@@ -176,10 +173,8 @@ define <vscale x 16 x iXLen> @lrint_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    uunpkhi z6.d, z2.s
 ; CHECK-NEXT:    uunpklo z26.d, z7.s
 ; CHECK-NEXT:    uunpkhi z7.d, z7.s
-; CHECK-NEXT:    mov z2.h, w8
-; CHECK-NEXT:    mov w8, #31743 // =0x7bff
+; CHECK-NEXT:    mov z2.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpklo z30.d, z1.s
-; CHECK-NEXT:    mov z29.h, w8
 ; CHECK-NEXT:    mov z3.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpkhi z1.d, z1.s
 ; CHECK-NEXT:    movprfx z27, z4
@@ -192,17 +187,17 @@ define <vscale x 16 x iXLen> @lrint_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    frintx z26.h, p0/m, z26.h
 ; CHECK-NEXT:    frintx z7.h, p0/m, z7.h
 ; CHECK-NEXT:    mov z6.d, #0x8000000000000000
-; CHECK-NEXT:    fcmge p1.h, p0/z, z27.h, z2.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z24.h, z2.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z25.h, z2.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z28.h, z2.h
-; CHECK-NEXT:    fcmge p5.h, p0/z, z26.h, z2.h
-; CHECK-NEXT:    fcvtzs z0.d, p1/m, z27.h
+; CHECK-NEXT:    fcmge p1.h, p0/z, z27.h, z0.h
+; CHECK-NEXT:    fcmge p3.h, p0/z, z24.h, z0.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z25.h, z0.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z28.h, z0.h
+; CHECK-NEXT:    fcmge p5.h, p0/z, z26.h, z0.h
+; CHECK-NEXT:    fcvtzs z2.d, p1/m, z27.h
 ; CHECK-NEXT:    fcvtzs z4.d, p3/m, z24.h
 ; CHECK-NEXT:    fcvtzs z5.d, p4/m, z25.h
 ; CHECK-NEXT:    fcmgt p3.h, p0/z, z27.h, z29.h
 ; CHECK-NEXT:    fcvtzs z3.d, p2/m, z28.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z7.h, z2.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z7.h, z0.h
 ; CHECK-NEXT:    fcvtzs z6.d, p5/m, z26.h
 ; CHECK-NEXT:    fcmuo p1.h, p0/z, z27.h, z27.h
 ; CHECK-NEXT:    movprfx z27, z30
@@ -213,7 +208,7 @@ define <vscale x 16 x iXLen> @lrint_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    fcmuo p2.h, p0/z, z28.h, z28.h
 ; CHECK-NEXT:    mov z28.d, #0x8000000000000000
 ; CHECK-NEXT:    fcvtzs z31.d, p4/m, z7.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z2.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z0.h
 ; CHECK-NEXT:    fcmgt p6.h, p0/z, z24.h, z29.h
 ; CHECK-NEXT:    fcmuo p7.h, p0/z, z24.h, z24.h
 ; CHECK-NEXT:    mov z24.d, #0x7fffffffffffffff
@@ -222,31 +217,31 @@ define <vscale x 16 x iXLen> @lrint_v16f16(<vscale x 16 x half> %x) {
 ; CHECK-NEXT:    fcmuo p10.h, p0/z, z25.h, z25.h
 ; CHECK-NEXT:    mov z25.d, #0x8000000000000000
 ; CHECK-NEXT:    sel z1.d, p5, z24.d, z3.d
-; CHECK-NEXT:    mov z0.d, p3/m, z24.d
 ; CHECK-NEXT:    sel z3.d, p8, z24.d, z5.d
-; CHECK-NEXT:    fcmge p4.h, p0/z, z30.h, z2.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z30.h, z0.h
+; CHECK-NEXT:    sel z0.d, p3, z24.d, z2.d
 ; CHECK-NEXT:    sel z2.d, p6, z24.d, z4.d
-; CHECK-NEXT:    mov z0.d, p1/m, #0 // =0x0
 ; CHECK-NEXT:    mov z1.d, p2/m, #0 // =0x0
 ; CHECK-NEXT:    mov z3.d, p10/m, #0 // =0x0
 ; CHECK-NEXT:    ldr p10, [sp, #1, mul vl] // 2-byte Reload
+; CHECK-NEXT:    fcmgt p9.h, p0/z, z26.h, z29.h
 ; CHECK-NEXT:    mov z2.d, p7/m, #0 // =0x0
 ; CHECK-NEXT:    ldr p7, [sp, #4, mul vl] // 2-byte Reload
-; CHECK-NEXT:    fcmgt p9.h, p0/z, z26.h, z29.h
 ; CHECK-NEXT:    fcvtzs z25.d, p4/m, z30.h
+; CHECK-NEXT:    mov z0.d, p1/m, #0 // =0x0
 ; CHECK-NEXT:    fcmgt p5.h, p0/z, z7.h, z29.h
 ; CHECK-NEXT:    fcmgt p6.h, p0/z, z27.h, z29.h
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z30.h, z29.h
 ; CHECK-NEXT:    sel z4.d, p9, z24.d, z6.d
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z30.h, z29.h
 ; CHECK-NEXT:    fcmuo p8.h, p0/z, z7.h, z7.h
 ; CHECK-NEXT:    sel z5.d, p5, z24.d, z31.d
 ; CHECK-NEXT:    ldr p5, [sp, #6, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    sel z6.d, p6, z24.d, z28.d
 ; CHECK-NEXT:    ldr p6, [sp, #5, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    fcmuo p9.h, p0/z, z27.h, z27.h
+; CHECK-NEXT:    fcmuo p3.h, p0/z, z26.h, z26.h
 ; CHECK-NEXT:    sel z7.d, p4, z24.d, z25.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Reload
-; CHECK-NEXT:    fcmuo p3.h, p0/z, z26.h, z26.h
 ; CHECK-NEXT:    mov z5.d, p8/m, #0 // =0x0
 ; CHECK-NEXT:    ldr p8, [sp, #3, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    fcmuo p0.h, p0/z, z30.h, z30.h
@@ -303,48 +298,47 @@ define <vscale x 32 x iXLen> @lrint_v32f16(<vscale x 32 x half> %x) {
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4e, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x40, 0x1c // $d14 @ cfa - 56 * VG - 16
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4f, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x40, 0x1c // $d15 @ cfa - 64 * VG - 16
 ; CHECK-NEXT:    uunpklo z4.s, z0.h
-; CHECK-NEXT:    uunpkhi z5.s, z0.h
-; CHECK-NEXT:    mov w9, #64511 // =0xfbff
-; CHECK-NEXT:    uunpklo z6.s, z1.h
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    uunpkhi z28.s, z1.h
-; CHECK-NEXT:    mov z30.h, w9
+; CHECK-NEXT:    uunpkhi z0.s, z0.h
 ; CHECK-NEXT:    mov w9, #31743 // =0x7bff
+; CHECK-NEXT:    uunpklo z5.s, z1.h
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z28.h, #-1025 // =0xfffffffffffffbff
+; CHECK-NEXT:    uunpkhi z29.s, z1.h
+; CHECK-NEXT:    mov z7.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpklo z13.s, z2.h
 ; CHECK-NEXT:    mov z9.d, #0x8000000000000000
 ; CHECK-NEXT:    uunpkhi z14.s, z2.h
 ; CHECK-NEXT:    uunpkhi z17.s, z3.h
-; CHECK-NEXT:    uunpklo z7.d, z4.s
+; CHECK-NEXT:    uunpklo z6.d, z4.s
 ; CHECK-NEXT:    uunpkhi z4.d, z4.s
-; CHECK-NEXT:    uunpklo z27.d, z5.s
-; CHECK-NEXT:    uunpklo z31.d, z6.s
-; CHECK-NEXT:    uunpkhi z8.d, z6.s
-; CHECK-NEXT:    uunpkhi z29.d, z5.s
-; CHECK-NEXT:    uunpkhi z11.d, z28.s
-; CHECK-NEXT:    uunpklo z10.d, z28.s
+; CHECK-NEXT:    uunpklo z27.d, z0.s
+; CHECK-NEXT:    uunpklo z31.d, z5.s
+; CHECK-NEXT:    uunpkhi z8.d, z5.s
+; CHECK-NEXT:    uunpkhi z30.d, z0.s
+; CHECK-NEXT:    uunpkhi z11.d, z29.s
+; CHECK-NEXT:    uunpklo z10.d, z29.s
 ; CHECK-NEXT:    uunpklo z15.s, z3.h
 ; CHECK-NEXT:    uunpklo z16.d, z14.s
 ; CHECK-NEXT:    uunpkhi z14.d, z14.s
 ; CHECK-NEXT:    mov z24.d, #0x8000000000000000
-; CHECK-NEXT:    movprfx z1, z7
-; CHECK-NEXT:    frintx z1.h, p0/m, z7.h
 ; CHECK-NEXT:    movprfx z5, z27
 ; CHECK-NEXT:    frintx z5.h, p0/m, z27.h
+; CHECK-NEXT:    movprfx z1, z6
+; CHECK-NEXT:    frintx z1.h, p0/m, z6.h
 ; CHECK-NEXT:    frintx z4.h, p0/m, z4.h
 ; CHECK-NEXT:    movprfx z12, z31
 ; CHECK-NEXT:    frintx z12.h, p0/m, z31.h
 ; CHECK-NEXT:    movprfx z27, z8
 ; CHECK-NEXT:    frintx z27.h, p0/m, z8.h
-; CHECK-NEXT:    movprfx z6, z29
-; CHECK-NEXT:    frintx z6.h, p0/m, z29.h
+; CHECK-NEXT:    movprfx z6, z30
+; CHECK-NEXT:    frintx z6.h, p0/m, z30.h
 ; CHECK-NEXT:    movprfx z31, z10
 ; CHECK-NEXT:    frintx z31.h, p0/m, z10.h
-; CHECK-NEXT:    mov z7.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z8.d, #0x8000000000000000
+; CHECK-NEXT:    frintx z11.h, p0/m, z11.h
 ; CHECK-NEXT:    movprfx z3, z16
 ; CHECK-NEXT:    frintx z3.h, p0/m, z16.h
-; CHECK-NEXT:    frintx z11.h, p0/m, z11.h
-; CHECK-NEXT:    mov z29.h, w9
+; CHECK-NEXT:    mov z30.h, w9
 ; CHECK-NEXT:    uunpklo z10.d, z13.s
 ; CHECK-NEXT:    uunpkhi z13.d, z13.s
 ; CHECK-NEXT:    uunpkhi z20.d, z15.s
@@ -355,124 +349,124 @@ define <vscale x 32 x iXLen> @lrint_v32f16(<vscale x 32 x half> %x) {
 ; CHECK-NEXT:    uunpklo z15.d, z15.s
 ; CHECK-NEXT:    mov z2.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z21.d, #0x8000000000000000
+; CHECK-NEXT:    frintx z10.h, p0/m, z10.h
 ; CHECK-NEXT:    mov z26.d, #0x8000000000000000
-; CHECK-NEXT:    mov z28.d, #0x7fffffffffffffff
+; CHECK-NEXT:    mov z29.d, #0x7fffffffffffffff
 ; CHECK-NEXT:    movprfx z19, z13
 ; CHECK-NEXT:    frintx z19.h, p0/m, z13.h
 ; CHECK-NEXT:    movprfx z13, z14
 ; CHECK-NEXT:    frintx z13.h, p0/m, z14.h
-; CHECK-NEXT:    frintx z10.h, p0/m, z10.h
 ; CHECK-NEXT:    frintx z16.h, p0/m, z16.h
 ; CHECK-NEXT:    mov z22.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z23.d, #0x8000000000000000
-; CHECK-NEXT:    frintx z15.h, p0/m, z15.h
 ; CHECK-NEXT:    mov z14.d, #0x8000000000000000
-; CHECK-NEXT:    fcmge p4.h, p0/z, z4.h, z30.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z12.h, z30.h
-; CHECK-NEXT:    fcmgt p9.h, p0/z, z12.h, z29.h
+; CHECK-NEXT:    frintx z15.h, p0/m, z15.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z4.h, z28.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z12.h, z28.h
+; CHECK-NEXT:    fcmgt p9.h, p0/z, z12.h, z30.h
 ; CHECK-NEXT:    fcmuo p8.h, p0/z, z12.h, z12.h
 ; CHECK-NEXT:    fcvtzs z7.d, p4/m, z4.h
 ; CHECK-NEXT:    fcvtzs z8.d, p2/m, z12.h
 ; CHECK-NEXT:    mov z12.d, #0x8000000000000000
-; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z30.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z27.h, z28.h
 ; CHECK-NEXT:    fcmuo p10.h, p0/z, z11.h, z11.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z5.h, z30.h
-; CHECK-NEXT:    mov z8.d, p9/m, z28.d
+; CHECK-NEXT:    fcmge p3.h, p0/z, z5.h, z28.h
+; CHECK-NEXT:    mov z8.d, p9/m, z29.d
 ; CHECK-NEXT:    fcvtzs z9.d, p4/m, z27.h
-; CHECK-NEXT:    fcmge p4.h, p0/z, z11.h, z30.h
+; CHECK-NEXT:    fcmge p4.h, p0/z, z11.h, z28.h
 ; CHECK-NEXT:    fcvtzs z24.d, p3/m, z5.h
 ; CHECK-NEXT:    mov z8.d, p8/m, #0 // =0x0
-; CHECK-NEXT:    fcmge p1.h, p0/z, z6.h, z30.h
-; CHECK-NEXT:    fcmge p5.h, p0/z, z1.h, z30.h
+; CHECK-NEXT:    fcmge p1.h, p0/z, z6.h, z28.h
+; CHECK-NEXT:    fcmge p5.h, p0/z, z1.h, z28.h
 ; CHECK-NEXT:    str z8, [x8, #4, mul vl]
 ; CHECK-NEXT:    fcvtzs z12.d, p4/m, z11.h
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z29.h
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z30.h
 ; CHECK-NEXT:    uunpkhi z11.d, z17.s
 ; CHECK-NEXT:    movprfx z17, z20
 ; CHECK-NEXT:    frintx z17.h, p0/m, z20.h
 ; CHECK-NEXT:    fcvtzs z25.d, p1/m, z6.h
 ; CHECK-NEXT:    mov z20.d, #0x8000000000000000
 ; CHECK-NEXT:    fcvtzs z0.d, p5/m, z1.h
-; CHECK-NEXT:    fcmge p6.h, p0/z, z10.h, z30.h
+; CHECK-NEXT:    fcmge p6.h, p0/z, z10.h, z28.h
 ; CHECK-NEXT:    frintx z11.h, p0/m, z11.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z31.h, z30.h
-; CHECK-NEXT:    fcmge p1.h, p0/z, z13.h, z30.h
+; CHECK-NEXT:    fcmge p3.h, p0/z, z31.h, z28.h
+; CHECK-NEXT:    fcmge p1.h, p0/z, z13.h, z28.h
 ; CHECK-NEXT:    fcvtzs z18.d, p6/m, z10.h
-; CHECK-NEXT:    fcmgt p11.h, p0/z, z10.h, z29.h
-; CHECK-NEXT:    fcmge p5.h, p0/z, z11.h, z30.h
+; CHECK-NEXT:    fcmgt p11.h, p0/z, z10.h, z30.h
+; CHECK-NEXT:    fcmge p5.h, p0/z, z11.h, z28.h
 ; CHECK-NEXT:    fcvtzs z2.d, p3/m, z31.h
 ; CHECK-NEXT:    fcvtzs z21.d, p1/m, z13.h
-; CHECK-NEXT:    fcmge p2.h, p0/z, z17.h, z30.h
-; CHECK-NEXT:    fcmge p3.h, p0/z, z16.h, z30.h
+; CHECK-NEXT:    fcmge p2.h, p0/z, z17.h, z28.h
+; CHECK-NEXT:    fcmge p3.h, p0/z, z16.h, z28.h
 ; CHECK-NEXT:    fcmuo p1.h, p0/z, z10.h, z10.h
-; CHECK-NEXT:    sel z10.d, p4, z28.d, z12.d
-; CHECK-NEXT:    sel z12.d, p11, z28.d, z18.d
+; CHECK-NEXT:    sel z10.d, p4, z29.d, z12.d
+; CHECK-NEXT:    sel z12.d, p11, z29.d, z18.d
 ; CHECK-NEXT:    fcvtzs z26.d, p5/m, z11.h
 ; CHECK-NEXT:    fcvtzs z22.d, p2/m, z17.h
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z29.h
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z11.h, z30.h
 ; CHECK-NEXT:    fcvtzs z23.d, p3/m, z16.h
 ; CHECK-NEXT:    mov z10.d, p10/m, #0 // =0x0
 ; CHECK-NEXT:    mov z12.d, p1/m, #0 // =0x0
-; CHECK-NEXT:    fcmge p6.h, p0/z, z19.h, z30.h
+; CHECK-NEXT:    fcmge p6.h, p0/z, z19.h, z28.h
 ; CHECK-NEXT:    str z10, [x8, #7, mul vl]
-; CHECK-NEXT:    fcmge p7.h, p0/z, z3.h, z30.h
+; CHECK-NEXT:    fcmge p7.h, p0/z, z3.h, z28.h
 ; CHECK-NEXT:    str z12, [x8, #8, mul vl]
-; CHECK-NEXT:    mov z26.d, p4/m, z28.d
-; CHECK-NEXT:    fcmge p2.h, p0/z, z15.h, z30.h
-; CHECK-NEXT:    mov z30.d, #0x8000000000000000
+; CHECK-NEXT:    mov z26.d, p4/m, z29.d
+; CHECK-NEXT:    fcmge p2.h, p0/z, z15.h, z28.h
+; CHECK-NEXT:    mov z28.d, #0x8000000000000000
 ; CHECK-NEXT:    fcvtzs z14.d, p6/m, z19.h
-; CHECK-NEXT:    fcmgt p5.h, p0/z, z16.h, z29.h
-; CHECK-NEXT:    fcmgt p3.h, p0/z, z17.h, z29.h
+; CHECK-NEXT:    fcmgt p5.h, p0/z, z16.h, z30.h
+; CHECK-NEXT:    fcmgt p3.h, p0/z, z17.h, z30.h
 ; CHECK-NEXT:    fcvtzs z20.d, p7/m, z3.h
-; CHECK-NEXT:    fcvtzs z30.d, p2/m, z15.h
+; CHECK-NEXT:    fcvtzs z28.d, p2/m, z15.h
 ; CHECK-NEXT:    fcmuo p1.h, p0/z, z11.h, z11.h
 ; CHECK-NEXT:    fcmuo p2.h, p0/z, z16.h, z16.h
-; CHECK-NEXT:    sel z11.d, p5, z28.d, z23.d
-; CHECK-NEXT:    sel z16.d, p3, z28.d, z22.d
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z19.h, z29.h
-; CHECK-NEXT:    fcmgt p3.h, p0/z, z15.h, z29.h
+; CHECK-NEXT:    sel z11.d, p5, z29.d, z23.d
+; CHECK-NEXT:    sel z16.d, p3, z29.d, z22.d
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z19.h, z30.h
+; CHECK-NEXT:    fcmgt p3.h, p0/z, z15.h, z30.h
 ; CHECK-NEXT:    mov z26.d, p1/m, #0 // =0x0
 ; CHECK-NEXT:    mov z11.d, p2/m, #0 // =0x0
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z13.h, z29.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z13.h, z30.h
 ; CHECK-NEXT:    fcmuo p6.h, p0/z, z17.h, z17.h
 ; CHECK-NEXT:    str z26, [x8, #15, mul vl]
-; CHECK-NEXT:    sel z26.d, p4, z28.d, z14.d
+; CHECK-NEXT:    sel z26.d, p4, z29.d, z14.d
 ; CHECK-NEXT:    str z11, [x8, #14, mul vl]
-; CHECK-NEXT:    mov z30.d, p3/m, z28.d
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z3.h, z29.h
+; CHECK-NEXT:    mov z28.d, p3/m, z29.d
+; CHECK-NEXT:    fcmgt p2.h, p0/z, z3.h, z30.h
 ; CHECK-NEXT:    fcmuo p4.h, p0/z, z13.h, z13.h
 ; CHECK-NEXT:    fcmuo p3.h, p0/z, z3.h, z3.h
-; CHECK-NEXT:    sel z3.d, p1, z28.d, z21.d
+; CHECK-NEXT:    sel z3.d, p1, z29.d, z21.d
 ; CHECK-NEXT:    mov z16.d, p6/m, #0 // =0x0
-; CHECK-NEXT:    fcmgt p12.h, p0/z, z27.h, z29.h
-; CHECK-NEXT:    sel z11.d, p2, z28.d, z20.d
+; CHECK-NEXT:    fcmgt p12.h, p0/z, z27.h, z30.h
+; CHECK-NEXT:    sel z11.d, p2, z29.d, z20.d
 ; CHECK-NEXT:    str z16, [x8, #13, mul vl]
 ; CHECK-NEXT:    mov z3.d, p4/m, #0 // =0x0
 ; CHECK-NEXT:    fcmuo p6.h, p0/z, z15.h, z15.h
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z4.h, z29.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z4.h, z30.h
 ; CHECK-NEXT:    mov z11.d, p3/m, #0 // =0x0
-; CHECK-NEXT:    mov z9.d, p12/m, z28.d
+; CHECK-NEXT:    mov z9.d, p12/m, z29.d
 ; CHECK-NEXT:    str z3, [x8, #11, mul vl]
 ; CHECK-NEXT:    fcmuo p5.h, p0/z, z19.h, z19.h
-; CHECK-NEXT:    fcmgt p2.h, p0/z, z5.h, z29.h
+; CHECK-NEXT:    fcmgt p2.h, p0/z, z5.h, z30.h
 ; CHECK-NEXT:    str z11, [x8, #10, mul vl]
-; CHECK-NEXT:    mov z30.d, p6/m, #0 // =0x0
-; CHECK-NEXT:    sel z3.d, p1, z28.d, z7.d
-; CHECK-NEXT:    fcmgt p4.h, p0/z, z6.h, z29.h
+; CHECK-NEXT:    mov z28.d, p6/m, #0 // =0x0
+; CHECK-NEXT:    sel z3.d, p1, z29.d, z7.d
+; CHECK-NEXT:    fcmgt p4.h, p0/z, z6.h, z30.h
 ; CHECK-NEXT:    fcmuo p3.h, p0/z, z27.h, z27.h
-; CHECK-NEXT:    str z30, [x8, #12, mul vl]
+; CHECK-NEXT:    str z28, [x8, #12, mul vl]
 ; CHECK-NEXT:    mov z26.d, p5/m, #0 // =0x0
-; CHECK-NEXT:    sel z7.d, p2, z28.d, z24.d
-; CHECK-NEXT:    fcmgt p6.h, p0/z, z31.h, z29.h
-; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z29.h
+; CHECK-NEXT:    sel z7.d, p2, z29.d, z24.d
+; CHECK-NEXT:    fcmgt p6.h, p0/z, z31.h, z30.h
+; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z30.h
 ; CHECK-NEXT:    str z26, [x8, #9, mul vl]
-; CHECK-NEXT:    sel z24.d, p4, z28.d, z25.d
+; CHECK-NEXT:    sel z24.d, p4, z29.d, z25.d
 ; CHECK-NEXT:    mov z9.d, p3/m, #0 // =0x0
 ; CHECK-NEXT:    fcmuo p5.h, p0/z, z31.h, z31.h
 ; CHECK-NEXT:    fcmuo p2.h, p0/z, z6.h, z6.h
-; CHECK-NEXT:    mov z2.d, p6/m, z28.d
+; CHECK-NEXT:    mov z2.d, p6/m, z29.d
 ; CHECK-NEXT:    str z9, [x8, #5, mul vl]
-; CHECK-NEXT:    mov z0.d, p1/m, z28.d
+; CHECK-NEXT:    mov z0.d, p1/m, z29.d
 ; CHECK-NEXT:    fcmuo p3.h, p0/z, z5.h, z5.h
 ; CHECK-NEXT:    fcmuo p4.h, p0/z, z4.h, z4.h
 ; CHECK-NEXT:    mov z2.d, p5/m, #0 // =0x0
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
index 5cca5539048b5..1ceaa5ad27734 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
@@ -509,6 +509,294 @@ define <vscale x 2 x bfloat> @splat_nxv2bf16_imm() {
   ret <vscale x 2 x bfloat> splat(bfloat 1.0)
 }
 
+define <vscale x 2 x half> @splat_nzero_nxv2f16() {
+; CHECK-LABEL: splat_nzero_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x half> splat (half -0.0)
+}
+
+define <vscale x 4 x half> @splat_nzero_nxv4f16() {
+; CHECK-LABEL: splat_nzero_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x half> splat (half -0.0)
+}
+
+define <vscale x 8 x half> @splat_nzero_nxv8f16() {
+; CHECK-LABEL: splat_nzero_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x half> splat (half -0.0)
+}
+
+define <vscale x 2 x float> @splat_nzero_nxv2f32() {
+; CHECK-LABEL: splat_nzero_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0x80000000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x float> splat (float -0.0)
+}
+
+define <vscale x 4 x float> @splat_nzero_nxv4f32() {
+; CHECK-LABEL: splat_nzero_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0x80000000
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x float> splat (float -0.0)
+}
+
+define <vscale x 2 x double> @splat_nzero_nxv2f64() {
+; CHECK-LABEL: splat_nzero_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, #0x8000000000000000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x double> splat (double -0.0)
+}
+
+define <vscale x 2 x bfloat> @splat_nzero_nxv2bf16() {
+; CHECK-LABEL: splat_nzero_nxv2bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x bfloat> splat (bfloat -0.0)
+}
+
+define <vscale x 4 x bfloat> @splat_nzero_nxv4bf16() {
+; CHECK-LABEL: splat_nzero_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x bfloat> splat (bfloat -0.0)
+}
+
+define <vscale x 8 x bfloat> @splat_nzero_nxv8bf16() {
+; CHECK-LABEL: splat_nzero_nxv8bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x8000
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x bfloat> splat (bfloat -0.0)
+}
+
+define <vscale x 2 x half> @splat_pinf_nxv2f16() {
+; CHECK-LABEL: splat_pinf_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x7c00
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x half> splat (half 0x7FF0000000000000)
+}
+
+define <vscale x 4 x half> @splat_pinf_nxv4f16() {
+; CHECK-LABEL: splat_pinf_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x7c00
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x half> splat (half 0x7FF0000000000000)
+}
+
+define <vscale x 8 x half> @splat_pinf_nxv8f16() {
+; CHECK-LABEL: splat_pinf_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x7c00
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x half> splat (half 0x7FF0000000000000)
+}
+
+define <vscale x 2 x float> @splat_pinf_nxv2f32() {
+; CHECK-LABEL: splat_pinf_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0x7f800000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x float> splat (float 0x7FF0000000000000)
+}
+
+define <vscale x 4 x float> @splat_pinf_nxv4f32() {
+; CHECK-LABEL: splat_pinf_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0x7f800000
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x float> splat (float 0x7FF0000000000000)
+}
+
+define <vscale x 2 x double> @splat_pinf_nxv2f64() {
+; CHECK-LABEL: splat_pinf_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, #0x7ff0000000000000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x double> splat (double 0x7FF0000000000000)
+}
+
+define <vscale x 2 x bfloat> @splat_pinf_nxv2bf16() {
+; CHECK-LABEL: splat_pinf_nxv2bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #32640 // =0x7f80
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x bfloat> splat (bfloat 0x7FF0000000000000)
+}
+
+define <vscale x 4 x bfloat> @splat_pinf_nxv4bf16() {
+; CHECK-LABEL: splat_pinf_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #32640 // =0x7f80
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x bfloat> splat (bfloat 0x7FF0000000000000)
+}
+
+define <vscale x 8 x bfloat> @splat_pinf_nxv8bf16() {
+; CHECK-LABEL: splat_pinf_nxv8bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #32640 // =0x7f80
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x bfloat> splat (bfloat 0x7FF0000000000000)
+}
+
+define <vscale x 2 x half> @splat_ninf_nxv2f16() {
+; CHECK-LABEL: splat_ninf_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0xfc00
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x half> splat (half 0xFFF0000000000000)
+}
+
+define <vscale x 4 x half> @splat_ninf_nxv4f16() {
+; CHECK-LABEL: splat_ninf_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0xfc00
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x half> splat (half 0xFFF0000000000000)
+}
+
+define <vscale x 8 x half> @splat_ninf_nxv8f16() {
+; CHECK-LABEL: splat_ninf_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0xfc00
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x half> splat (half 0xFFF0000000000000)
+}
+
+define <vscale x 2 x float> @splat_ninf_nxv2f32() {
+; CHECK-LABEL: splat_ninf_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0xff800000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x float> splat (float 0xFFF0000000000000)
+}
+
+define <vscale x 4 x float> @splat_ninf_nxv4f32() {
+; CHECK-LABEL: splat_ninf_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0xff800000
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x float> splat (float 0xFFF0000000000000)
+}
+
+define <vscale x 2 x double> @splat_ninf_nxv2f64() {
+; CHECK-LABEL: splat_ninf_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, #0xfff0000000000000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x double> splat (double 0xFFF0000000000000)
+}
+
+define <vscale x 2 x bfloat> @splat_ninf_nxv2bf16() {
+; CHECK-LABEL: splat_ninf_nxv2bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0xff80
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x bfloat> splat (bfloat 0xFFF0000000000000)
+}
+
+define <vscale x 4 x bfloat> @splat_ninf_nxv4bf16() {
+; CHECK-LABEL: splat_ninf_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0xff80
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x bfloat> splat (bfloat 0xFFF0000000000000)
+}
+
+define <vscale x 8 x bfloat> @splat_ninf_nxv8bf16() {
+; CHECK-LABEL: splat_ninf_nxv8bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0xff80
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x bfloat> splat (bfloat 0xFFF0000000000000)
+}
+
+define <vscale x 2 x half> @splat_nan_nxv2f16() {
+; CHECK-LABEL: splat_nan_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x7e00
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x half> splat (half 0x7FF8000000000000)
+}
+
+define <vscale x 4 x half> @splat_nan_nxv4f16() {
+; CHECK-LABEL: splat_nan_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x7e00
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x half> splat (half 0x7FF8000000000000)
+}
+
+define <vscale x 8 x half> @splat_nan_nxv8f16() {
+; CHECK-LABEL: splat_nan_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupm z0.h, #0x7e00
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x half> splat (half 0x7FF8000000000000)
+}
+
+define <vscale x 2 x float> @splat_nan_nxv2f32() {
+; CHECK-LABEL: splat_nan_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0x7fc00000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x float> splat (float 0x7FF8000000000000)
+}
+
+define <vscale x 4 x float> @splat_nan_nxv4f32() {
+; CHECK-LABEL: splat_nan_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, #0x7fc00000
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x float> splat (float 0x7FF8000000000000)
+}
+
+define <vscale x 2 x double> @splat_nan_nxv2f64() {
+; CHECK-LABEL: splat_nan_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, #0x7ff8000000000000
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x double> splat (double 0x7FF8000000000000)
+}
+
+define <vscale x 2 x bfloat> @splat_nan_nxv2bf16() {
+; CHECK-LABEL: splat_nan_nxv2bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #32704 // =0x7fc0
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x bfloat> splat (bfloat 0x7FF8000000000000)
+}
+
+define <vscale x 4 x bfloat> @splat_nan_nxv4bf16() {
+; CHECK-LABEL: splat_nan_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #32704 // =0x7fc0
+; CHECK-NEXT:    ret
+  ret <vscale x 4 x bfloat> splat (bfloat 0x7FF8000000000000)
+}
+
+define <vscale x 8 x bfloat> @splat_nan_nxv8bf16() {
+; CHECK-LABEL: splat_nan_nxv8bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #32704 // =0x7fc0
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x bfloat> splat (bfloat 0x7FF8000000000000)
+}
+
 define <vscale x 4 x i32> @splat_nxv4i32_fold(<vscale x 4 x i32> %x) {
 ; CHECK-LABEL: splat_nxv4i32_fold:
 ; CHECK:       // %bb.0:
@@ -581,8 +869,8 @@ define <vscale x 2 x double> @splat_nxv2f64_imm_out_of_range() {
 ; CHECK-LABEL: splat_nxv2f64_imm_out_of_range:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    adrp x8, .LCPI60_0
-; CHECK-NEXT:    add x8, x8, :lo12:.LCPI60_0
+; CHECK-NEXT:    adrp x8, .LCPI96_0
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI96_0
 ; CHECK-NEXT:    ld1rd { z0.d }, p0/z, [x8]
 ; CHECK-NEXT:    ret
   ret <vscale x 2 x double> splat(double 3.33)
diff --git a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
index 6b5b3d6d436cb..b04029c273ae2 100644
--- a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
@@ -338,8 +338,7 @@ ret <vscale x 2 x double> %sel
 define <vscale x 8 x half> @sel_merge_nxv8f16_negative_zero(<vscale x 8 x i1> %p, <vscale x 8 x half> %in) {
 ; CHECK-LABEL: sel_merge_nxv8f16_negative_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
-; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    dupm z1.h, #0x8000
 ; CHECK-NEXT:    mov z0.h, p0/m, z1.h
 ; CHECK-NEXT:    ret
 %sel = select <vscale x 8 x i1> %p, <vscale x 8 x half> splat (half -0.0), <vscale x 8 x half> %in
@@ -349,8 +348,7 @@ ret <vscale x 8 x half> %sel
 define <vscale x 4 x half> @sel_merge_nx4f16_negative_zero(<vscale x 4 x i1> %p, <vscale x 4 x half> %in) {
 ; CHECK-LABEL: sel_merge_nx4f16_negative_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
-; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    dupm z1.h, #0x8000
 ; CHECK-NEXT:    mov z0.s, p0/m, z1.s
 ; CHECK-NEXT:    ret
 %sel = select <vscale x 4 x i1> %p, <vscale x 4 x half> splat (half -0.0), <vscale x 4 x half> %in
@@ -360,8 +358,7 @@ ret <vscale x 4 x half> %sel
 define <vscale x 2 x half> @sel_merge_nx2f16_negative_zero(<vscale x 2 x i1> %p, <vscale x 2 x half> %in) {
 ; CHECK-LABEL: sel_merge_nx2f16_negative_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32768 // =0x8000
-; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    dupm z1.h, #0x8000
 ; CHECK-NEXT:    mov z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
 %sel = select <vscale x 2 x i1> %p, <vscale x 2 x half> splat (half -0.0), <vscale x 2 x half> %in
@@ -371,8 +368,7 @@ ret <vscale x 2 x half> %sel
 define <vscale x 4 x float> @sel_merge_nx4f32_negative_zero(<vscale x 4 x i1> %p, <vscale x 4 x float> %in) {
 ; CHECK-LABEL: sel_merge_nx4f32_negative_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
-; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    mov z1.s, #0x80000000
 ; CHECK-NEXT:    mov z0.s, p0/m, z1.s
 ; CHECK-NEXT:    ret
 %sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> splat (float -0.0), <vscale x 4 x float> %in
@@ -382,8 +378,7 @@ ret <vscale x 4 x float> %sel
 define <vscale x 2 x float> @sel_merge_nx2f32_negative_zero(<vscale x 2 x i1> %p, <vscale x 2 x float> %in) {
 ; CHECK-LABEL: sel_merge_nx2f32_negative_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
-; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    mov z1.s, #0x80000000
 ; CHECK-NEXT:    mov z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
 %sel = select <vscale x 2 x i1> %p, <vscale x 2 x float> splat (float -0.0), <vscale x 2 x float> %in
@@ -393,8 +388,7 @@ ret <vscale x 2 x float> %sel
 define <vscale x 2 x double> @sel_merge_nx2f64_negative_zero(<vscale x 2 x i1> %p, <vscale x 2 x double> %in) {
 ; CHECK-LABEL: sel_merge_nx2f64_negative_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    mov z1.d, #0x8000000000000000
 ; CHECK-NEXT:    mov z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
 %sel = select <vscale x 2 x i1> %p, <vscale x 2 x double> splat (double -0.0), <vscale x 2 x double> %in
diff --git a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll
index edfd80b4f2706..ace0c83e63c7c 100644
--- a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll
@@ -4,20 +4,28 @@
 define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; AARCH-LABEL: muloti_test:
 ; AARCH:       // %bb.0: // %start
+; AARCH-NEXT:    orr x8, x1, x3
+; AARCH-NEXT:    cbz x8, .LBB0_2
+; AARCH-NEXT:  // %bb.1: // %overflow
 ; AARCH-NEXT:    mul x9, x3, x0
 ; AARCH-NEXT:    cmp x1, #0
 ; AARCH-NEXT:    ccmp x3, #0, #4, ne
-; AARCH-NEXT:    umulh x8, x1, x2
-; AARCH-NEXT:    umulh x10, x3, x0
+; AARCH-NEXT:    umulh x10, x1, x2
+; AARCH-NEXT:    umulh x8, x3, x0
 ; AARCH-NEXT:    madd x9, x1, x2, x9
-; AARCH-NEXT:    ccmp xzr, x8, #0, eq
-; AARCH-NEXT:    umulh x11, x0, x2
 ; AARCH-NEXT:    ccmp xzr, x10, #0, eq
+; AARCH-NEXT:    umulh x11, x0, x2
+; AARCH-NEXT:    ccmp xzr, x8, #0, eq
 ; AARCH-NEXT:    mul x0, x0, x2
 ; AARCH-NEXT:    cset w8, ne
 ; AARCH-NEXT:    adds x1, x11, x9
 ; AARCH-NEXT:    csinc w2, w8, wzr, lo
 ; AARCH-NEXT:    ret
+; AARCH-NEXT:  .LBB0_2: // %overflow.no
+; AARCH-NEXT:    umulh x1, x0, x2
+; AARCH-NEXT:    mul x0, x0, x2
+; AARCH-NEXT:    mov w2, wzr
+; AARCH-NEXT:    ret
 start:
   %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
   %1 = extractvalue { i128, i1 } %0, 0
@@ -35,45 +43,56 @@ start:
 define i128 @__muloti4(i128 %0, i128 %1, ptr nocapture nonnull writeonly align 4 %2) #2 {
 ; AARCH-LABEL: __muloti4:
 ; AARCH:       // %bb.0: // %Entry
-; AARCH-NEXT:    asr x11, x1, #63
-; AARCH-NEXT:    asr x9, x3, #63
-; AARCH-NEXT:    umulh x12, x0, x2
-; AARCH-NEXT:    mov x8, x1
+; AARCH-NEXT:    eor x8, x3, x2, asr #63
+; AARCH-NEXT:    eor x9, x1, x0, asr #63
 ; AARCH-NEXT:    str wzr, [x4]
-; AARCH-NEXT:    mul x13, x1, x2
-; AARCH-NEXT:    umulh x10, x1, x2
-; AARCH-NEXT:    mul x11, x11, x2
-; AARCH-NEXT:    adds x12, x13, x12
-; AARCH-NEXT:    mul x15, x0, x3
-; AARCH-NEXT:    umulh x14, x0, x3
-; AARCH-NEXT:    adc x10, x10, x11
-; AARCH-NEXT:    mul x9, x0, x9
-; AARCH-NEXT:    mul x16, x1, x3
-; AARCH-NEXT:    adds x1, x15, x12
-; AARCH-NEXT:    asr x12, x10, #63
-; AARCH-NEXT:    smulh x11, x8, x3
-; AARCH-NEXT:    adc x9, x14, x9
-; AARCH-NEXT:    asr x13, x9, #63
-; AARCH-NEXT:    adds x9, x10, x9
-; AARCH-NEXT:    asr x10, x1, #63
+; AARCH-NEXT:    orr x8, x9, x8
+; AARCH-NEXT:    cbz x8, .LBB1_2
+; AARCH-NEXT:  // %bb.1: // %overflow
+; AARCH-NEXT:    asr x9, x1, #63
+; AARCH-NEXT:    umulh x10, x0, x2
+; AARCH-NEXT:    asr x13, x3, #63
+; AARCH-NEXT:    mul x11, x1, x2
+; AARCH-NEXT:    umulh x8, x1, x2
+; AARCH-NEXT:    mul x9, x9, x2
+; AARCH-NEXT:    adds x10, x11, x10
+; AARCH-NEXT:    mul x14, x0, x3
+; AARCH-NEXT:    umulh x12, x0, x3
+; AARCH-NEXT:    adc x9, x8, x9
+; AARCH-NEXT:    mul x13, x0, x13
+; AARCH-NEXT:    adds x8, x14, x10
+; AARCH-NEXT:    mul x15, x1, x3
+; AARCH-NEXT:    smulh x10, x1, x3
+; AARCH-NEXT:    adc x11, x12, x13
+; AARCH-NEXT:    asr x12, x9, #63
+; AARCH-NEXT:    asr x13, x11, #63
+; AARCH-NEXT:    adds x9, x9, x11
+; AARCH-NEXT:    asr x11, x8, #63
 ; AARCH-NEXT:    mul x0, x0, x2
 ; AARCH-NEXT:    adc x12, x12, x13
-; AARCH-NEXT:    adds x9, x16, x9
-; AARCH-NEXT:    adc x11, x11, x12
-; AARCH-NEXT:    cmp x9, x10
-; AARCH-NEXT:    ccmp x11, x10, #0, eq
+; AARCH-NEXT:    adds x9, x15, x9
+; AARCH-NEXT:    adc x10, x10, x12
+; AARCH-NEXT:    cmp x9, x11
+; AARCH-NEXT:    ccmp x10, x11, #0, eq
 ; AARCH-NEXT:    cset w9, ne
-; AARCH-NEXT:    tbz x8, #63, .LBB1_2
-; AARCH-NEXT:  // %bb.1: // %Entry
-; AARCH-NEXT:    eor x8, x3, #0x8000000000000000
-; AARCH-NEXT:    orr x8, x2, x8
-; AARCH-NEXT:    cbz x8, .LBB1_3
-; AARCH-NEXT:  .LBB1_2: // %Else2
-; AARCH-NEXT:    cbz w9, .LBB1_4
-; AARCH-NEXT:  .LBB1_3: // %Then7
-; AARCH-NEXT:    mov w8, #1 // =0x1
-; AARCH-NEXT:    str w8, [x4]
-; AARCH-NEXT:  .LBB1_4: // %Block9
+; AARCH-NEXT:    tbnz x1, #63, .LBB1_3
+; AARCH-NEXT:    b .LBB1_4
+; AARCH-NEXT:  .LBB1_2: // %overflow.no
+; AARCH-NEXT:    smulh x8, x0, x2
+; AARCH-NEXT:    mov w9, wzr
+; AARCH-NEXT:    mul x0, x0, x2
+; AARCH-NEXT:    tbz x1, #63, .LBB1_4
+; AARCH-NEXT:  .LBB1_3: // %overflow.res
+; AARCH-NEXT:    eor x10, x3, #0x8000000000000000
+; AARCH-NEXT:    orr x10, x2, x10
+; AARCH-NEXT:    cbz x10, .LBB1_5
+; AARCH-NEXT:  .LBB1_4: // %Else2
+; AARCH-NEXT:    cbz w9, .LBB1_6
+; AARCH-NEXT:  .LBB1_5: // %Then7
+; AARCH-NEXT:    mov w9, #1 // =0x1
+; AARCH-NEXT:    str w9, [x4]
+; AARCH-NEXT:  .LBB1_6: // %Block9
+; AARCH-NEXT:    mov x1, x8
 ; AARCH-NEXT:    ret
 Entry:
   store i32 0, ptr %2, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir b/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir
index 86e657093b5b2..ab4077d8f5b68 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir
+++ b/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir
@@ -269,11 +269,12 @@ body:            |
     S_ENDPGM 0
 ...
 
-# GCN-LABEL: name: no_hazard_lds_branch_flat
+# GCN-LABEL: name: hazard_lds_branch_flat
 # GCN:      bb.1:
+# GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
 # GCN-NEXT: FLAT_LOAD_DWORD
 ---
-name:            no_hazard_lds_branch_flat
+name:            hazard_lds_branch_flat
 body:            |
   bb.0:
     successors: %bb.1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index dd2cffd7bd161..dd19ba17bb292 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -1,16 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx704 < %s  | FileCheck --check-prefixes=GFX7CHECK,GFX7SELDAG %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx704 < %s  | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx704 < %s  | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s
 ; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx803 < %s  | FileCheck --check-prefixes=GFX8CHECK,GFX8SELDAG %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx803 < %s  | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 < %s  | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s
 ; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 < %s  | FileCheck --check-prefixes=GFX9CHECK,GFX9SELDAG %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 < %s  | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 < %s  | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s
 ; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10SELDAG %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s
 ; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG,GFX11SELDAG-TRUE16 %s
 ; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG,GFX11SELDAG-FAKE16 %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-TRUE16 %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-FAKE16 %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-TRUE16 %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-FAKE16 %s
+
+; FIXME: There are code size regressions in GlobalISel due to use of SGPRs and
+; moving those SGPRs into VGPRs.
 
 define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
 ; GFX7SELDAG-LABEL: sgpr_isnan_f16:
@@ -34,48 +37,98 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
 ; GFX7GLISEL-NEXT:    s_mov_b32 s2, -1
 ; GFX7GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    s_and_b32 s3, s3, 0x7fff
+; GFX7GLISEL-NEXT:    s_and_b32 s3, 0xffff, s3
 ; GFX7GLISEL-NEXT:    s_cmpk_gt_u32 s3, 0x7c00
-; GFX7GLISEL-NEXT:    s_cselect_b32 s3, 1, 0
-; GFX7GLISEL-NEXT:    s_bfe_i32 s3, s3, 0x10000
+; GFX7GLISEL-NEXT:    s_cselect_b32 s3, -1, 0
 ; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX7GLISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7GLISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX7GLISEL-NEXT:    s_endpgm
 ;
-; GFX8CHECK-LABEL: sgpr_isnan_f16:
-; GFX8CHECK:       ; %bb.0:
-; GFX8CHECK-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX8CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8CHECK-NEXT:    v_cmp_class_f16_e64 s[2:3], s2, 3
-; GFX8CHECK-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[2:3]
-; GFX8CHECK-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8CHECK-NEXT:    flat_store_dword v[0:1], v2
-; GFX8CHECK-NEXT:    s_endpgm
-;
-; GFX9CHECK-LABEL: sgpr_isnan_f16:
-; GFX9CHECK:       ; %bb.0:
-; GFX9CHECK-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX9CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9CHECK-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9CHECK-NEXT:    v_cmp_class_f16_e64 s[2:3], s2, 3
-; GFX9CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[2:3]
-; GFX9CHECK-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9CHECK-NEXT:    s_endpgm
-;
-; GFX10CHECK-LABEL: sgpr_isnan_f16:
-; GFX10CHECK:       ; %bb.0:
-; GFX10CHECK-NEXT:    s_clause 0x1
-; GFX10CHECK-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX10CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10CHECK-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10CHECK-NEXT:    v_cmp_class_f16_e64 s2, s2, 3
-; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
-; GFX10CHECK-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX10CHECK-NEXT:    s_endpgm
+; GFX8SELDAG-LABEL: sgpr_isnan_f16:
+; GFX8SELDAG:       ; %bb.0:
+; GFX8SELDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX8SELDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8SELDAG-NEXT:    v_cmp_class_f16_e64 s[2:3], s2, 3
+; GFX8SELDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8SELDAG-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[2:3]
+; GFX8SELDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8SELDAG-NEXT:    flat_store_dword v[0:1], v2
+; GFX8SELDAG-NEXT:    s_endpgm
+;
+; GFX8GLISEL-LABEL: sgpr_isnan_f16:
+; GFX8GLISEL:       ; %bb.0:
+; GFX8GLISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX8GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8GLISEL-NEXT:    v_cmp_class_f16_e64 s[2:3], s2, 3
+; GFX8GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX8GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX8GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX8GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX8GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX8GLISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8GLISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8GLISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8GLISEL-NEXT:    flat_store_dword v[0:1], v2
+; GFX8GLISEL-NEXT:    s_endpgm
+;
+; GFX9SELDAG-LABEL: sgpr_isnan_f16:
+; GFX9SELDAG:       ; %bb.0:
+; GFX9SELDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX9SELDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9SELDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9SELDAG-NEXT:    v_cmp_class_f16_e64 s[2:3], s2, 3
+; GFX9SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[2:3]
+; GFX9SELDAG-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9SELDAG-NEXT:    s_endpgm
+;
+; GFX9GLISEL-LABEL: sgpr_isnan_f16:
+; GFX9GLISEL:       ; %bb.0:
+; GFX9GLISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX9GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9GLISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9GLISEL-NEXT:    v_cmp_class_f16_e64 s[2:3], s2, 3
+; GFX9GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX9GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX9GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX9GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX9GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX9GLISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX9GLISEL-NEXT:    s_endpgm
+;
+; GFX10SELDAG-LABEL: sgpr_isnan_f16:
+; GFX10SELDAG:       ; %bb.0:
+; GFX10SELDAG-NEXT:    s_clause 0x1
+; GFX10SELDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX10SELDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10SELDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10SELDAG-NEXT:    v_cmp_class_f16_e64 s2, s2, 3
+; GFX10SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
+; GFX10SELDAG-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX10SELDAG-NEXT:    s_endpgm
+;
+; GFX10GLISEL-LABEL: sgpr_isnan_f16:
+; GFX10GLISEL:       ; %bb.0:
+; GFX10GLISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
+; GFX10GLISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10GLISEL-NEXT:    v_cmp_class_f16_e64 s2, s0, 3
+; GFX10GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX10GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX10GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX10GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX10GLISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX10GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX10GLISEL-NEXT:    s_endpgm
 ;
 ; GFX11SELDAG-TRUE16-LABEL: sgpr_isnan_f16:
 ; GFX11SELDAG-TRUE16:       ; %bb.0:
@@ -103,26 +156,36 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
 ;
 ; GFX11GLISEL-TRUE16-LABEL: sgpr_isnan_f16:
 ; GFX11GLISEL-TRUE16:       ; %bb.0:
-; GFX11GLISEL-TRUE16-NEXT:    s_clause 0x1
-; GFX11GLISEL-TRUE16-NEXT:    s_load_b32 s2, s[4:5], 0x2c
-; GFX11GLISEL-TRUE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11GLISEL-TRUE16-NEXT:    s_load_b32 s0, s[4:5], 0x2c
 ; GFX11GLISEL-TRUE16-NEXT:    v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 0
 ; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e32 vcc_lo, s2, v0.l
-; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e32 vcc_lo, s0, v0.l
+; GFX11GLISEL-TRUE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11GLISEL-TRUE16-NEXT:    s_cmp_lg_u32 vcc_lo, 0
+; GFX11GLISEL-TRUE16-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX11GLISEL-TRUE16-NEXT:    s_and_b32 s2, s2, 1
+; GFX11GLISEL-TRUE16-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX11GLISEL-TRUE16-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX11GLISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11GLISEL-TRUE16-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11GLISEL-TRUE16-NEXT:    s_endpgm
 ;
 ; GFX11GLISEL-FAKE16-LABEL: sgpr_isnan_f16:
 ; GFX11GLISEL-FAKE16:       ; %bb.0:
-; GFX11GLISEL-FAKE16-NEXT:    s_clause 0x1
-; GFX11GLISEL-FAKE16-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11GLISEL-FAKE16-NEXT:    s_load_b32 s0, s[4:5], 0x2c
+; GFX11GLISEL-FAKE16-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s2, s0, 3
 ; GFX11GLISEL-FAKE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GLISEL-FAKE16-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11GLISEL-FAKE16-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX11GLISEL-FAKE16-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX11GLISEL-FAKE16-NEXT:    s_and_b32 s2, s2, 1
+; GFX11GLISEL-FAKE16-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX11GLISEL-FAKE16-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX11GLISEL-FAKE16-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s2, s2, 3
-; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
-; GFX11GLISEL-FAKE16-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11GLISEL-FAKE16-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11GLISEL-FAKE16-NEXT:    s_endpgm
   %result = call i1 @llvm.is.fpclass.f16(half %x, i32 3)
   %sext = sext i1 %result to i32
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll
index 4f5432a202058..0a9fe10874c38 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll
@@ -1,14 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx704 < %s  | FileCheck --check-prefixes=GFX7CHECK,GFX7SELDAG %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx704 < %s  | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx704 < %s  | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s
 ; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx803 < %s  | FileCheck --check-prefixes=GFX8CHECK,GFX8SELDAG %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx803 < %s  | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s
-; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 < %s  | FileCheck --check-prefixes=GFX9CHECK %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 < %s  | FileCheck --check-prefixes=GFX9CHECK %s
-; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK %s
-; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefix=GFX11CHECK %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefix=GFX11CHECK %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 < %s  | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s
+; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 < %s  | FileCheck --check-prefixes=GFX9CHECK,GFX9SELDAG %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 < %s  | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s
+; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10SELDAG %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s
+; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG %s
+; RUN:  llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL %s
+
+; FIXME: There are code size regressions in GlobalISel due to use of SGPRs and
+; moving those SGPRs into VGPRs.
 
 define amdgpu_kernel void @sgpr_isnan_f32(ptr addrspace(1) %out, float %x) {
 ; GFX7SELDAG-LABEL: sgpr_isnan_f32:
@@ -30,58 +33,132 @@ define amdgpu_kernel void @sgpr_isnan_f32(ptr addrspace(1) %out, float %x) {
 ; GFX7GLISEL-NEXT:    s_mov_b32 s2, -1
 ; GFX7GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7GLISEL-NEXT:    v_cmp_class_f32_e64 s[4:5], s3, 3
-; GFX7GLISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
+; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], s[4:5]
+; GFX7GLISEL-NEXT:    s_cselect_b32 s3, 1, 0
+; GFX7GLISEL-NEXT:    s_and_b32 s3, s3, 1
+; GFX7GLISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX7GLISEL-NEXT:    s_cselect_b32 s3, -1, 0
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX7GLISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7GLISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX7GLISEL-NEXT:    s_endpgm
 ;
-; GFX8CHECK-LABEL: sgpr_isnan_f32:
-; GFX8CHECK:       ; %bb.0:
-; GFX8CHECK-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX8CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8CHECK-NEXT:    v_cmp_class_f32_e64 s[2:3], s2, 3
-; GFX8CHECK-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[2:3]
-; GFX8CHECK-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8CHECK-NEXT:    flat_store_dword v[0:1], v2
-; GFX8CHECK-NEXT:    s_endpgm
-;
-; GFX9CHECK-LABEL: sgpr_isnan_f32:
-; GFX9CHECK:       ; %bb.0:
-; GFX9CHECK-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX9CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9CHECK-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9CHECK-NEXT:    v_cmp_class_f32_e64 s[2:3], s2, 3
-; GFX9CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[2:3]
-; GFX9CHECK-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9CHECK-NEXT:    s_endpgm
-;
-; GFX10CHECK-LABEL: sgpr_isnan_f32:
-; GFX10CHECK:       ; %bb.0:
-; GFX10CHECK-NEXT:    s_clause 0x1
-; GFX10CHECK-NEXT:    s_load_dword s2, s[4:5], 0x2c
-; GFX10CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10CHECK-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10CHECK-NEXT:    v_cmp_class_f32_e64 s2, s2, 3
-; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
-; GFX10CHECK-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX10CHECK-NEXT:    s_endpgm
-;
-; GFX11CHECK-LABEL: sgpr_isnan_f32:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_clause 0x1
-; GFX11CHECK-NEXT:    s_load_b32 s2, s[4:5], 0x2c
-; GFX11CHECK-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11CHECK-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f32_e64 s2, s2, 3
-; GFX11CHECK-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
-; GFX11CHECK-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11CHECK-NEXT:    s_endpgm
+; GFX8SELDAG-LABEL: sgpr_isnan_f32:
+; GFX8SELDAG:       ; %bb.0:
+; GFX8SELDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX8SELDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8SELDAG-NEXT:    v_cmp_class_f32_e64 s[2:3], s2, 3
+; GFX8SELDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8SELDAG-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[2:3]
+; GFX8SELDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8SELDAG-NEXT:    flat_store_dword v[0:1], v2
+; GFX8SELDAG-NEXT:    s_endpgm
+;
+; GFX8GLISEL-LABEL: sgpr_isnan_f32:
+; GFX8GLISEL:       ; %bb.0:
+; GFX8GLISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX8GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8GLISEL-NEXT:    v_cmp_class_f32_e64 s[2:3], s2, 3
+; GFX8GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX8GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX8GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX8GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX8GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX8GLISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8GLISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8GLISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8GLISEL-NEXT:    flat_store_dword v[0:1], v2
+; GFX8GLISEL-NEXT:    s_endpgm
+;
+; GFX9SELDAG-LABEL: sgpr_isnan_f32:
+; GFX9SELDAG:       ; %bb.0:
+; GFX9SELDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX9SELDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9SELDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9SELDAG-NEXT:    v_cmp_class_f32_e64 s[2:3], s2, 3
+; GFX9SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[2:3]
+; GFX9SELDAG-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9SELDAG-NEXT:    s_endpgm
+;
+; GFX9GLISEL-LABEL: sgpr_isnan_f32:
+; GFX9GLISEL:       ; %bb.0:
+; GFX9GLISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX9GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9GLISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9GLISEL-NEXT:    v_cmp_class_f32_e64 s[2:3], s2, 3
+; GFX9GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX9GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX9GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX9GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX9GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX9GLISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX9GLISEL-NEXT:    s_endpgm
+;
+; GFX10SELDAG-LABEL: sgpr_isnan_f32:
+; GFX10SELDAG:       ; %bb.0:
+; GFX10SELDAG-NEXT:    s_clause 0x1
+; GFX10SELDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX10SELDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10SELDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10SELDAG-NEXT:    v_cmp_class_f32_e64 s2, s2, 3
+; GFX10SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
+; GFX10SELDAG-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX10SELDAG-NEXT:    s_endpgm
+;
+; GFX10GLISEL-LABEL: sgpr_isnan_f32:
+; GFX10GLISEL:       ; %bb.0:
+; GFX10GLISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
+; GFX10GLISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10GLISEL-NEXT:    v_cmp_class_f32_e64 s2, s0, 3
+; GFX10GLISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX10GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX10GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX10GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX10GLISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX10GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX10GLISEL-NEXT:    s_endpgm
+;
+; GFX11SELDAG-LABEL: sgpr_isnan_f32:
+; GFX11SELDAG:       ; %bb.0:
+; GFX11SELDAG-NEXT:    s_clause 0x1
+; GFX11SELDAG-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11SELDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11SELDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11SELDAG-NEXT:    v_cmp_class_f32_e64 s2, s2, 3
+; GFX11SELDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
+; GFX11SELDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11SELDAG-NEXT:    s_endpgm
+;
+; GFX11GLISEL-LABEL: sgpr_isnan_f32:
+; GFX11GLISEL:       ; %bb.0:
+; GFX11GLISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
+; GFX11GLISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11GLISEL-NEXT:    v_cmp_class_f32_e64 s2, s0, 3
+; GFX11GLISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX11GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX11GLISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX11GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX11GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX11GLISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11GLISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11GLISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11GLISEL-NEXT:    s_endpgm
   %result = call i1 @llvm.is.fpclass.f32(float %x, i32 3)  ; nan
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
@@ -106,9 +183,14 @@ define amdgpu_kernel void @sgpr_isnan_f64(ptr addrspace(1) %out, double %x) {
 ; GFX7GLISEL:       ; %bb.0:
 ; GFX7GLISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
 ; GFX7GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7GLISEL-NEXT:    v_cmp_class_f64_e64 s[2:3], s[2:3], 3
-; GFX7GLISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[2:3]
+; GFX7GLISEL-NEXT:    v_cmp_class_f64_e64 s[4:5], s[2:3], 3
 ; GFX7GLISEL-NEXT:    s_mov_b32 s2, -1
+; GFX7GLISEL-NEXT:    s_or_b64 s[4:5], s[4:5], s[4:5]
+; GFX7GLISEL-NEXT:    s_cselect_b32 s3, 1, 0
+; GFX7GLISEL-NEXT:    s_and_b32 s3, s3, 1
+; GFX7GLISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX7GLISEL-NEXT:    s_cselect_b32 s3, -1, 0
+; GFX7GLISEL-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX7GLISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7GLISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX7GLISEL-NEXT:    s_endpgm
@@ -131,40 +213,92 @@ define amdgpu_kernel void @sgpr_isnan_f64(ptr addrspace(1) %out, double %x) {
 ; GFX8GLISEL-NEXT:    v_cmp_class_f64_e64 s[2:3], s[2:3], 3
 ; GFX8GLISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8GLISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GLISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[2:3]
+; GFX8GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX8GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX8GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX8GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX8GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX8GLISEL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX8GLISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8GLISEL-NEXT:    s_endpgm
 ;
-; GFX9CHECK-LABEL: sgpr_isnan_f64:
-; GFX9CHECK:       ; %bb.0:
-; GFX9CHECK-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9CHECK-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9CHECK-NEXT:    v_cmp_class_f64_e64 s[2:3], s[2:3], 3
-; GFX9CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[2:3]
-; GFX9CHECK-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9CHECK-NEXT:    s_endpgm
-;
-; GFX10CHECK-LABEL: sgpr_isnan_f64:
-; GFX10CHECK:       ; %bb.0:
-; GFX10CHECK-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX10CHECK-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10CHECK-NEXT:    v_cmp_class_f64_e64 s2, s[2:3], 3
-; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
-; GFX10CHECK-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX10CHECK-NEXT:    s_endpgm
-;
-; GFX11CHECK-LABEL: sgpr_isnan_f64:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11CHECK-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f64_e64 s2, s[2:3], 3
-; GFX11CHECK-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
-; GFX11CHECK-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11CHECK-NEXT:    s_endpgm
+; GFX9SELDAG-LABEL: sgpr_isnan_f64:
+; GFX9SELDAG:       ; %bb.0:
+; GFX9SELDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9SELDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9SELDAG-NEXT:    v_cmp_class_f64_e64 s[2:3], s[2:3], 3
+; GFX9SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[2:3]
+; GFX9SELDAG-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9SELDAG-NEXT:    s_endpgm
+;
+; GFX9GLISEL-LABEL: sgpr_isnan_f64:
+; GFX9GLISEL:       ; %bb.0:
+; GFX9GLISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9GLISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9GLISEL-NEXT:    v_cmp_class_f64_e64 s[2:3], s[2:3], 3
+; GFX9GLISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX9GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX9GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX9GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX9GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX9GLISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX9GLISEL-NEXT:    s_endpgm
+;
+; GFX10SELDAG-LABEL: sgpr_isnan_f64:
+; GFX10SELDAG:       ; %bb.0:
+; GFX10SELDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10SELDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10SELDAG-NEXT:    v_cmp_class_f64_e64 s2, s[2:3], 3
+; GFX10SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
+; GFX10SELDAG-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX10SELDAG-NEXT:    s_endpgm
+;
+; GFX10GLISEL-LABEL: sgpr_isnan_f64:
+; GFX10GLISEL:       ; %bb.0:
+; GFX10GLISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10GLISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10GLISEL-NEXT:    v_cmp_class_f64_e64 s2, s[2:3], 3
+; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX10GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX10GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX10GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX10GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX10GLISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX10GLISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX10GLISEL-NEXT:    s_endpgm
+;
+; GFX11SELDAG-LABEL: sgpr_isnan_f64:
+; GFX11SELDAG:       ; %bb.0:
+; GFX11SELDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11SELDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11SELDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11SELDAG-NEXT:    v_cmp_class_f64_e64 s2, s[2:3], 3
+; GFX11SELDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
+; GFX11SELDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11SELDAG-NEXT:    s_endpgm
+;
+; GFX11GLISEL-LABEL: sgpr_isnan_f64:
+; GFX11GLISEL:       ; %bb.0:
+; GFX11GLISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11GLISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11GLISEL-NEXT:    v_cmp_class_f64_e64 s2, s[2:3], 3
+; GFX11GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX11GLISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX11GLISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11GLISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX11GLISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX11GLISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; GFX11GLISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11GLISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX11GLISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11GLISEL-NEXT:    s_endpgm
   %result = call i1 @llvm.is.fpclass.f64(double %x, i32 3)  ; nan
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
diff --git a/llvm/test/CodeGen/DirectX/ddx_coarse-errors.ll b/llvm/test/CodeGen/DirectX/ddx_coarse-errors.ll
new file mode 100644
index 0000000000000..0679eec31cec1
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ddx_coarse-errors.ll
@@ -0,0 +1,15 @@
+; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
+
+; DXIL operation ddx.coarse does not support double overload type
+; CHECK: in function ddx.coarse
+; CHECK-SAME: Cannot create DerivCoarseX operation: Invalid overload type
+
+; Function Attrs: noinline nounwind optnone
+define noundef double @ddx.coarse_double(double noundef %a) #0 {
+entry:
+  %a.addr = alloca double, align 8
+  store double %a, ptr %a.addr, align 8
+  %0 = load double, ptr %a.addr, align 8
+  %dx.ddx.coarse = call double @llvm.dx.ddx.coarse.f64(double %0)
+  ret double %dx.ddx.coarse
+}
diff --git a/llvm/test/CodeGen/DirectX/ddx_coarse.ll b/llvm/test/CodeGen/DirectX/ddx_coarse.ll
new file mode 100644
index 0000000000000..f6ea031273263
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ddx_coarse.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S  -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+; Make sure dxil operation function calls for ddx_coarse are generated for half/float and matching vectors
+
+define noundef half @deriv_coarse_x_half(half noundef %a) {
+; CHECK: call half @dx.op.unary.f16(i32 83, half %{{.*}})
+entry:
+  %dx.ddx.coarse = call half @llvm.dx.ddx.coarse.f16(half %a)
+  ret half %dx.ddx.coarse
+}
+
+define noundef float @deriv_coarse_x_float(float noundef %a) {
+; CHECK: call float @dx.op.unary.f32(i32 83, float %{{.*}})
+entry:
+  %dx.ddx.coarse = call float @llvm.dx.ddx.coarse.f32(float %a)
+  ret float %dx.ddx.coarse
+}
+
+define noundef <4 x float> @deriv_coarse_x_float4(<4 x float> noundef %a) {
+; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee0]])
+; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee1]])
+; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee2]])
+; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee3]])
+; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+; CHECK: ret <4 x float> %{{.*}}
+entry:
+  %dx.ddx.coarse = call <4 x float> @llvm.dx.ddx.coarse.v4f32(<4 x float> %a)
+  ret <4 x float> %dx.ddx.coarse
+}
+
+declare half @llvm.dx.ddx.coarse.f16(half)
+declare float @llvm.dx.ddx.coarse.f32(float)
+declare <4 x float> @llvm.dx.ddx.coarse.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/ddy_coarse-errors.ll b/llvm/test/CodeGen/DirectX/ddy_coarse-errors.ll
new file mode 100644
index 0000000000000..df8e3eb0f7e0b
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ddy_coarse-errors.ll
@@ -0,0 +1,15 @@
+; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
+
+; DXIL operation ddy.coarse does not support double overload type
+; CHECK: in function ddy.coarse
+; CHECK-SAME: Cannot create DerivCoarseY operation: Invalid overload type
+
+; Function Attrs: noinline nounwind optnone
+define noundef double @ddy.coarse_double(double noundef %a) #0 {
+entry:
+  %a.addr = alloca double, align 8
+  store double %a, ptr %a.addr, align 8
+  %0 = load double, ptr %a.addr, align 8
+  %dx.ddy.coarse = call double @llvm.dx.ddy.coarse.f64(double %0)
+  ret double %dx.ddy.coarse
+}
diff --git a/llvm/test/CodeGen/DirectX/ddy_coarse.ll b/llvm/test/CodeGen/DirectX/ddy_coarse.ll
new file mode 100644
index 0000000000000..e3337022e1b01
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ddy_coarse.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S  -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+; Make sure dxil operation function calls for ddy_coarse are generated for half/float and matching vectors
+
+define noundef half @deriv_coarse_y_half(half noundef %a) {
+; CHECK: call half @dx.op.unary.f16(i32 84, half %{{.*}})
+entry:
+  %dx.ddy.coarse = call half @llvm.dx.ddy.coarse.f16(half %a)
+  ret half %dx.ddy.coarse
+}
+
+define noundef float @deriv_coarse_y_float(float noundef %a) {
+; CHECK: call float @dx.op.unary.f32(i32 84, float %{{.*}})
+entry:
+  %dx.ddy.coarse = call float @llvm.dx.ddy.coarse.f32(float %a)
+  ret float %dx.ddy.coarse
+}
+
+define noundef <4 x float> @deriv_coarse_y_float4(<4 x float> noundef %a) {
+; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee0]])
+; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee1]])
+; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee2]])
+; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee3]])
+; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+; CHECK: ret <4 x float> %{{.*}}
+entry:
+  %dx.ddy.coarse = call <4 x float> @llvm.dx.ddy.coarse.v4f32(<4 x float> %a)
+  ret <4 x float> %dx.ddy.coarse
+}
+
+declare half @llvm.dx.ddy.coarse.f16(half)
+declare float @llvm.dx.ddy.coarse.f32(float)
+declare <4 x float> @llvm.dx.ddy.coarse.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddx_coarse.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddx_coarse.ll
new file mode 100644
index 0000000000000..478acb53701ea
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddx_coarse.ll
@@ -0,0 +1,47 @@
+; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val --target-env spv1.4 %}
+
+; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
+
+; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
+; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
+
+define noundef float @ddx_coarse_float(float noundef %a) {
+entry:
+; CHECK: %[[#float_32_arg:]] = OpFunctionParameter %[[#float_32]]
+; CHECK: %[[#]] = OpDPdxCoarse %[[#float_32]] %[[#float_32_arg]]
+  %elt.ddx.coarse = call float @llvm.spv.ddx.coarse.f32(float %a)
+  ret float %elt.ddx.coarse
+}
+
+define noundef half @ddx_coarse_half(half noundef %a) {
+entry:
+; CHECK: %[[#float_16_arg:]] = OpFunctionParameter %[[#float_16]]
+; CHECK: %[[#converted:]] = OpFConvert %[[#float_32:]] %[[#float_16_arg]]
+; CHECK: %[[#coarse:]] = OpDPdxCoarse %[[#float_32]] %[[#converted]]
+; CHECK: %[[#]] = OpFConvert %[[#float_16]] %[[#coarse]]
+  %elt.ddx.coarse = call half @llvm.spv.ddx.coarse.f16(half %a)
+  ret half %elt.ddx.coarse
+}
+
+define noundef <4 x float> @ddx_coarse_float_vector(<4 x float> noundef %a) {
+entry:
+; CHECK: %[[#vec4_float_32_arg:]] = OpFunctionParameter %[[#vec4_float_32]]
+; CHECK: %[[#]] = OpDPdxCoarse %[[#vec4_float_32]] %[[#vec4_float_32_arg]]
+  %elt.ddx.coarse = call <4 x float> @llvm.spv.ddx.coarse.v4f32(<4 x float> %a)
+  ret <4 x float> %elt.ddx.coarse
+}
+
+define noundef <4 x half> @ddx_coarse_half_vector(<4 x half> noundef %a) {
+entry:
+; CHECK: %[[#vec4_float_16_arg:]] = OpFunctionParameter %[[#vec4_float_16]]
+; CHECK: %[[#converted:]] = OpFConvert %[[#vec4_float_32:]] %[[#vec4_float_16_arg]]
+; CHECK: %[[#coarse:]] = OpDPdxCoarse %[[#vec4_float_32]] %[[#converted]]
+; CHECK: %[[#]] = OpFConvert %[[#vec4_float_16]] %[[#coarse]]
+  %elt.ddx.coarse = call <4 x half> @llvm.spv.ddx.coarse.v4f16(<4 x half> %a)
+  ret <4 x half> %elt.ddx.coarse
+}
+
+declare float @llvm.spv.ddx.coarse.f32(float)
+declare half @llvm.spv.ddx.coarse.f16(half)
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddy_coarse.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddy_coarse.ll
new file mode 100644
index 0000000000000..8ad67cb644aa7
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddy_coarse.ll
@@ -0,0 +1,47 @@
+; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val --target-env spv1.4 %}
+
+; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
+
+; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
+; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
+
+define noundef float @ddy_coarse_float(float noundef %a) {
+entry:
+; CHECK: %[[#float_32_arg:]] = OpFunctionParameter %[[#float_32]]
+; CHECK: %[[#]] = OpDPdyCoarse %[[#float_32]] %[[#float_32_arg]]
+  %elt.ddy.coarse = call float @llvm.spv.ddy.coarse.f32(float %a)
+  ret float %elt.ddy.coarse
+}
+
+define noundef half @ddy_coarse_half(half noundef %a) {
+entry:
+; CHECK: %[[#float_16_arg:]] = OpFunctionParameter %[[#float_16]]
+; CHECK: %[[#converted:]] = OpFConvert %[[#float_32:]] %[[#float_16_arg]]
+; CHECK: %[[#coarse:]] = OpDPdyCoarse %[[#float_32]] %[[#converted]]
+; CHECK: %[[#]] = OpFConvert %[[#float_16]] %[[#coarse]]
+  %elt.ddy.coarse = call half @llvm.spv.ddy.coarse.f16(half %a)
+  ret half %elt.ddy.coarse
+}
+
+define noundef <4 x float> @ddy_coarse_float_vector(<4 x float> noundef %a) {
+entry:
+; CHECK: %[[#vec4_float_32_arg:]] = OpFunctionParameter %[[#vec4_float_32]]
+; CHECK: %[[#]] = OpDPdyCoarse %[[#vec4_float_32]] %[[#vec4_float_32_arg]]
+  %elt.ddy.coarse = call <4 x float> @llvm.spv.ddy.coarse.v4f32(<4 x float> %a)
+  ret <4 x float> %elt.ddy.coarse
+}
+
+define noundef <4 x half> @ddy_coarse_half_vector(<4 x half> noundef %a) {
+entry:
+; CHECK: %[[#vec4_float_16_arg:]] = OpFunctionParameter %[[#vec4_float_16]]
+; CHECK: %[[#converted:]] = OpFConvert %[[#vec4_float_32:]] %[[#vec4_float_16_arg]]
+; CHECK: %[[#coarse:]] = OpDPdyCoarse %[[#vec4_float_32]] %[[#converted]]
+; CHECK: %[[#]] = OpFConvert %[[#vec4_float_16]] %[[#coarse]]
+  %elt.ddy.coarse = call <4 x half> @llvm.spv.ddy.coarse.v4f16(<4 x half> %a)
+  ret <4 x half> %elt.ddy.coarse
+}
+
+declare float @llvm.spv.ddy.coarse.f32(float)
+declare half @llvm.spv.ddy.coarse.f16(half)
diff --git a/llvm/test/CodeGen/SPIRV/opencl/ddx_coarse-error.ll b/llvm/test/CodeGen/SPIRV/opencl/ddx_coarse-error.ll
new file mode 100644
index 0000000000000..e93c1d1ba4d36
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/opencl/ddx_coarse-error.ll
@@ -0,0 +1,12 @@
+; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s
+; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: LLVM ERROR: %{{.*}} = G_INTRINSIC intrinsic(@llvm.spv.ddx.coarse), %{{.*}} is only supported in shaders.
+
+define noundef float @ddx_coarse(float noundef %a) {
+entry:
+  %spv.ddx.coarse = call float @llvm.spv.ddx.coarse.f32(float %a)
+  ret float %spv.ddx.coarse
+}
+
+declare float @llvm.spv.ddx.coarse.f32(float)
diff --git a/llvm/test/CodeGen/SPIRV/opencl/ddy_coarse-error.ll b/llvm/test/CodeGen/SPIRV/opencl/ddy_coarse-error.ll
new file mode 100644
index 0000000000000..aa71a395d8680
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/opencl/ddy_coarse-error.ll
@@ -0,0 +1,12 @@
+; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s
+; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: LLVM ERROR: %{{.*}} = G_INTRINSIC intrinsic(@llvm.spv.ddy.coarse), %{{.*}} is only supported in shaders.
+
+define noundef float @ddy_coarse(float noundef %a) {
+entry:
+  %spv.ddy.coarse = call float @llvm.spv.ddy.coarse.f32(float %a)
+  ret float %spv.ddy.coarse
+}
+
+declare float @llvm.spv.ddy.coarse.f32(float)
diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll
index b85a20b9d6b6e..023fb5065b892 100644
--- a/llvm/test/CodeGen/X86/bittest-big-integer.ll
+++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll
@@ -1877,85 +1877,56 @@ define i32 @blsr_u512(ptr %word) nounwind {
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pushq %r15
 ; SSE-NEXT:    pushq %r14
-; SSE-NEXT:    pushq %r12
 ; SSE-NEXT:    pushq %rbx
-; SSE-NEXT:    pushq %rax
-; SSE-NEXT:    movq 56(%rdi), %rcx
-; SSE-NEXT:    movq 48(%rdi), %rdx
-; SSE-NEXT:    movq 40(%rdi), %rsi
-; SSE-NEXT:    movq 32(%rdi), %r11
+; SSE-NEXT:    movq 48(%rdi), %r11
+; SSE-NEXT:    movq 40(%rdi), %r9
 ; SSE-NEXT:    movq 24(%rdi), %r8
-; SSE-NEXT:    movq 16(%rdi), %r9
-; SSE-NEXT:    movq (%rdi), %rax
-; SSE-NEXT:    movq 8(%rdi), %r10
-; SSE-NEXT:    rep bsfq %rax, %rbx
-; SSE-NEXT:    rep bsfq %r10, %r14
-; SSE-NEXT:    addq $64, %r14
-; SSE-NEXT:    testq %rax, %rax
-; SSE-NEXT:    cmovneq %rbx, %r14
-; SSE-NEXT:    rep bsfq %r9, %r15
-; SSE-NEXT:    rep bsfq %r8, %rbx
+; SSE-NEXT:    movq 16(%rdi), %rdx
+; SSE-NEXT:    movq (%rdi), %rcx
+; SSE-NEXT:    movq 8(%rdi), %rsi
+; SSE-NEXT:    rep bsfq %rcx, %rax
+; SSE-NEXT:    rep bsfq %rsi, %rbx
 ; SSE-NEXT:    addq $64, %rbx
-; SSE-NEXT:    testq %r9, %r9
-; SSE-NEXT:    cmovneq %r15, %rbx
-; SSE-NEXT:    subq $-128, %rbx
-; SSE-NEXT:    movq %rax, %r15
-; SSE-NEXT:    movq %rax, %r12
-; SSE-NEXT:    orq %r10, %r12
-; SSE-NEXT:    cmovneq %r14, %rbx
-; SSE-NEXT:    rep bsfq %r11, %r12
-; SSE-NEXT:    rep bsfq %rsi, %r14
-; SSE-NEXT:    addq $64, %r14
-; SSE-NEXT:    testq %r11, %r11
-; SSE-NEXT:    cmovneq %r12, %r14
-; SSE-NEXT:    xorps %xmm0, %xmm0
-; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    rep bsfq %rdx, %r12
+; SSE-NEXT:    testq %rcx, %rcx
+; SSE-NEXT:    cmovneq %rax, %rbx
+; SSE-NEXT:    rep bsfq %rdx, %rax
+; SSE-NEXT:    rep bsfq %r8, %r10
+; SSE-NEXT:    addq $64, %r10
+; SSE-NEXT:    testq %rdx, %rdx
+; SSE-NEXT:    cmovneq %rax, %r10
+; SSE-NEXT:    movq 32(%rdi), %r14
+; SSE-NEXT:    subq $-128, %r10
+; SSE-NEXT:    movq %rcx, %rax
+; SSE-NEXT:    orq %rsi, %rax
+; SSE-NEXT:    cmovneq %rbx, %r10
+; SSE-NEXT:    rep bsfq %r14, %rax
+; SSE-NEXT:    rep bsfq %r9, %rbx
+; SSE-NEXT:    addq $64, %rbx
+; SSE-NEXT:    testq %r14, %r14
+; SSE-NEXT:    cmovneq %rax, %rbx
+; SSE-NEXT:    rep bsfq %r11, %r15
 ; SSE-NEXT:    movl $64, %eax
-; SSE-NEXT:    rep bsfq %rcx, %rax
+; SSE-NEXT:    rep bsfq 56(%rdi), %rax
 ; SSE-NEXT:    addq $64, %rax
-; SSE-NEXT:    testq %rdx, %rdx
-; SSE-NEXT:    cmovneq %r12, %rax
+; SSE-NEXT:    testq %r11, %r11
+; SSE-NEXT:    cmovneq %r15, %rax
 ; SSE-NEXT:    subq $-128, %rax
-; SSE-NEXT:    movq %r11, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    orq %rsi, %r11
-; SSE-NEXT:    cmovneq %r14, %rax
-; SSE-NEXT:    addq $256, %rax # imm = 0x100
-; SSE-NEXT:    movq %r10, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    orq %r8, %r10
-; SSE-NEXT:    orq %r9, %r15
-; SSE-NEXT:    orq %r10, %r15
+; SSE-NEXT:    orq %r9, %r14
 ; SSE-NEXT:    cmovneq %rbx, %rax
-; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    movq %r8, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    movq %r9, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    addq $256, %rax # imm = 0x100
+; SSE-NEXT:    orq %r8, %rsi
+; SSE-NEXT:    orq %rdx, %rcx
+; SSE-NEXT:    orq %rsi, %rcx
+; SSE-NEXT:    cmovneq %r10, %rax
+; SSE-NEXT:    movl $-2, %edx
+; SSE-NEXT:    movl %eax, %ecx
+; SSE-NEXT:    roll %cl, %edx
 ; SSE-NEXT:    movl %eax, %ecx
-; SSE-NEXT:    andl $32, %ecx
-; SSE-NEXT:    movl %eax, %edx
-; SSE-NEXT:    andl $480, %edx # imm = 0x1E0
-; SSE-NEXT:    shrl $3, %edx
-; SSE-NEXT:    movl %edx, %esi
-; SSE-NEXT:    andl $-8, %esi
-; SSE-NEXT:    movq -128(%rsp,%rsi), %r8
-; SSE-NEXT:    shrq %cl, %r8
-; SSE-NEXT:    movl -120(%rsp,%rsi), %esi
-; SSE-NEXT:    addl %esi, %esi
-; SSE-NEXT:    notl %ecx
-; SSE-NEXT:    # kill: def $cl killed $cl killed $ecx
-; SSE-NEXT:    shlq %cl, %rsi
-; SSE-NEXT:    orl %r8d, %esi
-; SSE-NEXT:    btrl %eax, %esi
-; SSE-NEXT:    movl %esi, (%rdi,%rdx)
+; SSE-NEXT:    shrl $3, %ecx
+; SSE-NEXT:    andl $60, %ecx
+; SSE-NEXT:    andl %edx, (%rdi,%rcx)
 ; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
-; SSE-NEXT:    addq $8, %rsp
 ; SSE-NEXT:    popq %rbx
-; SSE-NEXT:    popq %r12
 ; SSE-NEXT:    popq %r14
 ; SSE-NEXT:    popq %r15
 ; SSE-NEXT:    retq
@@ -1964,133 +1935,86 @@ define i32 @blsr_u512(ptr %word) nounwind {
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    pushq %r15
 ; AVX2-NEXT:    pushq %r14
-; AVX2-NEXT:    pushq %r13
-; AVX2-NEXT:    pushq %r12
 ; AVX2-NEXT:    pushq %rbx
-; AVX2-NEXT:    movq 56(%rdi), %rcx
-; AVX2-NEXT:    movq 40(%rdi), %rdx
-; AVX2-NEXT:    movq 32(%rdi), %r11
-; AVX2-NEXT:    movq 24(%rdi), %rsi
-; AVX2-NEXT:    movq 16(%rdi), %r8
-; AVX2-NEXT:    movq (%rdi), %r9
-; AVX2-NEXT:    movq 8(%rdi), %r10
-; AVX2-NEXT:    xorl %ebx, %ebx
-; AVX2-NEXT:    tzcntq %r9, %rbx
-; AVX2-NEXT:    tzcntq %r10, %rax
-; AVX2-NEXT:    addq $64, %rax
-; AVX2-NEXT:    testq %r9, %r9
-; AVX2-NEXT:    cmovneq %rbx, %rax
-; AVX2-NEXT:    xorl %r14d, %r14d
-; AVX2-NEXT:    tzcntq %r8, %r14
+; AVX2-NEXT:    movq 40(%rdi), %r9
+; AVX2-NEXT:    movq 32(%rdi), %r10
+; AVX2-NEXT:    movq 24(%rdi), %r8
+; AVX2-NEXT:    movq 16(%rdi), %rdx
+; AVX2-NEXT:    movq (%rdi), %rcx
+; AVX2-NEXT:    movq 8(%rdi), %rsi
+; AVX2-NEXT:    tzcntq %rcx, %rax
 ; AVX2-NEXT:    xorl %ebx, %ebx
 ; AVX2-NEXT:    tzcntq %rsi, %rbx
 ; AVX2-NEXT:    addq $64, %rbx
-; AVX2-NEXT:    testq %r8, %r8
-; AVX2-NEXT:    cmovneq %r14, %rbx
-; AVX2-NEXT:    subq $-128, %rbx
-; AVX2-NEXT:    movq %r9, %r14
-; AVX2-NEXT:    movq %r9, %r15
-; AVX2-NEXT:    orq %r10, %r15
+; AVX2-NEXT:    testq %rcx, %rcx
 ; AVX2-NEXT:    cmovneq %rax, %rbx
 ; AVX2-NEXT:    xorl %eax, %eax
-; AVX2-NEXT:    tzcntq %r11, %rax
-; AVX2-NEXT:    xorl %r12d, %r12d
-; AVX2-NEXT:    tzcntq %rdx, %r12
-; AVX2-NEXT:    addq $64, %r12
-; AVX2-NEXT:    testq %r11, %r11
-; AVX2-NEXT:    cmovneq %rax, %r12
-; AVX2-NEXT:    movq 48(%rdi), %r15
-; AVX2-NEXT:    xorl %r13d, %r13d
-; AVX2-NEXT:    tzcntq %r15, %r13
+; AVX2-NEXT:    tzcntq %rdx, %rax
+; AVX2-NEXT:    tzcntq %r8, %r11
+; AVX2-NEXT:    addq $64, %r11
+; AVX2-NEXT:    testq %rdx, %rdx
+; AVX2-NEXT:    cmovneq %rax, %r11
+; AVX2-NEXT:    subq $-128, %r11
+; AVX2-NEXT:    movq %rcx, %rax
+; AVX2-NEXT:    orq %rsi, %rax
+; AVX2-NEXT:    cmovneq %rbx, %r11
 ; AVX2-NEXT:    xorl %eax, %eax
-; AVX2-NEXT:    tzcntq %rcx, %rax
+; AVX2-NEXT:    tzcntq %r10, %rax
+; AVX2-NEXT:    xorl %ebx, %ebx
+; AVX2-NEXT:    tzcntq %r9, %rbx
+; AVX2-NEXT:    addq $64, %rbx
+; AVX2-NEXT:    testq %r10, %r10
+; AVX2-NEXT:    cmovneq %rax, %rbx
+; AVX2-NEXT:    movq 48(%rdi), %r14
+; AVX2-NEXT:    xorl %r15d, %r15d
+; AVX2-NEXT:    tzcntq %r14, %r15
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    tzcntq 56(%rdi), %rax
 ; AVX2-NEXT:    addq $64, %rax
-; AVX2-NEXT:    testq %r15, %r15
-; AVX2-NEXT:    cmovneq %r13, %rax
+; AVX2-NEXT:    testq %r14, %r14
+; AVX2-NEXT:    cmovneq %r15, %rax
 ; AVX2-NEXT:    subq $-128, %rax
-; AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0
-; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    movq %r11, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    orq %rdx, %r11
-; AVX2-NEXT:    cmovneq %r12, %rax
-; AVX2-NEXT:    addq $256, %rax # imm = 0x100
-; AVX2-NEXT:    movq %r10, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    orq %rsi, %r10
-; AVX2-NEXT:    orq %r8, %r14
-; AVX2-NEXT:    orq %r10, %r14
+; AVX2-NEXT:    orq %r9, %r10
 ; AVX2-NEXT:    cmovneq %rbx, %rax
-; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    movq %r9, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    movq %r15, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    movq %r8, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT:    addq $256, %rax # imm = 0x100
+; AVX2-NEXT:    orq %r8, %rsi
+; AVX2-NEXT:    orq %rdx, %rcx
+; AVX2-NEXT:    orq %rsi, %rcx
+; AVX2-NEXT:    cmovneq %r11, %rax
+; AVX2-NEXT:    movl $-2, %edx
+; AVX2-NEXT:    movl %eax, %ecx
+; AVX2-NEXT:    roll %cl, %edx
 ; AVX2-NEXT:    movl %eax, %ecx
-; AVX2-NEXT:    andl $32, %ecx
-; AVX2-NEXT:    movl %eax, %edx
-; AVX2-NEXT:    andl $480, %edx # imm = 0x1E0
-; AVX2-NEXT:    shrl $3, %edx
-; AVX2-NEXT:    movl %edx, %esi
-; AVX2-NEXT:    andl $-8, %esi
-; AVX2-NEXT:    shrxq %rcx, -128(%rsp,%rsi), %r8
-; AVX2-NEXT:    notl %ecx
-; AVX2-NEXT:    movl -120(%rsp,%rsi), %esi
-; AVX2-NEXT:    addl %esi, %esi
-; AVX2-NEXT:    shlxq %rcx, %rsi, %rcx
-; AVX2-NEXT:    orl %r8d, %ecx
-; AVX2-NEXT:    btrl %eax, %ecx
-; AVX2-NEXT:    movl %ecx, (%rdi,%rdx)
+; AVX2-NEXT:    shrl $3, %ecx
+; AVX2-NEXT:    andl $60, %ecx
+; AVX2-NEXT:    andl %edx, (%rdi,%rcx)
 ; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
 ; AVX2-NEXT:    popq %rbx
-; AVX2-NEXT:    popq %r12
-; AVX2-NEXT:    popq %r13
 ; AVX2-NEXT:    popq %r14
 ; AVX2-NEXT:    popq %r15
-; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: blsr_u512:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    pushq %rax
-; AVX512-NEXT:    vmovups (%rdi), %ymm0
-; AVX512-NEXT:    vmovups 32(%rdi), %ymm1
-; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm2
-; AVX512-NEXT:    vpternlogd {{.*#+}} zmm3 = -1
-; AVX512-NEXT:    vpaddq %zmm3, %zmm2, %zmm3
-; AVX512-NEXT:    vpandnq %zmm3, %zmm2, %zmm3
-; AVX512-NEXT:    vplzcntq %zmm3, %zmm3
-; AVX512-NEXT:    vmovdqa64 {{.*#+}} zmm4 = [64,128,192,256,320,384,448,512]
-; AVX512-NEXT:    vpsubq %zmm3, %zmm4, %zmm3
-; AVX512-NEXT:    vptestmq %zmm2, %zmm2, %k1
-; AVX512-NEXT:    vpbroadcastq {{.*#+}} zmm2 = [512,512,512,512,512,512,512,512]
-; AVX512-NEXT:    vpcompressq %zmm3, %zmm2 {%k1}
-; AVX512-NEXT:    vmovq %xmm2, %rax
-; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX512-NEXT:    vmovdqu %ymm2, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT:    vmovdqu %ymm2, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT:    vmovups %ymm1, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm0
+; AVX512-NEXT:    vpternlogd {{.*#+}} zmm1 = -1
+; AVX512-NEXT:    vpaddq %zmm1, %zmm0, %zmm1
+; AVX512-NEXT:    vpandnq %zmm1, %zmm0, %zmm1
+; AVX512-NEXT:    vplzcntq %zmm1, %zmm1
+; AVX512-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [64,128,192,256,320,384,448,512]
+; AVX512-NEXT:    vpsubq %zmm1, %zmm2, %zmm1
+; AVX512-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; AVX512-NEXT:    vpbroadcastq {{.*#+}} zmm0 = [512,512,512,512,512,512,512,512]
+; AVX512-NEXT:    vpcompressq %zmm1, %zmm0 {%k1}
+; AVX512-NEXT:    vmovq %xmm0, %rax
+; AVX512-NEXT:    movl $-2, %edx
+; AVX512-NEXT:    movl %eax, %ecx
+; AVX512-NEXT:    roll %cl, %edx
 ; AVX512-NEXT:    movl %eax, %ecx
-; AVX512-NEXT:    andl $32, %ecx
-; AVX512-NEXT:    movl %ecx, %edx
-; AVX512-NEXT:    notl %edx
-; AVX512-NEXT:    movl %eax, %esi
-; AVX512-NEXT:    shrl $3, %esi
-; AVX512-NEXT:    movl %esi, %r8d
-; AVX512-NEXT:    andl $56, %r8d
-; AVX512-NEXT:    movl -120(%rsp,%r8), %r9d
-; AVX512-NEXT:    addl %r9d, %r9d
-; AVX512-NEXT:    shlxq %rdx, %r9, %rdx
 ; AVX512-NEXT:    shrl $3, %ecx
-; AVX512-NEXT:    addq %rsp, %r8
-; AVX512-NEXT:    addq $-128, %r8
-; AVX512-NEXT:    orl (%rcx,%r8), %edx
-; AVX512-NEXT:    btrl %eax, %edx
-; AVX512-NEXT:    andl $60, %esi
-; AVX512-NEXT:    movl %edx, (%rdi,%rsi)
+; AVX512-NEXT:    andl $60, %ecx
+; AVX512-NEXT:    andl %edx, (%rdi,%rcx)
 ; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
-; AVX512-NEXT:    popq %rcx
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %ld = load i512, ptr %word
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll b/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll
index b3093a92624ae..f0e40f4ede161 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll
@@ -123,8 +123,7 @@ define i8 @udiv_slt_exact(i8 %x) {
 define i8 @canonicalize_icmp_operands(i8 %x) {
 ; CHECK-LABEL: define i8 @canonicalize_icmp_operands(
 ; CHECK-SAME: i8 [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.smin.i8(i8 [[X]], i8 119)
-; CHECK-NEXT:    [[S:%.*]] = add nsw i8 [[TMP1]], 8
+; CHECK-NEXT:    [[S:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X]], i8 8)
 ; CHECK-NEXT:    ret i8 [[S]]
 ;
   %add = add nsw i8 %x, 8
diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
index cfd679c0cc592..c0ad5818e448a 100644
--- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
+++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
@@ -2351,3 +2351,323 @@ define i8 @fold_add_umax_to_usub_multiuse(i8 %a) {
 }
 
 declare void @usei8(i8)
+
+define i8 @sadd_sat_uge_int_max(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_uge_int_max(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 127
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_ugt_int_max(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_ugt_int_max(
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sgt i8 %x, 127
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_eq_int_max(i8 %x) {
+; CHECK-LABEL: @sadd_sat_eq_int_max(
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 1)
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp eq i8 %x, 127
+  %add = add i8 %x, 1
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_constant(i8 %x) {
+; CHECK-LABEL: @sadd_sat_constant(
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 10)
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 118
+  %add = add i8 %x, 10
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_negative_no_fold(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_negative_no_fold(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 127
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_wrong_predicate(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_wrong_predicate(
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[Y:%.*]], 127
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP_NOT]], i8 [[ADD]], i8 127
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp slt i8 %x, 127
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_wrong_constant(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_wrong_constant(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], 125
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 126
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define <2 x i8> @sadd_sat_vector(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @sadd_sat_vector(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 127)
+; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i8> [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[CMP]], <2 x i8> splat (i8 127), <2 x i8> [[ADD]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %cmp = icmp sge <2 x i8> %x, <i8 127, i8 127>
+  %add = add <2 x i8> %x, %y
+  %r = select <2 x i1> %cmp, <2 x i8> <i8 127, i8 127>, <2 x i8> %add
+  ret <2 x i8> %r
+}
+
+define <2 x i8> @sadd_sat_vector_constant(<2 x i8> %x) {
+; CHECK-LABEL: @sadd_sat_vector_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> <i8 117, i8 107>)
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[TMP1]], <i8 10, i8 20>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %cmp = icmp sge <2 x i8> %x, <i8 118, i8 108>
+  %add = add <2 x i8> %x, <i8 10, i8 20>
+  %r = select <2 x i1> %cmp, <2 x i8> <i8 127, i8 127>, <2 x i8> %add
+  ret <2 x i8> %r
+}
+
+define i8 @sadd_sat_int_max_minus_x(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 127, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[SUB]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 127, %x
+  %cmp = icmp slt i8 %sub, %y
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_commuted(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_commuted(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 127, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[Y:%.*]], [[SUB]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 127, %x
+  %cmp = icmp sgt i8 %y, %sub
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_nonstrict(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_nonstrict(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 127, [[X:%.*]]
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp sgt i8 [[SUB]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP_NOT]], i8 [[ADD]], i8 127
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 127, %x
+  %cmp = icmp sle i8 %sub, %y
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_commuted_nonstrict(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_commuted_nonstrict(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 127, [[X:%.*]]
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp slt i8 [[Y:%.*]], [[SUB]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP_NOT]], i8 [[ADD]], i8 127
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 127, %x
+  %cmp = icmp sge i8 %y, %sub
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_wrong_constant(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_wrong_constant(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 126, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[SUB]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 126, %x
+  %cmp = icmp slt i8 %sub, %y
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_wrong_predicate(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_wrong_predicate(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 127, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[SUB]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 127, %x
+  %cmp = icmp sgt i8 %sub, %y
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define <2 x i8> @sadd_sat_int_max_minus_x_vector(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_vector(
+; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i8> splat (i8 127), [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[SUB]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i8> [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[CMP]], <2 x i8> splat (i8 127), <2 x i8> [[ADD]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %sub = sub <2 x i8> <i8 127, i8 127>, %x
+  %cmp = icmp slt <2 x i8> %sub, %y
+  %add = add <2 x i8> %x, %y
+  %r = select <2 x i1> %cmp, <2 x i8> <i8 127, i8 127>, <2 x i8> %add
+  ret <2 x i8> %r
+}
+
+define i8 @sadd_sat_commuted_select(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_commuted_select(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[Y:%.*]], 127
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 [[ADD]], i8 127
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 127
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 %add, i8 127
+  ret i8 %r
+}
+
+define i8 @sadd_sat_commuted_add(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_commuted_add(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[Y:%.*]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 127
+  %add = add i8 %y, %x
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_commuted_both(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_commuted_both(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[Y:%.*]], 127
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 [[ADD]], i8 127
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sge i8 %x, 127
+  %add = add i8 %y, %x
+  %r = select i1 %cmp, i8 %add, i8 127
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_nsw_slt(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_nsw_slt(
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub nsw i8 127, %x
+  %cmp = icmp slt i8 %sub, %y
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_nsw_sge_commuted(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_nsw_sge_commuted(
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub nsw i8 127, %x
+  %cmp = icmp sge i8 %y, %sub
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @sadd_sat_int_max_minus_x_no_nsw_neg(i8 %x, i8 %y) {
+; CHECK-LABEL: @sadd_sat_int_max_minus_x_no_nsw_neg(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 127, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[SUB]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 127, %x
+  %cmp = icmp slt i8 %sub, %y
+  %add = add i8 %x, %y
+  %r = select i1 %cmp, i8 127, i8 %add
+  ret i8 %r
+}
+
+define i8 @neg_no_nsw(i8 %x, i8 %y) {
+; CHECK-LABEL: @neg_no_nsw(
+; CHECK-NEXT:    [[ADD:%.*]] = sub i8 127, [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], [[ADD]]
+; CHECK-NEXT:    [[D:%.*]] = add i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[CMP]], i8 127, i8 [[D]]
+; CHECK-NEXT:    ret i8 [[S]]
+;
+  %add = sub i8 127, %y
+  %cmp = icmp sgt i8 %x, %add
+  %d = add i8 %x, %y
+  %s = select i1 %cmp, i8 127, i8 %d
+  ret i8 %s
+}
+
+define i8 @neg_neg_constant(i8 %x, i8 %y) {
+; CHECK-LABEL: @neg_neg_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.smin.i8(i8 [[X:%.*]], i8 -1)
+; CHECK-NEXT:    [[S:%.*]] = and i8 [[TMP1]], 127
+; CHECK-NEXT:    ret i8 [[S]]
+;
+  %cmp = icmp sgt i8 %x, -2
+  %d = add i8 %x, -128
+  %s = select i1 %cmp, i8 127, i8 %d
+  ret i8 %s
+}
diff --git a/llvm/test/Transforms/InstSimplify/AArch64/aarch64-sve-reductions.ll b/llvm/test/Transforms/InstSimplify/AArch64/aarch64-sve-reductions.ll
new file mode 100644
index 0000000000000..a54d6044d04b1
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/AArch64/aarch64-sve-reductions.ll
@@ -0,0 +1,912 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -passes=instsimplify < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;
+; ANDV
+;
+
+define i8 @andv_i8_no_active(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define i8 @andv_i8_no_active(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    ret i8 -1
+;
+  %out = call i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a)
+  ret i8 %out
+}
+
+define i8 @andv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @andv_i8_splat_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 -1
+;
+  %out = call i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 -1))
+  ret i8 %out
+}
+
+define i8 @andv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @andv_i8_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT:    ret i8 [[OUT]]
+;
+  %out = call i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  ret i8 %out
+}
+
+define i8 @andv_i8_all_active_splat(i8 %a) #0 {
+; CHECK-LABEL: define i8 @andv_i8_all_active_splat(
+; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 [[A]]
+;
+  %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0
+  %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %out = call i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat)
+  ret i8 %out
+}
+
+define i16 @andv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @andv_i16_splat_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i16 -1
+;
+  %out = call i16 @llvm.aarch64.sve.andv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 -1))
+  ret i16 %out
+}
+
+define i16 @andv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @andv_i16_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i16 @llvm.aarch64.sve.andv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> zeroinitializer)
+; CHECK-NEXT:    ret i16 [[OUT]]
+;
+  %out = call i16 @llvm.aarch64.sve.andv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer)
+  ret i16 %out
+}
+
+define i32 @andv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @andv_i32_splat_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i32 -1
+;
+  %out = call i32 @llvm.aarch64.sve.andv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 -1))
+  ret i32 %out
+}
+
+define i32 @andv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @andv_i32_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i32 @llvm.aarch64.sve.andv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %out = call i32 @llvm.aarch64.sve.andv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+  ret i32 %out
+}
+
+define i64 @andv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @andv_i64_splat_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 -1
+;
+  %out = call i64 @llvm.aarch64.sve.andv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 -1))
+  ret i64 %out
+}
+
+define i64 @andv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @andv_i64_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.andv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.andv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer)
+  ret i64 %out
+}
+
+;
+; EORV
+;
+
+define i8 @eorv_i8_no_active(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define i8 @eorv_i8_no_active(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 0
+;
+  %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a)
+  ret i8 %out
+}
+
+define i8 @eorv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @eorv_i8_splat_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 0
+;
+  %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  ret i8 %out
+}
+
+define i8 @eorv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @eorv_i8_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i8 @llvm.aarch64.sve.eorv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1))
+; CHECK-NEXT:    ret i8 [[OUT]]
+;
+  %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 1))
+  ret i8 %out
+}
+
+define i8 @eorv_i8_all_active_splat(i8 %a) #0 {
+; CHECK-LABEL: define i8 @eorv_i8_all_active_splat(
+; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 0
+;
+  %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0
+  %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat)
+  ret i8 %out
+}
+
+define i16 @eorv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @eorv_i16_splat_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i16 0
+;
+  %out = call i16 @llvm.aarch64.sve.eorv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer)
+  ret i16 %out
+}
+
+define i16 @eorv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @eorv_i16_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i16 @llvm.aarch64.sve.eorv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1))
+; CHECK-NEXT:    ret i16 [[OUT]]
+;
+  %out = call i16 @llvm.aarch64.sve.eorv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 1))
+  ret i16 %out
+}
+
+define i32 @eorv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @eorv_i32_splat_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i32 0
+;
+  %out = call i32 @llvm.aarch64.sve.eorv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+  ret i32 %out
+}
+
+define i32 @eorv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @eorv_i32_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i32 @llvm.aarch64.sve.eorv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1))
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %out = call i32 @llvm.aarch64.sve.eorv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 1))
+  ret i32 %out
+}
+
+define i64 @eorv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @eorv_i64_splat_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.eorv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @eorv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @eorv_i64_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.eorv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.eorv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 1))
+  ret i64 %out
+}
+
+;
+; ORV
+;
+
+define i8 @orv_i8_no_active(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define i8 @orv_i8_no_active(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 0
+;
+  %out = call i8 @llvm.aarch64.sve.orv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a)
+  ret i8 %out
+}
+
+define i8 @orv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @orv_i8_splat_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 0
+;
+  %out = call i8 @llvm.aarch64.sve.orv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  ret i8 %out
+}
+
+define i8 @orv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @orv_i8_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i8 @llvm.aarch64.sve.orv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1))
+; CHECK-NEXT:    ret i8 [[OUT]]
+;
+  %out = call i8 @llvm.aarch64.sve.orv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 1))
+  ret i8 %out
+}
+
+define i8 @orv_i8_all_active_splat(i8 %a) #0 {
+; CHECK-LABEL: define i8 @orv_i8_all_active_splat(
+; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 [[A]]
+;
+  %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0
+  %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %out = call i8 @llvm.aarch64.sve.orv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat)
+  ret i8 %out
+}
+
+define i16 @orv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @orv_i16_splat_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i16 0
+;
+  %out = call i16 @llvm.aarch64.sve.orv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer)
+  ret i16 %out
+}
+
+define i16 @orv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @orv_i16_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i16 @llvm.aarch64.sve.orv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1))
+; CHECK-NEXT:    ret i16 [[OUT]]
+;
+  %out = call i16 @llvm.aarch64.sve.orv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 1))
+  ret i16 %out
+}
+
+define i32 @orv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @orv_i32_splat_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i32 0
+;
+  %out = call i32 @llvm.aarch64.sve.orv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+  ret i32 %out
+}
+
+define i32 @orv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @orv_i32_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i32 @llvm.aarch64.sve.orv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1))
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %out = call i32 @llvm.aarch64.sve.orv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 1))
+  ret i32 %out
+}
+
+define i64 @orv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @orv_i64_splat_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.orv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @orv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @orv_i64_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.orv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.orv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 1))
+  ret i64 %out
+}
+
+;
+; SADDV
+;
+
+define i64 @saddv_i8_no_active(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define i64 @saddv_i8_no_active(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a)
+  ret i64 %out
+}
+
+define i64 @saddv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @saddv_i8_splat_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @saddv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @saddv_i8_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 1))
+  ret i64 %out
+}
+
+define i64 @saddv_i8_all_active_splat(i8 %a) #0 {
+; CHECK-LABEL: define i64 @saddv_i8_all_active_splat(
+; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[A_INSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i8 0
+; CHECK-NEXT:    [[A_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[A_INSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A_SPLAT]])
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0
+  %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat)
+  ret i64 %out
+}
+
+define i64 @saddv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @saddv_i16_splat_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.saddv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @saddv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @saddv_i16_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.saddv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 1))
+  ret i64 %out
+}
+
+define i64 @saddv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @saddv_i32_splat_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.saddv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @saddv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @saddv_i32_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.saddv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 1))
+  ret i64 %out
+}
+
+define i64 @saddv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @saddv_i64_splat_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.saddv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @saddv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @saddv_i64_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.saddv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 1))
+  ret i64 %out
+}
+
+;
+; SMAXV
+;
+
+define i8 @smaxv_i8_no_active(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define i8 @smaxv_i8_no_active(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 -128
+;
+  %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a)
+  ret i8 %out
+}
+
+define i8 @smaxv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @smaxv_i8_splat_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 -128
+;
+  %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 -128))
+  ret i8 %out
+}
+
+define i8 @smaxv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @smaxv_i8_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i8 @llvm.aarch64.sve.smaxv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT:    ret i8 [[OUT]]
+;
+  %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  ret i8 %out
+}
+
+define i8 @smaxv_i8_all_active_splat(i8 %a) #0 {
+; CHECK-LABEL: define i8 @smaxv_i8_all_active_splat(
+; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 [[A]]
+;
+  %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0
+  %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat)
+  ret i8 %out
+}
+
+define i16 @smaxv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @smaxv_i16_splat_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i16 -32768
+;
+  %out = call i16 @llvm.aarch64.sve.smaxv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 -32768))
+  ret i16 %out
+}
+
+define i16 @smaxv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @smaxv_i16_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i16 @llvm.aarch64.sve.smaxv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> zeroinitializer)
+; CHECK-NEXT:    ret i16 [[OUT]]
+;
+  %out = call i16 @llvm.aarch64.sve.smaxv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer)
+  ret i16 %out
+}
+
+define i32 @smaxv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @smaxv_i32_splat_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i32 -2147483648
+;
+  %out = call i32 @llvm.aarch64.sve.smaxv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 -2147483648))
+  ret i32 %out
+}
+
+define i32 @smaxv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @smaxv_i32_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i32 @llvm.aarch64.sve.smaxv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %out = call i32 @llvm.aarch64.sve.smaxv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+  ret i32 %out
+}
+
+define i64 @smaxv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @smaxv_i64_splat_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 -9223372036854775808
+;
+  %out = call i64 @llvm.aarch64.sve.smaxv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 -9223372036854775808))
+  ret i64 %out
+}
+
+define i64 @smaxv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @smaxv_i64_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.smaxv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.smaxv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer)
+  ret i64 %out
+}
+
+;
+; SMINV
+;
+
+define i8 @sminv_i8_no_active(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define i8 @sminv_i8_no_active(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 127
+;
+  %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a)
+  ret i8 %out
+}
+
+define i8 @sminv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @sminv_i8_splat_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 127
+;
+  %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 127))
+  ret i8 %out
+}
+
+define i8 @sminv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @sminv_i8_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i8 @llvm.aarch64.sve.sminv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT:    ret i8 [[OUT]]
+;
+  %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  ret i8 %out
+}
+
+define i8 @sminv_i8_all_active_splat(i8 %a) #0 {
+; CHECK-LABEL: define i8 @sminv_i8_all_active_splat(
+; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 [[A]]
+;
+  %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0
+  %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat)
+  ret i8 %out
+}
+
+define i16 @sminv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @sminv_i16_splat_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i16 32767
+;
+  %out = call i16 @llvm.aarch64.sve.sminv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 32767))
+  ret i16 %out
+}
+
+define i16 @sminv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @sminv_i16_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i16 @llvm.aarch64.sve.sminv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> zeroinitializer)
+; CHECK-NEXT:    ret i16 [[OUT]]
+;
+  %out = call i16 @llvm.aarch64.sve.sminv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer)
+  ret i16 %out
+}
+
+define i32 @sminv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @sminv_i32_splat_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i32 2147483647
+;
+  %out = call i32 @llvm.aarch64.sve.sminv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 2147483647))
+  ret i32 %out
+}
+
+define i32 @sminv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @sminv_i32_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i32 @llvm.aarch64.sve.sminv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %out = call i32 @llvm.aarch64.sve.sminv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+  ret i32 %out
+}
+
+define i64 @sminv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @sminv_i64_splat_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 9223372036854775807
+;
+  %out = call i64 @llvm.aarch64.sve.sminv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 9223372036854775807))
+  ret i64 %out
+}
+
+define i64 @sminv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @sminv_i64_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.sminv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.sminv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer)
+  ret i64 %out
+}
+
+;
+; UADDV
+;
+
+define i64 @uaddv_i8_no_active(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define i64 @uaddv_i8_no_active(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a)
+  ret i64 %out
+}
+
+define i64 @uaddv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @uaddv_i8_splat_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @uaddv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @uaddv_i8_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 1))
+  ret i64 %out
+}
+
+define i64 @uaddv_i8_all_active_splat(i8 %a) #0 {
+; CHECK-LABEL: define i64 @uaddv_i8_all_active_splat(
+; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[A_INSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i8 0
+; CHECK-NEXT:    [[A_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[A_INSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A_SPLAT]])
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0
+  %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat)
+  ret i64 %out
+}
+
+define i64 @uaddv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @uaddv_i16_splat_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.uaddv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @uaddv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @uaddv_i16_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.uaddv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 1))
+  ret i64 %out
+}
+
+define i64 @uaddv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @uaddv_i32_splat_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @uaddv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @uaddv_i32_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 1))
+  ret i64 %out
+}
+
+define i64 @uaddv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @uaddv_i64_splat_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.uaddv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @uaddv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @uaddv_i64_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.uaddv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 1))
+  ret i64 %out
+}
+
+;
+; UMAXV
+;
+
+define i8 @umaxv_i8_no_active(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define i8 @umaxv_i8_no_active(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 0
+;
+  %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a)
+  ret i8 %out
+}
+
+define i8 @umaxv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @umaxv_i8_splat_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 0
+;
+  %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  ret i8 %out
+}
+
+define i8 @umaxv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @umaxv_i8_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1))
+; CHECK-NEXT:    ret i8 [[OUT]]
+;
+  %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 1))
+  ret i8 %out
+}
+
+define i8 @umaxv_i8_all_active_splat(i8 %a) #0 {
+; CHECK-LABEL: define i8 @umaxv_i8_all_active_splat(
+; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 [[A]]
+;
+  %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0
+  %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat)
+  ret i8 %out
+}
+
+define i16 @umaxv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @umaxv_i16_splat_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i16 0
+;
+  %out = call i16 @llvm.aarch64.sve.umaxv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer)
+  ret i16 %out
+}
+
+define i16 @umaxv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @umaxv_i16_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i16 @llvm.aarch64.sve.umaxv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1))
+; CHECK-NEXT:    ret i16 [[OUT]]
+;
+  %out = call i16 @llvm.aarch64.sve.umaxv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 1))
+  ret i16 %out
+}
+
+define i32 @umaxv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @umaxv_i32_splat_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i32 0
+;
+  %out = call i32 @llvm.aarch64.sve.umaxv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+  ret i32 %out
+}
+
+define i32 @umaxv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @umaxv_i32_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i32 @llvm.aarch64.sve.umaxv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1))
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %out = call i32 @llvm.aarch64.sve.umaxv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 1))
+  ret i32 %out
+}
+
+define i64 @umaxv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @umaxv_i64_splat_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 0
+;
+  %out = call i64 @llvm.aarch64.sve.umaxv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer)
+  ret i64 %out
+}
+
+define i64 @umaxv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @umaxv_i64_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.umaxv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1))
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.umaxv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 1))
+  ret i64 %out
+}
+
+;
+; UMINV
+;
+
+define i8 @uminv_i8_no_active(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define i8 @uminv_i8_no_active(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 -1
+;
+  %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a)
+  ret i8 %out
+}
+
+define i8 @uminv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @uminv_i8_splat_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 -1
+;
+  %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 -1))
+  ret i8 %out
+}
+
+define i8 @uminv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 {
+; CHECK-LABEL: define i8 @uminv_i8_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i8 @llvm.aarch64.sve.uminv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT:    ret i8 [[OUT]]
+;
+  %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer)
+  ret i8 %out
+}
+
+define i8 @uminv_i8_all_active_splat(i8 %a) #0 {
+; CHECK-LABEL: define i8 @uminv_i8_all_active_splat(
+; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i8 [[A]]
+;
+  %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0
+  %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat)
+  ret i8 %out
+}
+
+define i16 @uminv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @uminv_i16_splat_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i16 -1
+;
+  %out = call i16 @llvm.aarch64.sve.uminv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 -1))
+  ret i16 %out
+}
+
+define i16 @uminv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: define i16 @uminv_i16_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i16 @llvm.aarch64.sve.uminv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> zeroinitializer)
+; CHECK-NEXT:    ret i16 [[OUT]]
+;
+  %out = call i16 @llvm.aarch64.sve.uminv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer)
+  ret i16 %out
+}
+
+define i32 @uminv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @uminv_i32_splat_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i32 -1
+;
+  %out = call i32 @llvm.aarch64.sve.uminv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 -1))
+  ret i32 %out
+}
+
+define i32 @uminv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 {
+; CHECK-LABEL: define i32 @uminv_i32_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i32 @llvm.aarch64.sve.uminv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret i32 [[OUT]]
+;
+  %out = call i32 @llvm.aarch64.sve.uminv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer)
+  ret i32 %out
+}
+
+define i64 @uminv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @uminv_i64_splat_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    ret i64 -1
+;
+  %out = call i64 @llvm.aarch64.sve.uminv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 -1))
+  ret i64 %out
+}
+
+define i64 @uminv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: define i64 @uminv_i64_splat_non_neutral_val(
+; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uminv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT:    ret i64 [[OUT]]
+;
+  %out = call i64 @llvm.aarch64.sve.uminv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer)
+  ret i64 %out
+}
+
+attributes #0 = { "target-features"="+sve" }
diff --git a/llvm/test/Transforms/InstSimplify/AArch64/lit.local.cfg b/llvm/test/Transforms/InstSimplify/AArch64/lit.local.cfg
new file mode 100644
index 0000000000000..10d4a0e953ed4
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/AArch64/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "AArch64" in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
index 20676f3702294..10c265519952b 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
@@ -14,23 +14,23 @@ define void @foo(i64 %n) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT: outer.header:
 ; CHECK-NEXT:   EMIT-SCALAR ir<%outer.iv> = phi [ ir<%outer.iv.next>, outer.latch ], [ ir<0>, ir-bb<entry> ]
-; CHECK-NEXT:   EMIT ir<%gep.1> = getelementptr ir<@arr2>, ir<0>, ir<%outer.iv>
+; CHECK-NEXT:   EMIT ir<%gep.1> = getelementptr inbounds ir<@arr2>, ir<0>, ir<%outer.iv>
 ; CHECK-NEXT:   EMIT store ir<%outer.iv>, ir<%gep.1>
-; CHECK-NEXT:   EMIT ir<%add> = add ir<%outer.iv>, ir<%n>
+; CHECK-NEXT:   EMIT ir<%add> = add nsw ir<%outer.iv>, ir<%n>
 ; CHECK-NEXT: Successor(s): inner
 ; CHECK-EMPTY:
 ; CHECK-NEXT: inner:
 ; CHECK-NEXT:   EMIT-SCALAR ir<%inner.iv> = phi [ ir<%inner.iv.next>, inner ], [ ir<0>, outer.header ]
-; CHECK-NEXT:   EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
+; CHECK-NEXT:   EMIT ir<%gep.2> = getelementptr inbounds ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
 ; CHECK-NEXT:   EMIT store ir<%add>, ir<%gep.2>
-; CHECK-NEXT:   EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
-; CHECK-NEXT:   EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
+; CHECK-NEXT:   EMIT ir<%inner.iv.next> = add nuw nsw ir<%inner.iv>, ir<1>
+; CHECK-NEXT:   EMIT ir<%inner.ec> = icmp eq ir<%inner.iv.next>, ir<8>
 ; CHECK-NEXT:   EMIT branch-on-cond ir<%inner.ec>
 ; CHECK-NEXT: Successor(s): outer.latch, inner
 ; CHECK-EMPTY:
 ; CHECK-NEXT: outer.latch:
-; CHECK-NEXT:   EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>
-; CHECK-NEXT:   EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8>
+; CHECK-NEXT:   EMIT ir<%outer.iv.next> = add nuw nsw ir<%outer.iv>, ir<1>
+; CHECK-NEXT:   EMIT ir<%outer.ec> = icmp eq ir<%outer.iv.next>, ir<8>
 ; CHECK-NEXT:   EMIT branch-on-cond ir<%outer.ec>
 ; CHECK-NEXT: Successor(s): ir-bb<exit>, outer.header
 ; CHECK-EMPTY:
diff --git a/llvm/unittests/ADT/APIntTest.cpp b/llvm/unittests/ADT/APIntTest.cpp
index ca9f9f17ee112..4cb537da72e87 100644
--- a/llvm/unittests/ADT/APIntTest.cpp
+++ b/llvm/unittests/ADT/APIntTest.cpp
@@ -3823,4 +3823,87 @@ TEST(APIntTest, Fshr) {
             -8193);
 }
 
+TEST(APIntTest, clmul) {
+  EXPECT_EQ(APIntOps::clmul(APInt(4, 1), APInt(4, 2)).getZExtValue(), 2U);
+  EXPECT_EQ(APIntOps::clmul(APInt(4, 5), APInt(4, 6)).getZExtValue(), 14U);
+  EXPECT_EQ(APIntOps::clmul(APInt(4, -4, /*isSigned*/ true),
+                            APInt(4, 2, /*isSigned*/ false))
+                .getSExtValue(),
+            -8);
+  EXPECT_EQ(APIntOps::clmul(APInt(4, -4, /*isSigned*/ true),
+                            APInt(4, -5, /*isSigned*/ true))
+                .getSExtValue(),
+            4);
+  EXPECT_EQ(APIntOps::clmul(APInt(8, 0), APInt(8, 255)).getZExtValue(), 0U);
+  EXPECT_EQ(APIntOps::clmul(APInt(8, 15), APInt(8, 15)).getZExtValue(), 85U);
+  EXPECT_EQ(APIntOps::clmul(APInt(8, 1), APInt(8, 2)).getZExtValue(), 2U);
+  EXPECT_EQ(APIntOps::clmul(APInt(64, 0, /*isSigned*/ true),
+                            APInt(64, 9223372036854775807, /*isSigned*/ true))
+                .getSExtValue(),
+            0);
+  EXPECT_EQ(APIntOps::clmul(APInt(64, 1, /*isSigned*/ true),
+                            APInt(64, 2, /*isSigned*/ true))
+                .getSExtValue(),
+            2);
+  EXPECT_EQ(APIntOps::clmul(APInt(16, -2, /*isSigned*/ true),
+                            APInt(16, -1, /*isSigned*/ true))
+                .getSExtValue(),
+            -21846);
+}
+
+TEST(APIntTest, clmulr) {
+  EXPECT_EQ(APIntOps::clmulr(APInt(4, 1), APInt(4, 2)).getZExtValue(), 0U);
+  EXPECT_EQ(APIntOps::clmulr(APInt(4, 5), APInt(4, 6)).getZExtValue(), 3U);
+  EXPECT_EQ(APIntOps::clmulr(APInt(4, -4, /*isSigned*/ true),
+                             APInt(4, 2, /*isSigned*/ false))
+                .getSExtValue(),
+            3);
+  EXPECT_EQ(APIntOps::clmulr(APInt(4, -4, /*isSigned*/ true),
+                             APInt(4, -5, /*isSigned*/ true))
+                .getSExtValue(),
+            -2);
+  EXPECT_EQ(APIntOps::clmulr(APInt(8, 0), APInt(8, 255)).getZExtValue(), 0U);
+  EXPECT_EQ(APIntOps::clmulr(APInt(8, 15), APInt(8, 15)).getZExtValue(), 0U);
+  EXPECT_EQ(APIntOps::clmulr(APInt(8, 1), APInt(8, 2)).getZExtValue(), 0U);
+  EXPECT_EQ(APIntOps::clmulr(APInt(64, 0, /*isSigned*/ true),
+                             APInt(64, 9223372036854775807, /*isSigned*/ true))
+                .getSExtValue(),
+            0);
+  EXPECT_EQ(APIntOps::clmulr(APInt(64, 1, /*isSigned*/ true),
+                             APInt(64, 2, /*isSigned*/ true))
+                .getSExtValue(),
+            0);
+  EXPECT_EQ(APIntOps::clmulr(APInt(16, -2, /*isSigned*/ true),
+                             APInt(16, -1, /*isSigned*/ true))
+                .getSExtValue(),
+            -21845);
+}
+
+TEST(APIntTest, clmulh) {
+  EXPECT_EQ(APIntOps::clmulh(APInt(4, 1), APInt(4, 2)).getZExtValue(), 0U);
+  EXPECT_EQ(APIntOps::clmulh(APInt(4, 5), APInt(4, 6)).getZExtValue(), 1U);
+  EXPECT_EQ(APIntOps::clmulh(APInt(4, -4, /*isSigned*/ true),
+                             APInt(4, 2, /*isSigned*/ false))
+                .getSExtValue(),
+            1);
+  EXPECT_EQ(APIntOps::clmulh(APInt(4, -4, /*isSigned*/ true),
+                             APInt(4, -5, /*isSigned*/ true))
+                .getSExtValue(),
+            7);
+  EXPECT_EQ(APIntOps::clmulh(APInt(8, 0), APInt(8, 255)).getZExtValue(), 0U);
+  EXPECT_EQ(APIntOps::clmulh(APInt(8, 15), APInt(8, 15)).getZExtValue(), 0U);
+  EXPECT_EQ(APIntOps::clmulh(APInt(8, 1), APInt(8, 2)).getZExtValue(), 0U);
+  EXPECT_EQ(APIntOps::clmulh(APInt(64, 0, /*isSigned*/ true),
+                             APInt(64, 9223372036854775807, /*isSigned*/ true))
+                .getSExtValue(),
+            0);
+  EXPECT_EQ(APIntOps::clmulh(APInt(64, 1, /*isSigned*/ true),
+                             APInt(64, 2, /*isSigned*/ true))
+                .getSExtValue(),
+            0);
+  EXPECT_EQ(APIntOps::clmulh(APInt(16, -2, /*isSigned*/ true),
+                             APInt(16, -1, /*isSigned*/ true))
+                .getSExtValue(),
+            21845);
+}
 } // end anonymous namespace
diff --git a/llvm/unittests/Support/JobserverTest.cpp b/llvm/unittests/Support/JobserverTest.cpp
index d27445897db0a..1917145704608 100644
--- a/llvm/unittests/Support/JobserverTest.cpp
+++ b/llvm/unittests/Support/JobserverTest.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Parallel.h"
+#include "llvm/Support/Program.h"
 #include "llvm/Support/ThreadPool.h"
 #include "llvm/Support/raw_ostream.h"
 #include "gtest/gtest.h"
@@ -40,8 +41,14 @@
 
 using namespace llvm;
 
+// Provided by the unit test main to locate the current test binary.
+extern const char *TestMainArgv0;
+
 namespace {
 
+// Unique anchor whose address helps locate the current test binary.
+static int JobserverTestAnchor = 0;
+
 // RAII helper to set an environment variable for the duration of a test.
 class ScopedEnvironment {
   std::string Name;
@@ -382,51 +389,93 @@ TEST_F(JobserverStrategyTest, ThreadPoolConcurrencyIsLimited) {
   EXPECT_EQ(CompletedTasks, NumTasks);
 }
 
-TEST_F(JobserverStrategyTest, ParallelForIsLimited) {
+// Parent-side driver that spawns a fresh process to run the child test which
+// validates that parallelFor respects the jobserver limit when it is the first
+// user of the default executor in that process.
+TEST_F(JobserverStrategyTest, ParallelForIsLimited_Subprocess) {
+  // Mark child execution.
+  setenv("LLVM_JOBSERVER_TEST_CHILD", "1", 1);
+
+  // Find the current test binary and build args to run only the child test.
+  std::string Executable =
+      sys::fs::getMainExecutable(TestMainArgv0, &JobserverTestAnchor);
+  ASSERT_FALSE(Executable.empty()) << "Failed to get main executable path";
+  SmallVector<StringRef, 4> Args{Executable,
+                                 "--gtest_filter=JobserverStrategyTest."
+                                 "ParallelForIsLimited_SubprocessChild"};
+
+  std::string Error;
+  bool ExecFailed = false;
+  int RC = sys::ExecuteAndWait(Executable, Args, std::nullopt, {}, 0, 0, &Error,
+                               &ExecFailed);
+  unsetenv("LLVM_JOBSERVER_TEST_CHILD");
+  ASSERT_FALSE(ExecFailed) << Error;
+  ASSERT_EQ(RC, 0) << "Executable failed with exit code " << RC;
+}
+
+// Child-side test: create FIFO and make-proxy in this process, set the
+// jobserver strategy, and then run parallelFor.
+TEST_F(JobserverStrategyTest, ParallelForIsLimited_SubprocessChild) {
+  if (!getenv("LLVM_JOBSERVER_TEST_CHILD"))
+    GTEST_SKIP() << "Not running in child mode";
+
   // This test verifies that llvm::parallelFor respects the jobserver limit.
   const int NumExplicitJobs = 3;
   const int ConcurrencyLimit = NumExplicitJobs + 1; // +1 implicit
   const int NumTasks = 20;
 
-  LLVM_DEBUG(dbgs() << "Calling startMakeProxy with " << NumExplicitJobs
-                    << " jobs.\n");
   startMakeProxy(NumExplicitJobs);
-  LLVM_DEBUG(dbgs() << "MakeProxy is running.\n");
 
-  // Set the global strategy. parallelFor will use this.
+  // Set the global strategy before any default executor is created.
   parallel::strategy = jobserver_concurrency();
 
   std::atomic<int> ActiveTasks{0};
   std::atomic<int> MaxActiveTasks{0};
 
-  parallelFor(0, NumTasks, [&](int i) {
+  parallelFor(0, NumTasks, [&]([[maybe_unused]] int i) {
     int CurrentActive = ++ActiveTasks;
-    LLVM_DEBUG(dbgs() << "Task " << i << ": Active tasks: " << CurrentActive
-                      << "\n");
     int OldMax = MaxActiveTasks.load();
     while (CurrentActive > OldMax)
       MaxActiveTasks.compare_exchange_weak(OldMax, CurrentActive);
-
     std::this_thread::sleep_for(std::chrono::milliseconds(20));
     --ActiveTasks;
   });
 
-  LLVM_DEBUG(dbgs() << "ParallelFor finished. Max active tasks was "
-                    << MaxActiveTasks << ".\n");
   EXPECT_LE(MaxActiveTasks, ConcurrencyLimit);
 }
 
-TEST_F(JobserverStrategyTest, ParallelSortIsLimited) {
-  // This test serves as an integration test to ensure parallelSort completes
-  // correctly when running under the jobserver strategy. It doesn't directly
-  // measure concurrency but verifies correctness.
+// Parent-side driver for parallelSort child test.
+TEST_F(JobserverStrategyTest, ParallelSortIsLimited_Subprocess) {
+  setenv("LLVM_JOBSERVER_TEST_CHILD", "1", 1);
+
+  std::string Executable =
+      sys::fs::getMainExecutable(TestMainArgv0, &JobserverTestAnchor);
+  ASSERT_FALSE(Executable.empty()) << "Failed to get main executable path";
+  SmallVector<StringRef, 4> Args{Executable,
+                                 "--gtest_filter=JobserverStrategyTest."
+                                 "ParallelSortIsLimited_SubprocessChild"};
+
+  std::string Error;
+  bool ExecFailed = false;
+  int RC = sys::ExecuteAndWait(Executable, Args, std::nullopt, {}, 0, 0, &Error,
+                               &ExecFailed);
+  unsetenv("LLVM_JOBSERVER_TEST_CHILD");
+  ASSERT_FALSE(ExecFailed) << Error;
+  ASSERT_EQ(RC, 0) << "Executable failed with exit code " << RC;
+}
+
+// Child-side test: ensure parallelSort runs and completes correctly under the
+// jobserver strategy when it owns default executor initialization.
+TEST_F(JobserverStrategyTest, ParallelSortIsLimited_SubprocessChild) {
+  if (!getenv("LLVM_JOBSERVER_TEST_CHILD"))
+    GTEST_SKIP() << "Not running in child mode";
+
   const int NumExplicitJobs = 3;
   startMakeProxy(NumExplicitJobs);
 
   parallel::strategy = jobserver_concurrency();
 
   std::vector<int> V(1024);
-  // Fill with random data
   std::mt19937 randEngine;
   std::uniform_int_distribution<int> dist;
   for (int &i : V)
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
index b99d656c5c50f..5742df2aa3c53 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
@@ -139,12 +139,12 @@ compound=true
       "vector.body:\l" +
       "  EMIT vp\<%2\> = CANONICAL-INDUCTION ir\<0\>, vp\<%index.next\>\l" +
       "  EMIT-SCALAR ir\<%indvars.iv\> = phi [ ir\<0\>, vector.ph ], [ ir\<%indvars.iv.next\>, vector.body ]\l" +
-      "  EMIT ir\<%arr.idx\> = getelementptr ir\<%A\>, ir\<%indvars.iv\>\l" +
+      "  EMIT ir\<%arr.idx\> = getelementptr inbounds ir\<%A\>, ir\<%indvars.iv\>\l" +
       "  EMIT ir\<%l1\> = load ir\<%arr.idx\>\l" +
       "  EMIT ir\<%res\> = add ir\<%l1\>, ir\<10\>\l" +
       "  EMIT store ir\<%res\>, ir\<%arr.idx\>\l" +
       "  EMIT ir\<%indvars.iv.next\> = add ir\<%indvars.iv\>, ir\<1\>\l" +
-      "  EMIT ir\<%exitcond\> = icmp ir\<%indvars.iv.next\>, ir\<%N\>\l" +
+      "  EMIT ir\<%exitcond\> = icmp ne ir\<%indvars.iv.next\>, ir\<%N\>\l" +
       "  EMIT vp\<%3\> = not ir\<%exitcond\>\l" +
       "  EMIT vp\<%index.next\> = add nuw vp\<%2\>, vp\<%0\>\l" +
       "  EMIT branch-on-count vp\<%index.next\>, vp\<%1\>\l" +
@@ -305,9 +305,9 @@ compound=true
       "vector.body:\l" +
       "  EMIT vp\<%2\> = CANONICAL-INDUCTION ir\<0\>, vp\<%index.next\>\l" +
       "  EMIT-SCALAR ir\<%iv\> = phi [ ir\<0\>, vector.ph ], [ ir\<%iv.next\>, loop.latch ]\l" +
-      "  EMIT ir\<%arr.idx\> = getelementptr ir\<%A\>, ir\<%iv\>\l" +
+      "  EMIT ir\<%arr.idx\> = getelementptr inbounds ir\<%A\>, ir\<%iv\>\l" +
       "  EMIT ir\<%l1\> = load ir\<%arr.idx\>\l" +
-      "  EMIT ir\<%c\> = icmp ir\<%l1\>, ir\<0\>\l" +
+      "  EMIT ir\<%c\> = icmp eq ir\<%l1\>, ir\<0\>\l" +
       "Successor(s): loop.latch\l"
     ]
     N4 -> N6 [ label=""]
@@ -316,7 +316,7 @@ compound=true
       "  EMIT ir\<%res\> = add ir\<%l1\>, ir\<10\>\l" +
       "  EMIT store ir\<%res\>, ir\<%arr.idx\>\l" +
       "  EMIT ir\<%iv.next\> = add ir\<%iv\>, ir\<1\>\l" +
-      "  EMIT ir\<%exitcond\> = icmp ir\<%iv.next\>, ir\<%N\>\l" +
+      "  EMIT ir\<%exitcond\> = icmp ne ir\<%iv.next\>, ir\<%N\>\l" +
       "  EMIT vp\<%3\> = not ir\<%exitcond\>\l" +
       "  EMIT vp\<%index.next\> = add nuw vp\<%2\>, vp\<%0\>\l" +
       "  EMIT branch-on-count vp\<%index.next\>, vp\<%1\>\l" +
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
index 3842ba235ead3..63776b78a2088 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@@ -1009,7 +1009,7 @@ TEST_F(VPRecipeTest, CastVPWidenRecipeToVPUser) {
   SmallVector<VPValue *, 2> Args;
   Args.push_back(Op1);
   Args.push_back(Op2);
-  VPWidenRecipe WidenR(*AI, Args, VPIRMetadata(), DebugLoc());
+  VPWidenRecipe WidenR(*AI, Args);
 
   checkVPRecipeCastImpl<VPWidenRecipe, VPUser, VPIRMetadata>(&WidenR);
   delete AI;
@@ -1053,7 +1053,7 @@ TEST_F(VPRecipeTest, CastVPWidenSelectRecipeToVPUserAndVPDef) {
   Args.push_back(Op1);
   Args.push_back(Op2);
   Args.push_back(Op3);
-  VPWidenSelectRecipe WidenSelectR(*SelectI,
+  VPWidenSelectRecipe WidenSelectR(SelectI,
                                    make_range(Args.begin(), Args.end()));
 
   checkVPRecipeCastImpl<VPWidenSelectRecipe, VPUser, VPIRMetadata>(
@@ -1093,7 +1093,7 @@ TEST_F(VPRecipeTest, CastVPWidenCastRecipeToVPUser) {
   IntegerType *Int64 = IntegerType::get(C, 64);
   auto *Cast = CastInst::CreateZExtOrBitCast(PoisonValue::get(Int32), Int64);
   VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
-  VPWidenCastRecipe Recipe(Instruction::ZExt, Op1, Int64, *Cast, {});
+  VPWidenCastRecipe Recipe(Instruction::ZExt, Op1, Int64, Cast);
 
   checkVPRecipeCastImpl<VPWidenCastRecipe, VPUser, VPIRMetadata>(&Recipe);
   delete Cast;
@@ -1264,7 +1264,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
     SmallVector<VPValue *, 2> Args;
     Args.push_back(Op1);
     Args.push_back(Op2);
-    VPWidenRecipe Recipe(*AI, Args, VPIRMetadata(), DebugLoc());
+    VPWidenRecipe Recipe(*AI, Args);
     EXPECT_FALSE(Recipe.mayHaveSideEffects());
     EXPECT_FALSE(Recipe.mayReadFromMemory());
     EXPECT_FALSE(Recipe.mayWriteToMemory());
@@ -1283,7 +1283,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
     Args.push_back(Op1);
     Args.push_back(Op2);
     Args.push_back(Op3);
-    VPWidenSelectRecipe Recipe(*SelectI, make_range(Args.begin(), Args.end()));
+    VPWidenSelectRecipe Recipe(SelectI, make_range(Args.begin(), Args.end()));
     EXPECT_FALSE(Recipe.mayHaveSideEffects());
     EXPECT_FALSE(Recipe.mayReadFromMemory());
     EXPECT_FALSE(Recipe.mayWriteToMemory());
@@ -1412,7 +1412,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
     Args.push_back(Op1);
     Args.push_back(Op2);
     Args.push_back(CalledFn);
-    VPWidenCallRecipe Recipe(Call, TheFn, Args);
+    VPWidenCallRecipe Recipe(Call, TheFn, Args, VPIRFlags(), VPIRMetadata());
     EXPECT_FALSE(Recipe.mayHaveSideEffects());
     EXPECT_FALSE(Recipe.mayReadFromMemory());
     EXPECT_FALSE(Recipe.mayWriteToMemory());
@@ -1468,8 +1468,7 @@ TEST_F(VPRecipeTest, dumpRecipeInPlan) {
   VPValue *ExtVPV2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
   Args.push_back(ExtVPV1);
   Args.push_back(ExtVPV2);
-  VPWidenRecipe *WidenR =
-      new VPWidenRecipe(*AI, Args, VPIRMetadata(), DebugLoc());
+  VPWidenRecipe *WidenR = new VPWidenRecipe(*AI, Args);
   VPBB1->appendRecipe(WidenR);
 
   {
diff --git a/llvm/utils/TableGen/Basic/TableGen.cpp b/llvm/utils/TableGen/Basic/TableGen.cpp
index b79ae93dab4f7..a655cbbc16096 100644
--- a/llvm/utils/TableGen/Basic/TableGen.cpp
+++ b/llvm/utils/TableGen/Basic/TableGen.cpp
@@ -73,7 +73,7 @@ int tblgen_main(int argc, char **argv) {
   InitLLVM X(argc, argv);
   cl::ParseCommandLineOptions(argc, argv);
 
-  std::function<MultiFileTableGenMainFn> MainFn = nullptr;
+  MultiFileTableGenMainFn MainFn = nullptr;
   return TableGenMain(argv[0], MainFn);
 }
 
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index ef7b13e8940f8..3486a7a7fb08c 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -1878,6 +1878,8 @@ TableGenOutputFiles RegisterInfoEmitter::run(StringRef FilenamePrefix) {
   if (RegisterInfoDebug)
     debugDump(errs());
 
+  // The suffixes should be in sync with the tablegen function in
+  // llvm/cmake/modules/TableGen.cmake.
   return {Main,
           {{"Enums.inc", Enums},
            {"MCDesc.inc", MCDesc},
diff --git a/mlir/docs/Dialects/NVVM/_index.md b/mlir/docs/Dialects/NVVM/_index.md
new file mode 100644
index 0000000000000..f4832f76f86ad
--- /dev/null
+++ b/mlir/docs/Dialects/NVVM/_index.md
@@ -0,0 +1,84 @@
+# NVVM Dialect
+
+The NVVM dialect is MLIR's LLVM-IR-based, NVIDIA-specific backend dialect. It
+models NVVM intrinsics and public ISA functionality and introduces NVIDIA
+extensions to the MLIR/LLVM type system and address spaces (e.g., global,
+shared, and cluster memory), enabling faithful lowering of GPU kernels to the
+NVPTX toolchain. While a NVVM op usually maps to a single LLVM IR intrinsic,
+the NVVM dialect uses type polymorphism and other attributes so that a single
+NVVM op can map to different LLVM intrinsics.
+
+## Scope and Capabilities
+
+The dialect covers core GPU features such as thread/block builtins, barriers
+and atomics, warp-level collectives (e.g., shuffle/vote), matrix/tensor core
+operations (e.g., `mma.sync`, `wgmma`), tensor memory accelerator (TMA)
+operations, asynchronous copies (`cp.async`, bulk/tensor variants) with memory
+barriers, cache and prefetch controls, and NVVM-specific attributes and enums
+(e.g., FP rounding modes, memory scopes, and MMA types/layouts).
+
+## Placement in the Lowering Pipeline
+
+NVVM sits below target-agnostic dialects like `gpu` and NVIDIA's `nvgpu`.
+Typical pipelines convert `gpu`/`nvgpu` ops into NVVM using
+`-convert-gpu-to-nvvm` and `-convert-nvgpu-to-nvvm`, then translate into LLVM
+for final code generation via NVPTX backend.
+
+## Target Configuration and Serialization
+
+NVVM provides a `#nvvm.target` attribute to describe the GPU target (SM,
+features, and flags). In conjunction with `gpu` serialization (e.g.,
+`gpu-module-to-binary`), this enables producing architecture-specific GPU
+binaries (such as CUBIN) from nested GPU modules.
+
+## Inline PTX
+
+When an intrinsic is unavailable or a performance-critical sequence must be
+expressed directly, NVVM provides an `nvvm.inline_ptx` op to embed PTX inline
+as a last-resort escape hatch, with explicit operands and results.
+
+## Memory Spaces
+
+The NVVM dialect introduces the following memory spaces, each with distinct
+scopes and lifetimes:
+
+| Memory Space      | Address Space | Scope                |
+|-------------------|---------------|----------------------|
+| `generic`         | 0             | All threads          |
+| `global`          | 1             | All threads (device) |
+| `shared`          | 3             | Thread block (CTA)   |
+| `constant`        | 4             | All threads          |
+| `local`           | 5             | Single thread        |
+| `tensor`          | 6             | Thread block (CTA)   |
+| `shared_cluster`  | 7             | Thread block cluster |
+
+### Memory Space Details
+
+- **generic**: Can point to any memory space; requires runtime resolution of
+  actual address space. Use when pointer origin is unknown at compile time.
+  Performance varies based on the underlying memory space.
+- **global**: Accessible by all threads across all blocks; persists across
+  kernel launches. Highest latency but largest capacity (device memory). Best
+  for large data and inter-kernel communication.
+- **shared**: Shared within a thread block (CTA); very fast on-chip memory for
+  cooperation between threads in the same block. Limited capacity. Ideal for
+  block-level collaboration, caching, and reducing global memory traffic.
+- **constant**: Read-only memory cached per SM. Size typically limited to 64KB.
+  Best for read-only data and uniform values accessed by all threads.
+- **local**: Private to each thread. Use for per-thread private data and
+  automatic variables that don't fit in registers.
+- **tensor**: Special memory space for tensor core operations. Used by
+  `tcgen05` instructions on SM 100+ for tensor input/output operations.
+- **shared_cluster**: Distributed shared memory across thread blocks within a
+  cluster (SM 90+). Enables collaboration beyond single-block scope with fast
+  access across cluster threads.
+
+
+## Non-Goals
+
+NVVM is not a place for convenience or "wrapper" ops. It is not intended to
+introduce high-level ops that expand into multiple unrelated NVVM intrinsics or
+that lower to no intrinsic at all. Such abstractions belong in higher-level
+dialects (e.g., `nvgpu`, `gpu`, or project-specific dialects). The design
+intent is a thin, predictable, low-level surface with near-mechanical lowering
+to NVVM/LLVM IR.
\ No newline at end of file
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 87c73c4587485..524b9f820f290 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -37,84 +37,6 @@ def LLVM_PointerSharedCluster : LLVM_PointerInAddressSpace<7>;
 //===----------------------------------------------------------------------===//
 
 def NVVM_Dialect : Dialect {
-  let summary = "The NVVM dialect that models NVIDIA's public ISA";
-
-  let description = [{
-    The NVVM dialect is MLIR's LLVM-IR-based, NVIDIA-specific backend dialect. It
-    models NVVM intrinsics and public ISA functionality and introduces NVIDIA
-    extensions to the MLIR/LLVM type system and address spaces (e.g., global,
-    shared, and cluster memory), enabling faithful lowering of GPU kernels to the
-    NVPTX toolchain. While a NVVM op usually maps to a single LLVM IR intrinsic,
-    the NVVM dialect uses type polymorphism and other attributes so that a single
-    NVVM op can map to different LLVM intrinsics.
-
-    **Scope and capabilities:** The dialect covers core GPU features such as
-    thread/block builtins, barriers and atomics, warp-level collectives (e.g.,
-    shuffle/vote), matrix/tensor core operations (e.g., `mma.sync`, `wgmma`),
-    tensor memory accelerator (TMA) operations, asynchronous copies (`cp.async`,
-    bulk/tensor variants) with memory barriers, cache and prefetch controls, and
-    NVVM-specific attributes and enums (e.g., FP rounding modes, memory scopes,
-    and MMA types/layouts).
-
-    **Non-goals:** NVVM is not a place for convenience or “wrapper” ops. It is
-    not intended to introduce high-level ops that expand into multiple unrelated
-    NVVM intrinsics or that lower to no intrinsic at all. Such abstractions belong
-    in higher-level dialects (e.g., `nvgpu`, `gpu`, or project-specific dialects).
-    The design intent is a thin, predictable, low-level surface with
-    near-mechanical lowering to NVVM/LLVM IR.
-
-    **Placement in the lowering pipeline:** NVVM sits below target-agnostic
-    dialects like `gpu` and NVIDIA's `nvgpu`. Typical pipelines convert
-    `gpu`/`nvgpu` ops into NVVM using `-convert-gpu-to-nvvm` and
-    `-convert-nvgpu-to-nvvm`, then translate into LLVM for final code
-    generation via NVPTX backend.
-
-    **Target configuration and serialization:** NVVM provides a `#nvvm.target`
-    attribute to describe the GPU target (SM, features, and flags). In
-    conjunction with `gpu` serialization (e.g., `gpu-module-to-binary`), this
-    enables producing architecture-specific GPU binaries (such as CUBIN) from
-    nested GPU modules.
-
-    **Inline PTX:** When an intrinsic is unavailable or a performance-critical
-    sequence must be expressed directly, NVVM provides an `nvvm.inline_ptx` op to
-    embed PTX inline as a last-resort escape hatch, with explicit operands and
-    results.
-
-
-    **Memory Spaces:** The NVVM dialect introduces the following memory spaces,
-    each with distinct scopes and lifetimes:
-```
-    | Memory Space      | Address Space | Scope                | Lifetime          |
-    |-------------------|---------------|----------------------|-------------------|
-    | `generic`         | 0             | All threads          | Context-dependent |
-    | `global`          | 1             | All threads (device) | Application       |
-    | `shared`          | 3             | Thread block (CTA)   | Kernel execution  |
-    | `constant`        | 4             | All threads (RO)     | Application       |
-    | `local`           | 5             | Single thread        | Kernel execution  |
-    | `tensor`          | 6             | Thread block (CTA)   | Kernel execution  |
-    | `shared_cluster`  | 7             | Thread block cluster | Kernel execution  |
-```
-    **Memory Space Details:**
-    - **generic**: Can point to any memory space; requires runtime resolution of
-      actual address space. Use when pointer origin is unknown at compile time.
-      Performance varies based on the underlying memory space.
-    - **global**: Accessible by all threads across all blocks; persists across
-      kernel launches. Highest latency but largest capacity (device memory). Best
-      for large data and inter-kernel communication.
-    - **shared**: Shared within a thread block (CTA); very fast on-chip memory for
-      cooperation between threads in the same block. Limited capacity. Ideal for 
-      block-level collaboration, caching, and reducing global memory traffic.
-    - **constant**: Read-only memory cached per SM. Size typically limited to 
-      64KB. Best for read-only data and uniform values accessed by all threads.
-    - **local**: Private to each thread. Use for per-thread private data and
-      automatic variables that don't fit in registers.
-    - **tensor**: Special memory space for tensor core operations. Used by
-      `tcgen05` instructions on SM 100+ for tensor input/output operations.
-    - **shared_cluster**: Distributed shared memory across thread blocks within
-      a cluster (SM 90+). Enables collaboration beyond single-block scope with
-      fast access across cluster threads.
-  }];
-
   let name = "nvvm";
   let cppNamespace = "::mlir::NVVM";
   let dependentDialects = ["LLVM::LLVMDialect"];
diff --git a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td
index 14b00b04ccc18..420e58192b8fd 100644
--- a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td
@@ -166,4 +166,27 @@ def TosaAttachTarget : Pass<"tosa-attach-target", "ModuleOp"> {
   ];
 }
 
+def TosaNarrowI64ToI32Pass : Pass<"tosa-narrow-i64-to-i32", "func::FuncOp"> {
+  let summary = "Narrow I64 TOSA operations to I32";
+  let description = [{
+    This pass narrows TOSA operations with 64-bit integer tensor types to
+    32-bit integer tensor types. This can be useful for backends that do not
+    support the EXT-INT64 extension of TOSA.
+  }];
+
+  let options = [
+    Option<"aggressiveRewrite", "aggressive-rewrite", "bool", "false",
+      "If enabled, all TOSA operations are rewritten, regardless or whether the narrowing"
+      "is safe. This option may lead to data loss if not used carefully.">,
+    Option<"convertFunctionBoundaries", "convert-function-boundaries", "bool", "false",
+      "If enabled, the pass will convert function I/O types as well. Otherwise casts will"
+      "be inserted at the I/O boundaries.">
+  ];
+
+  let dependentDialects = [
+    "func::FuncDialect",
+    "tosa::TosaDialect",
+  ];
+}
+
 #endif // MLIR_DIALECT_TOSA_TRANSFORMS_PASSES
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index edc6565f44f00..b9a5e7d7f6eac 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -1738,15 +1738,11 @@ LogicalResult ScaledExtPacked816OpLowering::matchAndRewrite(
   auto sourceType = cast<VectorType>(op.getSource().getType());
   auto srcElemType = cast<FloatType>(sourceType.getElementType());
   unsigned bitWidth = srcElemType.getWidth();
-  int32_t scaleSel =
-      getScaleSel(blockSize, bitWidth, firstScaleLane, firstScaleByte);
 
   auto targetType = cast<VectorType>(op.getResult().getType());
   auto destElemType = cast<FloatType>(targetType.getElementType());
-  IntegerType i32 = rewriter.getI32Type();
-  Value castedScale =
-      LLVM::BitcastOp::create(rewriter, loc, i32, adaptor.getScale());
 
+  IntegerType i32 = rewriter.getI32Type();
   Value source = adaptor.getSource();
   Type llvmResultType = typeConverter->convertType(op.getResult().getType());
   Type packedType = nullptr;
@@ -1767,15 +1763,19 @@ LogicalResult ScaledExtPacked816OpLowering::matchAndRewrite(
     return rewriter.notifyMatchFailure(op, "type conversion failed");
   }
 
-  Value castedSource =
-      LLVM::BitcastOp::create(rewriter, loc, packedType, source);
-
   std::optional<StringRef> maybeIntrinsic =
       scaledExtPacked816ToIntrinsic(srcElemType, destElemType);
   if (!maybeIntrinsic.has_value())
     return op.emitOpError(
         "no intrinsic matching packed scaled conversion on the given chipset");
 
+  int32_t scaleSel =
+      getScaleSel(blockSize, bitWidth, firstScaleLane, firstScaleByte);
+  Value castedScale =
+      LLVM::BitcastOp::create(rewriter, loc, i32, adaptor.getScale());
+  Value castedSource =
+      LLVM::BitcastOp::create(rewriter, loc, packedType, source);
+
   OperationState loweredOp(loc, *maybeIntrinsic);
   loweredOp.addTypes({llvmResultType});
   loweredOp.addOperands({castedSource, castedScale});
diff --git a/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt
index 41b338d6e7189..76e9ddd5b2304 100644
--- a/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt
@@ -12,6 +12,7 @@ add_mlir_dialect_library(MLIRTosaTransforms
   TosaTypeConverters.cpp
   TosaProfileCompliance.cpp
   TosaValidation.cpp
+  TosaNarrowI64ToI32.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tosa/Transforms
@@ -21,6 +22,7 @@ add_mlir_dialect_library(MLIRTosaTransforms
 
   LINK_LIBS PUBLIC
   MLIRFuncDialect
+  MLIRFuncTransformOps
   MLIRPass
   MLIRTosaDialect
   MLIRTransformUtils
diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaNarrowI64ToI32.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaNarrowI64ToI32.cpp
new file mode 100644
index 0000000000000..ddaf7d8a5e033
--- /dev/null
+++ b/mlir/lib/Dialect/Tosa/Transforms/TosaNarrowI64ToI32.cpp
@@ -0,0 +1,310 @@
+//===- TosaNarrowI64ToI32.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass narrows TOSA operations with 64-bit integer tensor types to
+// 32-bit integer tensor types. This can be useful for backends that do not
+// support the EXT-INT64 extension of TOSA. The pass has two options:
+//
+// - aggressive-rewrite - If enabled, all TOSA operations are rewritten,
+//     regardless or whether the narrowing is safe. This option may lead to
+//     data loss if not used carefully.
+// - convert-function-boundaries - If enabled, the pass will convert function
+//     I/O types as well. Otherwise casts will be inserted at the I/O
+//     boundaries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Tosa/Transforms/Passes.h"
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/Func/Transforms/FuncConversions.h"
+#include "mlir/IR/Verifier.h"
+#include "mlir/Pass/Pass.h"
+
+namespace mlir {
+namespace tosa {
+#define GEN_PASS_DEF_TOSANARROWI64TOI32PASS
+#include "mlir/Dialect/Tosa/Transforms/Passes.h.inc"
+} // namespace tosa
+} // namespace mlir
+
+using namespace mlir;
+using namespace mlir::tosa;
+
+namespace {
+
+LogicalResult convertGenericOp(Operation *op, ValueRange operands,
+                               ConversionPatternRewriter &rewriter,
+                               const TypeConverter *typeConverter) {
+  // Convert types of results
+  SmallVector<Type, 4> newResults;
+  if (failed(typeConverter->convertTypes(op->getResultTypes(), newResults)))
+    return failure();
+
+  // Create a new operation state
+  OperationState state(op->getLoc(), op->getName().getStringRef(), operands,
+                       newResults, {}, op->getSuccessors());
+
+  for (const NamedAttribute &namedAttribute : op->getAttrs()) {
+    const Attribute attribute = namedAttribute.getValue();
+
+    // Convert integer attribute type
+    if (const auto intAttr = dyn_cast<IntegerAttr>(attribute)) {
+      const std::optional<Attribute> convertedAttribute =
+          typeConverter->convertTypeAttribute(intAttr.getType(), attribute);
+      state.addAttribute(namedAttribute.getName(), convertedAttribute.value());
+      continue;
+    }
+
+    if (const auto typeAttr = dyn_cast<TypeAttr>(attribute)) {
+      Type type = typeAttr.getValue();
+      const std::optional<Attribute> convertedAttribute =
+          typeConverter->convertTypeAttribute(type, attribute);
+      if (!convertedAttribute)
+        return rewriter.notifyMatchFailure(op,
+                                           "Failed to convert type attribute.");
+      state.addAttribute(namedAttribute.getName(), convertedAttribute.value());
+      continue;
+    }
+
+    if (const auto denseElementsAttr = dyn_cast<DenseElementsAttr>(attribute)) {
+      const Type type = denseElementsAttr.getType();
+      const std::optional<Attribute> convertedAttribute =
+          typeConverter->convertTypeAttribute(type, denseElementsAttr);
+      if (!convertedAttribute)
+        return rewriter.notifyMatchFailure(
+            op, "Failed to convert dense elements attribute.");
+      state.addAttribute(namedAttribute.getName(), convertedAttribute.value());
+      continue;
+    }
+
+    state.addAttribute(namedAttribute.getName(), attribute);
+  }
+
+  for (Region &region : op->getRegions()) {
+    Region *newRegion = state.addRegion();
+    rewriter.inlineRegionBefore(region, *newRegion, newRegion->begin());
+    if (failed(rewriter.convertRegionTypes(newRegion, *typeConverter)))
+      return failure();
+  }
+
+  Operation *newOp = rewriter.create(state);
+  rewriter.replaceOp(op, newOp->getResults());
+  return success();
+}
+
+// ===========================
+// Aggressive rewrite patterns
+// ===========================
+
+class ConvertGenericOp : public ConversionPattern {
+public:
+  ConvertGenericOp(TypeConverter &typeConverter, MLIRContext *context)
+      : ConversionPattern(typeConverter, MatchAnyOpTypeTag{}, 0, context) {}
+
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    if (!isa<tosa::TosaOp>(op))
+      return rewriter.notifyMatchFailure(
+          op,
+          "Support for operations other than TOSA has not been implemented.");
+
+    return convertGenericOp(op, operands, rewriter, typeConverter);
+  }
+};
+
+// ===============================
+// Bounds checked rewrite patterns
+// ===============================
+
+class ConvertArgMaxOpWithBoundsChecking
+    : public OpConversionPattern<tosa::ArgMaxOp> {
+  using OpConversionPattern::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(tosa::ArgMaxOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
+    // Output type can be narrowed based on the size of the axis dimension
+    const int32_t axis = op.getAxis();
+    const auto inputType = dyn_cast<ShapedType>(adaptor.getInput().getType());
+    if (!inputType || !inputType.isStaticDim(axis))
+      return rewriter.notifyMatchFailure(
+          op, "Requires a static axis dimension for bounds checking.");
+    const int64_t axisDim = inputType.getDimSize(axis);
+    if (axisDim >= std::numeric_limits<int32_t>::max())
+      return rewriter.notifyMatchFailure(
+          op, "Axis dimension is too large to narrow safely.");
+
+    const Type resultType = op.getOutput().getType();
+    const Type newResultType = typeConverter->convertType(resultType);
+    rewriter.replaceOpWithNewOp<tosa::ArgMaxOp>(op, newResultType,
+                                                adaptor.getInput(), axis);
+    return success();
+  }
+};
+
+class ConvertCastOpWithBoundsChecking
+    : public OpConversionPattern<tosa::CastOp> {
+  using OpConversionPattern::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(tosa::CastOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
+    const auto inputType = dyn_cast<ShapedType>(adaptor.getInput().getType());
+    const auto resultType = dyn_cast<ShapedType>(op.getResult().getType());
+    if (!inputType || !resultType)
+      return failure();
+
+    const auto elementInputIntType =
+        dyn_cast<IntegerType>(inputType.getElementType());
+    const auto elementResultIntType =
+        dyn_cast<IntegerType>(resultType.getElementType());
+    if (elementInputIntType && elementResultIntType &&
+        elementInputIntType.getWidth() > elementResultIntType.getWidth())
+      return rewriter.notifyMatchFailure(
+          op, "Narrowing cast may lead to data loss.");
+
+    rewriter.replaceOpWithNewOp<tosa::CastOp>(
+        op, typeConverter->convertType(resultType), adaptor.getInput());
+    return success();
+  }
+};
+
+template <typename OpTy>
+class ConvertTypedOp : public OpConversionPattern<OpTy> {
+  using OpConversionPattern<OpTy>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(OpTy op, typename OpTy::Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final {
+    return convertGenericOp(op, adaptor.getOperands(), rewriter,
+                            this->getTypeConverter());
+  }
+};
+
+struct TosaNarrowI64ToI32
+    : public tosa::impl::TosaNarrowI64ToI32PassBase<TosaNarrowI64ToI32> {
+public:
+  explicit TosaNarrowI64ToI32() = default;
+  explicit TosaNarrowI64ToI32(const TosaNarrowI64ToI32PassOptions &options)
+      : TosaNarrowI64ToI32() {
+    this->aggressiveRewrite = options.aggressiveRewrite;
+    this->convertFunctionBoundaries = options.convertFunctionBoundaries;
+  }
+
+  void runOnOperation() override {
+    MLIRContext *context = &getContext();
+
+    TypeConverter typeConverter;
+    typeConverter.addConversion([](Type type) -> Type { return type; });
+    typeConverter.addConversion([](IntegerType type) -> Type {
+      if (!type.isInteger(64))
+        return type;
+      return IntegerType::get(type.getContext(), 32);
+    });
+    typeConverter.addConversion(
+        [&typeConverter](RankedTensorType type) -> Type {
+          const Type elementType = type.getElementType();
+          if (!elementType.isInteger(64))
+            return type;
+          return RankedTensorType::get(type.getShape(),
+                                       typeConverter.convertType(elementType));
+        });
+
+    const auto materializeCast = [](OpBuilder &builder, Type resultType,
+                                    ValueRange inputs, Location loc) -> Value {
+      if (inputs.size() != 1)
+        return Value();
+      return tosa::CastOp::create(builder, loc, resultType, inputs.front());
+    };
+    typeConverter.addSourceMaterialization(materializeCast);
+    typeConverter.addTargetMaterialization(materializeCast);
+
+    typeConverter.addTypeAttributeConversion(
+        [](IntegerType type, IntegerAttr attribute) -> Attribute {
+          const APInt value = attribute.getValue().truncSSat(32);
+          return IntegerAttr::get(IntegerType::get(type.getContext(), 32),
+                                  value);
+        });
+    typeConverter.addTypeAttributeConversion(
+        [&typeConverter](ShapedType type,
+                         DenseIntElementsAttr attr) -> Attribute {
+          const ShapedType newType =
+              cast<ShapedType>(typeConverter.convertType(type));
+          const auto oldElementType = cast<IntegerType>(type.getElementType());
+          const auto newElementType =
+              cast<IntegerType>(newType.getElementType());
+          if (oldElementType.getWidth() == newElementType.getWidth())
+            return attr;
+
+          DenseElementsAttr mapped =
+              attr.mapValues(newElementType, [&](const APInt &v) {
+                return v.truncSSat(newElementType.getWidth());
+              });
+          return mapped;
+        });
+
+    ConversionTarget target(*context);
+    target.addDynamicallyLegalDialect<tosa::TosaDialect>(
+        [&typeConverter](Operation *op) {
+          return typeConverter.isLegal(op->getResultTypes()) &&
+                 typeConverter.isLegal(op->getOperandTypes());
+        });
+    if (convertFunctionBoundaries) {
+      target.addDynamicallyLegalOp<func::FuncOp>(
+          [&typeConverter](func::FuncOp op) {
+            return typeConverter.isSignatureLegal(op.getFunctionType()) &&
+                   typeConverter.isLegal(&op.getBody());
+          });
+      target.addDynamicallyLegalOp<func::ReturnOp>([](func::ReturnOp op) {
+        const FunctionType funcType =
+            op->getParentOfType<func::FuncOp>().getFunctionType();
+        return llvm::equal(op.getOperandTypes(), funcType.getResults());
+      });
+    } else {
+      target.addDynamicallyLegalOp<func::FuncOp>(
+          [](func::FuncOp op) { return true; });
+      target.addDynamicallyLegalOp<func::ReturnOp>(
+          [](func::ReturnOp op) { return true; });
+    }
+
+    RewritePatternSet patterns(context);
+    if (convertFunctionBoundaries) {
+      populateFunctionOpInterfaceTypeConversionPattern<func::FuncOp>(
+          patterns, typeConverter);
+      populateReturnOpTypeConversionPattern(patterns, typeConverter);
+    }
+    if (aggressiveRewrite) {
+      patterns.add<ConvertGenericOp>(typeConverter, context);
+    } else {
+      // Tensor
+      patterns.add<ConvertArgMaxOpWithBoundsChecking>(typeConverter, context);
+      // Data layout
+      patterns.add<ConvertTypedOp<tosa::ConcatOp>>(typeConverter, context);
+      patterns.add<ConvertTypedOp<tosa::PadOp>>(typeConverter, context);
+      patterns.add<ConvertTypedOp<tosa::ReshapeOp>>(typeConverter, context);
+      patterns.add<ConvertTypedOp<tosa::ReverseOp>>(typeConverter, context);
+      patterns.add<ConvertTypedOp<tosa::SliceOp>>(typeConverter, context);
+      patterns.add<ConvertTypedOp<tosa::TileOp>>(typeConverter, context);
+      patterns.add<ConvertTypedOp<tosa::TransposeOp>>(typeConverter, context);
+      patterns.add<ConvertTypedOp<tosa::IdentityOp>>(typeConverter, context);
+      // Type conversion
+      patterns.add<ConvertCastOpWithBoundsChecking>(typeConverter, context);
+      // Controlflow
+      patterns.add<ConvertTypedOp<tosa::IfOp>>(typeConverter, context);
+      patterns.add<ConvertTypedOp<tosa::WhileOp>>(typeConverter, context);
+    }
+
+    if (failed(
+            applyFullConversion(getOperation(), target, std::move(patterns))))
+      signalPassFailure();
+  }
+};
+
+} // namespace
diff --git a/mlir/python/mlir/dialects/gpu/__init__.py b/mlir/python/mlir/dialects/gpu/__init__.py
index 2fbcbb059f87a..d15643ca700e4 100644
--- a/mlir/python/mlir/dialects/gpu/__init__.py
+++ b/mlir/python/mlir/dialects/gpu/__init__.py
@@ -49,13 +49,13 @@ class GPUFuncOp(GPUFuncOp):
 
     FUNCTION_TYPE_ATTR_NAME = "function_type"
     SYM_NAME_ATTR_NAME = "sym_name"
-    ARGUMENT_ATTR_NAME = "arg_attrs"
-    RESULT_ATTR_NAME = "res_attrs"
 
     def __init__(
         self,
         function_type: Union[FunctionType, TypeAttr],
         sym_name: Optional[Union[str, StringAttr]] = None,
+        arg_attrs: Optional[Sequence[dict]] = None,
+        res_attrs: Optional[Sequence[dict]] = None,
         kernel: Optional[bool] = None,
         workgroup_attrib_attrs: Optional[Sequence[dict]] = None,
         private_attrib_attrs: Optional[Sequence[dict]] = None,
@@ -88,6 +88,8 @@ def __init__(
         )
         super().__init__(
             function_type,
+            arg_attrs=arg_attrs,
+            res_attrs=res_attrs,
             workgroup_attrib_attrs=workgroup_attrib_attrs,
             private_attrib_attrs=private_attrib_attrs,
             loc=loc,
diff --git a/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32-aggressive.mlir b/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32-aggressive.mlir
new file mode 100644
index 0000000000000..1a36177a37033
--- /dev/null
+++ b/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32-aggressive.mlir
@@ -0,0 +1,81 @@
+// RUN: mlir-opt -split-input-file -verify-diagnostics -tosa-narrow-i64-to-i32="aggressive-rewrite=1" %s | FileCheck %s --allow-unused-prefixes --check-prefixes=COMMON,DEFAULT
+// RUN: mlir-opt -split-input-file -verify-diagnostics -tosa-narrow-i64-to-i32="aggressive-rewrite=1 convert-function-boundaries=1" %s | FileCheck %s --allow-unused-prefixes --check-prefixes=COMMON,FUNCBOUND
+
+// CHECK-LABEL: test_i64_argmax_large_axis_dim
+func.func @test_i64_argmax_large_axis_dim(%arg0: tensor<1x513x513x2147483650xi8>) -> tensor<1x513x513xi64> {
+  // DEFAULT: tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x2147483650xi8>) -> tensor<1x513x513xi32>
+  %0 = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x2147483650xi8>) -> tensor<1x513x513xi64>
+  return %0 : tensor<1x513x513xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_convert_input_parameters
+// DEFAULT: %[[IN:.*]]: tensor<1x513x513x3xi64>
+// FUNCBOUND: %[[IN:.*]]: tensor<1x513x513x3xi32>
+func.func @test_convert_input_parameters(%arg0: tensor<1x513x513x3xi64>) -> tensor<1x513x513x3xf32> {
+  // DEFAULT: %[[FUNC_BOUND_CAST:.*]] = tosa.cast %[[IN]] : (tensor<1x513x513x3xi64>) -> tensor<1x513x513x3xi32>
+  // DEFAULT: %[[CAST1:.*]] = tosa.cast %[[FUNC_BOUND_CAST]] : (tensor<1x513x513x3xi32>) -> tensor<1x513x513x3xi32>
+  // FUNCBOUND: %[[CAST1:.*]] = tosa.cast %[[IN]] : (tensor<1x513x513x3xi32>) -> tensor<1x513x513x3xi32>
+  %0 = tosa.cast %arg0 : (tensor<1x513x513x3xi64>) -> tensor<1x513x513x3xi32>
+
+  // COMMON: %[[CAST2:.*]] = tosa.cast %[[CAST1]] : (tensor<1x513x513x3xi32>) -> tensor<1x513x513x3xf32>
+  %1 = tosa.cast %0 : (tensor<1x513x513x3xi32>) -> tensor<1x513x513x3xf32>
+  return %1 : tensor<1x513x513x3xf32>
+}
+
+// -----
+
+// CHECK-LABEL: test_add
+// DEFAULT: %[[IN0:.*]]: tensor<13x21x1xi64>, %[[IN1:.*]]: tensor<13x21x3xi64>
+// FUNCBOUND: %[[IN0:.*]]: tensor<13x21x1xi32>, %[[IN1:.*]]: tensor<13x21x3xi32>
+func.func @test_add(%arg0: tensor<13x21x1xi64>, %arg1: tensor<13x21x3xi64>) -> tensor<13x21x3xi64> {
+  // DEFAULT-DAG: %[[FUNC_BOUND_CAST0:.*]] = tosa.cast %[[IN0]] : (tensor<13x21x1xi64>) -> tensor<13x21x1xi32>
+  // DEFAULT-DAG: %[[FUNC_BOUND_CAST1:.*]] = tosa.cast %[[IN1]] : (tensor<13x21x3xi64>) -> tensor<13x21x3xi32>
+  // DEFAULT: %[[ADD:.*]] = tosa.add %[[FUNC_BOUND_CAST0]], %[[FUNC_BOUND_CAST1]] : (tensor<13x21x1xi32>, tensor<13x21x3xi32>) -> tensor<13x21x3xi32>
+  // DEFAULT: %[[CAST:.*]] = tosa.cast %[[ADD]] : (tensor<13x21x3xi32>) -> tensor<13x21x3xi64>
+  // DEFAULT: return %[[CAST]] : tensor<13x21x3xi64>
+  // FUNCBOUND: %[[ADD:.*]] = tosa.add %[[IN0]], %[[IN1]] : (tensor<13x21x1xi32>, tensor<13x21x3xi32>) -> tensor<13x21x3xi32>
+  // FUNCBOUND: return %[[ADD]] : tensor<13x21x3xi32>
+  %0 = tosa.add %arg0, %arg1 : (tensor<13x21x1xi64>, tensor<13x21x3xi64>) -> tensor<13x21x3xi64>
+  return %0 : tensor<13x21x3xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_regions
+// DEFAULT: %[[IN0:.*]]: tensor<i64>, %[[IN1:.*]]: tensor<i64>
+func.func @test_regions(%arg0: tensor<i64>, %arg1: tensor<i64>, %arg2: tensor<i1>) -> tensor<i64> {
+  // DEFAULT-DAG: %[[CAST0:.*]] = tosa.cast %[[IN0]] : (tensor<i64>) -> tensor<i32>
+  // DEFAULT-DAG: %[[CAST1:.*]] = tosa.cast %[[IN1]] : (tensor<i64>) -> tensor<i32>
+  // COMMON: %[[IF_RESULT:.*]] = tosa.cond_if
+  %0 = tosa.cond_if %arg2 : tensor<i1> -> (tensor<i64>) {
+    // DEFAULT: %[[ADD:.*]] = tosa.add %[[CAST0]], %[[CAST1]] : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    // FUNCBOUND: %[[ADD:.*]] = tosa.add %[[IN0]], %[[IN1]] : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    %1 = tosa.add %arg0, %arg1 : (tensor<i64>, tensor<i64>) -> tensor<i64>
+    // COMMON: tosa.yield %[[ADD]] : tensor<i32>
+    tosa.yield %1 : tensor<i64>
+  } else {
+    // DEFAULT: %[[SUB:.*]] = tosa.sub %[[CAST0]], %[[CAST1]] : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    // FUNCBOUND: %[[SUB:.*]] = tosa.sub %[[IN0]], %[[IN1]] : (tensor<i32>, tensor<i32>) -> tensor<i32>
+    %1 = tosa.sub %arg0, %arg1 : (tensor<i64>, tensor<i64>) -> tensor<i64>
+    // COMMON: tosa.yield %[[SUB]] : tensor<i32>
+    tosa.yield %1 : tensor<i64>
+  }
+  // DEFAULT: %[[OUT:.*]] = tosa.cast %[[IF_RESULT]] : (tensor<i32>) -> tensor<i64>
+  // DEFAULT: return %[[OUT]] : tensor<i64>
+  // FUNCBOUND: return %[[IF_RESULT]] : tensor<i32>
+  return %0 : tensor<i64>
+}
+
+// -----
+
+// CHECK-LABEL: test_const
+func.func @test_const() -> tensor<2xi64> {
+  // COMMON: %[[CONST:.*]] = "tosa.const"() <{values = dense<[1, 2]> : tensor<2xi32>}> : () -> tensor<2xi32>
+  %0 = "tosa.const"() <{values = dense<[1, 2]> : tensor<2xi64>}> : () -> tensor<2xi64>
+  // DEFAULT: %[[OUT:.*]] = tosa.cast %[[CONST]] : (tensor<2xi32>) -> tensor<2xi64>
+  // DEFAULT: return %[[OUT]] : tensor<2xi64>
+  // FUNCBOUND: return %[[CONST]] : tensor<2xi32>
+  return %0 : tensor<2xi64>
+}
diff --git a/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32.mlir b/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32.mlir
new file mode 100644
index 0000000000000..a14483fcdd7b0
--- /dev/null
+++ b/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32.mlir
@@ -0,0 +1,162 @@
+// RUN: mlir-opt -split-input-file -verify-diagnostics -tosa-narrow-i64-to-i32="convert-function-boundaries=0" %s | FileCheck %s --allow-unused-prefixes --check-prefixes=COMMON,DEFAULT
+// RUN: mlir-opt -split-input-file -verify-diagnostics -tosa-narrow-i64-to-i32="convert-function-boundaries=1" %s | FileCheck %s --allow-unused-prefixes --check-prefixes=COMMON,FUNCBOUND
+
+// -----
+
+// CHECK-LABEL: test_i64_argmax
+func.func @test_i64_argmax(%arg0: tensor<1x513x513x19xi8>) -> tensor<1x513x513xi64> {
+  // COMMON: %[[ARGMAX:.*]] = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x19xi8>) -> tensor<1x513x513xi32>
+  %0 = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x19xi8>) -> tensor<1x513x513xi64>
+
+  // DEFAULT: %[[CAST:.*]] = tosa.cast %[[ARGMAX]] : (tensor<1x513x513xi32>) -> tensor<1x513x513xi64>
+  // FUNCBOUND: return %[[ARGMAX]] : tensor<1x513x513xi32>
+  return %0 : tensor<1x513x513xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_i64_argmax_cast
+func.func @test_i64_argmax_cast(%arg0: tensor<1x513x513x19xi8>) -> tensor<1x513x513xf32> {
+  // COMMON: %[[ARGMAX:.*]] = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x19xi8>) -> tensor<1x513x513xi32>
+  %0 = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x19xi8>) -> tensor<1x513x513xi64>
+  // COMMON: tosa.cast %[[ARGMAX]] : (tensor<1x513x513xi32>) -> tensor<1x513x513xf32>
+  %1 = tosa.cast %0 : (tensor<1x513x513xi64>) -> tensor<1x513x513xf32>
+  return %1 : tensor<1x513x513xf32>
+}
+
+// -----
+
+// CHECK-LABEL: test_i64_argmax_large_axis_dim
+func.func @test_i64_argmax_large_axis_dim(%arg0: tensor<1x513x513x2147483650xi8>) -> tensor<1x513x513xi64> {
+  // expected-error @+1 {{failed to legalize operation 'tosa.argmax'}}
+  %0 = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x2147483650xi8>) -> tensor<1x513x513xi64>
+  return %0 : tensor<1x513x513xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_add
+func.func @test_add(%arg0: tensor<13x21x1xi64>, %arg1: tensor<13x21x3xi64>) -> tensor<13x21x3xi64> {
+  // expected-error @+1 {{failed to legalize operation 'tosa.add'}}
+  %0 = tosa.add %arg0, %arg1 : (tensor<13x21x1xi64>, tensor<13x21x3xi64>) -> tensor<13x21x3xi64>
+  return %0 : tensor<13x21x3xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_regions
+func.func @test_regions(%arg0: tensor<1x2xi32>, %arg1: tensor<1xi32>, %arg2: tensor<i1>) -> tensor<1xi32> {
+  // COMMON: %[[IF_RESULT:.*]] = tosa.cond_if %arg2 : tensor<i1> -> tensor<1xi32>
+  %0 = tosa.cond_if %arg2 : tensor<i1> -> tensor<1xi32> {
+    // COMMON: %[[ARGMAX:.*]] = tosa.argmax %arg0 {axis = 1 : i32} : (tensor<1x2xi32>) -> tensor<1xi32>
+    %1 = tosa.argmax %arg0 {axis = 1 : i32} : (tensor<1x2xi32>) -> tensor<1xi64>
+    // COMMON: %[[CAST:.*]] = tosa.cast %[[ARGMAX]] : (tensor<1xi32>) -> tensor<1xi32>
+    %2 = tosa.cast %1 : (tensor<1xi64>) -> tensor<1xi32>
+    // COMMON: tosa.yield %[[CAST]] : tensor<1xi32>
+    tosa.yield %2 : tensor<1xi32>
+  } else {
+    tosa.yield %arg1 : tensor<1xi32>
+  }
+  // COMMON: return %[[IF_RESULT]] : tensor<1xi32>
+  return %0 : tensor<1xi32>
+}
+
+// -----
+
+// CHECK-LABEL: test_concat
+func.func @test_concat(%arg0: tensor<13x21x3xi64>, %arg1: tensor<13x21x3xi64>) -> tensor<26x21x3xi64> {
+  // COMMON: tosa.concat %{{.*}}, %{{.*}} {axis = 0 : i32} : (tensor<13x21x3xi32>, tensor<13x21x3xi32>) -> tensor<26x21x3xi32>
+  %0 = tosa.concat %arg0, %arg1 {axis = 0 : i32} : (tensor<13x21x3xi64>, tensor<13x21x3xi64>) -> tensor<26x21x3xi64>
+  return %0 : tensor<26x21x3xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_pad
+func.func @test_pad(%arg0: tensor<13x21x3xi64>, %arg1: tensor<1xi64>) -> tensor<15x23x5xi64> {
+  %padding = tosa.const_shape {values = dense<1> : tensor<6xindex>} : () -> !tosa.shape<6>
+  // COMMON: tosa.pad %{{.*}}, %{{.*}}, %{{.*}} : (tensor<13x21x3xi32>, !tosa.shape<6>, tensor<1xi32>) -> tensor<15x23x5xi32>
+  %1 = tosa.pad %arg0, %padding, %arg1 : (tensor<13x21x3xi64>, !tosa.shape<6>, tensor<1xi64>) -> tensor<15x23x5xi64>
+  return %1 : tensor<15x23x5xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_reshape
+func.func @test_reshape(%arg0: tensor<13x21x3xi64>) -> tensor<1x819xi64> {
+  %1 = tosa.const_shape {values = dense<[1, 819]> : tensor<2xindex>} : () -> !tosa.shape<2>
+  // COMMON: tosa.reshape %{{.*}}, %{{.*}} : (tensor<13x21x3xi32>, !tosa.shape<2>) -> tensor<1x819xi32>
+  %0 = tosa.reshape %arg0, %1 : (tensor<13x21x3xi64>, !tosa.shape<2>) -> tensor<1x819xi64>
+  return %0 : tensor<1x819xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_reverse
+func.func @test_reverse(%arg0: tensor<13x21x3xi64>) -> tensor<13x21x3xi64> {
+  // COMMON: tosa.reverse %{{.*}} {axis = 0 : i32} : (tensor<13x21x3xi32>) -> tensor<13x21x3xi32>
+  %0 = tosa.reverse %arg0 {axis = 0 : i32} : (tensor<13x21x3xi64>) -> tensor<13x21x3xi64>
+  return %0 : tensor<13x21x3xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_slice
+func.func @test_slice(%arg0: tensor<13x21x3xi64>) -> tensor<4x11x1xi64> {
+  %0 = tosa.const_shape {values = dense<[4, 11, 1]> : tensor<3xindex>} : () -> !tosa.shape<3>
+  %1 = tosa.const_shape {values = dense<[6, 8, 0]> : tensor<3xindex>} : () -> !tosa.shape<3>
+  // COMMON: tosa.slice %{{.*}}, %{{.*}}, %{{.*}} : (tensor<13x21x3xi32>, !tosa.shape<3>, !tosa.shape<3>) -> tensor<4x11x1xi32>
+  %2 = tosa.slice %arg0, %0, %1 : (tensor<13x21x3xi64>, !tosa.shape<3>, !tosa.shape<3>) -> tensor<4x11x1xi64>
+  return %2 : tensor<4x11x1xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_tile
+func.func @test_tile(%arg0: tensor<13x21x3xi64>) -> tensor<39x21x6xi64> {
+  %cst = tosa.const_shape { values = dense<[3, 1, 2]> : tensor<3xindex> } : () -> !tosa.shape<3>
+  // COMMON: tosa.tile %{{.*}}, %{{.*}} : (tensor<13x21x3xi32>, !tosa.shape<3>) -> tensor<39x21x6xi32>
+  %0 = tosa.tile %arg0, %cst: (tensor<13x21x3xi64>, !tosa.shape<3>) -> tensor<39x21x6xi64>
+  return %0 : tensor<39x21x6xi64>
+}
+
+// -----
+
+// CHECK-LABEL: transpose
+func.func @test_transpose(%arg0: tensor<13x21x3xi64>) -> tensor<3x13x21xi64> {
+  // COMMON: tosa.transpose %{{.*}} {perms = array<i32: 2, 0, 1>} : (tensor<13x21x3xi32>) -> tensor<3x13x21xi32>
+  %1 = tosa.transpose %arg0 {perms = array<i32: 2, 0, 1>} : (tensor<13x21x3xi64>) -> tensor<3x13x21xi64>
+  return %1 : tensor<3x13x21xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_transition_to_i64
+func.func @test_transition_to_i64(%arg0: tensor<1xi32>) -> tensor<1xi64> {
+  // COMMON: %[[CAST:.*]] = tosa.cast %arg0 : (tensor<1xi32>) -> tensor<1xi32>
+  %0 = tosa.cast %arg0 : (tensor<1xi32>) -> tensor<1xi64>
+  // COMMON: %[[IDENTITY1:.*]] = tosa.identity %[[CAST]] : (tensor<1xi32>) -> tensor<1xi32>
+  %1 = tosa.identity %0 : (tensor<1xi64>) -> tensor<1xi64>
+  // COMMON: %[[IDENTITY2:.*]] = tosa.identity %[[IDENTITY1]] : (tensor<1xi32>) -> tensor<1xi32>
+  %2 = tosa.identity %1 : (tensor<1xi64>) -> tensor<1xi64>
+  // DEFAULT: %[[OUT_CAST:.*]] = tosa.cast %[[IDENTITY2]] : (tensor<1xi32>) -> tensor<1xi64>
+  // DEFAULT: return %[[OUT_CAST]] : tensor<1xi64>
+  // FUNCBOUND: return %[[IDENTITY2]] : tensor<1xi32>
+  return %2 : tensor<1xi64>
+}
+
+// -----
+
+// CHECK-LABEL: test_transition_from_i64
+func.func @test_transition_from_i64(%arg0: tensor<1xi64>) -> tensor<1xi32> {
+  // DEFAULT: %[[CAST:.*]] = tosa.cast %arg0 : (tensor<1xi64>) -> tensor<1xi32>
+  // DEFAULT: %[[IDENTITY1:.*]] = tosa.identity %[[CAST]] : (tensor<1xi32>) -> tensor<1xi32>
+  // FUNCBOUND: %[[IDENTITY1:.*]] = tosa.identity %arg0 : (tensor<1xi32>) -> tensor<1xi32>
+  %0 = tosa.identity %arg0 : (tensor<1xi64>) -> tensor<1xi64>
+  // COMMON: %[[IDENTITY2:.*]] = tosa.identity %[[IDENTITY1]] : (tensor<1xi32>) -> tensor<1xi32>
+  %1 = tosa.identity %0 : (tensor<1xi64>) -> tensor<1xi64>
+  // COMMON: %[[OUT_CAST:.*]] = tosa.cast %[[IDENTITY2]] : (tensor<1xi32>) -> tensor<1xi32>
+  %2 = tosa.cast %1 : (tensor<1xi64>) -> tensor<1xi32>
+  // COMMON: return %[[OUT_CAST]] : tensor<1xi32>
+  return %2 : tensor<1xi32>
+}
diff --git a/mlir/test/python/dialects/gpu/dialect.py b/mlir/test/python/dialects/gpu/dialect.py
index 3945c99c41091..1a009b7dfa30d 100644
--- a/mlir/test/python/dialects/gpu/dialect.py
+++ b/mlir/test/python/dialects/gpu/dialect.py
@@ -133,9 +133,10 @@ def builder(func: gpu.GPUFuncOp) -> None:
             ), func.known_grid_size
 
             func = gpu.GPUFuncOp(
-                func_type,
+                ir.FunctionType.get(inputs=[T.index()], results=[]),
                 sym_name="non_kernel_func",
                 body_builder=builder,
+                arg_attrs=[{"gpu.some_attribute": ir.StringAttr.get("foo")}],
             )
             assert not func.is_kernel
             assert func.known_block_size is None
@@ -154,10 +155,11 @@ def builder(func: gpu.GPUFuncOp) -> None:
     # CHECK:   %[[VAL_0:.*]] = gpu.global_id  x
     # CHECK:   gpu.return
     # CHECK: }
-    # CHECK: gpu.func @non_kernel_func() {
-    # CHECK:   %[[VAL_0:.*]] = gpu.global_id  x
-    # CHECK:   gpu.return
-    # CHECK: }
+    # CHECK:   gpu.func @non_kernel_func(
+    # CHECK-SAME:      %[[ARG0:.*]]: index {gpu.some_attribute = "foo"}) {
+    # CHECK:           %[[GLOBAL_ID_0:.*]] = gpu.global_id  x
+    # CHECK:           gpu.return
+    # CHECK:         }
 
 
 # CHECK-LABEL: testGPULaunchFuncOp
diff --git a/utils/bazel/MODULE.bazel b/utils/bazel/MODULE.bazel
new file mode 100644
index 0000000000000..d061487acf4d7
--- /dev/null
+++ b/utils/bazel/MODULE.bazel
@@ -0,0 +1,38 @@
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""bzlmod configuration for llvm-project"""
+module(name = "llvm-project-overlay")
+
+bazel_dep(name = "apple_support", version = "1.24.1", repo_name = "build_bazel_apple_support")
+bazel_dep(name = "bazel_skylib", version = "1.8.2")
+bazel_dep(name = "platforms", version = "1.0.0")
+bazel_dep(name = "rules_android", version = "0.6.6")
+bazel_dep(name = "rules_cc", version = "0.2.11")
+bazel_dep(name = "rules_foreign_cc", version = "0.15.1")
+bazel_dep(name = "rules_python", version = "1.6.3")
+bazel_dep(name = "rules_shell", version = "0.6.1")
+
+llvm_repos_extension = use_extension(":extensions.bzl", "llvm_repos_extension")
+
+use_repo(
+    llvm_repos_extension,
+    "llvm-raw",
+    "llvm_zlib",
+    "vulkan_headers",
+    "vulkan_sdk_setup",
+    "gmp",
+    "mpfr",
+    "mpc",
+    "pfm",
+    "llvm_zstd",
+    "pybind11",
+    "pyyaml",
+    "robin_map",
+    "nanobind",
+)
+
+llvm_configure = use_repo_rule("@llvm-raw//utils/bazel:configure.bzl", "llvm_configure")
+
+llvm_configure(name = "llvm-project")
diff --git a/utils/bazel/MODULE.bazel.lock b/utils/bazel/MODULE.bazel.lock
new file mode 100644
index 0000000000000..64de258401e91
--- /dev/null
+++ b/utils/bazel/MODULE.bazel.lock
@@ -0,0 +1,490 @@
+{
+  "lockFileVersion": 16,
+  "registryFileHashes": {
+    "https://bcr.bazel.build/bazel_registry.json": "8a28e4aff06ee60aed2a8c281907fb8bcbf3b753c91fb5a5c57da3215d5b3497",
+    "https://bcr.bazel.build/modules/abseil-cpp/20210324.2/MODULE.bazel": "7cd0312e064fde87c8d1cd79ba06c876bd23630c83466e9500321be55c96ace2",
+    "https://bcr.bazel.build/modules/abseil-cpp/20211102.0/MODULE.bazel": "70390338f7a5106231d20620712f7cccb659cd0e9d073d1991c038eb9fc57589",
+    "https://bcr.bazel.build/modules/abseil-cpp/20230125.1/MODULE.bazel": "89047429cb0207707b2dface14ba7f8df85273d484c2572755be4bab7ce9c3a0",
+    "https://bcr.bazel.build/modules/abseil-cpp/20230802.0.bcr.1/MODULE.bazel": "1c8cec495288dccd14fdae6e3f95f772c1c91857047a098fad772034264cc8cb",
+    "https://bcr.bazel.build/modules/abseil-cpp/20230802.0/MODULE.bazel": "d253ae36a8bd9ee3c5955384096ccb6baf16a1b1e93e858370da0a3b94f77c16",
+    "https://bcr.bazel.build/modules/abseil-cpp/20230802.1/MODULE.bazel": "fa92e2eb41a04df73cdabeec37107316f7e5272650f81d6cc096418fe647b915",
+    "https://bcr.bazel.build/modules/abseil-cpp/20240116.1/MODULE.bazel": "37bcdb4440fbb61df6a1c296ae01b327f19e9bb521f9b8e26ec854b6f97309ed",
+    "https://bcr.bazel.build/modules/abseil-cpp/20240116.2/MODULE.bazel": "73939767a4686cd9a520d16af5ab440071ed75cec1a876bf2fcfaf1f71987a16",
+    "https://bcr.bazel.build/modules/abseil-cpp/20250127.0/MODULE.bazel": "d1086e248cda6576862b4b3fe9ad76a214e08c189af5b42557a6e1888812c5d5",
+    "https://bcr.bazel.build/modules/abseil-cpp/20250127.0/source.json": "1b996859f840d8efc7c720efc61dcf2a84b1261cb3974cbbe9b6666ebf567775",
+    "https://bcr.bazel.build/modules/abseil-py/2.1.0/MODULE.bazel": "5ebe5bf853769c65707e5c28f216798f7a4b1042015e6a36e6d03094d94bec8a",
+    "https://bcr.bazel.build/modules/abseil-py/2.1.0/source.json": "0e8fc4f088ce07099c1cd6594c20c7ddbb48b4b3c0849b7d94ba94be88ff042b",
+    "https://bcr.bazel.build/modules/apple_support/1.11.1/MODULE.bazel": "1843d7cd8a58369a444fc6000e7304425fba600ff641592161d9f15b179fb896",
+    "https://bcr.bazel.build/modules/apple_support/1.15.1/MODULE.bazel": "a0556fefca0b1bb2de8567b8827518f94db6a6e7e7d632b4c48dc5f865bc7c85",
+    "https://bcr.bazel.build/modules/apple_support/1.24.1/MODULE.bazel": "f46e8ddad60aef170ee92b2f3d00ef66c147ceafea68b6877cb45bd91737f5f8",
+    "https://bcr.bazel.build/modules/apple_support/1.24.1/source.json": "cf725267cbacc5f028ef13bb77e7f2c2e0066923a4dab1025e4a0511b1ed258a",
+    "https://bcr.bazel.build/modules/bazel_features/1.1.0/MODULE.bazel": "cfd42ff3b815a5f39554d97182657f8c4b9719568eb7fded2b9135f084bf760b",
+    "https://bcr.bazel.build/modules/bazel_features/1.1.1/MODULE.bazel": "27b8c79ef57efe08efccbd9dd6ef70d61b4798320b8d3c134fd571f78963dbcd",
+    "https://bcr.bazel.build/modules/bazel_features/1.11.0/MODULE.bazel": "f9382337dd5a474c3b7d334c2f83e50b6eaedc284253334cf823044a26de03e8",
+    "https://bcr.bazel.build/modules/bazel_features/1.13.0/MODULE.bazel": "c14c33c7c3c730612bdbe14ebbb5e61936b6f11322ea95a6e91cd1ba962f94df",
+    "https://bcr.bazel.build/modules/bazel_features/1.15.0/MODULE.bazel": "d38ff6e517149dc509406aca0db3ad1efdd890a85e049585b7234d04238e2a4d",
+    "https://bcr.bazel.build/modules/bazel_features/1.17.0/MODULE.bazel": "039de32d21b816b47bd42c778e0454217e9c9caac4a3cf8e15c7231ee3ddee4d",
+    "https://bcr.bazel.build/modules/bazel_features/1.18.0/MODULE.bazel": "1be0ae2557ab3a72a57aeb31b29be347bcdc5d2b1eb1e70f39e3851a7e97041a",
+    "https://bcr.bazel.build/modules/bazel_features/1.19.0/MODULE.bazel": "59adcdf28230d220f0067b1f435b8537dd033bfff8db21335ef9217919c7fb58",
+    "https://bcr.bazel.build/modules/bazel_features/1.21.0/MODULE.bazel": "675642261665d8eea09989aa3b8afb5c37627f1be178382c320d1b46afba5e3b",
+    "https://bcr.bazel.build/modules/bazel_features/1.23.0/MODULE.bazel": "fd1ac84bc4e97a5a0816b7fd7d4d4f6d837b0047cf4cbd81652d616af3a6591a",
+    "https://bcr.bazel.build/modules/bazel_features/1.27.0/MODULE.bazel": "621eeee06c4458a9121d1f104efb80f39d34deff4984e778359c60eaf1a8cb65",
+    "https://bcr.bazel.build/modules/bazel_features/1.28.0/MODULE.bazel": "4b4200e6cbf8fa335b2c3f43e1d6ef3e240319c33d43d60cc0fbd4b87ece299d",
+    "https://bcr.bazel.build/modules/bazel_features/1.3.0/MODULE.bazel": "cdcafe83ec318cda34e02948e81d790aab8df7a929cec6f6969f13a489ccecd9",
+    "https://bcr.bazel.build/modules/bazel_features/1.30.0/MODULE.bazel": "a14b62d05969a293b80257e72e597c2da7f717e1e69fa8b339703ed6731bec87",
+    "https://bcr.bazel.build/modules/bazel_features/1.30.0/source.json": "b07e17f067fe4f69f90b03b36ef1e08fe0d1f3cac254c1241a1818773e3423bc",
+    "https://bcr.bazel.build/modules/bazel_features/1.4.1/MODULE.bazel": "e45b6bb2350aff3e442ae1111c555e27eac1d915e77775f6fdc4b351b758b5d7",
+    "https://bcr.bazel.build/modules/bazel_features/1.9.1/MODULE.bazel": "8f679097876a9b609ad1f60249c49d68bfab783dd9be012faf9d82547b14815a",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.0.3/MODULE.bazel": "bcb0fd896384802d1ad283b4e4eb4d718eebd8cb820b0a2c3a347fb971afd9d8",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.1.1/MODULE.bazel": "1add3e7d93ff2e6998f9e118022c84d163917d912f5afafb3058e3d2f1545b5e",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.2.0/MODULE.bazel": "44fe84260e454ed94ad326352a698422dbe372b21a1ac9f3eab76eb531223686",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.2.1/MODULE.bazel": "f35baf9da0efe45fa3da1696ae906eea3d615ad41e2e3def4aeb4e8bc0ef9a7a",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.3.0/MODULE.bazel": "20228b92868bf5cfc41bda7afc8a8ba2a543201851de39d990ec957b513579c5",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.4.1/MODULE.bazel": "a0dcb779424be33100dcae821e9e27e4f2901d9dfd5333efe5ac6a8d7ab75e1d",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.4.2/MODULE.bazel": "3bd40978e7a1fac911d5989e6b09d8f64921865a45822d8b09e815eaa726a651",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.5.0/MODULE.bazel": "32880f5e2945ce6a03d1fbd588e9198c0a959bb42297b2cfaf1685b7bc32e138",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.6.1/MODULE.bazel": "8fdee2dbaace6c252131c00e1de4b165dc65af02ea278476187765e1a617b917",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.7.0/MODULE.bazel": "0db596f4563de7938de764cc8deeabec291f55e8ec15299718b93c4423e9796d",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.7.1/MODULE.bazel": "3120d80c5861aa616222ec015332e5f8d3171e062e3e804a2a0253e1be26e59b",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.8.1/MODULE.bazel": "88ade7293becda963e0e3ea33e7d54d3425127e0a326e0d17da085a5f1f03ff6",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.8.2/MODULE.bazel": "69ad6927098316848b34a9142bcc975e018ba27f08c4ff403f50c1b6e646ca67",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.8.2/source.json": "34a3c8bcf233b835eb74be9d628899bb32999d3e0eadef1947a0a562a2b16ffb",
+    "https://bcr.bazel.build/modules/bazel_worker_api/0.0.1/MODULE.bazel": "02a13b77321773b2042e70ee5e4c5e099c8ddee4cf2da9cd420442c36938d4bd",
+    "https://bcr.bazel.build/modules/bazel_worker_api/0.0.4/MODULE.bazel": "460aa12d01231a80cce03c548287b433b321d205b0028ae596728c35e5ee442e",
+    "https://bcr.bazel.build/modules/bazel_worker_api/0.0.4/source.json": "d353c410d47a8b65d09fa98e83d57ebec257a2c2b9c6e42d6fda1cb25e5464a5",
+    "https://bcr.bazel.build/modules/bazel_worker_java/0.0.4/MODULE.bazel": "82494a01018bb7ef06d4a17ec4cd7a758721f10eb8b6c820a818e70d669500db",
+    "https://bcr.bazel.build/modules/bazel_worker_java/0.0.4/source.json": "a2d30458fd86cf022c2b6331e652526fa08e17573b2f5034a9dbcacdf9c2583c",
+    "https://bcr.bazel.build/modules/buildozer/7.1.2/MODULE.bazel": "2e8dd40ede9c454042645fd8d8d0cd1527966aa5c919de86661e62953cd73d84",
+    "https://bcr.bazel.build/modules/buildozer/7.1.2/source.json": "c9028a501d2db85793a6996205c8de120944f50a0d570438fcae0457a5f9d1f8",
+    "https://bcr.bazel.build/modules/gazelle/0.32.0/MODULE.bazel": "b499f58a5d0d3537f3cf5b76d8ada18242f64ec474d8391247438bf04f58c7b8",
+    "https://bcr.bazel.build/modules/gazelle/0.33.0/MODULE.bazel": "a13a0f279b462b784fb8dd52a4074526c4a2afe70e114c7d09066097a46b3350",
+    "https://bcr.bazel.build/modules/gazelle/0.34.0/MODULE.bazel": "abdd8ce4d70978933209db92e436deb3a8b737859e9354fb5fd11fb5c2004c8a",
+    "https://bcr.bazel.build/modules/gazelle/0.36.0/MODULE.bazel": "e375d5d6e9a6ca59b0cb38b0540bc9a05b6aa926d322f2de268ad267a2ee74c0",
+    "https://bcr.bazel.build/modules/gazelle/0.40.0/MODULE.bazel": "42ba5378ebe845fca43989a53186ab436d956db498acde790685fe0e8f9c6146",
+    "https://bcr.bazel.build/modules/gazelle/0.40.0/source.json": "1e5ef6e4d8b9b6836d93273c781e78ff829ea2e077afef7a57298040fa4f010a",
+    "https://bcr.bazel.build/modules/google_benchmark/1.8.2/MODULE.bazel": "a70cf1bba851000ba93b58ae2f6d76490a9feb74192e57ab8e8ff13c34ec50cb",
+    "https://bcr.bazel.build/modules/googletest/1.11.0/MODULE.bazel": "3a83f095183f66345ca86aa13c58b59f9f94a2f81999c093d4eeaa2d262d12f4",
+    "https://bcr.bazel.build/modules/googletest/1.14.0.bcr.1/MODULE.bazel": "22c31a561553727960057361aa33bf20fb2e98584bc4fec007906e27053f80c6",
+    "https://bcr.bazel.build/modules/googletest/1.14.0/MODULE.bazel": "cfbcbf3e6eac06ef9d85900f64424708cc08687d1b527f0ef65aa7517af8118f",
+    "https://bcr.bazel.build/modules/googletest/1.15.2/MODULE.bazel": "6de1edc1d26cafb0ea1a6ab3f4d4192d91a312fd2d360b63adaa213cd00b2108",
+    "https://bcr.bazel.build/modules/googletest/1.15.2/source.json": "dbdda654dcb3a0d7a8bc5d0ac5fc7e150b58c2a986025ae5bc634bb2cb61f470",
+    "https://bcr.bazel.build/modules/jsoncpp/1.9.5/MODULE.bazel": "31271aedc59e815656f5736f282bb7509a97c7ecb43e927ac1a37966e0578075",
+    "https://bcr.bazel.build/modules/jsoncpp/1.9.6/MODULE.bazel": "2f8d20d3b7d54143213c4dfc3d98225c42de7d666011528dc8fe91591e2e17b0",
+    "https://bcr.bazel.build/modules/jsoncpp/1.9.6/source.json": "a04756d367a2126c3541682864ecec52f92cdee80a35735a3cb249ce015ca000",
+    "https://bcr.bazel.build/modules/libpfm/4.11.0/MODULE.bazel": "45061ff025b301940f1e30d2c16bea596c25b176c8b6b3087e92615adbd52902",
+    "https://bcr.bazel.build/modules/nlohmann_json/3.6.1/MODULE.bazel": "6f7b417dcc794d9add9e556673ad25cb3ba835224290f4f848f8e2db1e1fca74",
+    "https://bcr.bazel.build/modules/nlohmann_json/3.6.1/source.json": "f448c6e8963fdfa7eb831457df83ad63d3d6355018f6574fb017e8169deb43a9",
+    "https://bcr.bazel.build/modules/platforms/0.0.10/MODULE.bazel": "8cb8efaf200bdeb2150d93e162c40f388529a25852b332cec879373771e48ed5",
+    "https://bcr.bazel.build/modules/platforms/0.0.11/MODULE.bazel": "0daefc49732e227caa8bfa834d65dc52e8cc18a2faf80df25e8caea151a9413f",
+    "https://bcr.bazel.build/modules/platforms/0.0.4/MODULE.bazel": "9b328e31ee156f53f3c416a64f8491f7eb731742655a47c9eec4703a71644aee",
+    "https://bcr.bazel.build/modules/platforms/0.0.5/MODULE.bazel": "5733b54ea419d5eaf7997054bb55f6a1d0b5ff8aedf0176fef9eea44f3acda37",
+    "https://bcr.bazel.build/modules/platforms/0.0.6/MODULE.bazel": "ad6eeef431dc52aefd2d77ed20a4b353f8ebf0f4ecdd26a807d2da5aa8cd0615",
+    "https://bcr.bazel.build/modules/platforms/0.0.7/MODULE.bazel": "72fd4a0ede9ee5c021f6a8dd92b503e089f46c227ba2813ff183b71616034814",
+    "https://bcr.bazel.build/modules/platforms/0.0.8/MODULE.bazel": "9f142c03e348f6d263719f5074b21ef3adf0b139ee4c5133e2aa35664da9eb2d",
+    "https://bcr.bazel.build/modules/platforms/0.0.9/MODULE.bazel": "4a87a60c927b56ddd67db50c89acaa62f4ce2a1d2149ccb63ffd871d5ce29ebc",
+    "https://bcr.bazel.build/modules/platforms/1.0.0/MODULE.bazel": "f05feb42b48f1b3c225e4ccf351f367be0371411a803198ec34a389fb22aa580",
+    "https://bcr.bazel.build/modules/platforms/1.0.0/source.json": "f4ff1fd412e0246fd38c82328eb209130ead81d62dcd5a9e40910f867f733d96",
+    "https://bcr.bazel.build/modules/protobuf/21.7/MODULE.bazel": "a5a29bb89544f9b97edce05642fac225a808b5b7be74038ea3640fae2f8e66a7",
+    "https://bcr.bazel.build/modules/protobuf/23.1/MODULE.bazel": "88b393b3eb4101d18129e5db51847cd40a5517a53e81216144a8c32dfeeca52a",
+    "https://bcr.bazel.build/modules/protobuf/24.4/MODULE.bazel": "7bc7ce5f2abf36b3b7b7c8218d3acdebb9426aeb35c2257c96445756f970eb12",
+    "https://bcr.bazel.build/modules/protobuf/27.0/MODULE.bazel": "7873b60be88844a0a1d8f80b9d5d20cfbd8495a689b8763e76c6372998d3f64c",
+    "https://bcr.bazel.build/modules/protobuf/27.1/MODULE.bazel": "703a7b614728bb06647f965264967a8ef1c39e09e8f167b3ca0bb1fd80449c0d",
+    "https://bcr.bazel.build/modules/protobuf/27.2/MODULE.bazel": "32450b50673882e4c8c3d10a83f3bc82161b213ed2f80d17e38bece8f165c295",
+    "https://bcr.bazel.build/modules/protobuf/29.0-rc2/MODULE.bazel": "6241d35983510143049943fc0d57937937122baf1b287862f9dc8590fc4c37df",
+    "https://bcr.bazel.build/modules/protobuf/29.0-rc3/MODULE.bazel": "33c2dfa286578573afc55a7acaea3cada4122b9631007c594bf0729f41c8de92",
+    "https://bcr.bazel.build/modules/protobuf/29.0/MODULE.bazel": "319dc8bf4c679ff87e71b1ccfb5a6e90a6dbc4693501d471f48662ac46d04e4e",
+    "https://bcr.bazel.build/modules/protobuf/3.19.0/MODULE.bazel": "6b5fbb433f760a99a22b18b6850ed5784ef0e9928a72668b66e4d7ccd47db9b0",
+    "https://bcr.bazel.build/modules/protobuf/3.19.2/MODULE.bazel": "532ffe5f2186b69fdde039efe6df13ba726ff338c6bc82275ad433013fa10573",
+    "https://bcr.bazel.build/modules/protobuf/3.19.6/MODULE.bazel": "9233edc5e1f2ee276a60de3eaa47ac4132302ef9643238f23128fea53ea12858",
+    "https://bcr.bazel.build/modules/protobuf/31.1/MODULE.bazel": "379a389bb330b7b8c1cdf331cc90bf3e13de5614799b3b52cdb7c6f389f6b38e",
+    "https://bcr.bazel.build/modules/protobuf/31.1/source.json": "25af5d0219da0c0fc4d1191a24ce438e6ca7f49d2e1a94f354efeba6ef10426f",
+    "https://bcr.bazel.build/modules/pybind11_bazel/2.11.1/MODULE.bazel": "88af1c246226d87e65be78ed49ecd1e6f5e98648558c14ce99176da041dc378e",
+    "https://bcr.bazel.build/modules/pybind11_bazel/2.12.0/MODULE.bazel": "e6f4c20442eaa7c90d7190d8dc539d0ab422f95c65a57cc59562170c58ae3d34",
+    "https://bcr.bazel.build/modules/pybind11_bazel/2.12.0/source.json": "6900fdc8a9e95866b8c0d4ad4aba4d4236317b5c1cd04c502df3f0d33afed680",
+    "https://bcr.bazel.build/modules/re2/2023-09-01/MODULE.bazel": "cb3d511531b16cfc78a225a9e2136007a48cf8a677e4264baeab57fe78a80206",
+    "https://bcr.bazel.build/modules/re2/2024-07-02.bcr.1/MODULE.bazel": "b4963dda9b31080be1905ef085ecd7dd6cd47c05c79b9cdf83ade83ab2ab271a",
+    "https://bcr.bazel.build/modules/re2/2024-07-02.bcr.1/source.json": "2ff292be6ef3340325ce8a045ecc326e92cbfab47c7cbab4bd85d28971b97ac4",
+    "https://bcr.bazel.build/modules/re2/2024-07-02/MODULE.bazel": "0eadc4395959969297cbcf31a249ff457f2f1d456228c67719480205aa306daa",
+    "https://bcr.bazel.build/modules/rules_android/0.1.1/MODULE.bazel": "48809ab0091b07ad0182defb787c4c5328bd3a278938415c00a7b69b50c4d3a8",
+    "https://bcr.bazel.build/modules/rules_android/0.6.6/MODULE.bazel": "b0fb569752aab65ab1a9db0a8f6cfaf5aa1754965e17e95dcf0e4d88e192a68d",
+    "https://bcr.bazel.build/modules/rules_android/0.6.6/source.json": "a9d8dc2d5a102dc03269a94acc886a4cab82cdcb9ccbc77b0f665d6d17a6ae09",
+    "https://bcr.bazel.build/modules/rules_apple/3.16.0/MODULE.bazel": "0d1caf0b8375942ce98ea944be754a18874041e4e0459401d925577624d3a54a",
+    "https://bcr.bazel.build/modules/rules_apple/3.16.0/source.json": "d8b5fe461272018cc07cfafce11fe369c7525330804c37eec5a82f84cd475366",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.1/MODULE.bazel": "cb2aa0747f84c6c3a78dad4e2049c154f08ab9d166b1273835a8174940365647",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.10/MODULE.bazel": "ec1705118f7eaedd6e118508d3d26deba2a4e76476ada7e0e3965211be012002",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.13/MODULE.bazel": "0e8529ed7b323dad0775ff924d2ae5af7640b23553dfcd4d34344c7e7a867191",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.14/MODULE.bazel": "5e343a3aac88b8d7af3b1b6d2093b55c347b8eefc2e7d1442f7a02dc8fea48ac",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.15/MODULE.bazel": "6704c35f7b4a72502ee81f61bf88706b54f06b3cbe5558ac17e2e14666cd5dcc",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.16/MODULE.bazel": "7661303b8fc1b4d7f532e54e9d6565771fea666fbdf839e0a86affcd02defe87",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.17/MODULE.bazel": "2ae1d8f4238ec67d7185d8861cb0a2cdf4bc608697c331b95bf990e69b62e64a",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.2/MODULE.bazel": "6915987c90970493ab97393024c156ea8fb9f3bea953b2f3ec05c34f19b5695c",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.6/MODULE.bazel": "abf360251023dfe3efcef65ab9d56beefa8394d4176dd29529750e1c57eaa33f",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.8/MODULE.bazel": "964c85c82cfeb6f3855e6a07054fdb159aced38e99a5eecf7bce9d53990afa3e",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.9/MODULE.bazel": "836e76439f354b89afe6a911a7adf59a6b2518fafb174483ad78a2a2fde7b1c5",
+    "https://bcr.bazel.build/modules/rules_cc/0.1.1/MODULE.bazel": "2f0222a6f229f0bf44cd711dc13c858dad98c62d52bd51d8fc3a764a83125513",
+    "https://bcr.bazel.build/modules/rules_cc/0.2.11/MODULE.bazel": "e94f24f065bf2191dba2dace951814378b66a94bb3bcc48077492fe0508059b5",
+    "https://bcr.bazel.build/modules/rules_cc/0.2.11/source.json": "4d555dc20c9c135b21b2e403cf0ce8393fb65711b2305979ce053df4ee3e78de",
+    "https://bcr.bazel.build/modules/rules_cc/0.2.8/MODULE.bazel": "f1df20f0bf22c28192a794f29b501ee2018fa37a3862a1a2132ae2940a23a642",
+    "https://bcr.bazel.build/modules/rules_foreign_cc/0.15.1/MODULE.bazel": "c2c60d26c79fda484acb95cdbec46e89d6b28b4845cb277160ce1e0c8622bb88",
+    "https://bcr.bazel.build/modules/rules_foreign_cc/0.15.1/source.json": "a161811a63ba8a859086da3b7ff3ad04f2e9c255d7727b41087103fc0eb22f55",
+    "https://bcr.bazel.build/modules/rules_foreign_cc/0.9.0/MODULE.bazel": "c9e8c682bf75b0e7c704166d79b599f93b72cfca5ad7477df596947891feeef6",
+    "https://bcr.bazel.build/modules/rules_fuzzing/0.5.2/MODULE.bazel": "40c97d1144356f52905566c55811f13b299453a14ac7769dfba2ac38192337a8",
+    "https://bcr.bazel.build/modules/rules_go/0.41.0/MODULE.bazel": "55861d8e8bb0e62cbd2896f60ff303f62ffcb0eddb74ecb0e5c0cbe36fc292c8",
+    "https://bcr.bazel.build/modules/rules_go/0.42.0/MODULE.bazel": "8cfa875b9aa8c6fce2b2e5925e73c1388173ea3c32a0db4d2b4804b453c14270",
+    "https://bcr.bazel.build/modules/rules_go/0.46.0/MODULE.bazel": "3477df8bdcc49e698b9d25f734c4f3a9f5931ff34ee48a2c662be168f5f2d3fd",
+    "https://bcr.bazel.build/modules/rules_go/0.50.1/MODULE.bazel": "b91a308dc5782bb0a8021ad4330c81fea5bda77f96b9e4c117b9b9c8f6665ee0",
+    "https://bcr.bazel.build/modules/rules_go/0.51.0-rc2/MODULE.bazel": "edfc3a9cea7bedb0eaaff37b0d7817c1a4bf72b3c615580b0ffcee6c52690fd4",
+    "https://bcr.bazel.build/modules/rules_go/0.51.0-rc2/source.json": "6b5cd0b3da2bd0e6949580851db990a04af0a285f072b9a0f059424457cd8cc9",
+    "https://bcr.bazel.build/modules/rules_java/4.0.0/MODULE.bazel": "5a78a7ae82cd1a33cef56dc578c7d2a46ed0dca12643ee45edbb8417899e6f74",
+    "https://bcr.bazel.build/modules/rules_java/5.3.5/MODULE.bazel": "a4ec4f2db570171e3e5eb753276ee4b389bae16b96207e9d3230895c99644b86",
+    "https://bcr.bazel.build/modules/rules_java/6.0.0/MODULE.bazel": "8a43b7df601a7ec1af61d79345c17b31ea1fedc6711fd4abfd013ea612978e39",
+    "https://bcr.bazel.build/modules/rules_java/6.3.0/MODULE.bazel": "a97c7678c19f236a956ad260d59c86e10a463badb7eb2eda787490f4c969b963",
+    "https://bcr.bazel.build/modules/rules_java/6.4.0/MODULE.bazel": "e986a9fe25aeaa84ac17ca093ef13a4637f6107375f64667a15999f77db6c8f6",
+    "https://bcr.bazel.build/modules/rules_java/6.5.2/MODULE.bazel": "1d440d262d0e08453fa0c4d8f699ba81609ed0e9a9a0f02cd10b3e7942e61e31",
+    "https://bcr.bazel.build/modules/rules_java/7.1.0/MODULE.bazel": "30d9135a2b6561c761bd67bd4990da591e6bdc128790ce3e7afd6a3558b2fb64",
+    "https://bcr.bazel.build/modules/rules_java/7.10.0/MODULE.bazel": "530c3beb3067e870561739f1144329a21c851ff771cd752a49e06e3dc9c2e71a",
+    "https://bcr.bazel.build/modules/rules_java/7.12.2/MODULE.bazel": "579c505165ee757a4280ef83cda0150eea193eed3bef50b1004ba88b99da6de6",
+    "https://bcr.bazel.build/modules/rules_java/7.2.0/MODULE.bazel": "06c0334c9be61e6cef2c8c84a7800cef502063269a5af25ceb100b192453d4ab",
+    "https://bcr.bazel.build/modules/rules_java/7.3.2/MODULE.bazel": "50dece891cfdf1741ea230d001aa9c14398062f2b7c066470accace78e412bc2",
+    "https://bcr.bazel.build/modules/rules_java/7.4.0/MODULE.bazel": "a592852f8a3dd539e82ee6542013bf2cadfc4c6946be8941e189d224500a8934",
+    "https://bcr.bazel.build/modules/rules_java/7.6.1/MODULE.bazel": "2f14b7e8a1aa2f67ae92bc69d1ec0fa8d9f827c4e17ff5e5f02e91caa3b2d0fe",
+    "https://bcr.bazel.build/modules/rules_java/8.13.0/MODULE.bazel": "0444ebf737d144cf2bb2ccb368e7f1cce735264285f2a3711785827c1686625e",
+    "https://bcr.bazel.build/modules/rules_java/8.13.0/source.json": "4605c0f676b87dd9d1fabd4d743b71f04d97503bd1a79aad53f87399fb5396de",
+    "https://bcr.bazel.build/modules/rules_java/8.3.2/MODULE.bazel": "7336d5511ad5af0b8615fdc7477535a2e4e723a357b6713af439fe8cf0195017",
+    "https://bcr.bazel.build/modules/rules_java/8.5.1/MODULE.bazel": "d8a9e38cc5228881f7055a6079f6f7821a073df3744d441978e7a43e20226939",
+    "https://bcr.bazel.build/modules/rules_java/8.6.0/MODULE.bazel": "9c064c434606d75a086f15ade5edb514308cccd1544c2b2a89bbac4310e41c71",
+    "https://bcr.bazel.build/modules/rules_java/8.6.1/MODULE.bazel": "f4808e2ab5b0197f094cabce9f4b006a27766beb6a9975931da07099560ca9c2",
+    "https://bcr.bazel.build/modules/rules_jvm_external/4.4.2/MODULE.bazel": "a56b85e418c83eb1839819f0b515c431010160383306d13ec21959ac412d2fe7",
+    "https://bcr.bazel.build/modules/rules_jvm_external/5.1/MODULE.bazel": "33f6f999e03183f7d088c9be518a63467dfd0be94a11d0055fe2d210f89aa909",
+    "https://bcr.bazel.build/modules/rules_jvm_external/5.2/MODULE.bazel": "d9351ba35217ad0de03816ef3ed63f89d411349353077348a45348b096615036",
+    "https://bcr.bazel.build/modules/rules_jvm_external/5.3/MODULE.bazel": "bf93870767689637164657731849fb887ad086739bd5d360d90007a581d5527d",
+    "https://bcr.bazel.build/modules/rules_jvm_external/6.1/MODULE.bazel": "75b5fec090dbd46cf9b7d8ea08cf84a0472d92ba3585b476f44c326eda8059c4",
+    "https://bcr.bazel.build/modules/rules_jvm_external/6.2/MODULE.bazel": "36a6e52487a855f33cb960724eb56547fa87e2c98a0474c3acad94339d7f8e99",
+    "https://bcr.bazel.build/modules/rules_jvm_external/6.3/MODULE.bazel": "c998e060b85f71e00de5ec552019347c8bca255062c990ac02d051bb80a38df0",
+    "https://bcr.bazel.build/modules/rules_jvm_external/6.6/MODULE.bazel": "153042249c7060536dc95b6bb9f9bb8063b8a0b0cb7acdb381bddbc2374aed55",
+    "https://bcr.bazel.build/modules/rules_jvm_external/6.7/MODULE.bazel": "e717beabc4d091ecb2c803c2d341b88590e9116b8bf7947915eeb33aab4f96dd",
+    "https://bcr.bazel.build/modules/rules_jvm_external/6.7/source.json": "5426f412d0a7fc6b611643376c7e4a82dec991491b9ce5cb1cfdd25fe2e92be4",
+    "https://bcr.bazel.build/modules/rules_kotlin/1.9.0/MODULE.bazel": "ef85697305025e5a61f395d4eaede272a5393cee479ace6686dba707de804d59",
+    "https://bcr.bazel.build/modules/rules_kotlin/1.9.5/MODULE.bazel": "043a16a572f610558ec2030db3ff0c9938574e7dd9f58bded1bb07c0192ef025",
+    "https://bcr.bazel.build/modules/rules_kotlin/1.9.6/MODULE.bazel": "d269a01a18ee74d0335450b10f62c9ed81f2321d7958a2934e44272fe82dcef3",
+    "https://bcr.bazel.build/modules/rules_kotlin/1.9.6/source.json": "2faa4794364282db7c06600b7e5e34867a564ae91bda7cae7c29c64e9466b7d5",
+    "https://bcr.bazel.build/modules/rules_license/0.0.3/MODULE.bazel": "627e9ab0247f7d1e05736b59dbb1b6871373de5ad31c3011880b4133cafd4bd0",
+    "https://bcr.bazel.build/modules/rules_license/0.0.7/MODULE.bazel": "088fbeb0b6a419005b89cf93fe62d9517c0a2b8bb56af3244af65ecfe37e7d5d",
+    "https://bcr.bazel.build/modules/rules_license/1.0.0/MODULE.bazel": "a7fda60eefdf3d8c827262ba499957e4df06f659330bbe6cdbdb975b768bb65c",
+    "https://bcr.bazel.build/modules/rules_license/1.0.0/source.json": "a52c89e54cc311196e478f8382df91c15f7a2bfdf4c6cd0e2675cc2ff0b56efb",
+    "https://bcr.bazel.build/modules/rules_pkg/0.7.0/MODULE.bazel": "df99f03fc7934a4737122518bb87e667e62d780b610910f0447665a7e2be62dc",
+    "https://bcr.bazel.build/modules/rules_pkg/1.0.1/MODULE.bazel": "5b1df97dbc29623bccdf2b0dcd0f5cb08e2f2c9050aab1092fd39a41e82686ff",
+    "https://bcr.bazel.build/modules/rules_pkg/1.0.1/source.json": "bd82e5d7b9ce2d31e380dd9f50c111d678c3bdaca190cb76b0e1c71b05e1ba8a",
+    "https://bcr.bazel.build/modules/rules_proto/4.0.0/MODULE.bazel": "a7a7b6ce9bee418c1a760b3d84f83a299ad6952f9903c67f19e4edd964894e06",
+    "https://bcr.bazel.build/modules/rules_proto/5.3.0-21.7/MODULE.bazel": "e8dff86b0971688790ae75528fe1813f71809b5afd57facb44dad9e8eca631b7",
+    "https://bcr.bazel.build/modules/rules_proto/6.0.0-rc1/MODULE.bazel": "1e5b502e2e1a9e825eef74476a5a1ee524a92297085015a052510b09a1a09483",
+    "https://bcr.bazel.build/modules/rules_proto/6.0.0/MODULE.bazel": "b531d7f09f58dce456cd61b4579ce8c86b38544da75184eadaf0a7cb7966453f",
+    "https://bcr.bazel.build/modules/rules_proto/6.0.2/MODULE.bazel": "ce916b775a62b90b61888052a416ccdda405212b6aaeb39522f7dc53431a5e73",
+    "https://bcr.bazel.build/modules/rules_proto/7.0.2/MODULE.bazel": "bf81793bd6d2ad89a37a40693e56c61b0ee30f7a7fdbaf3eabbf5f39de47dea2",
+    "https://bcr.bazel.build/modules/rules_proto/7.0.2/source.json": "1e5e7260ae32ef4f2b52fd1d0de8d03b606a44c91b694d2f1afb1d3b28a48ce1",
+    "https://bcr.bazel.build/modules/rules_python/0.10.2/MODULE.bazel": "cc82bc96f2997baa545ab3ce73f196d040ffb8756fd2d66125a530031cd90e5f",
+    "https://bcr.bazel.build/modules/rules_python/0.23.1/MODULE.bazel": "49ffccf0511cb8414de28321f5fcf2a31312b47c40cc21577144b7447f2bf300",
+    "https://bcr.bazel.build/modules/rules_python/0.25.0/MODULE.bazel": "72f1506841c920a1afec76975b35312410eea3aa7b63267436bfb1dd91d2d382",
+    "https://bcr.bazel.build/modules/rules_python/0.28.0/MODULE.bazel": "cba2573d870babc976664a912539b320cbaa7114cd3e8f053c720171cde331ed",
+    "https://bcr.bazel.build/modules/rules_python/0.31.0/MODULE.bazel": "93a43dc47ee570e6ec9f5779b2e64c1476a6ce921c48cc9a1678a91dd5f8fd58",
+    "https://bcr.bazel.build/modules/rules_python/0.33.2/MODULE.bazel": "3e036c4ad8d804a4dad897d333d8dce200d943df4827cb849840055be8d2e937",
+    "https://bcr.bazel.build/modules/rules_python/0.37.1/MODULE.bazel": "3faeb2d9fa0a81f8980643ee33f212308f4d93eea4b9ce6f36d0b742e71e9500",
+    "https://bcr.bazel.build/modules/rules_python/0.37.2/MODULE.bazel": "b5ffde91410745750b6c13be1c5dc4555ef5bc50562af4a89fd77807fdde626a",
+    "https://bcr.bazel.build/modules/rules_python/0.4.0/MODULE.bazel": "9208ee05fd48bf09ac60ed269791cf17fb343db56c8226a720fbb1cdf467166c",
+    "https://bcr.bazel.build/modules/rules_python/0.40.0/MODULE.bazel": "9d1a3cd88ed7d8e39583d9ffe56ae8a244f67783ae89b60caafc9f5cf318ada7",
+    "https://bcr.bazel.build/modules/rules_python/1.0.0/MODULE.bazel": "898a3d999c22caa585eb062b600f88654bf92efb204fa346fb55f6f8edffca43",
+    "https://bcr.bazel.build/modules/rules_python/1.2.0/MODULE.bazel": "5aeeb48b2a6c19d668b48adf2b8a2b209a6310c230db0ce77450f148a89846e4",
+    "https://bcr.bazel.build/modules/rules_python/1.6.3/MODULE.bazel": "a7b80c42cb3de5ee2a5fa1abc119684593704fcd2fec83165ebe615dec76574f",
+    "https://bcr.bazel.build/modules/rules_python/1.6.3/source.json": "f0be74977e5604a6526c8a416cda22985093ff7d5d380d41722d7e44015cc419",
+    "https://bcr.bazel.build/modules/rules_robolectric/4.14.1.2/MODULE.bazel": "d44fec647d0aeb67b9f3b980cf68ba634976f3ae7ccd6c07d790b59b87a4f251",
+    "https://bcr.bazel.build/modules/rules_robolectric/4.14.1.2/source.json": "37c10335f2361c337c5c1f34ed36d2da70534c23088062b33a8bdaab68aa9dea",
+    "https://bcr.bazel.build/modules/rules_shell/0.1.2/MODULE.bazel": "66e4ca3ce084b04af0b9ff05ff14cab4e5df7503973818bb91cbc6cda08d32fc",
+    "https://bcr.bazel.build/modules/rules_shell/0.2.0/MODULE.bazel": "fda8a652ab3c7d8fee214de05e7a9916d8b28082234e8d2c0094505c5268ed3c",
+    "https://bcr.bazel.build/modules/rules_shell/0.3.0/MODULE.bazel": "de4402cd12f4cc8fda2354fce179fdb068c0b9ca1ec2d2b17b3e21b24c1a937b",
+    "https://bcr.bazel.build/modules/rules_shell/0.6.1/MODULE.bazel": "72e76b0eea4e81611ef5452aa82b3da34caca0c8b7b5c0c9584338aa93bae26b",
+    "https://bcr.bazel.build/modules/rules_shell/0.6.1/source.json": "20ec05cd5e592055e214b2da8ccb283c7f2a421ea0dc2acbf1aa792e11c03d0c",
+    "https://bcr.bazel.build/modules/rules_swift/1.16.0/MODULE.bazel": "4a09f199545a60d09895e8281362b1ff3bb08bbde69c6fc87aff5b92fcc916ca",
+    "https://bcr.bazel.build/modules/rules_swift/2.1.1/MODULE.bazel": "494900a80f944fc7aa61500c2073d9729dff0b764f0e89b824eb746959bc1046",
+    "https://bcr.bazel.build/modules/rules_swift/2.1.1/source.json": "40fc69dfaac64deddbb75bd99cdac55f4427d9ca0afbe408576a65428427a186",
+    "https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8",
+    "https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c",
+    "https://bcr.bazel.build/modules/stardoc/0.5.6/MODULE.bazel": "c43dabc564990eeab55e25ed61c07a1aadafe9ece96a4efabb3f8bf9063b71ef",
+    "https://bcr.bazel.build/modules/stardoc/0.6.2/MODULE.bazel": "7060193196395f5dd668eda046ccbeacebfd98efc77fed418dbe2b82ffaa39fd",
+    "https://bcr.bazel.build/modules/stardoc/0.7.0/MODULE.bazel": "05e3d6d30c099b6770e97da986c53bd31844d7f13d41412480ea265ac9e8079c",
+    "https://bcr.bazel.build/modules/stardoc/0.7.1/MODULE.bazel": "3548faea4ee5dda5580f9af150e79d0f6aea934fc60c1cc50f4efdd9420759e7",
+    "https://bcr.bazel.build/modules/stardoc/0.7.2/MODULE.bazel": "fc152419aa2ea0f51c29583fab1e8c99ddefd5b3778421845606ee628629e0e5",
+    "https://bcr.bazel.build/modules/stardoc/0.7.2/source.json": "58b029e5e901d6802967754adf0a9056747e8176f017cfe3607c0851f4d42216",
+    "https://bcr.bazel.build/modules/swift_argument_parser/1.3.1.1/MODULE.bazel": "5e463fbfba7b1701d957555ed45097d7f984211330106ccd1352c6e0af0dcf91",
+    "https://bcr.bazel.build/modules/swift_argument_parser/1.3.1.1/source.json": "32bd87e5f4d7acc57c5b2ff7c325ae3061d5e242c0c4c214ae87e0f1c13e54cb",
+    "https://bcr.bazel.build/modules/upb/0.0.0-20220923-a547704/MODULE.bazel": "7298990c00040a0e2f121f6c32544bab27d4452f80d9ce51349b1a28f3005c43",
+    "https://bcr.bazel.build/modules/upb/0.0.0-20230516-61a97ef/MODULE.bazel": "c0df5e35ad55e264160417fd0875932ee3c9dda63d9fccace35ac62f45e1b6f9",
+    "https://bcr.bazel.build/modules/zlib/1.2.11/MODULE.bazel": "07b389abc85fdbca459b69e2ec656ae5622873af3f845e1c9d80fe179f3effa0",
+    "https://bcr.bazel.build/modules/zlib/1.2.12/MODULE.bazel": "3b1a8834ada2a883674be8cbd36ede1b6ec481477ada359cd2d3ddc562340b27",
+    "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.3/MODULE.bazel": "af322bc08976524477c79d1e45e241b6efbeb918c497e8840b8ab116802dda79",
+    "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.5/MODULE.bazel": "eec517b5bbe5492629466e11dae908d043364302283de25581e3eb944326c4ca",
+    "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.5/source.json": "22bc55c47af97246cfc093d0acf683a7869377de362b5d1c552c2c2e16b7a806",
+    "https://bcr.bazel.build/modules/zlib/1.3.1/MODULE.bazel": "751c9940dcfe869f5f7274e1295422a34623555916eb98c174c1e945594bf198"
+  },
+  "selectedYankedVersions": {},
+  "moduleExtensions": {
+    "//:extensions.bzl%llvm_deps_extension": {
+      "general": {
+        "bzlTransitiveDigest": "LGeZ4Ibt22AGXloFt/bm3EsBB05m6aTG+WxfH8fJVB4=",
+        "usagesDigest": "dHBLC1g5cqg/flxcuZRJMp2heDoB4+0/NDd6MutLhGE=",
+        "recordedFileInputs": {},
+        "recordedDirentsInputs": {},
+        "envVariables": {},
+        "generatedRepoSpecs": {
+          "llvm-raw": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:local.bzl%new_local_repository",
+            "attributes": {
+              "build_file_content": "# empty",
+              "path": "../../"
+            }
+          },
+          "llvm_zlib": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:zlib-ng.BUILD",
+              "sha256": "e36bb346c00472a1f9ff2a0a4643e590a254be6379da7cddd9daeb9a7f296731",
+              "strip_prefix": "zlib-ng-2.0.7",
+              "urls": [
+                "https://github.com/zlib-ng/zlib-ng/archive/refs/tags/2.0.7.zip"
+              ]
+            }
+          },
+          "vulkan_headers": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:vulkan_headers.BUILD",
+              "sha256": "19f491784ef0bc73caff877d11c96a48b946b5a1c805079d9006e3fbaa5c1895",
+              "strip_prefix": "Vulkan-Headers-9bd3f561bcee3f01d22912de10bb07ce4e23d378",
+              "urls": [
+                "https://github.com/KhronosGroup/Vulkan-Headers/archive/9bd3f561bcee3f01d22912de10bb07ce4e23d378.tar.gz"
+              ]
+            }
+          },
+          "vulkan_sdk_setup": {
+            "repoRuleId": "@@//:vulkan_sdk.bzl%vulkan_sdk_setup",
+            "attributes": {}
+          },
+          "gmp": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "urls": [
+                "https://gmplib.org/download/gmp/gmp-6.2.1.tar.xz",
+                "https://ftp.gnu.org/gnu/gmp/gmp-6.2.1.tar.xz"
+              ],
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:gmp.BUILD",
+              "sha256": "fd4829912cddd12f84181c3451cc752be224643e87fac497b69edddadc49b4f2",
+              "strip_prefix": "gmp-6.2.1"
+            }
+          },
+          "mpfr": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "urls": [
+                "https://www.mpfr.org/mpfr-current/mpfr-4.2.2.tar.gz"
+              ],
+              "sha256": "826cbb24610bd193f36fde172233fb8c009f3f5c2ad99f644d0dea2e16a20e42",
+              "strip_prefix": "mpfr-4.2.2",
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:mpfr.BUILD"
+            }
+          },
+          "mpc": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "urls": [
+                "https://ftp.gnu.org/gnu/mpc/mpc-1.3.1.tar.gz"
+              ],
+              "sha256": "ab642492f5cf882b74aa0cb730cd410a81edcdbec895183ce930e706c1c759b8",
+              "strip_prefix": "mpc-1.3.1",
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:mpc.BUILD"
+            }
+          },
+          "pfm": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "urls": [
+                "https://versaweb.dl.sourceforge.net/project/perfmon2/libpfm4/libpfm-4.13.0.tar.gz"
+              ],
+              "sha256": "d18b97764c755528c1051d376e33545d0eb60c6ebf85680436813fa5b04cc3d1",
+              "strip_prefix": "libpfm-4.13.0",
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:pfm.BUILD"
+            }
+          },
+          "llvm_zstd": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:zstd.BUILD",
+              "sha256": "7c42d56fac126929a6a85dbc73ff1db2411d04f104fae9bdea51305663a83fd0",
+              "strip_prefix": "zstd-1.5.2",
+              "urls": [
+                "https://github.com/facebook/zstd/releases/download/v1.5.2/zstd-1.5.2.tar.gz"
+              ]
+            }
+          },
+          "pybind11": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "url": "https://github.com/pybind/pybind11/archive/v2.10.3.zip",
+              "sha256": "201966a61dc826f1b1879a24a3317a1ec9214a918c8eb035be2f30c3e9cfbdcb",
+              "strip_prefix": "pybind11-2.10.3",
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:pybind.BUILD"
+            }
+          },
+          "pyyaml": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "url": "https://github.com/yaml/pyyaml/archive/refs/tags/5.1.zip",
+              "sha256": "f0a35d7f282a6d6b1a4f3f3965ef5c124e30ed27a0088efb97c0977268fd671f",
+              "strip_prefix": "pyyaml-5.1/lib3",
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:pyyaml.BUILD"
+            }
+          },
+          "robin_map": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:robin_map.BUILD",
+              "sha256": "a8424ad3b0affd4c57ed26f0f3d8a29604f0e1f2ef2089f497f614b1c94c7236",
+              "strip_prefix": "robin-map-1.3.0",
+              "url": "https://github.com/Tessil/robin-map/archive/refs/tags/v1.3.0.tar.gz"
+            }
+          },
+          "nanobind": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:nanobind.BUILD",
+              "sha256": "8ce3667dce3e64fc06bfb9b778b6f48731482362fb89a43da156632266cd5a90",
+              "strip_prefix": "nanobind-2.9.2",
+              "url": "https://github.com/wjakob/nanobind/archive/refs/tags/v2.9.2.tar.gz"
+            }
+          }
+        },
+        "recordedRepoMappingEntries": [
+          [
+            "",
+            "bazel_tools",
+            "bazel_tools"
+          ]
+        ]
+      }
+    },
+    "@@rules_android+//rules/android_sdk_repository:rule.bzl%android_sdk_repository_extension": {
+      "general": {
+        "bzlTransitiveDigest": "NAy+0M15JNVEBb8Tny6t7j3lKqTnsAMjoBB6LJ+C370=",
+        "usagesDigest": "g9Ur6X6qhf9a8MmY9qXU/jFjkyk/aZVBegI0yVMF0z4=",
+        "recordedFileInputs": {},
+        "recordedDirentsInputs": {},
+        "envVariables": {},
+        "generatedRepoSpecs": {
+          "androidsdk": {
+            "repoRuleId": "@@rules_android+//rules/android_sdk_repository:rule.bzl%_android_sdk_repository",
+            "attributes": {}
+          }
+        },
+        "recordedRepoMappingEntries": []
+      }
+    },
+    "@@rules_kotlin+//src/main/starlark/core/repositories:bzlmod_setup.bzl%rules_kotlin_extensions": {
+      "general": {
+        "bzlTransitiveDigest": "sFhcgPbDQehmbD1EOXzX4H1q/CD5df8zwG4kp4jbvr8=",
+        "usagesDigest": "QI2z8ZUR+mqtbwsf2fLqYdJAkPOHdOV+tF2yVAUgRzw=",
+        "recordedFileInputs": {},
+        "recordedDirentsInputs": {},
+        "envVariables": {},
+        "generatedRepoSpecs": {
+          "com_github_jetbrains_kotlin_git": {
+            "repoRuleId": "@@rules_kotlin+//src/main/starlark/core/repositories:compiler.bzl%kotlin_compiler_git_repository",
+            "attributes": {
+              "urls": [
+                "https://github.com/JetBrains/kotlin/releases/download/v1.9.23/kotlin-compiler-1.9.23.zip"
+              ],
+              "sha256": "93137d3aab9afa9b27cb06a824c2324195c6b6f6179d8a8653f440f5bd58be88"
+            }
+          },
+          "com_github_jetbrains_kotlin": {
+            "repoRuleId": "@@rules_kotlin+//src/main/starlark/core/repositories:compiler.bzl%kotlin_capabilities_repository",
+            "attributes": {
+              "git_repository_name": "com_github_jetbrains_kotlin_git",
+              "compiler_version": "1.9.23"
+            }
+          },
+          "com_github_google_ksp": {
+            "repoRuleId": "@@rules_kotlin+//src/main/starlark/core/repositories:ksp.bzl%ksp_compiler_plugin_repository",
+            "attributes": {
+              "urls": [
+                "https://github.com/google/ksp/releases/download/1.9.23-1.0.20/artifacts.zip"
+              ],
+              "sha256": "ee0618755913ef7fd6511288a232e8fad24838b9af6ea73972a76e81053c8c2d",
+              "strip_version": "1.9.23-1.0.20"
+            }
+          },
+          "com_github_pinterest_ktlint": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_file",
+            "attributes": {
+              "sha256": "01b2e0ef893383a50dbeb13970fe7fa3be36ca3e83259e01649945b09d736985",
+              "urls": [
+                "https://github.com/pinterest/ktlint/releases/download/1.3.0/ktlint"
+              ],
+              "executable": true
+            }
+          },
+          "rules_android": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "sha256": "cd06d15dd8bb59926e4d65f9003bfc20f9da4b2519985c27e190cddc8b7a7806",
+              "strip_prefix": "rules_android-0.1.1",
+              "urls": [
+                "https://github.com/bazelbuild/rules_android/archive/v0.1.1.zip"
+              ]
+            }
+          }
+        },
+        "recordedRepoMappingEntries": [
+          [
+            "rules_kotlin+",
+            "bazel_tools",
+            "bazel_tools"
+          ]
+        ]
+      }
+    },
+    "@@rules_python+//python/uv:uv.bzl%uv": {
+      "general": {
+        "bzlTransitiveDigest": "477hS4MXeJ7LqPNLTqL+1ltraV5lqwOw3tEXWqnJRt8=",
+        "usagesDigest": "icnInV8HDGrRQf9x8RMfxWfBHgT3OgRlYovS/9POEJw=",
+        "recordedFileInputs": {},
+        "recordedDirentsInputs": {},
+        "envVariables": {},
+        "generatedRepoSpecs": {
+          "uv": {
+            "repoRuleId": "@@rules_python+//python/uv/private:uv_toolchains_repo.bzl%uv_toolchains_repo",
+            "attributes": {
+              "toolchain_type": "'@@rules_python+//python/uv:uv_toolchain_type'",
+              "toolchain_names": [
+                "none"
+              ],
+              "toolchain_implementations": {
+                "none": "'@@rules_python+//python:none'"
+              },
+              "toolchain_compatible_with": {
+                "none": [
+                  "@platforms//:incompatible"
+                ]
+              },
+              "toolchain_target_settings": {}
+            }
+          }
+        },
+        "recordedRepoMappingEntries": [
+          [
+            "rules_python+",
+            "bazel_tools",
+            "bazel_tools"
+          ]
+        ]
+      }
+    }
+  }
+}
diff --git a/utils/bazel/extensions.bzl b/utils/bazel/extensions.bzl
new file mode 100644
index 0000000000000..b0d5871b722a7
--- /dev/null
+++ b/utils/bazel/extensions.bzl
@@ -0,0 +1,127 @@
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""bzlmod extensions for llvm-project"""
+
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+load("@bazel_tools//tools/build_defs/repo:local.bzl", "new_local_repository")
+load(":vulkan_sdk.bzl", "vulkan_sdk_setup")
+
+def _llvm_repos_extension_impl(module_ctx):
+    if any([m.is_root and m.name == "llvm-project-overlay" for m in module_ctx.modules]):
+        new_local_repository(
+            name = "llvm-raw",
+            build_file_content = "# empty",
+            path = "../../",
+        )
+
+    http_archive(
+        name = "llvm_zlib",
+        build_file = "@llvm-raw//utils/bazel/third_party_build:zlib-ng.BUILD",
+        sha256 = "e36bb346c00472a1f9ff2a0a4643e590a254be6379da7cddd9daeb9a7f296731",
+        strip_prefix = "zlib-ng-2.0.7",
+        urls = [
+            "https://github.com/zlib-ng/zlib-ng/archive/refs/tags/2.0.7.zip",
+        ],
+    )
+
+    http_archive(
+        name = "vulkan_headers",
+        build_file = "@llvm-raw//utils/bazel/third_party_build:vulkan_headers.BUILD",
+        sha256 = "19f491784ef0bc73caff877d11c96a48b946b5a1c805079d9006e3fbaa5c1895",
+        strip_prefix = "Vulkan-Headers-9bd3f561bcee3f01d22912de10bb07ce4e23d378",
+        urls = [
+            "https://github.com/KhronosGroup/Vulkan-Headers/archive/9bd3f561bcee3f01d22912de10bb07ce4e23d378.tar.gz",
+        ],
+    )
+
+    vulkan_sdk_setup(name = "vulkan_sdk_setup")
+
+    http_archive(
+        name = "gmp",
+        urls = [
+            "https://gmplib.org/download/gmp/gmp-6.2.1.tar.xz",
+            "https://ftp.gnu.org/gnu/gmp/gmp-6.2.1.tar.xz",
+        ],
+        build_file = "@llvm-raw//utils/bazel/third_party_build:gmp.BUILD",
+        sha256 = "fd4829912cddd12f84181c3451cc752be224643e87fac497b69edddadc49b4f2",
+        strip_prefix = "gmp-6.2.1",
+    )
+
+    http_archive(
+        name = "mpfr",
+        urls = [
+            "https://www.mpfr.org/mpfr-current/mpfr-4.2.2.tar.gz",
+        ],
+        sha256 = "826cbb24610bd193f36fde172233fb8c009f3f5c2ad99f644d0dea2e16a20e42",
+        strip_prefix = "mpfr-4.2.2",
+        build_file = "@llvm-raw//utils/bazel/third_party_build:mpfr.BUILD",
+    )
+
+    http_archive(
+        name = "mpc",
+        urls = [
+            "https://ftp.gnu.org/gnu/mpc/mpc-1.3.1.tar.gz",
+        ],
+        sha256 = "ab642492f5cf882b74aa0cb730cd410a81edcdbec895183ce930e706c1c759b8",
+        strip_prefix = "mpc-1.3.1",
+        build_file = "@llvm-raw//utils/bazel/third_party_build:mpc.BUILD",
+    )
+
+    http_archive(
+        name = "pfm",
+        urls = [
+            "https://versaweb.dl.sourceforge.net/project/perfmon2/libpfm4/libpfm-4.13.0.tar.gz",
+        ],
+        sha256 = "d18b97764c755528c1051d376e33545d0eb60c6ebf85680436813fa5b04cc3d1",
+        strip_prefix = "libpfm-4.13.0",
+        build_file = "@llvm-raw//utils/bazel/third_party_build:pfm.BUILD",
+    )
+
+    http_archive(
+        name = "llvm_zstd",
+        build_file = "@llvm-raw//utils/bazel/third_party_build:zstd.BUILD",
+        sha256 = "7c42d56fac126929a6a85dbc73ff1db2411d04f104fae9bdea51305663a83fd0",
+        strip_prefix = "zstd-1.5.2",
+        urls = [
+            "https://github.com/facebook/zstd/releases/download/v1.5.2/zstd-1.5.2.tar.gz",
+        ],
+    )
+
+    http_archive(
+        name = "pybind11",
+        url = "https://github.com/pybind/pybind11/archive/v2.10.3.zip",
+        sha256 = "201966a61dc826f1b1879a24a3317a1ec9214a918c8eb035be2f30c3e9cfbdcb",
+        strip_prefix = "pybind11-2.10.3",
+        build_file = "@llvm-raw//utils/bazel/third_party_build:pybind.BUILD",
+    )
+
+    http_archive(
+        name = "pyyaml",
+        url = "https://github.com/yaml/pyyaml/archive/refs/tags/5.1.zip",
+        sha256 = "f0a35d7f282a6d6b1a4f3f3965ef5c124e30ed27a0088efb97c0977268fd671f",
+        strip_prefix = "pyyaml-5.1/lib3",
+        build_file = "@llvm-raw//utils/bazel/third_party_build:pyyaml.BUILD",
+    )
+
+    # TODO: bump to robin-map-1.4.0
+    http_archive(
+        name = "robin_map",
+        build_file = "@llvm-raw//utils/bazel/third_party_build:robin_map.BUILD",
+        sha256 = "a8424ad3b0affd4c57ed26f0f3d8a29604f0e1f2ef2089f497f614b1c94c7236",
+        strip_prefix = "robin-map-1.3.0",
+        url = "https://github.com/Tessil/robin-map/archive/refs/tags/v1.3.0.tar.gz",
+    )
+
+    http_archive(
+        name = "nanobind",
+        build_file = "@llvm-raw//utils/bazel/third_party_build:nanobind.BUILD",
+        sha256 = "8ce3667dce3e64fc06bfb9b778b6f48731482362fb89a43da156632266cd5a90",
+        strip_prefix = "nanobind-2.9.2",
+        url = "https://github.com/wjakob/nanobind/archive/refs/tags/v2.9.2.tar.gz",
+    )
+
+llvm_repos_extension = module_extension(
+    implementation = _llvm_repos_extension_impl,
+)
diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
index deb56dc0957e9..790709bdef05c 100644
--- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
@@ -1025,6 +1025,7 @@ cc_library(
 gentbl_cc_library(
     name = "sema_attr_gen",
     tbl_outs = {
+        "include/clang/Sema/AttrIsTypeDependent.inc": ["-gen-clang-attr-is-type-dependent"],
         "include/clang/Sema/AttrParsedAttrImpl.inc": ["-gen-clang-attr-parsed-attr-impl"],
         "include/clang/Sema/AttrParsedAttrKinds.inc": ["-gen-clang-attr-parsed-attr-kinds"],
         "include/clang/Sema/AttrSpellingListIndex.inc": ["-gen-clang-attr-spelling-index"],
diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
index 635f77215b38f..ddad2f4f7611d 100644
--- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
@@ -4100,6 +4100,7 @@ cc_library(
         ":DebugInfo",
         ":DebugInfoDWARF",
         ":JITLink",
+        ":Object",
         ":OrcJIT",
         ":OrcShared",
         ":Support",