diff --git a/.ci/monolithic-linux.sh b/.ci/monolithic-linux.sh index 4a8418d7baa8c..ca619aa7e98a1 100755 --- a/.ci/monolithic-linux.sh +++ b/.ci/monolithic-linux.sh @@ -64,9 +64,11 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \ start-group "ninja" -# Targets are not escaped as they are passed as separate arguments. -ninja -C "${BUILD_DIR}" -k 0 ${targets} |& tee ninja.log -cp ${BUILD_DIR}/.ninja_log ninja.ninja_log +if [[ "${targets}" != "" ]]; then + # Targets are not escaped as they are passed as separate arguments. + ninja -C "${BUILD_DIR}" -k 0 ${targets} |& tee ninja.log + cp ${BUILD_DIR}/.ninja_log ninja.ninja_log +fi if [[ "${runtime_targets}" != "" ]]; then start-group "ninja Runtimes" diff --git a/.ci/monolithic-windows.sh b/.ci/monolithic-windows.sh index 7b926b87f3623..99e7758ce8d79 100755 --- a/.ci/monolithic-windows.sh +++ b/.ci/monolithic-windows.sh @@ -51,9 +51,11 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \ start-group "ninja" -# Targets are not escaped as they are passed as separate arguments. -ninja -C "${BUILD_DIR}" -k 0 ${targets} |& tee ninja.log -cp ${BUILD_DIR}/.ninja_log ninja.ninja_log +if [[ "${targets}" != "" ]]; then + # Targets are not escaped as they are passed as separate arguments. + ninja -C "${BUILD_DIR}" -k 0 ${targets} |& tee ninja.log + cp ${BUILD_DIR}/.ninja_log ninja.ninja_log +fi if [[ "${runtimes_targets}" != "" ]]; then start-group "ninja runtimes" diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml index efdc42d349195..bb0eef5842b0f 100644 --- a/.github/new-prs-labeler.yml +++ b/.github/new-prs-labeler.yml @@ -1,1131 +1,1449 @@ BOLT: - - bolt/**/* + - changed-files: + - any-glob-to-any-file: + - bolt/**/* ClangIR: - - clang/include/clang/CIR/**/* - - clang/lib/CIR/**/* - - clang/tools/cir-*/**/* - - clang/test/CIR/**/* + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/CIR/**/* + - clang/lib/CIR/**/* + - clang/tools/cir-*/**/* + - clang/test/CIR/**/* clang:bytecode: - - clang/docs/ConstantInterpreter.rst - - clang/lib/AST/ByteCode/**/* - - clang/test/AST/ByteCode/**/* - - clang/unittests/AST/ByteCode/**/* + - changed-files: + - any-glob-to-any-file: + - clang/docs/ConstantInterpreter.rst + - clang/lib/AST/ByteCode/**/* + - clang/test/AST/ByteCode/**/* + - clang/unittests/AST/ByteCode/**/* clang:dataflow: - - clang/include/clang/Analysis/FlowSensitive/**/* - - clang/lib/Analysis/FlowSensitive/**/* - - clang/unittests/Analysis/FlowSensitive/**/* - - clang/docs/DataFlowAnalysisIntro.md - - clang/docs/DataFlowAnalysisIntroImages/**/* + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/Analysis/FlowSensitive/**/* + - clang/lib/Analysis/FlowSensitive/**/* + - clang/unittests/Analysis/FlowSensitive/**/* + - clang/docs/DataFlowAnalysisIntro.md + - clang/docs/DataFlowAnalysisIntroImages/**/* clang:frontend: - - clang/lib/AST/**/* - - clang/include/clang/AST/**/* - - clang/lib/Basic/**/* - - clang/include/clang/Basic/**/* - - clang/lib/Interpreter/**/* - - clang/include/clang/Interpreter/**/* - - clang/lib/Lex/**/* - - clang/include/clang/Lex/**/* - - clang/lib/Parse/**/* - - clang/include/clang/Parse/**/* - - clang/lib/Sema/**/* - - clang/include/clang/Sema/**/* + - changed-files: + - any-glob-to-any-file: + - clang/lib/AST/**/* + - clang/include/clang/AST/**/* + - clang/lib/Basic/**/* + - clang/include/clang/Basic/**/* + - clang/lib/Interpreter/**/* + - clang/include/clang/Interpreter/**/* + - clang/lib/Lex/**/* + - clang/include/clang/Lex/**/* + - clang/lib/Parse/**/* + - clang/include/clang/Parse/**/* + - clang/lib/Sema/**/* + - clang/include/clang/Sema/**/* clang:headers: - - clang/lib/Headers/**/* + - changed-files: + - any-glob-to-any-file: + - clang/lib/Headers/**/* compiler-rt: - - compiler-rt/**/* + - changed-files: + - any-glob-to-any-file: + - compiler-rt/**/* flang: - - flang/**/* + - changed-files: + - any-glob-to-any-file: + - flang/**/* flang:frontend: - - flang/Parser/**/* - - flang/Evaluate/**/* - - flang/Semantics/**/* + - changed-files: + - any-glob-to-any-file: + - flang/Parser/**/* + - flang/Evaluate/**/* + - flang/Semantics/**/* libclc: - - libclc/** + - changed-files: + - any-glob-to-any-file: + - libclc/** HLSL: - - clang/*HLSL*/**/* - - clang/**/*HLSL* - - llvm/**/Frontend/HLSL/**/* + - changed-files: + - any-glob-to-any-file: + - clang/*HLSL*/**/* + - clang/**/*HLSL* + - llvm/**/Frontend/HLSL/**/* lld: - - lld/**/* + - changed-files: + - any-glob-to-any-file: + - lld/**/* llvm-lit: - - llvm/utils/lit/**/* + - changed-files: + - any-glob-to-any-file: + - llvm/utils/lit/**/* PGO: - - llvm/**/ProfileData/**/* - - llvm/**/SampleProfile* - - llvm/**/CodeGen/MIRSampleProfile* - - llvm/lib/Transforms/Instrumentation/CGProfile.cpp - - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp - - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp - - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp - - llvm/lib/Transforms/Instrumentation/PGO* - - llvm/lib/Transforms/Instrumentation/ValueProfile* - - llvm/test/Instrumentation/InstrProfiling/**/* - - llvm/test/Transforms/PGOProfile/**/* - - llvm/test/Transforms/SampleProfile/**/* - - llvm/**/llvm-profdata/**/* - - llvm/**/llvm-profgen/**/* + - changed-files: + - any-glob-to-any-file: + - llvm/**/ProfileData/**/* + - llvm/**/SampleProfile* + - llvm/**/CodeGen/MIRSampleProfile* + - llvm/lib/Transforms/Instrumentation/CGProfile.cpp + - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp + - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp + - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp + - llvm/lib/Transforms/Instrumentation/PGO* + - llvm/lib/Transforms/Instrumentation/ValueProfile* + - llvm/test/Instrumentation/InstrProfiling/**/* + - llvm/test/Transforms/PGOProfile/**/* + - llvm/test/Transforms/SampleProfile/**/* + - llvm/**/llvm-profdata/**/* + - llvm/**/llvm-profgen/**/* vectorizers: - - llvm/lib/Transforms/Vectorize/**/* - - llvm/include/llvm/Transforms/Vectorize/**/* + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Transforms/Vectorize/**/* + - llvm/include/llvm/Transforms/Vectorize/**/* # IMPORTED FROM CODEOWNERS LTO: - - llvm/*/LTO/** - - llvm/*/Linker/** - - llvm/*/ThinLTO/** - - llvm/lib/Transforms/*/FunctionImport* - - llvm/tools/gold/** + - changed-files: + - any-glob-to-any-file: + - llvm/*/LTO/** + - llvm/*/Linker/** + - llvm/*/ThinLTO/** + - llvm/lib/Transforms/*/FunctionImport* + - llvm/tools/gold/** clang:driver: - - clang/*/Driver/** + - changed-files: + - any-glob-to-any-file: + - clang/*/Driver/** compiler-rt:asan: - - compiler-rt/lib/asan/** - - compiler-rt/include/sanitizer/asan_interface.h - - compiler-rt/test/asan/** - - compiler-rt/lib/asan_abi/** - - compiler-rt/test/asan_abi/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/asan/** + - compiler-rt/include/sanitizer/asan_interface.h + - compiler-rt/test/asan/** + - compiler-rt/lib/asan_abi/** + - compiler-rt/test/asan_abi/** compiler-rt:builtins: - - compiler-rt/lib/builtins/** - - compiler-rt/test/builtins/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/builtins/** + - compiler-rt/test/builtins/** compiler-rt:cfi: - - compiler-rt/lib/cfi/** - - compiler-rt/test/cfi/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/cfi/** + - compiler-rt/test/cfi/** compiler-rt:fuzzer: - - compiler-rt/lib/fuzzer/** - - compiler-rt/include/fuzzer/** - - compiler-rt/test/fuzzer/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/fuzzer/** + - compiler-rt/include/fuzzer/** + - compiler-rt/test/fuzzer/** compiler-rt:hwasan: - - compiler-rt/lib/hwasan/** - - compiler-rt/include/sanitizer/hwasan_interface.h - - compiler-rt/test/hwasan/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/hwasan/** + - compiler-rt/include/sanitizer/hwasan_interface.h + - compiler-rt/test/hwasan/** compiler-rt:lsan: - - compiler-rt/lib/lsan/** - - compiler-rt/include/sanitizer/lsan_interface.h - - compiler-rt/test/lsan/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/lsan/** + - compiler-rt/include/sanitizer/lsan_interface.h + - compiler-rt/test/lsan/** compiler-rt:msan: - - compiler-rt/lib/msan/** - - compiler-rt/include/sanitizer/msan_interface.h - - compiler-rt/test/msan/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/msan/** + - compiler-rt/include/sanitizer/msan_interface.h + - compiler-rt/test/msan/** compiler-rt:sanitizer: - - llvm/lib/Transforms/Instrumentation/*Sanitizer* - - compiler-rt/lib/interception/** - - compiler-rt/lib/*san*/** - - compiler-rt/include/sanitizer/** - - compiler-rt/test/*san*/** - - compiler-rt/lib/fuzzer/** - - compiler-rt/include/fuzzer/** - - compiler-rt/test/fuzzer/** - - compiler-rt/lib/scudo/** - - compiler-rt/test/scudo/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Transforms/Instrumentation/*Sanitizer* + - compiler-rt/lib/interception/** + - compiler-rt/lib/*san*/** + - compiler-rt/include/sanitizer/** + - compiler-rt/test/*san*/** + - compiler-rt/lib/fuzzer/** + - compiler-rt/include/fuzzer/** + - compiler-rt/test/fuzzer/** + - compiler-rt/lib/scudo/** + - compiler-rt/test/scudo/** compiler-rt:scudo: - - compiler-rt/lib/scudo/** - - compiler-rt/test/scudo/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/scudo/** + - compiler-rt/test/scudo/** compiler-rt:tsan: - - compiler-rt/lib/tsan/** - - compiler-rt/include/sanitizer/tsan_interface.h - - compiler-rt/include/sanitizer/tsan_interface_atomic.h - - compiler-rt/test/tsan/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/tsan/** + - compiler-rt/include/sanitizer/tsan_interface.h + - compiler-rt/include/sanitizer/tsan_interface_atomic.h + - compiler-rt/test/tsan/** compiler-rt:ubsan: - - compiler-rt/lib/ubsan/** - - compiler-rt/include/sanitizer/ubsan_interface.h - - compiler-rt/test/ubsan/** - - compiler-rt/lib/ubsan_minimal/** - - compiler-rt/test/ubsan_minimal/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/ubsan/** + - compiler-rt/include/sanitizer/ubsan_interface.h + - compiler-rt/test/ubsan/** + - compiler-rt/lib/ubsan_minimal/** + - compiler-rt/test/ubsan_minimal/** xray: - - llvm/tools/llvm-xray/** - - compiler-rt/*/xray/** - - clang/include/clang/Basic/XRay* - - clang/lib/Basic/XRay* - - compiler-rt/*/xray/** - - llvm/include/llvm/XRay/** - - llvm/lib/XRay/** - - llvm/tools/llvm-xray/** - - llvm/unittests/XRay/** - - compiler-rt/*/xray/** + - changed-files: + - any-glob-to-any-file: + - llvm/tools/llvm-xray/** + - compiler-rt/*/xray/** + - clang/include/clang/Basic/XRay* + - clang/lib/Basic/XRay* + - compiler-rt/*/xray/** + - llvm/include/llvm/XRay/** + - llvm/lib/XRay/** + - llvm/tools/llvm-xray/** + - llvm/unittests/XRay/** + - compiler-rt/*/xray/** clang:codegen: - - clang/lib/CodeGen/** - - clang/include/clang/CodeGen/** + - changed-files: + - any-glob-to-any-file: + - clang/lib/CodeGen/** + - clang/include/clang/CodeGen/** mlir: - - mlir/** + - changed-files: + - any-glob-to-any-file: + - mlir/** mlir:core: - - mlir/include/mlir/Support/** - - mlir/lib/Support/** - - mlir/include/mlir/Parser/** - - mlir/lib/Parser/** - - mlir/include/mlir/IR/** - - mlir/lib/IR/** - - mlir/include/mlir/Bytecode/** - - mlir/lib/Bytecode/** - - mlir/include/mlir/AsmParser/** - - mlir/lib/AsmParser/** - - mlir/include/mlir/Pass/** - - mlir/lib/Pass/** - - mlir/include/mlir/Tools/** - - mlir/lib/Tools/** - - mlir/include/mlir/Reducer/** - - mlir/lib/Reducer/** - - mlir/include/mlir/Transforms/** - - mlir/lib/Transforms/** - - mlir/include/mlir/Debug/** - - mlir/lib/Debug/** - - mlir/tools/** + - changed-files: + - any-glob-to-any-file: + - mlir/include/mlir/Support/** + - mlir/lib/Support/** + - mlir/include/mlir/Parser/** + - mlir/lib/Parser/** + - mlir/include/mlir/IR/** + - mlir/lib/IR/** + - mlir/include/mlir/Bytecode/** + - mlir/lib/Bytecode/** + - mlir/include/mlir/AsmParser/** + - mlir/lib/AsmParser/** + - mlir/include/mlir/Pass/** + - mlir/lib/Pass/** + - mlir/include/mlir/Tools/** + - mlir/lib/Tools/** + - mlir/include/mlir/Reducer/** + - mlir/lib/Reducer/** + - mlir/include/mlir/Transforms/** + - mlir/lib/Transforms/** + - mlir/include/mlir/Debug/** + - mlir/lib/Debug/** + - mlir/tools/** mlir:ods: - - mlir/TableGen/** - - mlir/tblgen/** - - mlir/include/mlir/IR/*.td + - changed-files: + - any-glob-to-any-file: + - mlir/TableGen/** + - mlir/tblgen/** + - mlir/include/mlir/IR/*.td mlir:bindings: - - mlir/Bindings/** + - changed-files: + - any-glob-to-any-file: + - mlir/Bindings/** mlir:gpu: - - mlir/**/*GPU*/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*GPU*/** mlir:amdgpu: - - mlir/**/AMDGPU/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/AMDGPU/** mlir:amx: - - mlir/**/AMX/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/AMX/** mlir:affine: - - mlir/**/Affine/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Affine/** mlir:arith: - - mlir/**/Arith/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Arith/** mlir:neon: - - mlir/**/ArmNeon/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/ArmNeon/** mlir:sme: - - mlir/**/ArmSME/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/ArmSME/** mlir:sve: - - mlir/**/ArmSVE/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/ArmSVE/** mlir:async: - - mlir/**/Async/** - - mlir/**/Async/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Async/** + - mlir/**/Async/** mlir:bufferization: - - mlir/**/Bufferization/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Bufferization/** mlir:complex: - - mlir/**/Complex/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Complex/** mlir:cf: - - mlir/**/ControlFlow/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/ControlFlow/** mlir:dlti: - - mlir/**/DLTI/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/DLTI/** mlir:emitc: - - mlir/**/*EmitC*/** - - mlir/lib/Target/Cpp/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*EmitC*/** + - mlir/lib/Target/Cpp/** mlir:func: - - mlir/**/Func/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Func/** mlir:irdl: - - mlir/**/IRDL/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/IRDL/** mlir:index: - - mlir/**/Index/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Index/** mlir:llvm: - - mlir/**/LLVM* - - mlir/**/LLVM*/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/LLVM* + - mlir/**/LLVM*/** mlir:linalg: - - mlir/**/*linalg/** - - mlir/**/*Linalg/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*linalg/** + - mlir/**/*Linalg/** mlir:mlprogram: - - mlir/**/MLProgram/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/MLProgram/** mlir:math: - - mlir/**/Math/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Math/** mlir:memref: - - mlir/**/MemRef/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/MemRef/** mlir:nvgpu: - - mlir/**/NVGPU/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/NVGPU/** mlir:openacc: - - mlir/**/*OpenACC* - - mlir/**/*OpenACC*/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*OpenACC* + - mlir/**/*OpenACC*/** mlir:openmp: - - mlir/**/*OpenMP* - - mlir/**/*OpenMP*/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*OpenMP* + - mlir/**/*OpenMP*/** mlir:pdl: - - mlir/**/PDL/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/PDL/** mlir:quant: - - mlir/**/Quant/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Quant/** mlir:scf: - - mlir/**/SCF/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/SCF/** mlir:spirv: - - mlir/**/SPIRV/** - - mlir/**/SPIRVTo*/** - - mlir/**/*ToSPIRV/** - - mlir/tools/mlir-spirv-cpu-runner/** - - mlir/tools/mlir-vulkan-runner/** - - mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp + - changed-files: + - any-glob-to-any-file: + - mlir/**/SPIRV/** + - mlir/**/SPIRVTo*/** + - mlir/**/*ToSPIRV/** + - mlir/tools/mlir-spirv-cpu-runner/** + - mlir/tools/mlir-vulkan-runner/** + - mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp mlir:shape: - - mlir/**/Shape/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Shape/** mlir:sparse: - - mlir/**/SparseTensor/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/SparseTensor/** mlir:tensor: - - mlir/**/Tensor/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Tensor/** mlir:tosa: - - mlir/**/*Tosa*/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*Tosa*/** mlir:ub: - - mlir/**/UB/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/UB/** mlir:vector: - - mlir/**/*Vector/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*Vector/** mlir:execution-engine: - - mlir/**/ExecutionEngine/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/ExecutionEngine/** mlir:presburger: - - mlir/**/*Presburger*/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*Presburger*/** mlir:python: - - mlir/python/**/* + - changed-files: + - any-glob-to-any-file: + - mlir/python/**/* mlir:vectorops: - - mlir/**/Vector/**/* + - changed-files: + - any-glob-to-any-file: + - mlir/**/Vector/**/* coroutines: - - clang/docs/DebuggingCoroutines.rst - - clang/lib/Sema/SemaCoroutine.cpp - - clang/lib/CodeGen/CGCoroutine.cpp - - clang/test/CodeGenCoroutines/** - - llvm/docs/Coroutines.rst - - llvm/include/llvm/Transforms/Coroutines/** - - llvm/lib/Transforms/Coroutines/** - - llvm/test/Transforms/Coroutines/* + - changed-files: + - any-glob-to-any-file: + - clang/docs/DebuggingCoroutines.rst + - clang/lib/Sema/SemaCoroutine.cpp + - clang/lib/CodeGen/CGCoroutine.cpp + - clang/test/CodeGenCoroutines/** + - llvm/docs/Coroutines.rst + - llvm/include/llvm/Transforms/Coroutines/** + - llvm/lib/Transforms/Coroutines/** + - llvm/test/Transforms/Coroutines/* clang:modules: - - clang/docs/StandardCPlusPlusModules.rst - - clang/include/clang/AST/AbstractBasicReader.h - - clang/include/clang/AST/AbstractBasicWriter.h - - clang/include/clang/AST/AbstractTypeReader.h - - clang/include/clang/AST/AbstractTypeWriter.h - - clang/include/clang/AST/PropertiesBase.td - - clang/include/clang/AST/ODRHash.h - - clang/include/clang/AST/TypeProperties.td - - clang/include/clang/Basic/Module.h - - clang/include/clang/Frontend/PrecompiledPreamble.h - - clang/include/clang/Lex/ModuleLoader.h - - clang/include/clang/Lex/ModuleMap.h - - clang/include/clang/Serialization/** - - clang/lib/AST/ODRHash.cpp - - clang/lib/AST/StmtProfile.cpp - - clang/lib/Basic/Module.cpp - - clang/lib/Frontend/ModuleDependencyCollector.cpp - - clang/lib/Frontend/PrecompiledPreamble.cpp - - clang/lib/Lex/ModuleMap.cpp - - clang/lib/Sema/SemaModule.cpp - - clang/lib/Serialization/** - - clang/test/CXX/module/** - - clang/test/Modules/** - - clang/unittests/Serialization/* + - changed-files: + - any-glob-to-any-file: + - clang/docs/StandardCPlusPlusModules.rst + - clang/include/clang/AST/AbstractBasicReader.h + - clang/include/clang/AST/AbstractBasicWriter.h + - clang/include/clang/AST/AbstractTypeReader.h + - clang/include/clang/AST/AbstractTypeWriter.h + - clang/include/clang/AST/PropertiesBase.td + - clang/include/clang/AST/ODRHash.h + - clang/include/clang/AST/TypeProperties.td + - clang/include/clang/Basic/Module.h + - clang/include/clang/Frontend/PrecompiledPreamble.h + - clang/include/clang/Lex/ModuleLoader.h + - clang/include/clang/Lex/ModuleMap.h + - clang/include/clang/Serialization/** + - clang/lib/AST/ODRHash.cpp + - clang/lib/AST/StmtProfile.cpp + - clang/lib/Basic/Module.cpp + - clang/lib/Frontend/ModuleDependencyCollector.cpp + - clang/lib/Frontend/PrecompiledPreamble.cpp + - clang/lib/Lex/ModuleMap.cpp + - clang/lib/Sema/SemaModule.cpp + - clang/lib/Serialization/** + - clang/test/CXX/module/** + - clang/test/Modules/** + - clang/unittests/Serialization/* clang-tidy: - - clang-tools-extra/clang-tidy/** - - clang-tools-extra/docs/clang-tidy/** - - clang-tools-extra/test/clang-tidy/** + - changed-files: + - any-glob-to-any-file: + - clang-tools-extra/clang-tidy/** + - clang-tools-extra/docs/clang-tidy/** + - clang-tools-extra/test/clang-tidy/** clang-tools-extra: - - clang-tools-extra/** + - changed-files: + - any-glob-to-any-file: + - clang-tools-extra/** tools:llvm-mca: - - llvm/tools/llvm-mca/** - - llvm/include/llvm/MCA/** - - llvm/lib/MCA/** + - changed-files: + - any-glob-to-any-file: + - llvm/tools/llvm-mca/** + - llvm/include/llvm/MCA/** + - llvm/lib/MCA/** clang: - - any: - - clang/** - - '!clang/**/Format/**' - - '!clang/tools/clang-format/**' + - changed-files: + - all-globs-to-all-file: + - clang/** + - '!clang/**/Format/**' + - '!clang/tools/clang-format/**' testing-tools: - - llvm/include/llvm/FileCheck/** - - llvm/lib/FileCheck/** - - llvm/test/FileCheck/** - - llvm/unittests/FileCheck/** - - llvm/utils/lit/** - - llvm/utils/split-file/** - - llvm/utils/not/** - - llvm/utils/count/** - - llvm/utils/FileCheck/** - - llvm/docs/CommandGuide/FileCheck.rst - - llvm/docs/CommandGuide/lit.rst - - llvm/docs/TestingGuide.rst - - llvm/test/Other/FileCheck-space.txt - - llvm/utils/UpdateTestChecks/** - - llvm/utils/update*_test_checks.py + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/FileCheck/** + - llvm/lib/FileCheck/** + - llvm/test/FileCheck/** + - llvm/unittests/FileCheck/** + - llvm/utils/lit/** + - llvm/utils/split-file/** + - llvm/utils/not/** + - llvm/utils/count/** + - llvm/utils/FileCheck/** + - llvm/docs/CommandGuide/FileCheck.rst + - llvm/docs/CommandGuide/lit.rst + - llvm/docs/TestingGuide.rst + - llvm/test/Other/FileCheck-space.txt + - llvm/utils/UpdateTestChecks/** + - llvm/utils/update*_test_checks.py debuginfo: - - clang/lib/CodeGen/CGDebugInfo.* - - llvm/include/llvm/BinaryFormat/Dwarf.* - - llvm/include/llvm/CodeGen/*Debug*.* - - llvm/include/llvm/DebugInfo/** - - llvm/include/llvm/Debuginfod/** - - llvm/include/llvm/Frontend/Debug/** - - llvm/include/llvm/IR/Debug*.* - - llvm/include/llvm/Object/*Debug*.* - - llvm/include/llvm/ObjectYAML/*Debug*.* - - llvm/include/llvm/Transforms/Utils/*Debug*.* - - llvm/include/llvm-c/DebugInfo.h - - llvm/lib/BinaryFormat/Dwarf.cpp - - llvm/lib/CodeGen/AsmPrinter/*Debug*.* - - llvm/lib/CodeGen/AsmPrinter/Dwarf*.* - - llvm/lib/CodeGen/AsmPrinter/DIE*.* - - llvm/lib/CodeGen/LiveDebugValues/** - - llvm/lib/CodeGen/*Debug*.* - - llvm/lib/CodeGen/DwarfEHPrepare.cpp - - llvm/lib/DebugInfo/** - - llvm/lib/Debuginfod/** - - llvm/lib/DWARFLinkerParallel/** - - llvm/lib/IR/Debug*.cpp - - llvm/lib/MC/MCDwarf.cpp - - llvm/lib/Transforms/Utils/*Debug*.* - - llvm/test/DebugInfo/** - - llvm/test/tools/dsymutil/** - - llvm/test/tools/llvm-debuginfo-analyzer/** - - llvm/test/tools/llvm-debuginfod/** - - llvm/test/tools/llvm-debuginfod-find/** - - llvm/test/tools/llvm-dwarfdump/** - - llvm/test/tools/llvm-dwarfutil/** - - llvm/test/tools/llvm-dwp/** - - llvm/test/tools/llvm-gsymutil/** - - llvm/test/tools/llvm-pdbuti/** - - llvm/tools/dsymutil/** - - llvm/tools/llvm-debuginfo-analyzer/** - - llvm/tools/llvm-debuginfod/** - - llvm/tools/llvm-debuginfod-find/** - - llvm/tools/llvm-dwarfdump/** - - llvm/tools/llvm-dwarfutil/** - - llvm/tools/llvm-dwp/** - - llvm/tools/llvm-gsymutil/** - - llvm/tools/llvm-pdbutil/** + - changed-files: + - any-glob-to-any-file: + - clang/lib/CodeGen/CGDebugInfo.* + - llvm/include/llvm/BinaryFormat/Dwarf.* + - llvm/include/llvm/CodeGen/*Debug*.* + - llvm/include/llvm/DebugInfo/** + - llvm/include/llvm/Debuginfod/** + - llvm/include/llvm/Frontend/Debug/** + - llvm/include/llvm/IR/Debug*.* + - llvm/include/llvm/Object/*Debug*.* + - llvm/include/llvm/ObjectYAML/*Debug*.* + - llvm/include/llvm/Transforms/Utils/*Debug*.* + - llvm/include/llvm-c/DebugInfo.h + - llvm/lib/BinaryFormat/Dwarf.cpp + - llvm/lib/CodeGen/AsmPrinter/*Debug*.* + - llvm/lib/CodeGen/AsmPrinter/Dwarf*.* + - llvm/lib/CodeGen/AsmPrinter/DIE*.* + - llvm/lib/CodeGen/LiveDebugValues/** + - llvm/lib/CodeGen/*Debug*.* + - llvm/lib/CodeGen/DwarfEHPrepare.cpp + - llvm/lib/DebugInfo/** + - llvm/lib/Debuginfod/** + - llvm/lib/DWARFLinkerParallel/** + - llvm/lib/IR/Debug*.cpp + - llvm/lib/MC/MCDwarf.cpp + - llvm/lib/Transforms/Utils/*Debug*.* + - llvm/test/DebugInfo/** + - llvm/test/tools/dsymutil/** + - llvm/test/tools/llvm-debuginfo-analyzer/** + - llvm/test/tools/llvm-debuginfod/** + - llvm/test/tools/llvm-debuginfod-find/** + - llvm/test/tools/llvm-dwarfdump/** + - llvm/test/tools/llvm-dwarfutil/** + - llvm/test/tools/llvm-dwp/** + - llvm/test/tools/llvm-gsymutil/** + - llvm/test/tools/llvm-pdbuti/** + - llvm/tools/dsymutil/** + - llvm/tools/llvm-debuginfo-analyzer/** + - llvm/tools/llvm-debuginfod/** + - llvm/tools/llvm-debuginfod-find/** + - llvm/tools/llvm-dwarfdump/** + - llvm/tools/llvm-dwarfutil/** + - llvm/tools/llvm-dwp/** + - llvm/tools/llvm-gsymutil/** + - llvm/tools/llvm-pdbutil/** github:workflow: - - .github/workflows/** + - changed-files: + - any-glob-to-any-file: + - .github/workflows/** cmake: - - cmake/** - - llvm/cmake/** - - runtimes/** + - changed-files: + - any-glob-to-any-file: + - cmake/** + - llvm/cmake/** + - runtimes/** flang:driver: - - flang/tools/flang-driver/** - - flang/unittests/Frontend/** - - flang/lib/FrontendTool/** - - flang/lib/Frontend/** - - flang/include/flang/Frontend/** - - flang/include/flang/FrontendTool/** - - flang/test/Driver/** + - changed-files: + - any-glob-to-any-file: + - flang/tools/flang-driver/** + - flang/unittests/Frontend/** + - flang/lib/FrontendTool/** + - flang/lib/Frontend/** + - flang/include/flang/Frontend/** + - flang/include/flang/FrontendTool/** + - flang/test/Driver/** backend:m68k: - - llvm/lib/Target/M68k/** - - clang/lib/Basic/Targets/M68k.* - - clang/lib/CodeGen/Targets/M68k.cpp - - llvm/test/CodeGen/M68k/** - - llvm/test/MC/Disassembler/M68k/** - - llvm/test/MC/M68k/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Target/M68k/** + - clang/lib/Basic/Targets/M68k.* + - clang/lib/CodeGen/Targets/M68k.cpp + - llvm/test/CodeGen/M68k/** + - llvm/test/MC/Disassembler/M68k/** + - llvm/test/MC/M68k/** libc++: - - libcxx/** - - .github/workflows/libcxx-* + - changed-files: + - any-glob-to-any-file: + - libcxx/** + - .github/workflows/libcxx-* libc++abi: - - libcxxabi/** + - changed-files: + - any-glob-to-any-file: + - libcxxabi/** libunwind: - - libunwind/** + - changed-files: + - any-glob-to-any-file: + - libunwind/** objectyaml: - - llvm/include/llvm/ObjectYAML/** - - llvm/lib/ObjectYAML/** - - llvm/test/tools/obj2yaml/** - - llvm/test/tools/yaml2obj/** - - llvm/tools/obj2yaml/** - - llvm/tools/yaml2obj/** + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/ObjectYAML/** + - llvm/lib/ObjectYAML/** + - llvm/test/tools/obj2yaml/** + - llvm/test/tools/yaml2obj/** + - llvm/tools/obj2yaml/** + - llvm/tools/yaml2obj/** clang:analysis: - - clang/include/clang/Analysis/** - - clang/lib/Analysis/** + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/Analysis/** + - clang/lib/Analysis/** clang:static analyzer: - - clang/include/clang/StaticAnalyzer/** - - clang/lib/StaticAnalyzer/** - - clang/tools/scan-build/** - - clang/utils/analyzer/** - - clang/docs/analyzer/** - - clang/test/Analysis/** + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/StaticAnalyzer/** + - clang/lib/StaticAnalyzer/** + - clang/tools/scan-build/** + - clang/utils/analyzer/** + - clang/docs/analyzer/** + - clang/test/Analysis/** pgo: - - llvm/lib/Transforms/Instrumentation/CGProfile.cpp - - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp - - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp - - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp - - llvm/lib/Transforms/Instrumentation/PGO* - - llvm/lib/Transforms/Instrumentation/ValueProfile* - - llvm/test/Instrumentation/InstrProfiling/** - - llvm/test/Transforms/PGOProfile/** - - compiler-rt/lib/profile/** - - compiler-rt/lib/memprof/** - - compiler-rt/test/profile/** - - compiler-rt/test/memprof/** - - llvm/tools/llvm-profdata/** - - llvm/tools/llvm-profgen/** - - llvm/test/tools/llvm-profdata/** - - llvm/test/tools/llvm-profgen/** - - llvm/unittests/ProfileData/* + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Transforms/Instrumentation/CGProfile.cpp + - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp + - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp + - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp + - llvm/lib/Transforms/Instrumentation/PGO* + - llvm/lib/Transforms/Instrumentation/ValueProfile* + - llvm/test/Instrumentation/InstrProfiling/** + - llvm/test/Transforms/PGOProfile/** + - compiler-rt/lib/profile/** + - compiler-rt/lib/memprof/** + - compiler-rt/test/profile/** + - compiler-rt/test/memprof/** + - llvm/tools/llvm-profdata/** + - llvm/tools/llvm-profgen/** + - llvm/test/tools/llvm-profdata/** + - llvm/test/tools/llvm-profgen/** + - llvm/unittests/ProfileData/* openacc: - - flang/**/OpenACC/** - - flang/include/flang/Lower/OpenACC.h - - flang/docs/OpenACC.md - - flang/lib/Parser/openacc-parsers.cpp - - flang/lib/Lower/OpenACC.cpp - - llvm/**/Frontend/OpenACC/** - - llvm/unittests/Frontend/OpenACCTest.cpp - - mlir/test/Target/LLVMIR/openacc-llvm.mlir - - mlir/**/*OpenACC/** + - changed-files: + - any-glob-to-any-file: + - flang/**/OpenACC/** + - flang/include/flang/Lower/OpenACC.h + - flang/docs/OpenACC.md + - flang/lib/Parser/openacc-parsers.cpp + - flang/lib/Lower/OpenACC.cpp + - llvm/**/Frontend/OpenACC/** + - llvm/unittests/Frontend/OpenACCTest.cpp + - mlir/test/Target/LLVMIR/openacc-llvm.mlir + - mlir/**/*OpenACC/** flang:runtime: - - flang/runtime/** + - changed-files: + - any-glob-to-any-file: + - flang/runtime/** flang:parser: - - flang/**/Parser/** + - changed-files: + - any-glob-to-any-file: + - flang/**/Parser/** flang:semantics: - - flang/**/Evaluate/** - - flang/**/Semantics/** + - changed-files: + - any-glob-to-any-file: + - flang/**/Evaluate/** + - flang/**/Semantics/** flang:fir-hlfir: - - flang/**/Lower/** - - flang/**/Optimizer/** + - changed-files: + - any-glob-to-any-file: + - flang/**/Lower/** + - flang/**/Optimizer/** flang:codegen: - - flang/**/CodeGen/** + - changed-files: + - any-glob-to-any-file: + - flang/**/CodeGen/** llvm:codegen: - - llvm/lib/CodeGen/* - - llvm/lib/CodeGen/MIRParser/* - - llvm/lib/CodeGen/LiveDebugValues/* - - llvm/lib/CodeGen/AsmPrinter/* + - changed-files: + - any-glob-to-any-file: + - llvm/lib/CodeGen/* + - llvm/lib/CodeGen/MIRParser/* + - llvm/lib/CodeGen/LiveDebugValues/* + - llvm/lib/CodeGen/AsmPrinter/* llvm:globalisel: - - llvm/**/GlobalISel/** - - llvm/utils/TableGen/GlobalISel* + - changed-files: + - any-glob-to-any-file: + - llvm/**/GlobalISel/** + - llvm/utils/TableGen/GlobalISel* function-specialization: - - llvm/include/llvm/Transforms/Utils/SCCPSolver.h - - llvm/lib/Transforms/Utils/SCCPSolver.cpp - - llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h - - llvm/lib/Transforms/IPO/FunctionSpecialization.cpp - - llvm/test/Transforms/FunctionSpecialization/* + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/Transforms/Utils/SCCPSolver.h + - llvm/lib/Transforms/Utils/SCCPSolver.cpp + - llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h + - llvm/lib/Transforms/IPO/FunctionSpecialization.cpp + - llvm/test/Transforms/FunctionSpecialization/* libc: - - libc/** - - utils/bazel/llvm-project-overlay/libc/** + - changed-files: + - any-glob-to-any-file: + - libc/** + - utils/bazel/llvm-project-overlay/libc/** clang-format: - - clang/**/Format/** - - clang/tools/clang-format/** + - changed-files: + - any-glob-to-any-file: + - clang/**/Format/** + - clang/tools/clang-format/** flang:openmp: - - flang/test/**/OpenMP/** - - flang/lib/Lower/OpenMP.cpp - - flang/lib/Semantics/resolve-directives.cpp - - flang/lib/Semantics/check-omp-structure.cpp - - flang/lib/Optimizer/Transforms/OMP* - - flang/test/Fir/convert-to-llvm-openmp-and-fir.fir - - flang/test/Lower/OpenMP/** - - flang/test/Transforms/omp* - - mlir/**/*OpenMP* - - mlir/test/Target/LLVMIR/openmp* - - llvm/lib/Frontend/OpenMP/** - - llvm/include/llvm/Frontend/OpenMP/** - - llvm/unittests/Frontend/OpenMP* + - changed-files: + - any-glob-to-any-file: + - flang/test/**/OpenMP/** + - flang/lib/Lower/OpenMP.cpp + - flang/lib/Semantics/resolve-directives.cpp + - flang/lib/Semantics/check-omp-structure.cpp + - flang/lib/Optimizer/Transforms/OMP* + - flang/test/Fir/convert-to-llvm-openmp-and-fir.fir + - flang/test/Lower/OpenMP/** + - flang/test/Transforms/omp* + - mlir/**/*OpenMP* + - mlir/test/Target/LLVMIR/openmp* + - llvm/lib/Frontend/OpenMP/** + - llvm/include/llvm/Frontend/OpenMP/** + - llvm/unittests/Frontend/OpenMP* llvm:ir: - - llvm/lib/IR/** - - llvm/include/llvm/IR/** - - llvm/docs/LangRef.rst - - llvm/unittests/IR/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/IR/** + - llvm/include/llvm/IR/** + - llvm/docs/LangRef.rst + - llvm/unittests/IR/** llvm:SandboxIR: - - llvm/lib/SandboxIR/** - - llvm/include/llvm/SandboxIR/** - - llvm/docs/SandboxIR.md - - llvm/unittests/SandboxIR/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/SandboxIR/** + - llvm/include/llvm/SandboxIR/** + - llvm/docs/SandboxIR.md + - llvm/unittests/SandboxIR/** llvm:analysis: - - llvm/lib/Analysis/** - - llvm/include/llvm/Analysis/** - - llvm/test/Analysis/** - - llvm/unittests/Analysis/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Analysis/** + - llvm/include/llvm/Analysis/** + - llvm/test/Analysis/** + - llvm/unittests/Analysis/** llvm:adt: - - llvm/**/ADT/* + - changed-files: + - any-glob-to-any-file: + - llvm/**/ADT/* llvm:support: - - llvm/**/Support/** + - changed-files: + - any-glob-to-any-file: + - llvm/**/Support/** # Skip llvm/test/MC and llvm/unittests/MC, which includes target-specific directories. llvm:mc: - - llvm/include/llvm/MC/** - - llvm/lib/MC/** - - llvm/tools/llvm-mc/** + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/MC/** + - llvm/lib/MC/** + - llvm/tools/llvm-mc/** llvm:transforms: - - llvm/lib/Transforms/** - - llvm/include/llvm/Transforms/** - - llvm/test/Transforms/** - - llvm/unittests/Transforms/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Transforms/** + - llvm/include/llvm/Transforms/** + - llvm/test/Transforms/** + - llvm/unittests/Transforms/** llvm:instcombine: - - llvm/lib/Analysis/InstructionSimplify.cpp - - llvm/lib/Transforms/InstCombine/** - - llvm/include/llvm/Transforms/InstCombine/ - - llvm/include/llvm/Analysis/InstructionSimplify.h - - llvm/test/Transforms/InstCombine/** - - llvm/test/Transforms/InstSimplify/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Analysis/InstructionSimplify.cpp + - llvm/lib/Transforms/InstCombine/** + - llvm/include/llvm/Transforms/InstCombine/ + - llvm/include/llvm/Analysis/InstructionSimplify.h + - llvm/test/Transforms/InstCombine/** + - llvm/test/Transforms/InstSimplify/** llvm:vectorcombine: - - llvm/lib/Transforms/Vectorize/VectorCombine.cpp - - llvm/test/Transforms/VectorCombine/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Transforms/Vectorize/VectorCombine.cpp + - llvm/test/Transforms/VectorCombine/** clangd: - - clang-tools-extra/clangd/** + - changed-files: + - any-glob-to-any-file: + - clang-tools-extra/clangd/** hlsl: - - clang/test/ParserHLSL/** - - clang/test/SemaHLSL/** - - clang/test/AST/HLSL/** - - clang/test/CodeGenHLSL/** - - clang/cmake/caches/HLSL.cmake - - clang/include/clang/Basic/HLSL*.h - - clang/include/clang/Sema/HLSL*.h - - clang/docs/HLSL/** - - clang/lib/Driver/ToolChains/HLSL* - - clang/lib/Parse/ParseHLSL.cpp - - clang/lib/Sema/HLSLExternalSemaSource.cpp - - clang/lib/Sema/SemaHLSL.cpp - - clang/lib/CodeGen/CGHLSLRuntime.* - - clang/lib/CodeGen/CGHLSLBuiltins.cpp - - llvm/include/llvm/Frontend/HLSL/** - - llvm/lib/Frontend/HLSL/** + - changed-files: + - any-glob-to-any-file: + - clang/test/ParserHLSL/** + - clang/test/SemaHLSL/** + - clang/test/AST/HLSL/** + - clang/test/CodeGenHLSL/** + - clang/cmake/caches/HLSL.cmake + - clang/include/clang/Basic/HLSL*.h + - clang/include/clang/Sema/HLSL*.h + - clang/docs/HLSL/** + - clang/lib/Driver/ToolChains/HLSL* + - clang/lib/Parse/ParseHLSL.cpp + - clang/lib/Sema/HLSLExternalSemaSource.cpp + - clang/lib/Sema/SemaHLSL.cpp + - clang/lib/CodeGen/CGHLSLRuntime.* + - clang/lib/CodeGen/CGHLSLBuiltins.cpp + - llvm/include/llvm/Frontend/HLSL/** + - llvm/lib/Frontend/HLSL/** llvm:SelectionDAG: - - llvm/include/llvm/CodeGen/SelectionDAG*.h - - llvm/include/llvm/CodeGen/SDNodeProperties.td - - llvm/include/llvm/Target/TargetSelectionDAG.td - - llvm/lib/CodeGen/SelectionDAG/** - - llvm/utils/TableGen/CodeGenDAG* - - llvm/utils/TableGen/DAGISel* - - llvm/include/llvm/CodeGen/DAGCombine.h - - llvm/include/llvm/CodeGen/ISDOpcodes.h + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/CodeGen/SelectionDAG*.h + - llvm/include/llvm/CodeGen/SDNodeProperties.td + - llvm/include/llvm/Target/TargetSelectionDAG.td + - llvm/lib/CodeGen/SelectionDAG/** + - llvm/utils/TableGen/CodeGenDAG* + - llvm/utils/TableGen/DAGISel* + - llvm/include/llvm/CodeGen/DAGCombine.h + - llvm/include/llvm/CodeGen/ISDOpcodes.h backend:DirectX: - - '**/*DirectX*' - - '**/*DXIL*' - - '**/*dxil*' - - '**/*DirectX*/**' - - '**/*DXIL*/**' - - '**/*dxil*/**' - - '**/*DXContainer*' - - '**/*DXContainer*/**' - - clang/lib/Sema/SemaDirectX.cpp - - clang/include/clang/Sema/SemaDirectX.h - - clang/include/clang/Basic/BuiltinsDirectX.td - - clang/lib/CodeGen/TargetBuiltins/DirectX.cpp - - clang/test/CodeGenDirectX/** - - clang/test/SemaDirectX/** + - changed-files: + - any-glob-to-any-file: + - '**/*DirectX*' + - '**/*DXIL*' + - '**/*dxil*' + - '**/*DirectX*/**' + - '**/*DXIL*/**' + - '**/*dxil*/**' + - '**/*DXContainer*' + - '**/*DXContainer*/**' + - clang/lib/Sema/SemaDirectX.cpp + - clang/include/clang/Sema/SemaDirectX.h + - clang/include/clang/Basic/BuiltinsDirectX.td + - clang/lib/CodeGen/TargetBuiltins/DirectX.cpp + - clang/test/CodeGenDirectX/** + - clang/test/SemaDirectX/** backend:SPIR-V: - - clang/lib/Driver/ToolChains/SPIRV.* - - clang/lib/Sema/SemaSPIRV.cpp - - clang/include/clang/Sema/SemaSPIRV.h - - clang/include/clang/Basic/BuiltinsSPIRV.td - - clang/test/CodeGenSPIRV/** - - clang/test/SemaSPIRV/** - - llvm/lib/Target/SPIRV/** - - llvm/test/CodeGen/SPIRV/** - - llvm/test/Frontend/HLSL/** - - llvm/docs/SPIRVUsage.rst + - changed-files: + - any-glob-to-any-file: + - clang/lib/Driver/ToolChains/SPIRV.* + - clang/lib/Sema/SemaSPIRV.cpp + - clang/include/clang/Sema/SemaSPIRV.h + - clang/include/clang/Basic/BuiltinsSPIRV.td + - clang/test/CodeGenSPIRV/** + - clang/test/SemaSPIRV/** + - llvm/lib/Target/SPIRV/** + - llvm/test/CodeGen/SPIRV/** + - llvm/test/Frontend/HLSL/** + - llvm/docs/SPIRVUsage.rst mlgo: - - llvm/lib/Analysis/ML* - - llvm/include/llvm/Analysis/ML* - - llvm/lib/Analysis/*Runner.cpp - - llvm/include/llvm/Analysis/*Runner.h - - llvm/unittests/Analysis/ML* - - llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp - - llvm/lib/Analysis/TrainingLogger.cpp - - llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h - - llvm/include/llvm/Analysis/Utils/TrainingLogger.h - - llvm/test/Analysis/FunctionPropertiesAnalysis/* - - llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp - - llvm/test/Transforms/inline/ML/** - - llvm/lib/CodeGen/ML* - - llvm/unittests/CodeGen/ML* - - llvm/test/CodeGen/MLRegAlloc/** - - llvm/utils/mlgo-utils/** - - llvm/docs/MLGO.rst - - llvm/include/llvm/Analysis/IR2Vec.h - - llvm/lib/Analysis/IR2Vec.cpp - - llvm/lib/Analysis/models/** - - llvm/include/llvm/CodeGen/MIR2Vec.h - - llvm/lib/CodeGen/MIR2Vec.cpp - - llvm/test/Analysis/IR2Vec/** - - llvm/test/CodeGen/MIR2Vec/** - - llvm/unittests/Analysis/IR2VecTest.cpp - - llvm/unittests/CodeGen/MIR2VecTest.cpp - - llvm/tools/llvm-ir2vec/** - - llvm/docs/CommandGuide/llvm-ir2vec.rst + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Analysis/ML* + - llvm/include/llvm/Analysis/ML* + - llvm/lib/Analysis/*Runner.cpp + - llvm/include/llvm/Analysis/*Runner.h + - llvm/unittests/Analysis/ML* + - llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp + - llvm/lib/Analysis/TrainingLogger.cpp + - llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h + - llvm/include/llvm/Analysis/Utils/TrainingLogger.h + - llvm/test/Analysis/FunctionPropertiesAnalysis/* + - llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp + - llvm/test/Transforms/inline/ML/** + - llvm/lib/CodeGen/ML* + - llvm/unittests/CodeGen/ML* + - llvm/test/CodeGen/MLRegAlloc/** + - llvm/utils/mlgo-utils/** + - llvm/docs/MLGO.rst + - llvm/include/llvm/Analysis/IR2Vec.h + - llvm/lib/Analysis/IR2Vec.cpp + - llvm/lib/Analysis/models/** + - llvm/include/llvm/CodeGen/MIR2Vec.h + - llvm/lib/CodeGen/MIR2Vec.cpp + - llvm/test/Analysis/IR2Vec/** + - llvm/test/CodeGen/MIR2Vec/** + - llvm/unittests/Analysis/IR2VecTest.cpp + - llvm/unittests/CodeGen/MIR2VecTest.cpp + - llvm/tools/llvm-ir2vec/** + - llvm/docs/CommandGuide/llvm-ir2vec.rst tools:llvm-exegesis: - - llvm/tools/llvm-exegesis/** - - llvm/test/tools/llvm-exegesis/** - - llvm/unittests/tools/llvm-exegesis/** + - changed-files: + - any-glob-to-any-file: + - llvm/tools/llvm-exegesis/** + - llvm/test/tools/llvm-exegesis/** + - llvm/unittests/tools/llvm-exegesis/** tools:llvm-reduce: - - llvm/tools/llvm-reduce/** + - changed-files: + - any-glob-to-any-file: + - llvm/tools/llvm-reduce/** platform:windows: - - lld/COFF/** - - clang/lib/Driver/MSVC.cpp - - clang/lib/Driver/MinGW.cpp - - llvm/lib/DebugInfo/CodeView/** - - llvm/lib/DebugInfo/PDB/** - - llvm/lib/WindowsDriver/** - - llvm/lib/Support/Windows/** - - llvm/lib/BinaryFormat/COFF.cpp + - changed-files: + - any-glob-to-any-file: + - lld/COFF/** + - clang/lib/Driver/MSVC.cpp + - clang/lib/Driver/MinGW.cpp + - llvm/lib/DebugInfo/CodeView/** + - llvm/lib/DebugInfo/PDB/** + - llvm/lib/WindowsDriver/** + - llvm/lib/Support/Windows/** + - llvm/lib/BinaryFormat/COFF.cpp llvm:regalloc: - - llvm/**/CodeGen/CalcSpillWeights* - - llvm/**/CodeGen/InlineSpiller* - - llvm/**/CodeGen/InterferenceCache* - - llvm/**/CodeGen/LiveInterval* - - llvm/**/CodeGen/LiveRange* - - llvm/**/CodeGen/LiveReg* - - llvm/**/CodeGen/LiveVariables* - - llvm/**/CodeGen/MachineCopyPropagation* - - llvm/**/CodeGen/PHIElimination* - - llvm/**/CodeGen/ProcessImplicitDefs.cpp - - llvm/**/CodeGen/Register* - - llvm/**/CodeGen/RegUsage* - - llvm/**/CodeGen/RenameIndependentSubregs.cpp - - llvm/**/CodeGen/SlotIndexes.h - - llvm/**/CodeGen/SpillPlacement* - - llvm/**/CodeGen/SplitKit* - - llvm/**/CodeGen/VirtRegMap.h - - llvm/include/PBQP/** - - llvm/include/PBQPRAConstraint.h - - llvm/include/llvm/CodeGen/Spiller.h - - llvm/**/*RegAlloc + - changed-files: + - any-glob-to-any-file: + - llvm/**/CodeGen/CalcSpillWeights* + - llvm/**/CodeGen/InlineSpiller* + - llvm/**/CodeGen/InterferenceCache* + - llvm/**/CodeGen/LiveInterval* + - llvm/**/CodeGen/LiveRange* + - llvm/**/CodeGen/LiveReg* + - llvm/**/CodeGen/LiveVariables* + - llvm/**/CodeGen/MachineCopyPropagation* + - llvm/**/CodeGen/PHIElimination* + - llvm/**/CodeGen/ProcessImplicitDefs.cpp + - llvm/**/CodeGen/Register* + - llvm/**/CodeGen/RegUsage* + - llvm/**/CodeGen/RenameIndependentSubregs.cpp + - llvm/**/CodeGen/SlotIndexes.h + - llvm/**/CodeGen/SpillPlacement* + - llvm/**/CodeGen/SplitKit* + - llvm/**/CodeGen/VirtRegMap.h + - llvm/include/PBQP/** + - llvm/include/PBQPRAConstraint.h + - llvm/include/llvm/CodeGen/Spiller.h + - llvm/**/*RegAlloc lldb: - - lldb/** + - changed-files: + - any-glob-to-any-file: + - lldb/** lldb-dap: - - lldb/tools/lldb-dap/** + - changed-files: + - any-glob-to-any-file: + - lldb/tools/lldb-dap/** backend:AMDGPU: - - '**/*amdgpu*' - - '**/*AMDGPU*' - - '**/*amdgpu*/**' - - '**/*AMDGPU*/**' + - changed-files: + - any-glob-to-any-file: + - '**/*amdgpu*' + - '**/*AMDGPU*' + - '**/*amdgpu*/**' + - '**/*AMDGPU*/**' backend:NVPTX: - - 'llvm/**/*nvvm*' - - 'llvm/**/*NVVM*' - - 'llvm/**/*nvptx*' - - 'llvm/**/*NVPTX*' - - 'llvm/**/*nvvm*/**' - - 'llvm/**/*NVVM*/**' - - 'llvm/**/*nvptx*/**' - - 'llvm/**/*NVPTX*/**' + - changed-files: + - any-glob-to-any-file: + - 'llvm/**/*nvvm*' + - 'llvm/**/*NVVM*' + - 'llvm/**/*nvptx*' + - 'llvm/**/*NVPTX*' + - 'llvm/**/*nvvm*/**' + - 'llvm/**/*NVVM*/**' + - 'llvm/**/*nvptx*/**' + - 'llvm/**/*NVPTX*/**' backend:MIPS: - - '**/*mips*' - - '**/*Mips*' - - '**/*mips*/**' - - '**/*Mips*/**' + - changed-files: + - any-glob-to-any-file: + - '**/*mips*' + - '**/*Mips*' + - '**/*mips*/**' + - '**/*Mips*/**' backend:RISC-V: - - '**/*riscv*' - - '**/*RISCV*' - - '**/*riscv*/**' - - '**/*RISCV*/**' + - changed-files: + - any-glob-to-any-file: + - '**/*riscv*' + - '**/*RISCV*' + - '**/*riscv*/**' + - '**/*RISCV*/**' backend:Xtensa: - - '**/*xtensa*' - - '**/*Xtensa*' - - '**/*xtensa*/**' - - '**/*Xtensa*/**' + - changed-files: + - any-glob-to-any-file: + - '**/*xtensa*' + - '**/*Xtensa*' + - '**/*xtensa*/**' + - '**/*Xtensa*/**' lld:coff: - - lld/**/COFF/** - - lld/Common/** + - changed-files: + - any-glob-to-any-file: + - lld/**/COFF/** + - lld/Common/** lld:elf: - - lld/**/ELF/** - - lld/Common/** + - changed-files: + - any-glob-to-any-file: + - lld/**/ELF/** + - lld/Common/** lld:macho: - - lld/**/MachO/** - - lld/Common/** + - changed-files: + - any-glob-to-any-file: + - lld/**/MachO/** + - lld/Common/** lld:wasm: - - lld/**/wasm/** - - lld/Common/** + - changed-files: + - any-glob-to-any-file: + - lld/**/wasm/** + - lld/Common/** backend:ARC: - - llvm/lib/Target/ARC/** - - clang/lib/Basic/Targets/ARC.h - - clang/lib/Basic/Targets/ARC.cpp - - clang/lib/CodeGen/Targets/ARC.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Target/ARC/** + - clang/lib/Basic/Targets/ARC.h + - clang/lib/Basic/Targets/ARC.cpp + - clang/lib/CodeGen/Targets/ARC.cpp backend:ARM: - - llvm/include/llvm/IR/IntrinsicsARM.td - - llvm/test/MC/ARM/** - - llvm/lib/Target/ARM/** - - llvm/test/CodeGen/ARM/** - - clang/lib/Basic/Targets/ARM* - - clang/lib/Driver/ToolChains/Arch/ARM.* - - clang/lib/CodeGen/Targets/ARM.cpp - - clang/include/clang/Basic/BuiltinsARM* - - llvm/test/MC/DisasemblerARM/** - - clang/include/clang/Sema/SemaARM.h - - clang/lib/Sema/SemaARM.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/IR/IntrinsicsARM.td + - llvm/test/MC/ARM/** + - llvm/lib/Target/ARM/** + - llvm/test/CodeGen/ARM/** + - clang/lib/Basic/Targets/ARM* + - clang/lib/Driver/ToolChains/Arch/ARM.* + - clang/lib/CodeGen/Targets/ARM.cpp + - clang/include/clang/Basic/BuiltinsARM* + - llvm/test/MC/DisasemblerARM/** + - clang/include/clang/Sema/SemaARM.h + - clang/lib/Sema/SemaARM.cpp backend:AArch64: - - llvm/include/llvm/IR/IntrinsicsAArch64.td - - llvm/test/MC/AArch64/** - - llvm/lib/Target/AArch64/** - - llvm/test/CodeGen/AArch64/** - - clang/lib/Basic/Targets/AArch64* - - clang/lib/Driver/ToolChains/Arch/AArch64.* - - clang/lib/CodeGen/Targets/AArch64.cpp - - clang/include/clang/Basic/BuiltinsAArch64* - - llvm/test/MC/Disassembler/AArch64/** - - clang/include/clang/Sema/SemaARM.h - - clang/lib/Sema/SemaARM.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/IR/IntrinsicsAArch64.td + - llvm/test/MC/AArch64/** + - llvm/lib/Target/AArch64/** + - llvm/test/CodeGen/AArch64/** + - clang/lib/Basic/Targets/AArch64* + - clang/lib/Driver/ToolChains/Arch/AArch64.* + - clang/lib/CodeGen/Targets/AArch64.cpp + - clang/include/clang/Basic/BuiltinsAArch64* + - llvm/test/MC/Disassembler/AArch64/** + - clang/include/clang/Sema/SemaARM.h + - clang/lib/Sema/SemaARM.cpp backend:CSKY: - - llvm/lib/Target/CSKY/** - - llvm/include/llvm/TargetParser/CSKYTargetParser.def - - llvm/include/llvm/TargetParser/CSKYTargetParser.h - - llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def - - llvm/lib/TargetParser/CSKYTargetParser.cpp - - llvm/lib/Support/CSKYAttributes.cpp - - llvm/lib/Support/CSKYAttributeParser.cpp - - clang/lib/Basic/Targets/CSKY.h - - clang/lib/Basic/Targets/CSKY.cpp - - clang/lib/CodeGen/Targets/CSKY.cpp - - clang/lib/Driver/ToolChains/CSKY* + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Target/CSKY/** + - llvm/include/llvm/TargetParser/CSKYTargetParser.def + - llvm/include/llvm/TargetParser/CSKYTargetParser.h + - llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def + - llvm/lib/TargetParser/CSKYTargetParser.cpp + - llvm/lib/Support/CSKYAttributes.cpp + - llvm/lib/Support/CSKYAttributeParser.cpp + - clang/lib/Basic/Targets/CSKY.h + - clang/lib/Basic/Targets/CSKY.cpp + - clang/lib/CodeGen/Targets/CSKY.cpp + - clang/lib/Driver/ToolChains/CSKY* backend:Hexagon: - - clang/include/clang/Basic/BuiltinsHexagon*.def - - clang/include/clang/Sema/SemaHexagon.h - - clang/lib/Basic/Targets/Hexagon.* - - clang/lib/CodeGen/Targets/Hexagon.cpp - - clang/lib/Driver/ToolChains/Hexagon.* - - clang/lib/Sema/SemaHexagon.cpp - - lld/ELF/Arch/Hexagon.cpp - - lldb/source/Plugins/ABI/Hexagon/** - - lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/** - - llvm/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def - - llvm/include/llvm/IR/IntrinsicsHexagon* - - llvm/include/llvm/Support/Hexagon* - - llvm/lib/Support/Hexagon* - - llvm/lib/Target/Hexagon/** - - llvm/test/CodeGen/Hexagon/** - - llvm/test/CodeGen/*/Hexagon/** - - llvm/test/DebugInfo/*/Hexagon/** - - llvm/test/Transforms/*/Hexagon - - llvm/test/MC/Disassembler/Hexagon/** - - llvm/test/MC/Hexagon/** - - llvm/test/tools/llvm-objdump/ELF/Hexagon/** + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/Basic/BuiltinsHexagon*.def + - clang/include/clang/Sema/SemaHexagon.h + - clang/lib/Basic/Targets/Hexagon.* + - clang/lib/CodeGen/Targets/Hexagon.cpp + - clang/lib/Driver/ToolChains/Hexagon.* + - clang/lib/Sema/SemaHexagon.cpp + - lld/ELF/Arch/Hexagon.cpp + - lldb/source/Plugins/ABI/Hexagon/** + - lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/** + - llvm/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def + - llvm/include/llvm/IR/IntrinsicsHexagon* + - llvm/include/llvm/Support/Hexagon* + - llvm/lib/Support/Hexagon* + - llvm/lib/Target/Hexagon/** + - llvm/test/CodeGen/Hexagon/** + - llvm/test/CodeGen/*/Hexagon/** + - llvm/test/DebugInfo/*/Hexagon/** + - llvm/test/Transforms/*/Hexagon + - llvm/test/MC/Disassembler/Hexagon/** + - llvm/test/MC/Hexagon/** + - llvm/test/tools/llvm-objdump/ELF/Hexagon/** backend:Lanai: - - llvm/lib/Target/Lanai/** - - clang/lib/Basic/Targets/Lanai.h - - clang/lib/Basic/Targets/Lanai.cpp - - clang/lib/CodeGen/Targets/Lanai.cpp - - clang/lib/Driver/ToolChains/Lanai* + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Target/Lanai/** + - clang/lib/Basic/Targets/Lanai.h + - clang/lib/Basic/Targets/Lanai.cpp + - clang/lib/CodeGen/Targets/Lanai.cpp + - clang/lib/Driver/ToolChains/Lanai* backend:loongarch: - - llvm/include/llvm/IR/IntrinsicsLoongArch.td - - llvm/test/MC/LoongArch/** - - llvm/lib/Target/LoongArch/** - - llvm/test/CodeGen/LoongArch/** - - clang/lib/Basic/Targets/LoongArch* - - clang/lib/Driver/ToolChains/Arch/LoongArch.* - - clang/lib/CodeGen/Targets/LoongArch.cpp - - clang/include/clang/Basic/BuiltinsLoongArch* - - clang/include/clang/Sema/SemaLoongArch.h - - clang/lib/Sema/SemaLoongArch.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/IR/IntrinsicsLoongArch.td + - llvm/test/MC/LoongArch/** + - llvm/lib/Target/LoongArch/** + - llvm/test/CodeGen/LoongArch/** + - clang/lib/Basic/Targets/LoongArch* + - clang/lib/Driver/ToolChains/Arch/LoongArch.* + - clang/lib/CodeGen/Targets/LoongArch.cpp + - clang/include/clang/Basic/BuiltinsLoongArch* + - clang/include/clang/Sema/SemaLoongArch.h + - clang/lib/Sema/SemaLoongArch.cpp backend:MSP430: - - llvm/include/llvm/IR/IntrinsicsMSP430.td - - llvm/test/MC/MSP430/** - - llvm/lib/Target/MSP430/** - - llvm/test/CodeGen/MSP430/** - - clang/lib/Basic/Targets/MSP430* - - clang/lib/Driver/ToolChains/Arch/MSP430.* - - clang/lib/CodeGen/Targets/MSP430.cpp - - clang/include/clang/Basic/BuiltinsMSP430* - - llvm/test/MC/Disassembler/MSP430/** + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/IR/IntrinsicsMSP430.td + - llvm/test/MC/MSP430/** + - llvm/lib/Target/MSP430/** + - llvm/test/CodeGen/MSP430/** + - clang/lib/Basic/Targets/MSP430* + - clang/lib/Driver/ToolChains/Arch/MSP430.* + - clang/lib/CodeGen/Targets/MSP430.cpp + - clang/include/clang/Basic/BuiltinsMSP430* + - llvm/test/MC/Disassembler/MSP430/** backend:Sparc: - - llvm/include/llvm/IR/IntrinsicsSparc.td - - llvm/test/MC/Sparc/** - - llvm/lib/Target/Sparc/** - - llvm/test/CodeGen/Sparc/** - - clang/lib/Basic/Targets/Sparc* - - clang/lib/Driver/ToolChains/Arch/Sparc.* - - clang/lib/CodeGen/Targets/Sparc.cpp - - clang/include/clang/Basic/BuiltinsSparc* - - llvm/test/MC/Disassembler/Sparc/** + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/IR/IntrinsicsSparc.td + - llvm/test/MC/Sparc/** + - llvm/lib/Target/Sparc/** + - llvm/test/CodeGen/Sparc/** + - clang/lib/Basic/Targets/Sparc* + - clang/lib/Driver/ToolChains/Arch/Sparc.* + - clang/lib/CodeGen/Targets/Sparc.cpp + - clang/include/clang/Basic/BuiltinsSparc* + - llvm/test/MC/Disassembler/Sparc/** backend:WebAssembly: - - llvm/lib/Target/WebAssembly/** - - llvm/test/CodeGen/WebAssembly/** - - clang/lib/Basic/Targets/WebAssembly* - - clang/include/clang/Basic/BuiltinsWebAssembly.def - - clang/include/clang/Basic/WebAssemblyReferenceTypes.def - - clang/lib/CodeGen/Targets/WebAssembly* - - llvm/include/llvm/IR/IntinsicsWebAssembly.td - - llvm/include/llvm/Object/Wasm* - - llvm/lib/CodeGen/AsmPrinter/Wasm* - - llvm/lib/CodeGen/Wasm* - - llvm/lib/MC/MCParser/Wasm* - - llvm/lib/MC/Wasm* - - llvm/lib/ObjCopy/wasm/** - - llvm/lib/Object/Wasm* - - clang/lib/Driver/Toolchains/WebAssembly* - - clang/lib/Headers/wasm_simd128.h - - clang/test/CodeGen/WebAssembly/** - - clang/test/SemaCXX/*wasm* - - clang/test/Sema/*wasm* - - llvm/include/llvm/BinaryFormat/Wasm.h - - llvm/unittests/Target/WebAssembly/** - - llvm/test/DebugInfo/WebAssembly/** - - llvm/test/MC/WebAssembly/** - - clang/include/clang/Sema/SemaWasm.h - - clang/lib/Sema/SemaLoongWasm.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Target/WebAssembly/** + - llvm/test/CodeGen/WebAssembly/** + - clang/lib/Basic/Targets/WebAssembly* + - clang/include/clang/Basic/BuiltinsWebAssembly.def + - clang/include/clang/Basic/WebAssemblyReferenceTypes.def + - clang/lib/CodeGen/Targets/WebAssembly* + - llvm/include/llvm/IR/IntinsicsWebAssembly.td + - llvm/include/llvm/Object/Wasm* + - llvm/lib/CodeGen/AsmPrinter/Wasm* + - llvm/lib/CodeGen/Wasm* + - llvm/lib/MC/MCParser/Wasm* + - llvm/lib/MC/Wasm* + - llvm/lib/ObjCopy/wasm/** + - llvm/lib/Object/Wasm* + - clang/lib/Driver/Toolchains/WebAssembly* + - clang/lib/Headers/wasm_simd128.h + - clang/test/CodeGen/WebAssembly/** + - clang/test/SemaCXX/*wasm* + - clang/test/Sema/*wasm* + - llvm/include/llvm/BinaryFormat/Wasm.h + - llvm/unittests/Target/WebAssembly/** + - llvm/test/DebugInfo/WebAssembly/** + - llvm/test/MC/WebAssembly/** + - clang/include/clang/Sema/SemaWasm.h + - clang/lib/Sema/SemaLoongWasm.cpp backend:X86: - - llvm/include/llvm/IR/IntrinsicsX86.td - - llvm/lib/Target/X86/** - - llvm/test/CodeGen/X86/** - - llvm/test/MC/X86/** - - llvm/test/MC/Disassembler/X86/** - - llvm/test/Analysis/CostModel/X86/** - - llvm/test/tools/llvm-mca/X86/** - - clang/lib/Basic/Targets/X86/** - - clang/lib/Driver/ToolChains/Arch/X86.* - - clang/lib/CodeGen/Targets/X86.* - - clang/lib/Headers/** - - clang/test/CodeGen/X86/** - - clang/include/clang/Basic/BuiltinsX86* - - llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h - - llvm/include/llvm/TargetParser/X86* - - llvm/lib/TargetParser/X86* - - llvm/utils/TableGen/X86* - - clang/include/clang/Sema/SemaX86.h - - clang/lib/Sema/SemaX86.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/IR/IntrinsicsX86.td + - llvm/lib/Target/X86/** + - llvm/test/CodeGen/X86/** + - llvm/test/MC/X86/** + - llvm/test/MC/Disassembler/X86/** + - llvm/test/Analysis/CostModel/X86/** + - llvm/test/tools/llvm-mca/X86/** + - clang/lib/Basic/Targets/X86/** + - clang/lib/Driver/ToolChains/Arch/X86.* + - clang/lib/CodeGen/Targets/X86.* + - clang/lib/Headers/** + - clang/test/CodeGen/X86/** + - clang/include/clang/Basic/BuiltinsX86* + - llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h + - llvm/include/llvm/TargetParser/X86* + - llvm/lib/TargetParser/X86* + - llvm/utils/TableGen/X86* + - clang/include/clang/Sema/SemaX86.h + - clang/lib/Sema/SemaX86.cpp backend:PowerPC: - - llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC* - - llvm/include/llvm/BinaryFormat/XCOFF.h - - llvm/include/llvm/IR/IntrinsicsPowerPC.td - - llvm/lib/CodeGen/AsmPrinter/AIXException.cpp - - llvm/lib/Target/PowerPC/** - - llvm/test/Analysis/**/PowerPC/** - - llvm/test/CodeGen/PowerPC/** - - llvm/test/CodeGen/MIR/PowerPC/** - - llvm/test/DebugInfo/XCOFF/** - - llvm/test/DebugInfo/PowerPC/** - - llvm/test/LTO/PowerPC/** - - llvm/test/MC/Disassembler/PowerPC/** - - llvm/test/MC/PowerPC/** - - llvm/test/MC/XCOFF/** - - llvm/test/Transforms/**/PowerPC/** - - clang/include/clang/Basic/BuiltinsPPC.* - - clang/lib/Basic/Targets/PPC.* - - clang/lib/CodeGen/Targets/PPC.cpp - - clang/lib/Driver/ToolChains/PPC* - - clang/lib/Driver/ToolChains/AIX* - - clang/lib/Driver/ToolChains/Arch/PPC.* - - clang/test/CodeGen/PowerPC/** - - clang/include/clang/Sema/SemaPPC.h - - clang/lib/Sema/SemaPPC.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC* + - llvm/include/llvm/BinaryFormat/XCOFF.h + - llvm/include/llvm/IR/IntrinsicsPowerPC.td + - llvm/lib/CodeGen/AsmPrinter/AIXException.cpp + - llvm/lib/Target/PowerPC/** + - llvm/test/Analysis/**/PowerPC/** + - llvm/test/CodeGen/PowerPC/** + - llvm/test/CodeGen/MIR/PowerPC/** + - llvm/test/DebugInfo/XCOFF/** + - llvm/test/DebugInfo/PowerPC/** + - llvm/test/LTO/PowerPC/** + - llvm/test/MC/Disassembler/PowerPC/** + - llvm/test/MC/PowerPC/** + - llvm/test/MC/XCOFF/** + - llvm/test/Transforms/**/PowerPC/** + - clang/include/clang/Basic/BuiltinsPPC.* + - clang/lib/Basic/Targets/PPC.* + - clang/lib/CodeGen/Targets/PPC.cpp + - clang/lib/Driver/ToolChains/PPC* + - clang/lib/Driver/ToolChains/AIX* + - clang/lib/Driver/ToolChains/Arch/PPC.* + - clang/test/CodeGen/PowerPC/** + - clang/include/clang/Sema/SemaPPC.h + - clang/lib/Sema/SemaPPC.cpp backend:SystemZ: - - llvm/include/llvm/BinaryFormat/ELFRelocs/SystemZ* - - llvm/include/llvm/BinaryFormat/GOFF.h - - llvm/include/llvm/IR/IntrinsicsSystemZ.td - - llvm/lib/Target/SystemZ/** - - llvm/test/Analysis/**/SystemZ/** - - llvm/test/CodeGen/SystemZ/** - - llvm/test/DebugInfo/SystemZ/** - - llvm/test/ExecutionEngine/**/SystemZ/** - - llvm/test/MC/Disassembler/SystemZ/** - - llvm/test/MC/GOFF/** - - llvm/test/MC/SystemZ/** - - llvm/test/Transforms/**/SystemZ/** - - clang/include/clang/Basic/BuiltinsSystemZ.* - - clang/lib/Basic/Targets/SystemZ.* - - clang/lib/CodeGen/Targets/SystemZ.cpp - - clang/lib/Driver/ToolChains/ZOS* - - clang/lib/Driver/ToolChains/Arch/SystemZ.* - - clang/test/CodeGen/SystemZ/** - - clang/include/clang/Sema/SemaSystemZ.h - - clang/lib/Sema/SemaSystemZ.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/BinaryFormat/ELFRelocs/SystemZ* + - llvm/include/llvm/BinaryFormat/GOFF.h + - llvm/include/llvm/IR/IntrinsicsSystemZ.td + - llvm/lib/Target/SystemZ/** + - llvm/test/Analysis/**/SystemZ/** + - llvm/test/CodeGen/SystemZ/** + - llvm/test/DebugInfo/SystemZ/** + - llvm/test/ExecutionEngine/**/SystemZ/** + - llvm/test/MC/Disassembler/SystemZ/** + - llvm/test/MC/GOFF/** + - llvm/test/MC/SystemZ/** + - llvm/test/Transforms/**/SystemZ/** + - clang/include/clang/Basic/BuiltinsSystemZ.* + - clang/lib/Basic/Targets/SystemZ.* + - clang/lib/CodeGen/Targets/SystemZ.cpp + - clang/lib/Driver/ToolChains/ZOS* + - clang/lib/Driver/ToolChains/Arch/SystemZ.* + - clang/test/CodeGen/SystemZ/** + - clang/include/clang/Sema/SemaSystemZ.h + - clang/lib/Sema/SemaSystemZ.cpp third-party:unittests: - - third-party/unittests/** + - changed-files: + - any-glob-to-any-file: + - third-party/unittests/** third-party:benchmark: - - third-party/benchmark/** + - changed-files: + - any-glob-to-any-file: + - third-party/benchmark/** llvm:binary-utilities: - - llvm/docs/CommandGuide/llvm-* - - llvm/include/llvm/BinaryFormat/** - - llvm/include/llvm/DebugInfo/Symbolize/** - - llvm/include/llvm/ObjCopy/** - - llvm/include/llvm/Object/** - - llvm/lib/BinaryFormat/** - - llvm/lib/DebugInfo/Symbolize/** - - llvm/lib/ObjCopy/** - - llvm/lib/Object/** - - llvm/test/Object/** - - llvm/test/tools/llvm-ar/** - - llvm/test/tools/llvm-cxxfilt/** - - llvm/test/tools/llvm-nm/** - - llvm/test/tools/llvm-objcopy/** - - llvm/test/tools/llvm-objdump/** - - llvm/test/tools/llvm-readobj/** - - llvm/test/tools/llvm-size/** - - llvm/test/tools/llvm-strings/** - - llvm/test/tools/llvm-symbolizer/** - - llvm/tools/llvm-ar/** - - llvm/tools/llvm-cxxfilt/** - - llvm/tools/llvm-nm/** - - llvm/tools/llvm-objcopy/** - - llvm/tools/llvm-objdump/** - - llvm/tools/llvm-readobj/** - - llvm/tools/llvm-size/** - - llvm/tools/llvm-strings/** - - llvm/tools/llvm-symbolizer/** + - changed-files: + - any-glob-to-any-file: + - llvm/docs/CommandGuide/llvm-* + - llvm/include/llvm/BinaryFormat/** + - llvm/include/llvm/DebugInfo/Symbolize/** + - llvm/include/llvm/ObjCopy/** + - llvm/include/llvm/Object/** + - llvm/lib/BinaryFormat/** + - llvm/lib/DebugInfo/Symbolize/** + - llvm/lib/ObjCopy/** + - llvm/lib/Object/** + - llvm/test/Object/** + - llvm/test/tools/llvm-ar/** + - llvm/test/tools/llvm-cxxfilt/** + - llvm/test/tools/llvm-nm/** + - llvm/test/tools/llvm-objcopy/** + - llvm/test/tools/llvm-objdump/** + - llvm/test/tools/llvm-readobj/** + - llvm/test/tools/llvm-size/** + - llvm/test/tools/llvm-strings/** + - llvm/test/tools/llvm-symbolizer/** + - llvm/tools/llvm-ar/** + - llvm/tools/llvm-cxxfilt/** + - llvm/tools/llvm-nm/** + - llvm/tools/llvm-objcopy/** + - llvm/tools/llvm-objdump/** + - llvm/tools/llvm-readobj/** + - llvm/tools/llvm-size/** + - llvm/tools/llvm-strings/** + - llvm/tools/llvm-symbolizer/** clang:openmp: - - clang/include/clang/Basic/OpenMP* - - clang/include/clang/AST/OpenMPClause.h - - clang/include/clang/AST/DeclOpenMP.h - - clang/include/clang/AST/ExprOpenMP.h - - clang/include/clang/AST/StmtOpenMP.h - - clang/lib/AST/DeclOpenMP.cpp - - clang/lib/AST/OpenMPClause.cpp - - clang/lib/AST/StmtOpenMP.cpp - - clang/lib/Headers/openmp_wrappers/** - - clang/lib/Parse/ParseOpenMP.cpp - - clang/lib/Basic/OpenMPKinds.cpp - - clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp - - clang/lib/Driver/ToolChains/AMDGPUOpenMP.h - - clang/lib/CodeGen/CgStmtOpenMP.cpp - - clang/lib/CodeGen/CGOpenMP* - - clang/lib/Sema/SemaOpenMP.cpp - - clang/test/OpenMP/** - - clang/test/AST/ast-dump-openmp-* - - llvm/lib/Frontend/OpenMP/** - - llvm/lib/Transforms/IPO/OpenMPOpt.cpp - - llvm/include/llvm/Frontend/OpenMP/** - - llvm/include/llvm/Transforms/IPO/OpenMPOpt.h - - llvm/unittests/Frontend/OpenMP* - - llvm/test/Transforms/OpenMP/** + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/Basic/OpenMP* + - clang/include/clang/AST/OpenMPClause.h + - clang/include/clang/AST/DeclOpenMP.h + - clang/include/clang/AST/ExprOpenMP.h + - clang/include/clang/AST/StmtOpenMP.h + - clang/lib/AST/DeclOpenMP.cpp + - clang/lib/AST/OpenMPClause.cpp + - clang/lib/AST/StmtOpenMP.cpp + - clang/lib/Headers/openmp_wrappers/** + - clang/lib/Parse/ParseOpenMP.cpp + - clang/lib/Basic/OpenMPKinds.cpp + - clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp + - clang/lib/Driver/ToolChains/AMDGPUOpenMP.h + - clang/lib/CodeGen/CgStmtOpenMP.cpp + - clang/lib/CodeGen/CGOpenMP* + - clang/lib/Sema/SemaOpenMP.cpp + - clang/test/OpenMP/** + - clang/test/AST/ast-dump-openmp-* + - llvm/lib/Frontend/OpenMP/** + - llvm/lib/Transforms/IPO/OpenMPOpt.cpp + - llvm/include/llvm/Frontend/OpenMP/** + - llvm/include/llvm/Transforms/IPO/OpenMPOpt.h + - llvm/unittests/Frontend/OpenMP* + - llvm/test/Transforms/OpenMP/** clang:temporal-safety: - - clang/include/clang/Analysis/Analyses/LifetimeSafety/** - - clang/lib/Analysis/LifetimeSafety/** - - clang/unittests/Analysis/LifetimeSafety* - - clang/test/Sema/*lifetime-safety* - - clang/test/Sema/*lifetime-analysis* - - clang/test/Analysis/LifetimeSafety/** + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/Analysis/Analyses/LifetimeSafety/** + - clang/lib/Analysis/LifetimeSafety/** + - clang/unittests/Analysis/LifetimeSafety* + - clang/test/Sema/*lifetime-safety* + - clang/test/Sema/*lifetime-analysis* + - clang/test/Analysis/LifetimeSafety/** clang:as-a-library: - - clang/tools/libclang/** - - clang/bindings/** - - clang/include/clang-c/** - - clang/test/LibClang/** - - clang/unittest/libclang/** + - changed-files: + - any-glob-to-any-file: + - clang/tools/libclang/** + - clang/bindings/** + - clang/include/clang-c/** + - clang/test/LibClang/** + - clang/unittest/libclang/** openmp:libomp: - - any: ['openmp/**', '!openmp/libomptarget/**'] + - changed-files: + - any-glob-to-any-file: + - 'openmp/**' openmp:libomptarget: - - any: ['openmp/**', '!openmp/runtime/**'] + - changed-files: + - all-globs-to-all-file: + - openmp/** + - '!openmp/runtime/**'' bazel: - - utils/bazel/** + - changed-files: + - any-glob-to-any-file: + - utils/bazel/** offload: - - offload/** + - changed-files: + - any-glob-to-any-file: + - offload/** tablegen: - - llvm/include/TableGen/** - - llvm/lib/TableGen/** - - llvm/utils/TableGen/** + - changed-files: + - any-glob-to-any-file: + - llvm/include/TableGen/** + - llvm/lib/TableGen/** + - llvm/utils/TableGen/** infrastructure: - - .ci/** + - changed-files: + - any-glob-to-any-file: + - .ci/** diff --git a/.github/workflows/new-prs.yml b/.github/workflows/new-prs.yml index e1f2e754c1a3d..0d97e436d39c4 100644 --- a/.github/workflows/new-prs.yml +++ b/.github/workflows/new-prs.yml @@ -67,9 +67,7 @@ jobs: github.event.pull_request.draft == false && github.event.pull_request.commits < 10 steps: - - uses: actions/labeler@ac9175f8a1f3625fd0d4fb234536d26811351594 # v4.3.0 + - uses: actions/labeler@634933edcd8ababfe52f92936142cc22ac488b1b # v6.0.1 with: configuration-path: .github/new-prs-labeler.yml - # workaround for https://github.com/actions/labeler/issues/112 - sync-labels: '' repo-token: ${{ secrets.ISSUE_SUBSCRIBER_TOKEN }} diff --git a/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp index 5524c4b484be1..67d0931003c54 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp @@ -301,14 +301,20 @@ void UnsafeFunctionsCheck::check(const MatchFinder::MatchResult &Result) { if (Custom) { for (const auto &Entry : CustomFunctions) { if (Entry.Pattern.match(*FuncDecl)) { - const StringRef Reason = + StringRef Reason = Entry.Reason.empty() ? "is marked as unsafe" : Entry.Reason.c_str(); - if (Entry.Replacement.empty()) { + // Omit the replacement, when a fully-custom reason is given. + if (Reason.consume_front(">")) { + diag(SourceExpr->getExprLoc(), "function %0 %1") + << FuncDecl << Reason.trim() << SourceExpr->getSourceRange(); + // Do not recommend a replacement when it is not present. + } else if (Entry.Replacement.empty()) { diag(SourceExpr->getExprLoc(), "function %0 %1; it should not be used") << FuncDecl << Reason << Entry.Replacement << SourceExpr->getSourceRange(); + // Otherwise, emit the replacement. } else { diag(SourceExpr->getExprLoc(), "function %0 %1; '%2' should be used instead") diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index b982216297919..743397e3ec6ce 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -69,6 +69,13 @@ Potentially Breaking Changes - `CharTypdefsToIgnore` to `CharTypedefsToIgnore` in :doc:`bugprone-signed-char-misuse ` + +- Modified the custom message format of :doc:`bugprone-unsafe-functions + ` by assigning a special meaning + to the character ``>`` at the start of the value of the option + ``CustomFunctions``. If the option value starts with ``>``, then the + replacement suggestion part of the message (which would be included by + default) is omitted. (This does not change the warning locations.) - :program:`clang-tidy` now displays warnings from all non-system headers by default. Previously, users had to explicitly opt-in to header warnings using @@ -387,6 +394,11 @@ Changes in existing checks ` check by adding an additional matcher that generalizes the copy-and-swap idiom pattern detection. + +- Improved :doc:`bugprone-unsafe-functions + ` check by hiding the default + suffix when the reason starts with the character `>` in the `CustomFunctions` + option. - Improved :doc:`cppcoreguidelines-avoid-non-const-global-variables ` check diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst index f1fec13739271..cb7ea415c54b2 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst @@ -96,37 +96,62 @@ to be checked. The format is the following, without newlines: The functions are matched using POSIX extended regular expressions. *(Note: The regular expressions do not support negative* ``(?!)`` *matches.)* -The `reason` is optional and is used to provide additional information -about the reasoning behind the replacement. The default reason is -`is marked as unsafe`. +The ``reason`` is optional and is used to provide additional information about the +reasoning behind the replacement. The default reason is ``is marked as unsafe``. -If `replacement` is empty, the text `it should not be used` will be shown -instead of the suggestion for a replacement. +If ``replacement`` is empty, the default text ``it should not be used`` will be +shown instead of the suggestion for a replacement. -As an example, the configuration `^original$, replacement, is deprecated;` -will produce the following diagnostic message. +If the ``reason`` starts with the character ``>``, the reason becomes fully custom. +The default suffix is disabled even if a ``replacement`` is present, and only the +reason message is shown after the matched function, to allow better control over +the suggestions. (The starting ``>`` and whitespace directly after it are +trimmed from the message.) + +As an example, the following configuration matches only the function ``original`` +in the default namespace. A similar diagnostic can also be printed using a fully +custom reason. .. code:: c + // bugprone-unsafe-functions.CustomFunctions: + // ^original$, replacement, is deprecated; + // Using the fully custom message syntax: + // ^suspicious$,,> should be avoided if possible. original(); // warning: function 'original' is deprecated; 'replacement' should be used instead. + suspicious(); // warning: function 'suspicious' should be avoided if possible. ::std::original(); // no-warning original_function(); // no-warning -If the regular expression contains the character `:`, it is matched against the -qualified name (i.e. ``std::original``), otherwise the regex is matched against the unqualified name (``original``). -If the regular expression starts with `::` (or `^::`), it is matched against the -fully qualified name (``::std::original``). +If the regular expression contains the character ``:``, it is matched against the +qualified name (i.e. ``std::original``), otherwise the regex is matched against +the unqualified name (``original``). If the regular expression starts with ``::`` +(or ``^::``), it is matched against the fully qualified name +(``::std::original``). + +One of the use cases for fully custom messages is suggesting compiler options +and warning flags: + +.. code:: c + + // bugprone-unsafe-functions.CustomFunctions: + // ^memcpy$,,>is recommended to have compiler hardening using '_FORTIFY_SOURCE'; + // ^printf$,,>is recommended to have the '-Werror=format-security' compiler warning flag; + + memcpy(dest, src, 999'999); // warning: function 'memcpy' is recommended to have compiler hardening using '_FORTIFY_SOURCE' + printf(raw_str); // warning: function 'printf' is recommended to have the '-Werror=format-security' compiler warning flag .. note:: - Fully qualified names can contain template parameters on certain C++ classes, but not on C++ functions. - Type aliases are resolved before matching. + Fully qualified names can contain template parameters on certain C++ classes, + but not on C++ functions. Type aliases are resolved before matching. As an example, the member function ``open`` in the class ``std::ifstream`` has a fully qualified name of ``::std::basic_ifstream::open``. - The example could also be matched with the regex ``::std::basic_ifstream<[^>]*>::open``, which matches all potential - template parameters, but does not match nested template classes. + The example could also be matched with the regex + ``::std::basic_ifstream<[^>]*>::open``, which matches all potential template + parameters, but does not match nested template classes. Options ------- diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom.c index 7fd71ec2f2e7b..7eaf015f06aa2 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom.c +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom.c @@ -1,5 +1,5 @@ // RUN: %check_clang_tidy -check-suffix=NON-STRICT-REGEX %s bugprone-unsafe-functions %t --\ -// RUN: -config="{CheckOptions: {bugprone-unsafe-functions.CustomFunctions: '::name_match,replacement,is a qualname match;^::prefix_match,,is matched on qualname prefix'}}" +// RUN: -config="{CheckOptions: {bugprone-unsafe-functions.CustomFunctions: \"::name_match,,>is a qualname match, but with a fully 'custom' message;^::prefix_match,,is matched on qualname prefix\"}}" // RUN: %check_clang_tidy -check-suffix=STRICT-REGEX %s bugprone-unsafe-functions %t --\ // RUN: -config="{CheckOptions: {bugprone-unsafe-functions.CustomFunctions: '^name_match$,replacement,is matched on function name only;^::prefix_match$,,is a full qualname match'}}" @@ -11,14 +11,14 @@ void prefix_match_regex(); void f1() { name_match(); - // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match' is a qualname match; 'replacement' should be used instead + // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match' is a qualname match, but with a fully 'custom' message // CHECK-MESSAGES-STRICT-REGEX: :[[@LINE-2]]:3: warning: function 'name_match' is matched on function name only; 'replacement' should be used instead prefix_match(); // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'prefix_match' is matched on qualname prefix; it should not be used // CHECK-MESSAGES-STRICT-REGEX: :[[@LINE-2]]:3: warning: function 'prefix_match' is a full qualname match; it should not be used name_match_regex(); - // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match_regex' is a qualname match; 'replacement' should be used instead + // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match_regex' is a qualname match, but with a fully 'custom' message // no-warning STRICT-REGEX prefix_match_regex(); diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7459127670cc3..c2da61e4d066a 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -500,6 +500,7 @@ Bug Fixes to Attribute Support - Fixes crashes or missing diagnostics with the `device_kernel` attribute. (#GH161905) - Fix handling of parameter indexes when an attribute is applied to a C++23 explicit object member function. - Fixed several false positives and false negatives in function effect (`nonblocking`) analysis. (#GH166078) (#GH166101) (#GH166110) +- Fix ``cleanup`` attribute by delaying type checks until after the type is deduced. (#GH129631) Bug Fixes to C++ Support ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 8dfe4bc08c48e..0097476bc0d8d 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -741,6 +741,17 @@ class Attr { // our existing general parsing we need to have a separate flag that // opts an attribute into strict parsing of attribute parameters bit StrictEnumParameters = 0; + // Set to true for attributes which have Sema checks which requires the type + // to be deduced. + // When `IsTypeDependent` is set to true, you should add an `ActOn*Attr` + // function to `Sema.h`. The signature of the function must be: + // `void ActOn*Attr(Decl *, const Attr *);` where the `Decl *` is the + // declaration the attribute will be attached to; its type will have already + // been deduced, and the `Attr *` is the attribute being applied to that + // declaration. This function should handle all type-sensitive semantics for + // the attribute. This function will be automatically called by + // `Sema::CheckAttributesOnDeducedType()`. + bit IsTypeDependent = 0; // Lists language options, one of which is required to be true for the // attribute to be applicable. If empty, no language options are required. list LangOpts = []; @@ -1400,6 +1411,7 @@ def Cleanup : InheritableAttr { let Args = [DeclArgument]; let Subjects = SubjectList<[LocalVar]>; let Documentation = [CleanupDocs]; + let IsTypeDependent = 1; // FIXME: DeclArgument should be reworked to also store the // Expr instead of adding attr specific hacks like the following. // See the discussion in https://github.com/llvm/llvm-project/pull/14023. diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index dbf857afa08c8..47da17e5cfe83 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -5253,6 +5253,18 @@ def HLSLF16ToF32 : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLDdxCoarse : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_ddx_coarse"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + +def HLSLDdyCoarse : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_ddy_coarse"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + // Builtins for XRay. def XRayCustomEvent : Builtin { let Spellings = ["__xray_customevent"]; diff --git a/clang/include/clang/Sema/CMakeLists.txt b/clang/include/clang/Sema/CMakeLists.txt index 9077e22c2307c..3f540ea596871 100644 --- a/clang/include/clang/Sema/CMakeLists.txt +++ b/clang/include/clang/Sema/CMakeLists.txt @@ -8,6 +8,11 @@ clang_tablegen(AttrParsedAttrKinds.inc -gen-clang-attr-parsed-attr-kinds SOURCE ../Basic/Attr.td TARGET ClangAttrParsedAttrKinds) +clang_tablegen(AttrIsTypeDependent.inc -gen-clang-attr-is-type-dependent + -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ + SOURCE ../Basic/Attr.td + TARGET ClangAttrIsTypeDependent) + clang_tablegen(AttrSpellingListIndex.inc -gen-clang-attr-spelling-index -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ SOURCE ../Basic/Attr.td diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 6ca182338d6af..fd2a2469142e4 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -4456,6 +4456,10 @@ class Sema final : public SemaBase { NamedDecl *New, Decl *Old, AvailabilityMergeKind AMK = AvailabilityMergeKind::Redeclaration); + /// CheckAttributesOnDeducedType - Calls Sema functions for attributes that + /// requires the type to be deduced. + void CheckAttributesOnDeducedType(Decl *D); + /// MergeTypedefNameDecl - We just parsed a typedef 'New' which has the /// same name and scope as a previous declaration 'Old'. Figure out /// how to resolve this situation, merging decls or emitting @@ -4760,6 +4764,8 @@ class Sema final : public SemaBase { // linkage or not. static bool mightHaveNonExternalLinkage(const DeclaratorDecl *FD); +#include "clang/Sema/AttrIsTypeDependent.inc" + ///@} // @@ -15469,6 +15475,8 @@ class Sema final : public SemaBase { std::optional ActOnEffectExpression(Expr *CondExpr, StringRef AttributeName); + void ActOnCleanupAttr(Decl *D, const Attr *A); + private: /// The implementation of RequireCompleteType bool RequireCompleteTypeImpl(SourceLocation Loc, QualType T, diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index b6928ce7d9c44..12d9a98915ce3 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -924,6 +924,24 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, return EmitRuntimeCall( Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); } + case Builtin::BI__builtin_hlsl_elementwise_ddx_coarse: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable("ddx_coarse operand must have a float representation"); + Intrinsic::ID ID = CGM.getHLSLRuntime().getDdxCoarseIntrinsic(); + return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID, + ArrayRef{Op0}, nullptr, + "hlsl.ddx.coarse"); + } + case Builtin::BI__builtin_hlsl_elementwise_ddy_coarse: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable("ddy_coarse operand must have a float representation"); + Intrinsic::ID ID = CGM.getHLSLRuntime().getDdyCoarseIntrinsic(); + return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID, + ArrayRef{Op0}, nullptr, + "hlsl.ddy.coarse"); + } case Builtin::BI__builtin_get_spirv_spec_constant_bool: case Builtin::BI__builtin_get_spirv_spec_constant_short: case Builtin::BI__builtin_get_spirv_spec_constant_ushort: diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 48935584f28a2..e1200c62eccf1 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -163,6 +163,8 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrierWithGroupSync, group_memory_barrier_with_group_sync) GENERATE_HLSL_INTRINSIC_FUNCTION(GetDimensionsX, resource_getdimensions_x) + GENERATE_HLSL_INTRINSIC_FUNCTION(DdxCoarse, ddx_coarse) + GENERATE_HLSL_INTRINSIC_FUNCTION(DdyCoarse, ddy_coarse) //===----------------------------------------------------------------------===// // End of reserved area for HLSL intrinsic getters. diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 04fd68692d8d8..426fc796ffc20 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -70,6 +70,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" @@ -103,6 +104,7 @@ #include #include #include +#include #include #if LLVM_ON_UNIX #include // getpid @@ -2050,12 +2052,17 @@ void Driver::generateCompilationDiagnostics( InputList Inputs; BuildInputs(C.getDefaultToolChain(), C.getArgs(), Inputs); + ArgStringList IRInputs; for (InputList::iterator it = Inputs.begin(), ie = Inputs.end(); it != ie;) { bool IgnoreInput = false; - // Ignore input from stdin or any inputs that cannot be preprocessed. - // Check type first as not all linker inputs have a value. - if (types::getPreprocessedType(it->first) == types::TY_INVALID) { + // Save IR inputs separately, ignore input from stdin or any other inputs + // that cannot be preprocessed. Check type first as not all linker inputs + // have a value. + if (types::isLLVMIR(it->first)) { + IRInputs.push_back(it->second->getValue()); + IgnoreInput = true; + } else if (types::getPreprocessedType(it->first) == types::TY_INVALID) { IgnoreInput = true; } else if (!strcmp(it->second->getValue(), "-")) { Diag(clang::diag::note_drv_command_failed_diag_msg) @@ -2072,7 +2079,7 @@ void Driver::generateCompilationDiagnostics( } } - if (Inputs.empty()) { + if (Inputs.empty() && IRInputs.empty()) { Diag(clang::diag::note_drv_command_failed_diag_msg) << "Error generating preprocessed source(s) - " "no preprocessable inputs."; @@ -2095,46 +2102,82 @@ void Driver::generateCompilationDiagnostics( return; } - // Construct the list of abstract actions to perform for this compilation. On - // Darwin OSes this uses the driver-driver and builds universal actions. - const ToolChain &TC = C.getDefaultToolChain(); - if (TC.getTriple().isOSBinFormatMachO()) - BuildUniversalActions(C, TC, Inputs); - else - BuildActions(C, C.getArgs(), Inputs, C.getActions()); + // If we only have IR inputs there's no need for preprocessing. + if (!Inputs.empty()) { + // Construct the list of abstract actions to perform for this compilation. + // On Darwin OSes this uses the driver-driver and builds universal actions. + const ToolChain &TC = C.getDefaultToolChain(); + if (TC.getTriple().isOSBinFormatMachO()) + BuildUniversalActions(C, TC, Inputs); + else + BuildActions(C, C.getArgs(), Inputs, C.getActions()); - BuildJobs(C); + BuildJobs(C); - // If there were errors building the compilation, quit now. - if (Trap.hasErrorOccurred()) { - Diag(clang::diag::note_drv_command_failed_diag_msg) - << "Error generating preprocessed source(s)."; - return; - } + // If there were errors building the compilation, quit now. + if (Trap.hasErrorOccurred()) { + Diag(clang::diag::note_drv_command_failed_diag_msg) + << "Error generating preprocessed source(s)."; + return; + } + // Generate preprocessed output. + SmallVector, 4> FailingCommands; + C.ExecuteJobs(C.getJobs(), FailingCommands); - // Generate preprocessed output. - SmallVector, 4> FailingCommands; - C.ExecuteJobs(C.getJobs(), FailingCommands); + // If any of the preprocessing commands failed, clean up and exit. + if (!FailingCommands.empty()) { + Diag(clang::diag::note_drv_command_failed_diag_msg) + << "Error generating preprocessed source(s)."; + return; + } - // If any of the preprocessing commands failed, clean up and exit. - if (!FailingCommands.empty()) { - Diag(clang::diag::note_drv_command_failed_diag_msg) - << "Error generating preprocessed source(s)."; - return; + const ArgStringList &TempFiles = C.getTempFiles(); + if (TempFiles.empty()) { + Diag(clang::diag::note_drv_command_failed_diag_msg) + << "Error generating preprocessed source(s)."; + return; + } } - const ArgStringList &TempFiles = C.getTempFiles(); - if (TempFiles.empty()) { - Diag(clang::diag::note_drv_command_failed_diag_msg) - << "Error generating preprocessed source(s)."; - return; + // Copying filenames due to ownership. + const ArgStringList &Files = C.getTempFiles(); + SmallVector TempFiles(Files.begin(), Files.end()); + + // We'd like to copy the IR input file into our own temp file + // because the build system might try to clean-up after itself. + for (auto const *Input : IRInputs) { + int FD; + llvm::SmallVector Path; + + StringRef extension = llvm::sys::path::extension(Input); + if (!extension.empty()) + extension = extension.drop_front(); + + std::error_code EC = llvm::sys::fs::createTemporaryFile( + llvm::sys::path::stem(Input), extension, FD, Path); + if (EC) { + Diag(clang::diag::note_drv_command_failed_diag_msg) + << "Error generating run script: " << "Failed copying IR input files" + << " " << EC.message(); + return; + } + + EC = llvm::sys::fs::copy_file(Input, FD); + if (EC) { + Diag(clang::diag::note_drv_command_failed_diag_msg) + << "Error generating run script: " << "Failed copying IR input files" + << " " << EC.message(); + return; + } + + TempFiles.push_back(std::string(Path.begin(), Path.end())); } Diag(clang::diag::note_drv_command_failed_diag_msg) << BugReporMsg; SmallString<128> VFS; SmallString<128> ReproCrashFilename; - for (const char *TempFile : TempFiles) { + for (std::string &TempFile : TempFiles) { Diag(clang::diag::note_drv_command_failed_diag_msg) << TempFile; if (Report) Report->TemporaryFiles.push_back(TempFile); @@ -2151,7 +2194,7 @@ void Driver::generateCompilationDiagnostics( } for (const char *TempFile : SavedTemps) - C.addTempFile(TempFile); + TempFiles.push_back(TempFile); // Assume associated files are based off of the first temporary file. CrashReportInfo CrashInfo(TempFiles[0], VFS); diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 5ff7d83946137..77a2c73f0d446 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -1639,6 +1639,8 @@ SanitizerMask ToolChain::getSupportedSanitizers() const { Res |= SanitizerKind::ShadowCallStack; if (getTriple().isAArch64(64)) Res |= SanitizerKind::MemTag; + if (getTriple().isBPF()) + Res |= SanitizerKind::KernelAddress; return Res; } diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h index 2e2703de18cb1..38b95ee90736a 100644 --- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h @@ -2946,5 +2946,73 @@ float4 radians(float4); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_group_memory_barrier_with_group_sync) __attribute__((convergent)) void GroupMemoryBarrierWithGroupSync(void); +//===----------------------------------------------------------------------===// +// ddx_coarse builtin +//===----------------------------------------------------------------------===// + +/// \fn T ddx_coarse(T value) +/// \brief Computes a low precision partial derivative with respect to the +/// screen-space x-coordinate. +/// \param value The input value. +/// +/// The return value is a floating point scalar or vector containing the low +/// prevision partial derivative of the input value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +half ddx_coarse(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +half2 ddx_coarse(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +half3 ddx_coarse(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +half4 ddx_coarse(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +float ddx_coarse(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +float2 ddx_coarse(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +float3 ddx_coarse(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +float4 ddx_coarse(float4); + +//===----------------------------------------------------------------------===// +// ddy_coarse builtin +//===----------------------------------------------------------------------===// + +/// \fn T ddy_coarse(T value) +/// \brief Computes a low precision partial derivative with respect to the +/// screen-space y-coordinate. +/// \param value The input value. +/// +/// The return value is a floating point scalar or vector containing the low +/// prevision partial derivative of the input value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +half ddy_coarse(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +half2 ddy_coarse(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +half3 ddy_coarse(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +half4 ddy_coarse(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +float ddy_coarse(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +float2 ddy_coarse(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +float3 ddy_coarse(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +float4 ddy_coarse(float4); + } // namespace hlsl #endif //_HLSL_HLSL_ALIAS_INTRINSICS_H_ diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 25b89d65847ad..b7aecadc86871 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -3355,6 +3355,11 @@ void Sema::mergeDeclAttributes(NamedDecl *New, Decl *Old, if (!foundAny) New->dropAttrs(); } +void Sema::CheckAttributesOnDeducedType(Decl *D) { + for (const Attr *A : D->attrs()) + checkAttrIsTypeDependent(D, A); +} + // Returns the number of added attributes. template static unsigned propagateAttribute(ParmVarDecl *To, const ParmVarDecl *From, @@ -13809,6 +13814,8 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) { return; } + this->CheckAttributesOnDeducedType(RealDecl); + // dllimport cannot be used on variable definitions. if (VDecl->hasAttr() && !VDecl->isStaticDataMember()) { Diag(VDecl->getLocation(), diag::err_attribute_dllimport_data_definition); @@ -14300,6 +14307,8 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) { DeduceVariableDeclarationType(Var, false, nullptr)) return; + this->CheckAttributesOnDeducedType(RealDecl); + // C++11 [class.static.data]p3: A static data member can be declared with // the constexpr specifier; if so, its declaration shall specify // a brace-or-equal-initializer. diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index a9e7b44ac9d73..bda7aa32a9348 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -3511,16 +3511,6 @@ static void handleCleanupAttr(Sema &S, Decl *D, const ParsedAttr &AL) { return; } - // We're currently more strict than GCC about what function types we accept. - // If this ever proves to be a problem it should be easy to fix. - QualType Ty = S.Context.getPointerType(cast(D)->getType()); - QualType ParamTy = FD->getParamDecl(0)->getType(); - if (!S.IsAssignConvertCompatible(S.CheckAssignmentConstraints( - FD->getParamDecl(0)->getLocation(), ParamTy, Ty))) { - S.Diag(Loc, diag::err_attribute_cleanup_func_arg_incompatible_type) - << NI.getName() << ParamTy << Ty; - return; - } VarDecl *VD = cast(D); // Create a reference to the variable declaration. This is a fake/dummy // reference. @@ -8311,3 +8301,28 @@ void Sema::redelayDiagnostics(DelayedDiagnosticPool &pool) { assert(curPool && "re-emitting in undelayed context not supported"); curPool->steal(pool); } + +void Sema::ActOnCleanupAttr(Decl *D, const Attr *A) { + VarDecl *VD = cast(D); + if (VD->getType()->isDependentType()) + return; + + // Obtains the FunctionDecl that was found when handling the attribute + // earlier. + CleanupAttr *Attr = D->getAttr(); + FunctionDecl *FD = Attr->getFunctionDecl(); + DeclarationNameInfo NI = FD->getNameInfo(); + + // We're currently more strict than GCC about what function types we accept. + // If this ever proves to be a problem it should be easy to fix. + QualType Ty = this->Context.getPointerType(VD->getType()); + QualType ParamTy = FD->getParamDecl(0)->getType(); + if (!this->IsAssignConvertCompatible(this->CheckAssignmentConstraints( + FD->getParamDecl(0)->getLocation(), ParamTy, Ty))) { + this->Diag(Attr->getArgLoc(), + diag::err_attribute_cleanup_func_arg_incompatible_type) + << NI.getName() << ParamTy << Ty; + D->dropAttr(); + return; + } +} diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 2b9b3abbd5360..5555916c2536f 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -3239,7 +3239,9 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case Builtin::BI__builtin_hlsl_elementwise_degrees: case Builtin::BI__builtin_hlsl_elementwise_radians: case Builtin::BI__builtin_hlsl_elementwise_rsqrt: - case Builtin::BI__builtin_hlsl_elementwise_frac: { + case Builtin::BI__builtin_hlsl_elementwise_frac: + case Builtin::BI__builtin_hlsl_elementwise_ddx_coarse: + case Builtin::BI__builtin_hlsl_elementwise_ddy_coarse: { if (SemaRef.checkArgCount(TheCall, 1)) return true; if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall, diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 1b6b559c1227b..3a4b2ccc74350 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -1007,6 +1007,15 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs, continue; } + if (auto *A = dyn_cast(TmplAttr)) { + if (!New->hasAttr()) { + auto *NewAttr = A->clone(Context); + NewAttr->setArgLoc(A->getArgLoc()); + New->addAttr(NewAttr); + } + continue; + } + assert(!TmplAttr->isPackExpansion()); if (TmplAttr->isLateParsed() && LateAttrs) { // Late parsed attributes must be instantiated and attached after the diff --git a/clang/test/CIR/CodeGen/call.c b/clang/test/CIR/CodeGen/call.c index d780e37f3d153..99ae4506b1f16 100644 --- a/clang/test/CIR/CodeGen/call.c +++ b/clang/test/CIR/CodeGen/call.c @@ -130,7 +130,7 @@ int f12(void) { // OGCG: %{{.+}} = call i32 @f10(i32 noundef 1) #[[ATTR0:.+]] // OGCG-NEXT: %{{.+}} = call i32 @f11(i32 noundef 2) #[[ATTR1:.+]] -// LLVM: attributes #[[ATTR0]] = { nounwind willreturn memory(read, errnomem: none) } +// LLVM: attributes #[[ATTR0]] = { nounwind willreturn memory(read, errnomem: none, target_mem0: none, target_mem1: none) } // LLVM: attributes #[[ATTR1]] = { nounwind willreturn memory(none) } // OGCG: attributes #[[ATTR0]] = { nounwind willreturn memory(read) } diff --git a/clang/test/CodeGenHLSL/builtins/ddx-coarse-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/ddx-coarse-builtin.hlsl new file mode 100644 index 0000000000000..01216eefadba2 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/ddx-coarse-builtin.hlsl @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-pc-vulkan-compute %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK-SPIRV + +// CHECK-LABEL: half @_Z19test_f16_ddx_coarseDh +// CHECK: %hlsl.ddx.coarse = call {{.*}} half @llvm.dx.ddx.coarse.f16(half %{{.*}}) +// CHECK: ret half %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: half @_Z19test_f16_ddx_coarseDh +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} half @llvm.spv.ddx.coarse.f16(half %{{.*}}) +// CHECK-SPIRV: ret half %hlsl.ddx.coarse +half test_f16_ddx_coarse(half val) { + return __builtin_hlsl_elementwise_ddx_coarse(val); +} + +// CHECK-LABEL: float @_Z19test_f32_ddx_coarsef +// CHECK: %hlsl.ddx.coarse = call {{.*}} float @llvm.dx.ddx.coarse.f32(float %{{.*}}) +// CHECK: ret float %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: float @_Z19test_f32_ddx_coarsef +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} float @llvm.spv.ddx.coarse.f32(float %{{.*}}) +// CHECK-SPIRV: ret float %hlsl.ddx.coarse +float test_f32_ddx_coarse(float val) { + return __builtin_hlsl_elementwise_ddx_coarse(val); +} diff --git a/clang/test/CodeGenHLSL/builtins/ddx-coarse.hlsl b/clang/test/CodeGenHLSL/builtins/ddx-coarse.hlsl new file mode 100644 index 0000000000000..c200d4715629e --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/ddx-coarse.hlsl @@ -0,0 +1,86 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-pc-vulkan-compute %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK-SPIRV + +// CHECK-LABEL: half @_Z19test_f16_ddx_coarseDh +// CHECK: %hlsl.ddx.coarse = call {{.*}} half @llvm.dx.ddx.coarse.f16(half %{{.*}}) +// CHECK: ret half %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: half @_Z19test_f16_ddx_coarseDh +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} half @llvm.spv.ddx.coarse.f16(half %{{.*}}) +// CHECK-SPIRV: ret half %hlsl.ddx.coarse +half test_f16_ddx_coarse(half val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: <2 x half> @_Z20test_f16_ddx_coarse2Dv2_Dh +// CHECK: %hlsl.ddx.coarse = call {{.*}} <2 x half> @llvm.dx.ddx.coarse.v2f16(<2 x half> %{{.*}}) +// CHECK: ret <2 x half> %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: <2 x half> @_Z20test_f16_ddx_coarse2Dv2_Dh +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <2 x half> @llvm.spv.ddx.coarse.v2f16(<2 x half> %{{.*}}) +// CHECK-SPIRV: ret <2 x half> %hlsl.ddx.coarse +half2 test_f16_ddx_coarse2(half2 val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: <3 x half> @_Z20test_f16_ddx_coarse3Dv3_Dh +// CHECK: %hlsl.ddx.coarse = call {{.*}} <3 x half> @llvm.dx.ddx.coarse.v3f16(<3 x half> %{{.*}}) +// CHECK: ret <3 x half> %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: <3 x half> @_Z20test_f16_ddx_coarse3Dv3_Dh +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <3 x half> @llvm.spv.ddx.coarse.v3f16(<3 x half> %{{.*}}) +// CHECK-SPIRV: ret <3 x half> %hlsl.ddx.coarse +half3 test_f16_ddx_coarse3(half3 val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: <4 x half> @_Z20test_f16_ddx_coarse4Dv4_Dh +// CHECK: %hlsl.ddx.coarse = call {{.*}} <4 x half> @llvm.dx.ddx.coarse.v4f16(<4 x half> %{{.*}}) +// CHECK: ret <4 x half> %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: <4 x half> @_Z20test_f16_ddx_coarse4Dv4_Dh +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <4 x half> @llvm.spv.ddx.coarse.v4f16(<4 x half> %{{.*}}) +// CHECK-SPIRV: ret <4 x half> %hlsl.ddx.coarse +half4 test_f16_ddx_coarse4(half4 val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: float @_Z19test_f32_ddx_coarsef +// CHECK: %hlsl.ddx.coarse = call {{.*}} float @llvm.dx.ddx.coarse.f32(float %{{.*}}) +// CHECK: ret float %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: float @_Z19test_f32_ddx_coarsef +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} float @llvm.spv.ddx.coarse.f32(float %{{.*}}) +// CHECK-SPIRV: ret float %hlsl.ddx.coarse +float test_f32_ddx_coarse(float val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: <2 x float> @_Z20test_f32_ddx_coarse2Dv2_f +// CHECK: %hlsl.ddx.coarse = call {{.*}} <2 x float> @llvm.dx.ddx.coarse.v2f32(<2 x float> %{{.*}}) +// CHECK: ret <2 x float> %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: <2 x float> @_Z20test_f32_ddx_coarse2Dv2_f +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <2 x float> @llvm.spv.ddx.coarse.v2f32(<2 x float> %{{.*}}) +// CHECK-SPIRV: ret <2 x float> %hlsl.ddx.coarse +float2 test_f32_ddx_coarse2(float2 val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: <3 x float> @_Z20test_f32_ddx_coarse3Dv3_f +// CHECK: %hlsl.ddx.coarse = call {{.*}} <3 x float> @llvm.dx.ddx.coarse.v3f32(<3 x float> %{{.*}}) +// CHECK: ret <3 x float> %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: <3 x float> @_Z20test_f32_ddx_coarse3Dv3_f +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <3 x float> @llvm.spv.ddx.coarse.v3f32(<3 x float> %{{.*}}) +// CHECK-SPIRV: ret <3 x float> %hlsl.ddx.coarse +float3 test_f32_ddx_coarse3(float3 val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: <4 x float> @_Z20test_f32_ddx_coarse4Dv4_f +// CHECK: %hlsl.ddx.coarse = call {{.*}} <4 x float> @llvm.dx.ddx.coarse.v4f32(<4 x float> %{{.*}}) +// CHECK: ret <4 x float> %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: <4 x float> @_Z20test_f32_ddx_coarse4Dv4_f +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <4 x float> @llvm.spv.ddx.coarse.v4f32(<4 x float> %{{.*}}) +// CHECK-SPIRV: ret <4 x float> %hlsl.ddx.coarse +float4 test_f32_ddx_coarse4(float4 val) { + return ddx_coarse(val); +} diff --git a/clang/test/CodeGenHLSL/builtins/ddy-coarse-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/ddy-coarse-builtin.hlsl new file mode 100644 index 0000000000000..2967deb75031f --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/ddy-coarse-builtin.hlsl @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-pc-vulkan-compute %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK-SPIRV + +// CHECK-LABEL: half @_Z19test_f16_ddy_coarseDh +// CHECK: %hlsl.ddy.coarse = call {{.*}} half @llvm.dx.ddy.coarse.f16(half %{{.*}}) +// CHECK: ret half %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: half @_Z19test_f16_ddy_coarseDh +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} half @llvm.spv.ddy.coarse.f16(half %{{.*}}) +// CHECK-SPIRV: ret half %hlsl.ddy.coarse +half test_f16_ddy_coarse(half val) { + return __builtin_hlsl_elementwise_ddy_coarse(val); +} + +// CHECK-LABEL: float @_Z19test_f32_ddy_coarsef +// CHECK: %hlsl.ddy.coarse = call {{.*}} float @llvm.dx.ddy.coarse.f32(float %{{.*}}) +// CHECK: ret float %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: float @_Z19test_f32_ddy_coarsef +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} float @llvm.spv.ddy.coarse.f32(float %{{.*}}) +// CHECK-SPIRV: ret float %hlsl.ddy.coarse +float test_f32_ddy_coarse(float val) { + return __builtin_hlsl_elementwise_ddy_coarse(val); +} diff --git a/clang/test/CodeGenHLSL/builtins/ddy-coarse.hlsl b/clang/test/CodeGenHLSL/builtins/ddy-coarse.hlsl new file mode 100644 index 0000000000000..faa972a1be326 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/ddy-coarse.hlsl @@ -0,0 +1,86 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-pc-vulkan-compute %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK-SPIRV + +// CHECK-LABEL: half @_Z19test_f16_ddy_coarseDh +// CHECK: %hlsl.ddy.coarse = call {{.*}} half @llvm.dx.ddy.coarse.f16(half %{{.*}}) +// CHECK: ret half %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: half @_Z19test_f16_ddy_coarseDh +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} half @llvm.spv.ddy.coarse.f16(half %{{.*}}) +// CHECK-SPIRV: ret half %hlsl.ddy.coarse +half test_f16_ddy_coarse(half val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: <2 x half> @_Z20test_f16_ddy_coarse2Dv2_Dh +// CHECK: %hlsl.ddy.coarse = call {{.*}} <2 x half> @llvm.dx.ddy.coarse.v2f16(<2 x half> %{{.*}}) +// CHECK: ret <2 x half> %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: <2 x half> @_Z20test_f16_ddy_coarse2Dv2_Dh +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <2 x half> @llvm.spv.ddy.coarse.v2f16(<2 x half> %{{.*}}) +// CHECK-SPIRV: ret <2 x half> %hlsl.ddy.coarse +half2 test_f16_ddy_coarse2(half2 val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: <3 x half> @_Z20test_f16_ddy_coarse3Dv3_Dh +// CHECK: %hlsl.ddy.coarse = call {{.*}} <3 x half> @llvm.dx.ddy.coarse.v3f16(<3 x half> %{{.*}}) +// CHECK: ret <3 x half> %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: <3 x half> @_Z20test_f16_ddy_coarse3Dv3_Dh +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <3 x half> @llvm.spv.ddy.coarse.v3f16(<3 x half> %{{.*}}) +// CHECK-SPIRV: ret <3 x half> %hlsl.ddy.coarse +half3 test_f16_ddy_coarse3(half3 val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: <4 x half> @_Z20test_f16_ddy_coarse4Dv4_Dh +// CHECK: %hlsl.ddy.coarse = call {{.*}} <4 x half> @llvm.dx.ddy.coarse.v4f16(<4 x half> %{{.*}}) +// CHECK: ret <4 x half> %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: <4 x half> @_Z20test_f16_ddy_coarse4Dv4_Dh +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <4 x half> @llvm.spv.ddy.coarse.v4f16(<4 x half> %{{.*}}) +// CHECK-SPIRV: ret <4 x half> %hlsl.ddy.coarse +half4 test_f16_ddy_coarse4(half4 val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: float @_Z19test_f32_ddy_coarsef +// CHECK: %hlsl.ddy.coarse = call {{.*}} float @llvm.dx.ddy.coarse.f32(float %{{.*}}) +// CHECK: ret float %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: float @_Z19test_f32_ddy_coarsef +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} float @llvm.spv.ddy.coarse.f32(float %{{.*}}) +// CHECK-SPIRV: ret float %hlsl.ddy.coarse +float test_f32_ddy_coarse(float val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: <2 x float> @_Z20test_f32_ddy_coarse2Dv2_f +// CHECK: %hlsl.ddy.coarse = call {{.*}} <2 x float> @llvm.dx.ddy.coarse.v2f32(<2 x float> %{{.*}}) +// CHECK: ret <2 x float> %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: <2 x float> @_Z20test_f32_ddy_coarse2Dv2_f +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <2 x float> @llvm.spv.ddy.coarse.v2f32(<2 x float> %{{.*}}) +// CHECK-SPIRV: ret <2 x float> %hlsl.ddy.coarse +float2 test_f32_ddy_coarse2(float2 val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: <3 x float> @_Z20test_f32_ddy_coarse3Dv3_f +// CHECK: %hlsl.ddy.coarse = call {{.*}} <3 x float> @llvm.dx.ddy.coarse.v3f32(<3 x float> %{{.*}}) +// CHECK: ret <3 x float> %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: <3 x float> @_Z20test_f32_ddy_coarse3Dv3_f +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <3 x float> @llvm.spv.ddy.coarse.v3f32(<3 x float> %{{.*}}) +// CHECK-SPIRV: ret <3 x float> %hlsl.ddy.coarse +float3 test_f32_ddy_coarse3(float3 val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: <4 x float> @_Z20test_f32_ddy_coarse4Dv4_f +// CHECK: %hlsl.ddy.coarse = call {{.*}} <4 x float> @llvm.dx.ddy.coarse.v4f32(<4 x float> %{{.*}}) +// CHECK: ret <4 x float> %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: <4 x float> @_Z20test_f32_ddy_coarse4Dv4_f +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <4 x float> @llvm.spv.ddy.coarse.v4f32(<4 x float> %{{.*}}) +// CHECK-SPIRV: ret <4 x float> %hlsl.ddy.coarse +float4 test_f32_ddy_coarse4(float4 val) { + return ddy_coarse(val); +} diff --git a/clang/test/Driver/crash-ir-repro.cpp b/clang/test/Driver/crash-ir-repro.cpp new file mode 100644 index 0000000000000..1f31a5ca1bb34 --- /dev/null +++ b/clang/test/Driver/crash-ir-repro.cpp @@ -0,0 +1,15 @@ +// RUN: %clang -S -emit-llvm -o %t.ll %s +// RUN: not %clang -S -DCRASH %s %t.ll 2>&1 | FileCheck %s + +// CHECK: Preprocessed source(s) and associated run script(s) are located at: +// CHECK-NEXT: clang: note: diagnostic msg: {{.*}}.cpp +// CHECK-NEXT: clang: note: diagnostic msg: {{.*}}.ll +// CHECK-NEXT: clang: note: diagnostic msg: {{.*}}.sh + +#ifdef CRASH +#pragma clang __debug parser_crash +#endif + +int main() { + return 0; +} diff --git a/clang/test/Driver/linker-wrapper-hip-amdgcnspirv.c b/clang/test/Driver/linker-wrapper-hip-amdgcnspirv.c new file mode 100644 index 0000000000000..429f7d3b9ee13 --- /dev/null +++ b/clang/test/Driver/linker-wrapper-hip-amdgcnspirv.c @@ -0,0 +1,16 @@ +// RUN: %clang -cc1 %s -triple "spirv64-amd-amdhsa" -emit-llvm-bc -o %t.bc +// RUN: llvm-offload-binary -o %t.out "--image=file=%t.bc,triple=spirv64-amd-amdhsa,arch=amdgcnspirv,kind=hip" +// RUN: clang-linker-wrapper \ +// RUN: "--should-extract=amdgcnspirv" \ +// RUN: "--host-triple=spirv64-amd-amdhsa" \ +// RUN: "--linker-path=clang-offload-bundler" \ +// RUN: "--emit-fatbin-only" \ +// RUN: "-o" "%t.hipfb" \ +// RUN: "%t.out" \ +// RUN: --dry-run \ +// RUN: 2>&1 | FileCheck %s + +// clang-linker-wrapper was previously calling clang-offload-bundler with -targets=...,hip-amdgcn-amd-amdhsa--amdgcnspirv +// This caused the runtime not to recognise the triple for the AMD SPIR-V code. + +// CHECK: {{".*clang-offload-bundler.*"}} {{.*}} -targets={{.*}},hip-spirv64-amd-amdhsa--amdgcnspirv diff --git a/clang/test/Sema/type-dependent-attrs.c b/clang/test/Sema/type-dependent-attrs.c new file mode 100644 index 0000000000000..13068b3f94ad4 --- /dev/null +++ b/clang/test/Sema/type-dependent-attrs.c @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -std=c23 -fsyntax-only -verify %s + +int open() { return 0; } +void close(typeof(open()) *) {} + +void cleanup_attr() { + int fd_int [[gnu::cleanup(close)]] = open(); + auto fd_auto [[gnu::cleanup(close)]] = open(); + float fd_invalid [[gnu::cleanup(close)]] = open(); // expected-error {{'cleanup' function 'close' parameter has type 'typeof (open()) *' (aka 'int *') which is incompatible with type 'float *'}} +} diff --git a/clang/test/SemaCXX/attr-cleanup.cpp b/clang/test/SemaCXX/attr-cleanup.cpp index 32d10683edebb..6048b4e92ec3f 100644 --- a/clang/test/SemaCXX/attr-cleanup.cpp +++ b/clang/test/SemaCXX/attr-cleanup.cpp @@ -27,3 +27,28 @@ namespace E { int v1 __attribute__((cleanup(c3))); // expected-error {{'c3' is not a single function}} } } + +namespace F { + int open() { return 0; } + void close(decltype(open()) *) {} + + void test1() { + auto fd [[gnu::cleanup(close)]] = open(); + } + + template + void test2() { + Ty fd [[gnu::cleanup(close)]] = open(); + } + + template + void test3() { + Ty fd [[gnu::cleanup(close)]] = open(); // #TEST3_CLEANUP + } + + int main() { + test2(); + test3(); // expected-error@#TEST3_CLEANUP {{'cleanup' function 'close' parameter has type 'decltype(open()) *' (aka 'int *') which is incompatible with type 'float *'}} \ + expected-note {{in instantiation of function template specialization 'F::test3' requested here}} + } +} diff --git a/clang/test/SemaHLSL/BuiltIns/ddx-coarse-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/ddx-coarse-errors.hlsl new file mode 100644 index 0000000000000..ebad1cc6826d8 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/ddx-coarse-errors.hlsl @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -verify +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-library %s -fnative-half-type -verify + +float no_arg() { + return __builtin_hlsl_elementwise_ddx_coarse(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} +} + +float too_many_args(float val) { + return __builtin_hlsl_elementwise_ddx_coarse(val, val); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} +} + +float test_integer_scalar_input(int val) { + return __builtin_hlsl_elementwise_ddx_coarse(val); + // expected-error@-1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'int')}} +} + +double test_double_scalar_input(double val) { + return __builtin_hlsl_elementwise_ddx_coarse(val); + // expected-error@-1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'double')}} +} diff --git a/clang/test/SemaHLSL/BuiltIns/ddy-coarse-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/ddy-coarse-errors.hlsl new file mode 100644 index 0000000000000..9cc23665882c8 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/ddy-coarse-errors.hlsl @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -verify +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-library %s -fnative-half-type -verify + +float no_arg() { + return __builtin_hlsl_elementwise_ddy_coarse(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} +} + +float too_many_args(float val) { + return __builtin_hlsl_elementwise_ddy_coarse(val, val); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} +} + +float test_integer_scalar_input(int val) { + return __builtin_hlsl_elementwise_ddy_coarse(val); + // expected-error@-1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'int')}} +} + +double test_double_scalar_input(double val) { + return __builtin_hlsl_elementwise_ddy_coarse(val); + // expected-error@-1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'double')}} +} diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index bd4b40192c9f2..4a4a43db6ef25 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -439,8 +439,11 @@ fatbinary(ArrayRef> InputFiles, Args.MakeArgString(Twine("-compression-level=") + Arg->getValue())); SmallVector Targets = {"-targets=host-x86_64-unknown-linux-gnu"}; - for (const auto &[File, Arch] : InputFiles) - Targets.push_back(Saver.save("hip-amdgcn-amd-amdhsa--" + Arch)); + for (const auto &[File, Arch] : InputFiles) { + Targets.push_back(Saver.save(Arch == "amdgcnspirv" + ? "hip-spirv64-amd-amdhsa--" + Arch + : "hip-amdgcn-amd-amdhsa--" + Arch)); + } CmdArgs.push_back(Saver.save(llvm::join(Targets, ","))); #ifdef _WIN32 diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp index e49dcb9b70b0f..bee9a01a3b01a 100644 --- a/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -5045,6 +5045,26 @@ void EmitClangAttrParsedAttrKinds(const RecordKeeper &Records, << "}\n"; } +// Emits Sema calls for type dependent attributes +void EmitClangAttrIsTypeDependent(const RecordKeeper &Records, + raw_ostream &OS) { + emitSourceFileHeader("Attribute is type dependent", OS, Records); + + OS << "void checkAttrIsTypeDependent(Decl *D, const Attr *A) {\n"; + OS << " switch (A->getKind()) {\n"; + OS << " default:\n"; + OS << " break;\n"; + for (const auto *A : Records.getAllDerivedDefinitions("Attr")) { + if (A->getValueAsBit("IsTypeDependent")) { + OS << " case attr::" << A->getName() << ":\n"; + OS << " ActOn" << A->getName() << "Attr(D, A);\n"; + OS << " break;\n"; + } + } + OS << " }\n"; + OS << "}\n"; +} + // Emits the code to dump an attribute. void EmitClangAttrTextNodeDump(const RecordKeeper &Records, raw_ostream &OS) { emitSourceFileHeader("Attribute text node dumper", OS, Records); diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp index 866040d503646..707ce617cb2d0 100644 --- a/clang/utils/TableGen/TableGen.cpp +++ b/clang/utils/TableGen/TableGen.cpp @@ -43,6 +43,7 @@ enum ActionType { GenClangAttrParsedAttrList, GenClangAttrParsedAttrImpl, GenClangAttrParsedAttrKinds, + GenClangAttrIsTypeDependent, GenClangAttrTextNodeDump, GenClangAttrNodeTraverse, GenClangBasicReader, @@ -179,6 +180,9 @@ cl::opt Action( clEnumValN(GenClangAttrParsedAttrKinds, "gen-clang-attr-parsed-attr-kinds", "Generate a clang parsed attribute kinds"), + clEnumValN(GenClangAttrIsTypeDependent, + "gen-clang-attr-is-type-dependent", + "Generate clang is type dependent attribute code"), clEnumValN(GenClangAttrTextNodeDump, "gen-clang-attr-text-node-dump", "Generate clang attribute text node dumper"), clEnumValN(GenClangAttrNodeTraverse, "gen-clang-attr-node-traverse", @@ -423,6 +427,9 @@ bool ClangTableGenMain(raw_ostream &OS, const RecordKeeper &Records) { case GenClangAttrParsedAttrKinds: EmitClangAttrParsedAttrKinds(Records, OS); break; + case GenClangAttrIsTypeDependent: + EmitClangAttrIsTypeDependent(Records, OS); + break; case GenClangAttrTextNodeDump: EmitClangAttrTextNodeDump(Records, OS); break; diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h index fa49dcd289bc2..058bda3ebd246 100644 --- a/clang/utils/TableGen/TableGenBackends.h +++ b/clang/utils/TableGen/TableGenBackends.h @@ -82,6 +82,8 @@ void EmitClangAttrParsedAttrImpl(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangAttrParsedAttrKinds(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitClangAttrIsTypeDependent(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); void EmitClangAttrTextNodeDump(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangAttrNodeTraverse(const llvm::RecordKeeper &Records, diff --git a/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cpp b/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cpp index 664471b6987a8..4201d49df4d74 100644 --- a/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cpp +++ b/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cpp @@ -1,6 +1,7 @@ // Check that when having a DYLD_ROOT_PATH set, the symbolizer still works. // RUN: %clangxx_asan -O0 %s -o %t -// RUN: %env_asan_opts=verbosity=2 DYLD_ROOT_PATH="/" ASAN_SYMBOLIZER_PATH=$(which atos) \ +// RUN: which atos | tr -d '\n' > %t.symbolizer_path +// RUN: %env_asan_opts=verbosity=2 DYLD_ROOT_PATH="/" ASAN_SYMBOLIZER_PATH=%{readfile:%t.symbolizer_path} \ // RUN: not %run %t 2>&1 | FileCheck %s // // Due to a bug in atos, this only works on x86_64. diff --git a/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer.cpp b/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer.cpp index bab4e4f3765c2..7487bd4cb40e6 100644 --- a/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer.cpp +++ b/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer.cpp @@ -1,7 +1,8 @@ // Check that the `atos` symbolizer works. // RUN: %clangxx_asan -O0 %s -o %t -// RUN: %env_asan_opts=verbosity=2 ASAN_SYMBOLIZER_PATH=$(which atos) not %run %t 2>&1 | FileCheck %s +// RUN: which atos | tr -d '\n' > %t.symbolizer_path +// RUN: %env_asan_opts=verbosity=2 ASAN_SYMBOLIZER_PATH=%{readfile:%t.symbolizer_path} not %run %t 2>&1 | FileCheck %s // Path returned by `which atos` is invalid on iOS. // UNSUPPORTED: ios, i386-darwin diff --git a/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_reexec.cpp b/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_reexec.cpp index 0fec18b89411a..145e162a21c0e 100644 --- a/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_reexec.cpp +++ b/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_reexec.cpp @@ -4,7 +4,8 @@ // UNSUPPORTED: ios // RUN: rm -rf %t && mkdir -p %t -// RUN: cp `%clang_asan -print-file-name=lib`/darwin/libclang_rt.asan_osx_dynamic.dylib \ +// RUN: %clang_asan -print-file-name=lib | tr -d '\n' > %t.lib_name +// RUN: cp %{readfile:%t.lib_name}/darwin/libclang_rt.asan_osx_dynamic.dylib \ // RUN: %t/libclang_rt.asan_osx_dynamic.dylib // RUN: %clangxx_asan %s -o %t/a.out diff --git a/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_remove.cpp b/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_remove.cpp index 0672e064a1904..872848d075eaf 100644 --- a/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_remove.cpp +++ b/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_remove.cpp @@ -5,29 +5,27 @@ // UNSUPPORTED: ios // RUN: rm -rf %t && mkdir -p %t -// RUN: cp `%clang_asan -print-file-name=lib`/darwin/libclang_rt.asan_osx_dynamic.dylib \ +// RUN: %clang_asan -print-file-name=lib | tr -d '\n' > %t.lib_name +// RUN: cp %{readfile:%t.lib_name}/darwin/libclang_rt.asan_osx_dynamic.dylib \ // RUN: %t/libclang_rt.asan_osx_dynamic.dylib // RUN: %clangxx_asan %s -o %t/a.out // RUN: %clangxx -DSHARED_LIB %s \ // RUN: -dynamiclib -o %t/dummy-so.dylib -// RUN: ( cd %t && \ -// RUN: DYLD_INSERT_LIBRARIES=@executable_path/libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \ -// RUN: %run ./a.out 2>&1 ) | FileCheck %s || exit 1 +// RUN: cd %t +// RUN: env DYLD_INSERT_LIBRARIES=@executable_path/libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \ +// RUN: %run ./a.out 2>&1 | FileCheck %s -// RUN: ( cd %t && \ -// RUN: DYLD_INSERT_LIBRARIES=libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \ -// RUN: %run ./a.out 2>&1 ) | FileCheck %s || exit 1 +// RUN: env DYLD_INSERT_LIBRARIES=libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \ +// RUN: %run ./a.out 2>&1 | FileCheck %s -// RUN: ( cd %t && \ -// RUN: %env_asan_opts=strip_env=0 \ -// RUN: DYLD_INSERT_LIBRARIES=libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \ -// RUN: %run ./a.out 2>&1 ) | FileCheck %s --check-prefix=CHECK-KEEP || exit 1 +// RUN: %env_asan_opts=strip_env=0 \ +// RUN: DYLD_INSERT_LIBRARIES=libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \ +// RUN: %run ./a.out 2>&1 | FileCheck %s --check-prefix=CHECK-KEEP -// RUN: ( cd %t && \ -// RUN: DYLD_INSERT_LIBRARIES=%t/libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \ -// RUN: %run ./a.out 2>&1 ) | FileCheck %s || exit 1 +// RUN: env DYLD_INSERT_LIBRARIES=%t/libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \ +// RUN: %run ./a.out 2>&1 | FileCheck %s #if !defined(SHARED_LIB) #include diff --git a/compiler-rt/test/asan/TestCases/Darwin/init_for_dlopen.cpp b/compiler-rt/test/asan/TestCases/Darwin/init_for_dlopen.cpp index 3bf8e99703a08..9bb652cc79438 100644 --- a/compiler-rt/test/asan/TestCases/Darwin/init_for_dlopen.cpp +++ b/compiler-rt/test/asan/TestCases/Darwin/init_for_dlopen.cpp @@ -5,7 +5,7 @@ // - By default the lit config sets this but we don't want this // test to implicitly depend on this. // - It avoids requiring `--crash` to be passed to `not`. -// RUN: APPLE_ASAN_INIT_FOR_DLOPEN=0 %env_asan_opts=abort_on_error=0 not \ +// RUN: %env_asan_opts=abort_on_error=0 APPLE_ASAN_INIT_FOR_DLOPEN=0 not \ // RUN: %run %t %shared_libasan 2>&1 | \ // RUN: FileCheck -check-prefix=CHECK-DL-OPEN-FAIL %s // RUN: env -u APPLE_ASAN_INIT_FOR_DLOPEN %env_asan_opts=abort_on_error=0 not \ diff --git a/compiler-rt/test/asan/TestCases/Darwin/malloc_zone-protected.cpp b/compiler-rt/test/asan/TestCases/Darwin/malloc_zone-protected.cpp index 125b544724d3f..ac3c5898f271a 100644 --- a/compiler-rt/test/asan/TestCases/Darwin/malloc_zone-protected.cpp +++ b/compiler-rt/test/asan/TestCases/Darwin/malloc_zone-protected.cpp @@ -3,8 +3,7 @@ #include // RUN: %clangxx_asan %s -o %t -// RUN: ASAN_OPTIONS="abort_on_error=1" not --crash %run %t 2>&1 | FileCheck %s - +// RUN: env ASAN_OPTIONS="abort_on_error=1" not --crash %run %t 2>&1 | FileCheck %s void *pwn(malloc_zone_t *unused_zone, size_t unused_size) { printf("PWNED\n"); diff --git a/compiler-rt/test/asan_abi/TestCases/Darwin/llvm_interface_symbols.cpp b/compiler-rt/test/asan_abi/TestCases/Darwin/llvm_interface_symbols.cpp index 5da18aa971d43..ba7b5e5815bd6 100644 --- a/compiler-rt/test/asan_abi/TestCases/Darwin/llvm_interface_symbols.cpp +++ b/compiler-rt/test/asan_abi/TestCases/Darwin/llvm_interface_symbols.cpp @@ -24,7 +24,8 @@ // RUN: diff %t.imports-sorted %t.exports-sorted // Ensure that there is no dynamic dylib linked. -// RUN: otool -L %t | (! grep -q "dynamic.dylib") +// RUN: otool -L %t > %t.libs +// RUN: not grep -q "dynamic.dylib" < %t.libs // UNSUPPORTED: ios diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py index 3f7dd8e402b78..ea22fb0babc46 100644 --- a/compiler-rt/test/lit.common.cfg.py +++ b/compiler-rt/test/lit.common.cfg.py @@ -875,7 +875,7 @@ def is_windows_lto_supported(): config.substitutions.append( ( "%ld_flags_rpath_so" + postfix, - "-install_name @rpath/`basename %dynamiclib{}`".format(postfix), + "-install_name @rpath/%base_dynamiclib{}".format(postfix), ) ) elif config.target_os in ("FreeBSD", "NetBSD", "OpenBSD"): @@ -908,6 +908,9 @@ def is_windows_lto_supported(): config.substitutions.append( ("%dynamiclib" + postfix, "%t.dir/%xdynamiclib_filename" + postfix) ) + config.substitutions.append( + ("%base_dynamiclib" + postfix, "%xdynamiclib_filename" + postfix) + ) config.substitutions.append( ( "%xdynamiclib_filename" + postfix, diff --git a/compiler-rt/test/rtsan/Darwin/dlopen.cpp b/compiler-rt/test/rtsan/Darwin/dlopen.cpp index 1aabe5cb6e580..435a4353b7026 100644 --- a/compiler-rt/test/rtsan/Darwin/dlopen.cpp +++ b/compiler-rt/test/rtsan/Darwin/dlopen.cpp @@ -8,18 +8,19 @@ // RUN: %clangxx -fsanitize=realtime %s -o %t.so -shared -DSHARED_LIB // RUN: %clangxx %s -o %t -// RUN: RTSAN_DYLIB_PATH=`%clangxx -fsanitize=realtime %s -### 2>&1 \ +// RUN: %clangxx -fsanitize=realtime %s -### 2>&1 \ // RUN: | grep "libclang_rt.rtsan_osx_dynamic.dylib" \ -// RUN: | sed -e 's/.*"\(.*libclang_rt.rtsan_osx_dynamic.dylib\)".*/\1/'` +// RUN: | sed -e 's/.*"\(.*libclang_rt.rtsan_osx_dynamic.dylib\)".*/\1/' \ +// RUN: | tr -d '\n' > %t.rtsan_dylib_path // Launching a non-instrumented binary that dlopen's an instrumented library should fail. // RUN: not %run %t %t.so 2>&1 | FileCheck %s --check-prefix=CHECK-FAIL // Launching a non-instrumented binary with an explicit DYLD_INSERT_LIBRARIES should work. -// RUN: DYLD_INSERT_LIBRARIES=$RTSAN_DYLIB_PATH %run %t %t.so 2>&1 | FileCheck %s +// RUN: env DYLD_INSERT_LIBRARIES="%{readfile:%t.rtsan_dylib_path}" %run %t %t.so 2>&1 | FileCheck %s // Launching an instrumented binary with the DYLD_INSERT_LIBRARIES env variable has no error // RUN: %clangxx -fsanitize=realtime %s -o %t -// RUN: DYLD_INSERT_LIBRARIES=$RTSAN_DYLIB_PATH %run %t %t.so 2>&1 | FileCheck %s --check-prefix=CHECK-INSTRUMENTED +// RUN: env DYLD_INSERT_LIBRARIES="%{readfile:%t.rtsan_dylib_path}" %run %t %t.so 2>&1 | FileCheck %s --check-prefix=CHECK-INSTRUMENTED #include #include diff --git a/compiler-rt/test/tsan/Darwin/dlopen.cpp b/compiler-rt/test/tsan/Darwin/dlopen.cpp index 3d12b815f9c25..2ab052f1c0c26 100644 --- a/compiler-rt/test/tsan/Darwin/dlopen.cpp +++ b/compiler-rt/test/tsan/Darwin/dlopen.cpp @@ -9,14 +9,15 @@ // RUN: %clangxx_tsan %s -o %t.so -shared -DSHARED_LIB // RUN: %clangxx_tsan -fno-sanitize=thread %s -o %t -// RUN: TSAN_DYLIB_PATH=`%clangxx_tsan %s -### 2>&1 \ +// RUN: %clangxx_tsan %s -### 2>&1 \ // RUN: | grep "libclang_rt.tsan_osx_dynamic.dylib" \ -// RUN: | sed -e 's/.*"\(.*libclang_rt.tsan_osx_dynamic.dylib\)".*/\1/'` +// RUN: | sed -e 's/.*"\(.*libclang_rt.tsan_osx_dynamic.dylib\)".*/\1/' \ +// RUN: | tr -d '\n' > %t.tsan_dylib_path // Launching a non-instrumented binary that dlopen's an instrumented library should fail. // RUN: not %run %t %t.so 2>&1 | FileCheck %s --check-prefix=CHECK-FAIL // Launching a non-instrumented binary with an explicit DYLD_INSERT_LIBRARIES should work. -// RUN: DYLD_INSERT_LIBRARIES=$TSAN_DYLIB_PATH %run %t %t.so 2>&1 | FileCheck %s +// RUN: env DYLD_INSERT_LIBRARIES="%{readfile:%t.tsan_dylib_path}" %run %t %t.so 2>&1 | FileCheck %s #include #include diff --git a/compiler-rt/test/tsan/Darwin/external-ignore-noninstrumented.cpp b/compiler-rt/test/tsan/Darwin/external-ignore-noninstrumented.cpp index 916b0b893fc0d..cfa46e0f0a213 100644 --- a/compiler-rt/test/tsan/Darwin/external-ignore-noninstrumented.cpp +++ b/compiler-rt/test/tsan/Darwin/external-ignore-noninstrumented.cpp @@ -1,8 +1,10 @@ +// RUN: basename %t-lib.dylib | tr -d '\n' > %t.basename // RUN: %clangxx_tsan -shared %p/external-lib.cpp -fno-sanitize=thread -DUSE_TSAN_CALLBACKS \ -// RUN: -o %t-lib.dylib -install_name @rpath/`basename %t-lib.dylib` +// RUN: -o %t-lib.dylib -install_name @rpath/%{readfile:%t.basename} +// RUN: basename %t-module.dylib | tr -d '\n' > %t.basename // RUN: %clangxx_tsan -shared %p/external-noninstrumented-module.cpp %t-lib.dylib -fno-sanitize=thread \ -// RUN: -o %t-module.dylib -install_name @rpath/`basename %t-module.dylib` +// RUN: -o %t-module.dylib -install_name @rpath/%{readfile:%t.basename} // RUN: %clangxx_tsan %s %t-module.dylib -o %t // RUN: %run %t 2>&1 | FileCheck %s diff --git a/compiler-rt/test/tsan/Darwin/external.cpp b/compiler-rt/test/tsan/Darwin/external.cpp index bf189eb1d6b5b..52fae36f0e1f4 100644 --- a/compiler-rt/test/tsan/Darwin/external.cpp +++ b/compiler-rt/test/tsan/Darwin/external.cpp @@ -1,14 +1,17 @@ +// RUN: basename %t-lib-instrumented.dylib | tr -d '\n' > %t.basename // RUN: %clangxx_tsan %p/external-lib.cpp -shared \ // RUN: -o %t-lib-instrumented.dylib \ -// RUN: -install_name @rpath/`basename %t-lib-instrumented.dylib` +// RUN: -install_name @rpath/%{readfile:%t.basename} +// RUN: basename %t-lib-noninstrumented.dylib | tr -d '\n' > %t.basename // RUN: %clangxx_tsan %p/external-lib.cpp -shared -fno-sanitize=thread \ // RUN: -o %t-lib-noninstrumented.dylib \ -// RUN: -install_name @rpath/`basename %t-lib-noninstrumented.dylib` +// RUN: -install_name @rpath/%{readfile:%t.basename} +// RUN: basename %t-lib-noninstrumented-callbacks.dylib | tr -d '\n' > %t.basename // RUN: %clangxx_tsan %p/external-lib.cpp -shared -fno-sanitize=thread -DUSE_TSAN_CALLBACKS \ // RUN: -o %t-lib-noninstrumented-callbacks.dylib \ -// RUN: -install_name @rpath/`basename %t-lib-noninstrumented-callbacks.dylib` +// RUN: -install_name @rpath/%{readfile:%t.basename} // RUN: %clangxx_tsan %s %t-lib-instrumented.dylib -o %t-lib-instrumented // RUN: %clangxx_tsan %s %t-lib-noninstrumented.dylib -o %t-lib-noninstrumented diff --git a/compiler-rt/test/tsan/Darwin/malloc-stack-logging.cpp b/compiler-rt/test/tsan/Darwin/malloc-stack-logging.cpp index 8d9c2122d0e6c..0a96e346f8012 100644 --- a/compiler-rt/test/tsan/Darwin/malloc-stack-logging.cpp +++ b/compiler-rt/test/tsan/Darwin/malloc-stack-logging.cpp @@ -4,7 +4,7 @@ // use syscalls directly) to make sure other interceptors aren't called. // RUN: %clangxx_tsan -O1 %s -o %t -// RUN: MallocStackLogging=1 %run %t 2>&1 | FileCheck %s +// RUN: env MallocStackLogging=1 %run %t 2>&1 | FileCheck %s #include #include #include diff --git a/flang/include/flang/Parser/openmp-utils.h b/flang/include/flang/Parser/openmp-utils.h index 8fa4a84aff06d..36556f8dd7f4a 100644 --- a/flang/include/flang/Parser/openmp-utils.h +++ b/flang/include/flang/Parser/openmp-utils.h @@ -137,6 +137,8 @@ const T *GetFirstArgument(const OmpDirectiveSpecification &spec) { const BlockConstruct *GetFortranBlockConstruct( const ExecutionPartConstruct &epc); +const Block &GetInnermostExecPart(const Block &block); +bool IsStrictlyStructuredBlock(const Block &block); const OmpCombinerExpression *GetCombinerExpr( const OmpReductionSpecifier &rspec); diff --git a/flang/include/flang/Semantics/openmp-utils.h b/flang/include/flang/Semantics/openmp-utils.h index 14a4f0e93bda5..f5739ab16d643 100644 --- a/flang/include/flang/Semantics/openmp-utils.h +++ b/flang/include/flang/Semantics/openmp-utils.h @@ -97,8 +97,6 @@ const SomeExpr *HasStorageOverlap( const SomeExpr &base, llvm::ArrayRef exprs); bool IsAssignment(const parser::ActionStmt *x); bool IsPointerAssignment(const evaluate::Assignment &x); -const parser::Block &GetInnermostExecPart(const parser::Block &block); -bool IsStrictlyStructuredBlock(const parser::Block &block); } // namespace omp } // namespace Fortran::semantics diff --git a/flang/lib/Parser/openmp-utils.cpp b/flang/lib/Parser/openmp-utils.cpp index b9d3763cdd06d..2424828293c73 100644 --- a/flang/lib/Parser/openmp-utils.cpp +++ b/flang/lib/Parser/openmp-utils.cpp @@ -93,6 +93,34 @@ const BlockConstruct *GetFortranBlockConstruct( return nullptr; } +/// parser::Block is a list of executable constructs, parser::BlockConstruct +/// is Fortran's BLOCK/ENDBLOCK construct. +/// Strip the outermost BlockConstructs, return the reference to the Block +/// in the executable part of the innermost of the stripped constructs. +/// Specifically, if the given `block` has a single entry (it's a list), and +/// the entry is a BlockConstruct, get the Block contained within. Repeat +/// this step as many times as possible. +const Block &GetInnermostExecPart(const Block &block) { + const Block *iter{&block}; + while (iter->size() == 1) { + const ExecutionPartConstruct &ep{iter->front()}; + if (auto *bc{GetFortranBlockConstruct(ep)}) { + iter = &std::get(bc->t); + } else { + break; + } + } + return *iter; +} + +bool IsStrictlyStructuredBlock(const Block &block) { + if (block.size() == 1) { + return GetFortranBlockConstruct(block.front()) != nullptr; + } else { + return false; + } +} + const OmpCombinerExpression *GetCombinerExpr( const OmpReductionSpecifier &rspec) { return addr_if(std::get>(rspec.t)); diff --git a/flang/lib/Semantics/check-omp-atomic.cpp b/flang/lib/Semantics/check-omp-atomic.cpp index ec03e6fe2d920..b9e34ca6e74df 100644 --- a/flang/lib/Semantics/check-omp-atomic.cpp +++ b/flang/lib/Semantics/check-omp-atomic.cpp @@ -19,6 +19,7 @@ #include "flang/Evaluate/rewrite.h" #include "flang/Evaluate/tools.h" #include "flang/Parser/char-block.h" +#include "flang/Parser/openmp-utils.h" #include "flang/Parser/parse-tree.h" #include "flang/Semantics/openmp-utils.h" #include "flang/Semantics/symbol.h" @@ -41,6 +42,7 @@ namespace Fortran::semantics { +using namespace Fortran::parser::omp; using namespace Fortran::semantics::omp; namespace operation = Fortran::evaluate::operation; diff --git a/flang/lib/Semantics/openmp-utils.cpp b/flang/lib/Semantics/openmp-utils.cpp index 4a40d6eec17bb..18a37d64a3b5a 100644 --- a/flang/lib/Semantics/openmp-utils.cpp +++ b/flang/lib/Semantics/openmp-utils.cpp @@ -496,32 +496,4 @@ bool IsPointerAssignment(const evaluate::Assignment &x) { return std::holds_alternative(x.u) || std::holds_alternative(x.u); } - -/// parser::Block is a list of executable constructs, parser::BlockConstruct -/// is Fortran's BLOCK/ENDBLOCK construct. -/// Strip the outermost BlockConstructs, return the reference to the Block -/// in the executable part of the innermost of the stripped constructs. -/// Specifically, if the given `block` has a single entry (it's a list), and -/// the entry is a BlockConstruct, get the Block contained within. Repeat -/// this step as many times as possible. -const parser::Block &GetInnermostExecPart(const parser::Block &block) { - const parser::Block *iter{&block}; - while (iter->size() == 1) { - const parser::ExecutionPartConstruct &ep{iter->front()}; - if (auto *bc{GetFortranBlockConstruct(ep)}) { - iter = &std::get(bc->t); - } else { - break; - } - } - return *iter; -} - -bool IsStrictlyStructuredBlock(const parser::Block &block) { - if (block.size() == 1) { - return GetFortranBlockConstruct(block.front()) != nullptr; - } else { - return false; - } -} } // namespace Fortran::semantics::omp diff --git a/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90 index 355fd6c3a742a..b17c3f1bdc4e7 100644 --- a/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90 +++ b/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90 @@ -394,7 +394,7 @@ subroutine vec_xl_testi8a(arg1, arg2, res) vector(integer(1)) :: res res = vec_xl(arg1, arg2) - + ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]] ! LLVMIR: %[[ld:.*]] = load <16 x i8>, ptr %[[addr]], align 1 @@ -481,7 +481,7 @@ subroutine vec_xl_be_testi8a(arg1, arg2, res) vector(integer(1)) :: res res = vec_xl_be(arg1, arg2) - + ! LLVMIR: %4 = load i8, ptr %0, align 1 ! LLVMIR: %5 = getelementptr i8, ptr %1, i8 %4 ! LLVMIR: %6 = load <16 x i8>, ptr %5, align 1 diff --git a/flang/test/Lower/PowerPC/ppc-vec-sel.f90 b/flang/test/Lower/PowerPC/ppc-vec-sel.f90 index c3de8ba9c1444..93641d1461a99 100644 --- a/flang/test/Lower/PowerPC/ppc-vec-sel.f90 +++ b/flang/test/Lower/PowerPC/ppc-vec-sel.f90 @@ -136,7 +136,7 @@ subroutine vec_sel_testu8(arg1, arg2, arg3) vector(unsigned(8)) :: arg1, arg2, r vector(unsigned(8)) :: arg3 r = vec_sel(arg1, arg2, arg3) - + ! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 ! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 diff --git a/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90 index caf6d5463a833..947c8b1c7eb2c 100644 --- a/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90 +++ b/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90 @@ -14,7 +14,7 @@ subroutine vec_st_test(arg1, arg2, arg3) ! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16 ! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]] -! LLVMIR: %[[bc:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32> +! LLVMIR: %[[bc:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32> ! LLVMIR: %[[shf:.*]] = shufflevector <4 x i32> %[[bc]], <4 x i32> undef, <4 x i32> ! LLVMIR: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[shf]], ptr %[[addr]]) end subroutine vec_st_test @@ -28,7 +28,7 @@ subroutine vec_ste_test(arg1, arg2, arg3) integer(4) :: arg2 real(4) :: arg3 call vec_ste(arg1, arg2, arg3) - + ! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %0, align 16 ! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 ! LLVMIR: %[[addr]] = getelementptr i8, ptr %2, i32 %[[arg2]] diff --git a/flang/test/Lower/PowerPC/ppc-vec-store.f90 b/flang/test/Lower/PowerPC/ppc-vec-store.f90 index c25cc8b07cf79..1c3ab9638f117 100644 --- a/flang/test/Lower/PowerPC/ppc-vec-store.f90 +++ b/flang/test/Lower/PowerPC/ppc-vec-store.f90 @@ -300,7 +300,7 @@ subroutine vec_xst_test_vr4i2r4(arg1, arg2, arg3) real(4) :: arg3 call vec_xst(arg1, arg2, arg3) - + ! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 ! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]] @@ -432,7 +432,7 @@ subroutine vec_xst_be_test_vi4i4vai4(arg1, arg2, arg3, i) ! LLVMIR: %[[iadd:.*]] = add nsw i64 %[[imul2]], 0 ! LLVMIR: %[[gep1:.*]] = getelementptr <4 x i32>, ptr %2, i64 %[[iadd]] ! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16 -! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 +! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 ! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i32 %[[arg2]] ! LLVMIR: %[[src:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> undef, <4 x i32> ! LLVMIR: store <4 x i32> %[[src]], ptr %[[gep2]], align 16 @@ -449,7 +449,7 @@ subroutine vec_xstd2_test_vr4i2r4(arg1, arg2, arg3) real(4) :: arg3 call vec_xstd2(arg1, arg2, arg3) - + ! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 ! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]] @@ -509,7 +509,7 @@ subroutine vec_xstd2_test_vi4i4vai4(arg1, arg2, arg3, i) ! LLVMIR: %[[iadd:.*]] = add nsw i64 %[[imul2]], 0 ! LLVMIR: %[[gep1:.*]] = getelementptr <4 x i32>, ptr %2, i64 %[[iadd]] ! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16 -! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 +! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 ! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i32 %[[arg2]] ! LLVMIR: %[[src:.*]] = bitcast <4 x i32> %[[arg1]] to <2 x i64> ! LLVMIR: store <2 x i64> %[[src]], ptr %[[gep2]], align 16 @@ -526,7 +526,7 @@ subroutine vec_xstw4_test_vr4i2r4(arg1, arg2, arg3) real(4) :: arg3 call vec_xstw4(arg1, arg2, arg3) - + ! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 ! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]] @@ -584,7 +584,7 @@ subroutine vec_xstw4_test_vi4i4vai4(arg1, arg2, arg3, i) ! LLVMIR: %[[iadd:.*]] = add nsw i64 %[[imul2]], 0 ! LLVMIR: %[[gep1:.*]] = getelementptr <4 x i32>, ptr %2, i64 %[[iadd]] ! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16 -! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 +! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 ! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i32 %[[arg2]] ! LLVMIR: store <4 x i32> %[[arg1]], ptr %[[gep2]], align 16 end subroutine vec_xstw4_test_vi4i4vai4 diff --git a/flang/test/Lower/allocatable-assignment.f90 b/flang/test/Lower/allocatable-assignment.f90 index 3c220232104a5..b6b2f7b6c77b9 100644 --- a/flang/test/Lower/allocatable-assignment.f90 +++ b/flang/test/Lower/allocatable-assignment.f90 @@ -283,14 +283,14 @@ subroutine test_dyn_char(x, n, c) ! CHECK: hlfir.assign %[[VAL_8]]#0 to %[[VAL_14]]#0 realloc keep_lhs_len : !fir.box>>, !fir.ref>>>> subroutine test_derived_with_init(x, y) - type t + type t integer, allocatable :: a(:) - end type - type(t), allocatable :: x - type(t) :: y + end type + type(t), allocatable :: x + type(t) :: y ! The allocatable component of `x` need to be initialized ! during the automatic allocation (setting its rank and allocation - ! status) before it is assigned with the component of `y` + ! status) before it is assigned with the component of `y` x = y end subroutine ! CHECK-LABEL: func.func @_QMalloc_assignPtest_derived_with_init( @@ -357,7 +357,7 @@ end function elt ! real :: y(2, 3) = reshape([1,2,3,4,5,6], [2,3]) ! real, allocatable :: x (:, :) ! allocate(x(2,2)) -! call test_with_lbounds(x, y) +! call test_with_lbounds(x, y) ! print *, x(10, 20) ! print *, x !end diff --git a/flang/test/Lower/allocatable-globals.f90 b/flang/test/Lower/allocatable-globals.f90 index 9d386688f8881..8b7420ab32391 100644 --- a/flang/test/Lower/allocatable-globals.f90 +++ b/flang/test/Lower/allocatable-globals.f90 @@ -12,7 +12,7 @@ module mod_allocatables character(10), allocatable :: c(:) end module - + ! CHECK-LABEL: func @_QPtest_mod_allocatables() subroutine test_mod_allocatables() use mod_allocatables, only: c diff --git a/flang/test/Lower/allocatable-polymorphic.f90 b/flang/test/Lower/allocatable-polymorphic.f90 index 27cdf2839767d..d528fd8e546ff 100644 --- a/flang/test/Lower/allocatable-polymorphic.f90 +++ b/flang/test/Lower/allocatable-polymorphic.f90 @@ -460,7 +460,7 @@ subroutine test_allocate_with_mold() ! CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]](%{{.*}}) {uniq_name = "_QMpolyFtest_allocate_with_moldEx"} : (!fir.ref,c:i32}>>>, !fir.shape<1>) -> (!fir.ref,c:i32}>>>, !fir.ref,c:i32}>>>) ! CHECK: %[[EMBOX_X:.*]] = fir.embox %[[X_DECL]]#0(%{{.*}}) : (!fir.ref,c:i32}>>>, !fir.shape<1>) -> !fir.box,c:i32}>>> -! CHECK: %[[RANK:.*]] = arith.constant 1 : i32 +! CHECK: %[[RANK:.*]] = arith.constant 1 : i32 ! CHECK: %[[P_BOX_NONE:.*]] = fir.convert %[[P_DECL]]#0 : (!fir.ref>>>>) -> !fir.ref> ! CHECK: %[[X_BOX_NONE:.*]] = fir.convert %[[EMBOX_X]] : (!fir.box,c:i32}>>>) -> !fir.box ! CHECK: fir.call @_FortranAPointerApplyMold(%[[P_BOX_NONE]], %[[X_BOX_NONE]], %[[RANK]]) {{.*}} : (!fir.ref>, !fir.box, i32) -> () @@ -614,10 +614,10 @@ program test_alloc ! LLVM: %[[TYPE_CODE:.*]] = load i8, ptr %[[TYPE_CODE_GEP]] ! LLVM-NEXT: %[[EXT_TYPE_CODE:.*]] = sext i8 %[[TYPE_CODE]] to i32 ! LLVM: %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } undef, i64 %[[ELEM_SIZE]], 1 -! LLVM: %[[TRUNC_TYPE_CODE:.*]] = trunc i32 %[[EXT_TYPE_CODE]] to i8 +! LLVM: %[[TRUNC_TYPE_CODE:.*]] = trunc i32 %[[EXT_TYPE_CODE]] to i8 ! LLVM: %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %{{.*}}, i8 %[[TRUNC_TYPE_CODE]], 4 ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %{{.*}}, ptr %[[TMP:.*]] -! LLVM: call void %{{.*}}(ptr %{{.*}}) +! LLVM: call void %{{.*}}(ptr %{{.*}}) ! LLVM: call void @llvm.memcpy.p0.p0.i32 ! LLVM: %[[GEP_TDESC_C2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 7 @@ -628,7 +628,7 @@ program test_alloc ! LLVM: %[[TYPE_CODE:.*]] = load i8, ptr %[[TYPE_CODE_GEP]] ! LLVM-NEXT: %[[EXT_TYPE_CODE:.*]] = sext i8 %[[TYPE_CODE]] to i32 ! LLVM: %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } undef, i64 %[[ELEM_SIZE]], 1 -! LLVM: %[[TRUNC_TYPE_CODE:.*]] = trunc i32 %[[EXT_TYPE_CODE]] to i8 +! LLVM: %[[TRUNC_TYPE_CODE:.*]] = trunc i32 %[[EXT_TYPE_CODE]] to i8 ! LLVM: %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %{{.*}}, i8 %[[TRUNC_TYPE_CODE]], 4 ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %{{.*}}, ptr %{{.*}} ! LLVM: call void %{{.*}}(ptr %{{.*}}) diff --git a/flang/test/Lower/allocated.f90 b/flang/test/Lower/allocated.f90 index 6e8420fc7d79a..11e856fd67bad 100644 --- a/flang/test/Lower/allocated.f90 +++ b/flang/test/Lower/allocated.f90 @@ -15,4 +15,3 @@ subroutine allocated_test(scalar, array) ! CHECK: cmpi ne, %[[addrToInt1]], %c0{{.*}} print *, allocated(array) end subroutine - \ No newline at end of file diff --git a/flang/test/Lower/array-elemental-calls-2.f90 b/flang/test/Lower/array-elemental-calls-2.f90 index 2674b07dece17..60c9257a19822 100644 --- a/flang/test/Lower/array-elemental-calls-2.f90 +++ b/flang/test/Lower/array-elemental-calls-2.f90 @@ -172,7 +172,7 @@ subroutine check_parentheses_logical() subroutine check_parentheses_derived(a) type t integer :: i - end type + end type interface integer elemental function elem_func_derived(x) import :: t diff --git a/flang/test/Lower/array-elemental-calls.f90 b/flang/test/Lower/array-elemental-calls.f90 index 853807bcb3e6c..93d2979ec9383 100644 --- a/flang/test/Lower/array-elemental-calls.f90 +++ b/flang/test/Lower/array-elemental-calls.f90 @@ -57,7 +57,7 @@ elemental impure integer function impure_func(j) integer, intent(in) :: j end function end interface - + i = 42 + pure_func(j) i = 42 + impure_func(j) end subroutine diff --git a/flang/test/Lower/array-expression-assumed-size.f90 b/flang/test/Lower/array-expression-assumed-size.f90 index a498148d07fc7..b51dc00c20e28 100644 --- a/flang/test/Lower/array-expression-assumed-size.f90 +++ b/flang/test/Lower/array-expression-assumed-size.f90 @@ -16,8 +16,8 @@ end subroutine assumed_size_forall_test ! CHECK-LABEL: func @_QPassumed_size_test( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>{{.*}}) { -! CHECK: %[[VAL_1A:.*]] = fir.convert %c10{{.*}} : (i64) -> index -! CHECK: %[[VAL_1B:.*]] = arith.cmpi sgt, %[[VAL_1A]], %c0{{.*}} : index +! CHECK: %[[VAL_1A:.*]] = fir.convert %c10{{.*}} : (i64) -> index +! CHECK: %[[VAL_1B:.*]] = arith.cmpi sgt, %[[VAL_1A]], %c0{{.*}} : index ! CHECK: %[[VAL_1:.*]] = arith.select %[[VAL_1B]], %[[VAL_1A]], %c0{{.*}} : index ! CHECK: %[[VAL_2:.*]] = fir.assumed_size_extent : index ! CHECK: %[[VAL_3:.*]] = arith.constant 1 : index @@ -79,8 +79,8 @@ end subroutine assumed_size_forall_test ! CHECK-LABEL: func @_QPassumed_size_forall_test( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>{{.*}}) { ! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {adapt.valuebyref, bindc_name = "i"} -! CHECK: %[[VAL_2A:.*]] = fir.convert %c10{{.*}} : (i64) -> index -! CHECK: %[[VAL_2B:.*]] = arith.cmpi sgt, %[[VAL_2A]], %c0{{.*}} : index +! CHECK: %[[VAL_2A:.*]] = fir.convert %c10{{.*}} : (i64) -> index +! CHECK: %[[VAL_2B:.*]] = arith.cmpi sgt, %[[VAL_2A]], %c0{{.*}} : index ! CHECK: %[[VAL_2:.*]] = arith.select %[[VAL_2B]], %[[VAL_2A]], %c0{{.*}} : index ! CHECK: %[[VAL_3:.*]] = fir.assumed_size_extent : index ! CHECK: %[[VAL_4:.*]] = arith.constant 2 : i32 diff --git a/flang/test/Lower/array-substring.f90 b/flang/test/Lower/array-substring.f90 index 7544fbb989627..0ede04f0bb2f8 100644 --- a/flang/test/Lower/array-substring.f90 +++ b/flang/test/Lower/array-substring.f90 @@ -46,5 +46,5 @@ function test(C) logical :: test(1) character*12 C(1) - test = C(1:1)(1:8) == (/'ABCDabcd'/) + test = C(1:1)(1:8) == (/'ABCDabcd'/) end function test diff --git a/flang/test/Lower/array-wide-char.f90 b/flang/test/Lower/array-wide-char.f90 index 8bad280d0f056..44fcd45519d85 100644 --- a/flang/test/Lower/array-wide-char.f90 +++ b/flang/test/Lower/array-wide-char.f90 @@ -2,7 +2,7 @@ character(LEN=128, KIND=4), PARAMETER :: conarr(3) = & [ character(128,4) :: "now is the time", "for all good men to come", & - "to the aid of the country" ] + "to the aid of the country" ] character(LEN=10, KIND=4) :: arr(3) = & [ character(10,4) :: "good buddy", "best buddy", " " ] call action_on_char4(conarr) diff --git a/flang/test/Lower/array.f90 b/flang/test/Lower/array.f90 index 710175739b3a8..cd12d7f851e67 100644 --- a/flang/test/Lower/array.f90 +++ b/flang/test/Lower/array.f90 @@ -93,7 +93,7 @@ subroutine s(i,j,k,ii,jj,kk,a1,a2,a3,a4,a5,a6,a7) ! CHECK: fir.coordinate_of %[[a7]], %[[t7]] : ! CHECK-LABEL: EndIoStatement print *, a7(kk, jj, ii) - + end subroutine s ! CHECK-LABEL: range diff --git a/flang/test/Lower/forall-pointer-assignment.f90 b/flang/test/Lower/forall-pointer-assignment.f90 index d89fb3ed5cb57..62184a77addf5 100644 --- a/flang/test/Lower/forall-pointer-assignment.f90 +++ b/flang/test/Lower/forall-pointer-assignment.f90 @@ -1,4 +1,4 @@ -! Test lower of FORALL pointer assignment +! Test lower of FORALL pointer assignment ! RUN: bbc -emit-fir %s -o - | FileCheck %s diff --git a/flang/test/Lower/forall/forall-2.f90 b/flang/test/Lower/forall/forall-2.f90 index cdafb4f3d49e7..c6a20f5859497 100644 --- a/flang/test/Lower/forall/forall-2.f90 +++ b/flang/test/Lower/forall/forall-2.f90 @@ -16,7 +16,7 @@ subroutine implied_iters_allocatable(thing, a1) end type t type(t) :: thing(:) integer :: i - + forall (i=5:13) ! commenting out this test for the moment (hits assert) ! thing(i)%arr = a1 @@ -32,7 +32,7 @@ subroutine conflicting_allocatable(thing, lo, hi) end type t type(t) :: thing(:) integer :: i - + forall (i = lo:hi) ! commenting out this test for the moment (hits assert) ! thing(i)%arr = thing(hi-i)%arr diff --git a/flang/test/Lower/forall/forall-ranked.f90 b/flang/test/Lower/forall/forall-ranked.f90 index 9e56be926e78e..f508c67468212 100644 --- a/flang/test/Lower/forall/forall-ranked.f90 +++ b/flang/test/Lower/forall/forall-ranked.f90 @@ -68,7 +68,7 @@ end function f integer :: arr(11) end type t type(t) :: a(10,10) - + forall (i=1:5) a(i,:)%arr(i+4) = f(i) end forall diff --git a/flang/test/Lower/forall/forall-where-2.f90 b/flang/test/Lower/forall/forall-where-2.f90 index c075508bef561..85aab87559c3c 100644 --- a/flang/test/Lower/forall/forall-where-2.f90 +++ b/flang/test/Lower/forall/forall-where-2.f90 @@ -6,7 +6,7 @@ ! Test a FORALL construct with a nested WHERE construct where the mask ! contains temporary array expressions. -subroutine test_nested_forall_where_with_temp_in_mask(a,b) +subroutine test_nested_forall_where_with_temp_in_mask(a,b) interface function temp_foo(i, j) integer :: i, j @@ -28,10 +28,10 @@ function temp_foo(i, j) ! CHECK: func @_QPtest_nested_forall_where_with_temp_in_mask({{.*}}) { ! CHECK: %[[tempResultBox:.*]] = fir.alloca !fir.box>> {bindc_name = ".result"} - ! Where condition pre-evaluation + ! Where condition pre-evaluation ! CHECK: fir.do_loop {{.*}} { ! CHECK: fir.do_loop {{.*}} { - ! Evaluation of mask for iteration (i,j) into ragged array temp + ! Evaluation of mask for iteration (i,j) into ragged array temp ! CHECK: %[[tempResult:.*]] = fir.call @_QPtemp_foo ! CHECK: fir.save_result %[[tempResult]] to %[[tempResultBox]] : !fir.box>>, !fir.ref>>> ! CHECK: fir.if {{.*}} { @@ -52,7 +52,7 @@ function temp_foo(i, j) ! CHECK: fir.do_loop {{.*}} { ! Array assignment at iteration (i, j) ! CHECK: fir.do_loop {{.*}} { -! CHECK: fir.if {{.*}} { +! CHECK: fir.if {{.*}} { ! CHECK: arith.divf ! CHECK: } else { ! CHECK: } @@ -64,7 +64,7 @@ function temp_foo(i, j) ! CHECK: fir.do_loop {{.*}} { ! Array assignment at iteration (i, j) ! CHECK: fir.do_loop {{.*}} { -! CHECK: fir.if {{.*}} { +! CHECK: fir.if {{.*}} { ! CHECK: } else { ! CHECK: arith.negf ! CHECK: } diff --git a/flang/test/Lower/forall/forall-where.f90 b/flang/test/Lower/forall/forall-where.f90 index 54ff2bd4c3f16..3202edbaec808 100644 --- a/flang/test/Lower/forall/forall-where.f90 +++ b/flang/test/Lower/forall/forall-where.f90 @@ -6,7 +6,7 @@ ! This has both an explicit and implicit iteration space. The WHERE construct ! makes the assignments conditional and the where mask evaluation must happen ! prior to evaluating the array assignment statement. -subroutine test_nested_forall_where(a,b) +subroutine test_nested_forall_where(a,b) type t real data(100) end type t diff --git a/libcxx/include/__config b/libcxx/include/__config index 8f461599ffd5b..d79ace0cbb896 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -546,6 +546,12 @@ typedef __char32_t char32_t; # define _LIBCPP_DEPRECATED_(m) # endif +# if defined(__DEPRECATED) && __DEPRECATED && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS) +# define _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS 1 +# else +# define _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS 0 +# endif + # if !defined(_LIBCPP_CXX03_LANG) # define _LIBCPP_DEPRECATED_IN_CXX11 _LIBCPP_DEPRECATED # else diff --git a/libcxx/include/ccomplex b/libcxx/include/ccomplex index ee7e088aac54d..c1cb039f83a5e 100644 --- a/libcxx/include/ccomplex +++ b/libcxx/include/ccomplex @@ -26,18 +26,10 @@ # pragma GCC system_header # endif -# if _LIBCPP_STD_VER >= 20 - -using __standard_header_ccomplex - _LIBCPP_DEPRECATED_("removed in C++20. Include instead.") _LIBCPP_NODEBUG = void; -using __use_standard_header_ccomplex _LIBCPP_NODEBUG = __standard_header_ccomplex; - -# elif _LIBCPP_STD_VER >= 17 - -using __standard_header_ccomplex _LIBCPP_DEPRECATED_("Include instead.") _LIBCPP_NODEBUG = void; -using __use_standard_header_ccomplex _LIBCPP_NODEBUG = __standard_header_ccomplex; - +# if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS +# warning is deprecated in C++17 and removed in C++20. Include instead. # endif + #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) #endif // _LIBCPP_CCOMPLEX diff --git a/libcxx/include/ciso646 b/libcxx/include/ciso646 index 34164362dc10d..d9eae41291024 100644 --- a/libcxx/include/ciso646 +++ b/libcxx/include/ciso646 @@ -24,13 +24,10 @@ # pragma GCC system_header # endif -# if _LIBCPP_STD_VER >= 20 - -using __standard_header_ciso646 - _LIBCPP_DEPRECATED_("removed in C++20. Include instead.") _LIBCPP_NODEBUG = void; -using __use_standard_header_ciso646 _LIBCPP_NODEBUG = __standard_header_ciso646; - +# if _LIBCPP_STD_VER >= 20 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS +# warning is removed in C++20. Include instead. # endif + #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) #endif // _LIBCPP_CISO646 diff --git a/libcxx/include/cstdalign b/libcxx/include/cstdalign index 7f8dd1e1fbaf8..7aa8cc81ad14c 100644 --- a/libcxx/include/cstdalign +++ b/libcxx/include/cstdalign @@ -43,17 +43,10 @@ Macros: # undef __alignof_is_defined # define __alignof_is_defined 1 -# if _LIBCPP_STD_VER >= 20 - -using __standard_header_cstdalign _LIBCPP_DEPRECATED_("removed in C++20.") _LIBCPP_NODEBUG = void; -using __use_standard_header_cstdalign _LIBCPP_NODEBUG = __standard_header_cstdalign; - -# elif _LIBCPP_STD_VER >= 17 - -using __standard_header_cstdalign _LIBCPP_DEPRECATED _LIBCPP_NODEBUG = void; -using __use_standard_header_cstdalign _LIBCPP_NODEBUG = __standard_header_cstdalign; - +# if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS +# warning is deprecated in C++17 and removed in C++20. # endif + #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) #endif // _LIBCPP_CSTDALIGN diff --git a/libcxx/include/cstdbool b/libcxx/include/cstdbool index a432d5f08b9ae..805a287bd7627 100644 --- a/libcxx/include/cstdbool +++ b/libcxx/include/cstdbool @@ -31,17 +31,10 @@ Macros: # undef __bool_true_false_are_defined # define __bool_true_false_are_defined 1 -# if _LIBCPP_STD_VER >= 20 - -using __standard_header_cstdbool _LIBCPP_DEPRECATED_("removed in C++20.") _LIBCPP_NODEBUG = void; -using __use_standard_header_cstdbool _LIBCPP_NODEBUG = __standard_header_cstdbool; - -# elif _LIBCPP_STD_VER >= 17 - -using __standard_header_cstdbool _LIBCPP_DEPRECATED _LIBCPP_NODEBUG = void; -using __use_standard_header_cstdbool _LIBCPP_NODEBUG = __standard_header_cstdbool; - +# if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS +# warning is deprecated in C++17 and removed in C++20. # endif + #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) #endif // _LIBCPP_CSTDBOOL diff --git a/libcxx/include/ctgmath b/libcxx/include/ctgmath index db0786f1e2c46..13b7a96e4d8fc 100644 --- a/libcxx/include/ctgmath +++ b/libcxx/include/ctgmath @@ -28,17 +28,8 @@ # pragma GCC system_header # endif -# if _LIBCPP_STD_VER >= 20 - -using __standard_header_ctgmath - _LIBCPP_DEPRECATED_("removed in C++20. Include and instead.") _LIBCPP_NODEBUG = void; -using __use_standard_header_ctgmath _LIBCPP_NODEBUG = __standard_header_ctgmath; - -# elif _LIBCPP_STD_VER >= 17 - -using __standard_header_ctgmath _LIBCPP_DEPRECATED_("Include and instead.") _LIBCPP_NODEBUG = void; -using __use_standard_header_ctgmath _LIBCPP_NODEBUG = __standard_header_ctgmath; - +# if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS +# warning is deprecated in C++17 and removed in C++20. Include and instead. # endif #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) diff --git a/libcxx/test/libcxx/transitive_includes.gen.py b/libcxx/test/libcxx/transitive_includes.gen.py index 6ed35af7e275e..2b643e1f2ad48 100644 --- a/libcxx/test/libcxx/transitive_includes.gen.py +++ b/libcxx/test/libcxx/transitive_includes.gen.py @@ -89,7 +89,7 @@ // UNSUPPORTED: LIBCXX-FREEBSD-FIXME // RUN: mkdir %t -// RUN: %{{cxx}} %s %{{flags}} %{{compile_flags}} --trace-includes -fshow-skipped-includes --preprocess > /dev/null 2> %t/trace-includes.txt +// RUN: %{{cxx}} %s %{{flags}} %{{compile_flags}} -Wno-deprecated --trace-includes -fshow-skipped-includes --preprocess > /dev/null 2> %t/trace-includes.txt // RUN: %{{python}} %{{libcxx-dir}}/test/libcxx/transitive_includes/to_csv.py %t/trace-includes.txt > %t/actual_transitive_includes.csv // RUN: cat %{{libcxx-dir}}/test/libcxx/transitive_includes/%{{cxx_std}}.csv | awk '/^{escaped_header} / {{ print }}' > %t/expected_transitive_includes.csv // RUN: diff -w %t/expected_transitive_includes.csv %t/actual_transitive_includes.csv diff --git a/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp index 0eaf82ce5cef0..8df89d0ba9206 100644 --- a/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp +++ b/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp @@ -14,12 +14,6 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: clang-modules-build -#include "test_macros.h" - #include -#if TEST_STD_VER >= 20 -// expected-warning@ccomplex:* {{'__standard_header_ccomplex' is deprecated: removed in C++20. Include instead.}} -#else -// expected-warning@ccomplex:* {{'__standard_header_ccomplex' is deprecated: Include instead.}} -#endif +// expected-warning@ccomplex:* {{ is deprecated in C++17 and removed in C++20. Include instead.}} diff --git a/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp index 04acd10081548..32b57033331c8 100644 --- a/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp +++ b/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp @@ -15,4 +15,5 @@ // UNSUPPORTED: clang-modules-build #include -// expected-warning@ciso646:* {{'__standard_header_ciso646' is deprecated: removed in C++20. Include instead.}} + +// expected-warning@ciso646:* {{ is removed in C++20. Include instead.}} diff --git a/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp index dc9f1af55b3f1..23a7709a9d658 100644 --- a/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp +++ b/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp @@ -14,12 +14,6 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: clang-modules-build -#include "test_macros.h" - #include -#if TEST_STD_VER >= 20 -// expected-warning@cstdalign:* {{'__standard_header_cstdalign' is deprecated: removed in C++20.}} -#else -// expected-warning@cstdalign:* {{'__standard_header_cstdalign' is deprecated}} -#endif +// expected-warning@cstdalign:* {{ is deprecated in C++17 and removed in C++20.}} diff --git a/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp index eddefe14d35ea..c2c0f03c52d3c 100644 --- a/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp +++ b/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp @@ -14,12 +14,6 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: clang-modules-build -#include "test_macros.h" - #include -#if TEST_STD_VER >= 20 -// expected-warning@cstdbool:* {{'__standard_header_cstdbool' is deprecated: removed in C++20.}} -#else -// expected-warning@cstdbool:* {{'__standard_header_cstdbool' is deprecated}} -#endif +// expected-warning@cstdbool:* {{ is deprecated in C++17 and removed in C++20.}} diff --git a/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp index 097ab1643d15a..4f5564915443d 100644 --- a/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp +++ b/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp @@ -14,12 +14,6 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: clang-modules-build -#include "test_macros.h" - #include -#if TEST_STD_VER >= 20 -// expected-warning@ctgmath:* {{'__standard_header_ctgmath' is deprecated: removed in C++20. Include and instead.}} -#else -// expected-warning@ctgmath:* {{'__standard_header_ctgmath' is deprecated: Include and instead.}} -#endif +// expected-warning@ctgmath:* {{ is deprecated in C++17 and removed in C++20. Include and instead.}} diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.verify.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.verify.cpp index 12d778408d5ec..e58e760a5ce81 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.verify.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.verify.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: std-at-least-c++23 +// REQUIRES: std-at-least-c++26 // @@ -21,11 +21,6 @@ void test() { // expected-error@*:* {{static assertion failed}} - // Turns to an error since C++26 (Disallow Binding a Returned Glvalue to a Temporary https://wg21.link/P2748R5). -#if TEST_STD_VER >= 26 // expected-error@tuple:* {{returning reference to local temporary object}} -#else - // expected-warning@tuple:* {{returning reference to local temporary object}} -#endif std::ignore = std::make_from_tuple(std::tuple{}); } diff --git a/libcxx/utils/libcxx/test/format.py b/libcxx/utils/libcxx/test/format.py index 975209c273f8c..76e9115295b99 100644 --- a/libcxx/utils/libcxx/test/format.py +++ b/libcxx/utils/libcxx/test/format.py @@ -99,7 +99,7 @@ def parseScript(test, preamble): substitutions.append( ( "%{verify}", - "%{cxx} %s %{flags} %{compile_flags} -fsyntax-only -Wno-error -Xclang -verify -Xclang -verify-ignore-unexpected=note -ferror-limit=0", + "%{cxx} %s %{flags} %{compile_flags} -U_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER -fsyntax-only -Wno-error -Xclang -verify -Xclang -verify-ignore-unexpected=note -ferror-limit=0", ) ) substitutions.append(("%{run}", "%{exec} %t.exe")) diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 2a97df4785ecb..b0dc797292511 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -762,7 +762,7 @@ void AArch64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, relocateNoSym(loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val); break; default: - llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); + llvm_unreachable("unsupported relocation for TLS GD to IE relaxation"); } } diff --git a/lldb/include/lldb/Utility/RegisterValue.h b/lldb/include/lldb/Utility/RegisterValue.h index 49aaf68be17fc..baf984cbcb052 100644 --- a/lldb/include/lldb/Utility/RegisterValue.h +++ b/lldb/include/lldb/Utility/RegisterValue.h @@ -46,7 +46,8 @@ class RegisterValue { eTypeUInt16, eTypeUInt32, eTypeUInt64, - eTypeUInt128, + eTypeUIntN, /// < This value is used when the (integer) register is larger + /// than 64-bits. eTypeFloat, eTypeDouble, eTypeLongDouble, @@ -69,7 +70,7 @@ class RegisterValue { m_scalar = inst; } - explicit RegisterValue(llvm::APInt inst) : m_type(eTypeUInt128) { + explicit RegisterValue(llvm::APInt inst) : m_type(eTypeUIntN) { m_scalar = llvm::APInt(std::move(inst)); } @@ -178,7 +179,7 @@ class RegisterValue { } void operator=(llvm::APInt uint) { - m_type = eTypeUInt128; + m_type = eTypeUIntN; m_scalar = llvm::APInt(std::move(uint)); } @@ -217,8 +218,8 @@ class RegisterValue { m_scalar = uint; } - void SetUInt128(llvm::APInt uint) { - m_type = eTypeUInt128; + void SetUIntN(llvm::APInt uint) { + m_type = eTypeUIntN; m_scalar = std::move(uint); } diff --git a/lldb/source/Utility/DataExtractor.cpp b/lldb/source/Utility/DataExtractor.cpp index e9be0cba81f0c..a9aea168acf41 100644 --- a/lldb/source/Utility/DataExtractor.cpp +++ b/lldb/source/Utility/DataExtractor.cpp @@ -662,10 +662,6 @@ size_t DataExtractor::ExtractBytes(offset_t offset, offset_t length, const uint8_t *src = PeekData(offset, length); if (src) { if (dst_byte_order != GetByteOrder()) { - // Validate that only a word- or register-sized dst is byte swapped - assert(length == 1 || length == 2 || length == 4 || length == 8 || - length == 10 || length == 16 || length == 32); - for (uint32_t i = 0; i < length; ++i) (static_cast(dst))[i] = src[length - i - 1]; } else diff --git a/lldb/source/Utility/RegisterValue.cpp b/lldb/source/Utility/RegisterValue.cpp index 8b2af4e3d4f0e..c28c9e2d4d106 100644 --- a/lldb/source/Utility/RegisterValue.cpp +++ b/lldb/source/Utility/RegisterValue.cpp @@ -127,7 +127,7 @@ bool RegisterValue::GetScalarValue(Scalar &scalar) const { case eTypeUInt16: case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: case eTypeFloat: case eTypeDouble: case eTypeLongDouble: @@ -180,8 +180,6 @@ Status RegisterValue::SetValueFromData(const RegisterInfo ®_info, if (src_len > reg_info.byte_size) src_len = reg_info.byte_size; - type128 int128; - m_type = eTypeInvalid; switch (reg_info.encoding) { case eEncodingInvalid: @@ -196,17 +194,15 @@ Status RegisterValue::SetValueFromData(const RegisterInfo ®_info, SetUInt32(src.GetMaxU32(&src_offset, src_len)); else if (reg_info.byte_size <= 8) SetUInt64(src.GetMaxU64(&src_offset, src_len)); - else if (reg_info.byte_size <= 16) { - uint64_t data1 = src.GetU64(&src_offset); - uint64_t data2 = src.GetU64(&src_offset); - if (src.GetByteOrder() == eByteOrderLittle) { - int128.x[0] = data1; - int128.x[1] = data2; - } else { - int128.x[0] = data2; - int128.x[1] = data1; - } - SetUInt128(llvm::APInt(128, int128.x)); + else { + std::vector native_endian_src(src_len, 0); + src.ExtractBytes(src_offset, src_len, + llvm::sys::IsLittleEndianHost ? eByteOrderLittle + : eByteOrderBig, + native_endian_src.data()); + llvm::APInt uint = llvm::APInt::getZero(src_len * 8); + llvm::LoadIntFromMemory(uint, native_endian_src.data(), src_len); + SetUIntN(uint); } break; case eEncodingIEEE754: @@ -442,7 +438,7 @@ bool RegisterValue::SignExtend(uint32_t sign_bitpos) { case eTypeUInt16: case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: return m_scalar.SignExtend(sign_bitpos); case eTypeFloat: case eTypeDouble: @@ -465,7 +461,7 @@ bool RegisterValue::CopyValue(const RegisterValue &rhs) { case eTypeUInt16: case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: case eTypeFloat: case eTypeDouble: case eTypeLongDouble: @@ -581,7 +577,7 @@ llvm::APInt RegisterValue::GetAsUInt128(const llvm::APInt &fail_value, case eTypeUInt16: case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: case eTypeFloat: case eTypeDouble: case eTypeLongDouble: @@ -616,7 +612,7 @@ float RegisterValue::GetAsFloat(float fail_value, bool *success_ptr) const { break; case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: case eTypeFloat: case eTypeDouble: case eTypeLongDouble: @@ -636,7 +632,7 @@ double RegisterValue::GetAsDouble(double fail_value, bool *success_ptr) const { case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: case eTypeFloat: case eTypeDouble: case eTypeLongDouble: @@ -657,7 +653,7 @@ long double RegisterValue::GetAsLongDouble(long double fail_value, case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: case eTypeFloat: case eTypeDouble: case eTypeLongDouble: @@ -676,7 +672,7 @@ const void *RegisterValue::GetBytes() const { case eTypeUInt16: case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: case eTypeFloat: case eTypeDouble: case eTypeLongDouble: @@ -698,7 +694,7 @@ uint32_t RegisterValue::GetByteSize() const { return 2; case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: case eTypeFloat: case eTypeDouble: case eTypeLongDouble: @@ -721,7 +717,7 @@ bool RegisterValue::SetUInt(uint64_t uint, uint32_t byte_size) { } else if (byte_size <= 8) { SetUInt64(uint); } else if (byte_size <= 16) { - SetUInt128(llvm::APInt(128, uint)); + SetUIntN(llvm::APInt(128, uint)); } else return false; return true; @@ -749,7 +745,7 @@ bool RegisterValue::operator==(const RegisterValue &rhs) const { case eTypeUInt16: case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: case eTypeFloat: case eTypeDouble: case eTypeLongDouble: @@ -774,7 +770,7 @@ bool RegisterValue::ClearBit(uint32_t bit) { case eTypeUInt16: case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: if (bit < (GetByteSize() * 8)) { return m_scalar.ClearBit(bit); } @@ -814,7 +810,7 @@ bool RegisterValue::SetBit(uint32_t bit) { case eTypeUInt16: case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: if (bit < (GetByteSize() * 8)) { return m_scalar.SetBit(bit); } diff --git a/lldb/test/Shell/lldb-server/TestErrorMessages.test b/lldb/test/Shell/lldb-server/TestGdbserverErrorMessages.test similarity index 100% rename from lldb/test/Shell/lldb-server/TestErrorMessages.test rename to lldb/test/Shell/lldb-server/TestGdbserverErrorMessages.test diff --git a/lldb/test/Shell/lldb-server/TestPlatformErrorMessages.test b/lldb/test/Shell/lldb-server/TestPlatformErrorMessages.test new file mode 100644 index 0000000000000..7d3b37aa5fc39 --- /dev/null +++ b/lldb/test/Shell/lldb-server/TestPlatformErrorMessages.test @@ -0,0 +1,25 @@ +RUN: %platformserver 2>&1 | FileCheck --check-prefixes=NO_LISTEN,ALL %s +NO_LISTEN: error: either --listen or --child-platform-fd is required + +RUN: %lldb-server platform --listen 2>&1 | FileCheck --check-prefixes=LISTEN_MISSING,ALL %s +LISTEN_MISSING: error: --listen: missing argument + +RUN: %lldb-server p --bogus 2>&1 | FileCheck --check-prefixes=BOGUS,ALL %s +BOGUS: error: unknown argument '--bogus' + +RUN: %platformserver --gdbserver-port 2>&1 | FileCheck --check-prefixes=GDBPORT_MISSING,ALL %s +GDBPORT_MISSING: error: --gdbserver-port: missing argument + +RUN: %platformserver --gdbserver-port notanumber --listen :1234 2>&1 | FileCheck --check-prefixes=GDBPORT_INVALID %s +GDBPORT_INVALID: error: invalid --gdbserver-port value + +RUN: %platformserver --socket-file 2>&1 | FileCheck --check-prefixes=SOCKETFILE_MISSING,ALL %s +SOCKETFILE_MISSING: error: --socket-file: missing argument + +RUN: %platformserver --log-file 2>&1 | FileCheck --check-prefixes=LOGFILE_MISSING,ALL %s +LOGFILE_MISSING: error: --log-file: missing argument + +RUN: %platformserver --log-channels 2>&1 | FileCheck --check-prefixes=LOGCHANNELS_MISSING,ALL %s +LOGCHANNELS_MISSING: error: --log-channels: missing argument + +ALL: Use 'lldb-server{{(\.exe)?}} {{p|platform}} --help' for a complete list of options. diff --git a/lldb/test/Shell/lldb-server/TestPlatformHelp.test b/lldb/test/Shell/lldb-server/TestPlatformHelp.test new file mode 100644 index 0000000000000..c5ced8a318100 --- /dev/null +++ b/lldb/test/Shell/lldb-server/TestPlatformHelp.test @@ -0,0 +1,40 @@ +RUN: %platformserver --help 2>&1 | FileCheck %s +RUN: %platformserver -h 2>&1 | FileCheck %s +RUN: %lldb-server p --help 2>&1 | FileCheck %s +RUN: %lldb-server p -h 2>&1 | FileCheck %s +RUN: %lldb-server platform --help 2>&1 | FileCheck %s +RUN: %lldb-server platform -h 2>&1 | FileCheck %s + +CHECK: OVERVIEW: lldb-server{{(\.exe)?}} platform + +CHECK: USAGE: lldb-server{{(\.exe)?}} {{p|platform}} [options] --listen <[host]:port> {{\[}}[--] program args...] + +CHECK: CONNECTION OPTIONS: +CHECK: --gdbserver-port +CHECK-SAME: Short form: -P +CHECK: --listen <[host]:port> +CHECK-SAME: Short form: -L +CHECK: --socket-file +CHECK-SAME: Short form: -f + +CHECK: GENERAL OPTIONS: +CHECK: --help +CHECK: --log-channels +CHECK: Short form: -c +CHECK: --log-file +CHECK-SAME: Short form: -l +CHECK: --server + +CHECK: OPTIONS: +CHECK: -- program args + +CHECK: DESCRIPTION +CHECK: Acts as a platform server for remote debugging + +CHECK: EXAMPLES +CHECK: # Listen on port 1234, exit after first connection +CHECK: lldb-server{{(\.exe)?}} platform --listen tcp://0.0.0.0:1234 +CHECK: # Listen on port 5555, accept multiple connections +CHECK: lldb-server{{(\.exe)?}} platform --server --listen tcp://localhost:5555 +CHECK: # Listen on Unix domain socket +CHECK: lldb-server{{(\.exe)?}} platform --listen unix:///tmp/lldb-server.sock diff --git a/lldb/tools/lldb-server/CMakeLists.txt b/lldb/tools/lldb-server/CMakeLists.txt index 1d8dc72a3f872..fb55c64936121 100644 --- a/lldb/tools/lldb-server/CMakeLists.txt +++ b/lldb/tools/lldb-server/CMakeLists.txt @@ -2,6 +2,10 @@ set(LLVM_TARGET_DEFINITIONS LLGSOptions.td) tablegen(LLVM LLGSOptions.inc -gen-opt-parser-defs) add_public_tablegen_target(LLGSOptionsTableGen) +set(LLVM_TARGET_DEFINITIONS PlatformOptions.td) +tablegen(LLVM PlatformOptions.inc -gen-opt-parser-defs) +add_public_tablegen_target(PlatformOptionsTableGen) + set(LLDB_PLUGINS) if(CMAKE_SYSTEM_NAME MATCHES "Linux|Android") @@ -67,6 +71,7 @@ add_lldb_tool(lldb-server add_dependencies(lldb-server LLGSOptionsTableGen + PlatformOptionsTableGen ${tablegen_deps} ) target_include_directories(lldb-server PRIVATE "${LLDB_SOURCE_DIR}/source") diff --git a/lldb/tools/lldb-server/PlatformOptions.td b/lldb/tools/lldb-server/PlatformOptions.td new file mode 100644 index 0000000000000..eedd1d8c35343 --- /dev/null +++ b/lldb/tools/lldb-server/PlatformOptions.td @@ -0,0 +1,75 @@ +include "llvm/Option/OptParser.td" + +class F: Flag<["--", "-"], name>; +class R prefixes, string name> + : Option; + +multiclass SJ { + def NAME: Separate<["--", "-"], name>, + HelpText; + def NAME # _eq: Joined<["--", "-"], name # "=">, + Alias(NAME)>; +} + +def grp_connect : OptionGroup<"connection">, HelpText<"CONNECTION OPTIONS">; + +defm listen: SJ<"listen", "Host and port to listen on. Format: [host]:port or protocol://[host]:port (e.g., tcp://localhost:1234, unix:///path/to/socket). Short form: -L">, + MetaVarName<"<[host]:port>">, + Group; +def: Separate<["-"], "L">, Alias, + Group; + +defm socket_file: SJ<"socket-file", "Write listening socket information (port number for TCP or path for Unix domain sockets) to the specified file. Short form: -f">, + MetaVarName<"">, + Group; +def: Separate<["-"], "f">, Alias, + Group; + +defm gdbserver_port: SJ<"gdbserver-port", "Port to use for spawned gdbserver instances. If 0 or unspecified, a port will be chosen automatically. Short form: -P">, + MetaVarName<"">, + Group; +def: Separate<["-"], "P">, Alias, + Group; + +defm child_platform_fd: SJ<"child-platform-fd", "File descriptor for communication with parent platform process (internal use only).">, + MetaVarName<"">, + Group, + Flags<[HelpHidden]>; + +def grp_general : OptionGroup<"general options">, HelpText<"GENERAL OPTIONS">; + +def server: F<"server">, + HelpText<"Run in server mode, accepting multiple client connections sequentially. Without this flag, the server exits after handling the first connection.">, + Group; + +defm log_channels: SJ<"log-channels", "Channels to log. A colon-separated list of entries. Each entry starts with a channel followed by a space-separated list of categories. Common channels: lldb, gdb-remote, platform, process. Short form: -c">, + MetaVarName<"">, + Group; +def: Separate<["-"], "c">, Alias, + Group; + +defm log_file: SJ<"log-file", "Destination file to log to. If empty, log to stderr. Short form: -l">, + MetaVarName<"">, + Group; +def: Separate<["-"], "l">, Alias, + Group; + +def debug: F<"debug">, + HelpText<"(Unused, kept for backward compatibility)">, + Group, + Flags<[HelpHidden]>; + +def verbose: F<"verbose">, + HelpText<"(Unused, kept for backward compatibility)">, + Group, + Flags<[HelpHidden]>; + +def help: F<"help">, + HelpText<"Display this help message and exit.">, + Group; +def: Flag<["-"], "h">, Alias, + Group; + +def REM : R<["--"], "">, + HelpText<"Arguments to pass to launched gdbserver instances.">, + MetaVarName<"program args">; diff --git a/lldb/tools/lldb-server/lldb-platform.cpp b/lldb/tools/lldb-server/lldb-platform.cpp index 0bd928507ba89..59b1eb419bc2b 100644 --- a/lldb/tools/lldb-server/lldb-platform.cpp +++ b/lldb/tools/lldb-server/lldb-platform.cpp @@ -21,6 +21,9 @@ #include #include +#include "llvm/Option/ArgList.h" +#include "llvm/Option/OptTable.h" +#include "llvm/Option/Option.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/WithColor.h" @@ -56,22 +59,69 @@ using namespace llvm; // of target CPUs. For now, let's just use 100. static const int backlog = 100; static const int socket_error = -1; -static int g_debug = 0; -static int g_verbose = 0; -static int g_server = 0; - -// option descriptors for getopt_long_only() -static struct option g_long_options[] = { - {"debug", no_argument, &g_debug, 1}, - {"verbose", no_argument, &g_verbose, 1}, - {"log-file", required_argument, nullptr, 'l'}, - {"log-channels", required_argument, nullptr, 'c'}, - {"listen", required_argument, nullptr, 'L'}, - {"gdbserver-port", required_argument, nullptr, 'P'}, - {"socket-file", required_argument, nullptr, 'f'}, - {"server", no_argument, &g_server, 1}, - {"child-platform-fd", required_argument, nullptr, 2}, - {nullptr, 0, nullptr, 0}}; + +namespace { +using namespace llvm::opt; + +enum ID { + OPT_INVALID = 0, // This is not an option ID. +#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__), +#include "PlatformOptions.inc" +#undef OPTION +}; + +#define OPTTABLE_STR_TABLE_CODE +#include "PlatformOptions.inc" +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "PlatformOptions.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE + +static constexpr opt::OptTable::Info InfoTable[] = { +#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), +#include "PlatformOptions.inc" +#undef OPTION +}; + +class PlatformOptTable : public opt::GenericOptTable { +public: + PlatformOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {} + + void PrintHelp(llvm::StringRef Name) { + std::string Usage = + (Name + " [options] --listen <[host]:port> [[--] program args...]") + .str(); + + std::string Title = "lldb-server platform"; + + OptTable::printHelp(llvm::outs(), Usage.c_str(), Title.c_str()); + + llvm::outs() << R"( +DESCRIPTION + Acts as a platform server for remote debugging. When LLDB clients connect, + the platform server handles platform operations (file transfers, process + launching) and spawns debug server instances (lldb-server gdbserver) to + handle actual debugging sessions. + + By default, the server exits after handling one connection. Use --server + to keep running and accept multiple connections sequentially. + +EXAMPLES + # Listen on port 1234, exit after first connection + lldb-server platform --listen tcp://0.0.0.0:1234 + + # Listen on port 5555, accept multiple connections + lldb-server platform --server --listen tcp://localhost:5555 + + # Listen on Unix domain socket + lldb-server platform --listen unix:///tmp/lldb-server.sock + +)"; + } +}; +} // namespace #if defined(__APPLE__) #define LOW_PORT (IPPORT_RESERVED) @@ -97,12 +147,11 @@ static void signal_handler(int signo) { } #endif -static void display_usage(const char *progname, const char *subcommand) { - fprintf(stderr, "Usage:\n %s %s [--log-file log-file-name] [--log-channels " - "log-channel-list] [--port-file port-file-path] --server " - "--listen port\n", - progname, subcommand); - exit(0); +static void display_usage(PlatformOptTable &Opts, const char *progname, + const char *subcommand) { + std::string Name = + (llvm::sys::path::filename(progname) + " " + subcommand).str(); + Opts.PrintHelp(Name); } static Status parse_listen_host_port(Socket::SocketProtocol &protocol, @@ -261,7 +310,8 @@ static Status spawn_process(const char *progname, const FileSpec &prog, const Socket *conn_socket, uint16_t gdb_port, const lldb_private::Args &args, const std::string &log_file, - const StringRef log_channels, MainLoop &main_loop) { + const StringRef log_channels, MainLoop &main_loop, + bool multi_client) { Status error; SharedSocket shared_socket(conn_socket, error); if (error.Fail()) @@ -297,9 +347,12 @@ static Status spawn_process(const char *progname, const FileSpec &prog, launch_info.SetLaunchInSeparateProcessGroup(false); - if (g_server) + // Set up process monitor callback based on whether we're in server mode. + if (multi_client) + // In server mode: empty callback (don't terminate when child exits). launch_info.SetMonitorProcessCallback([](lldb::pid_t, int, int) {}); else + // In single-client mode: terminate main loop when child exits. launch_info.SetMonitorProcessCallback([&main_loop](lldb::pid_t, int, int) { main_loop.AddPendingCallback( [](MainLoopBase &loop) { loop.RequestTermination(); }); @@ -371,107 +424,101 @@ int main_platform(int argc, char *argv[]) { signal(SIGPIPE, SIG_IGN); signal(SIGHUP, signal_handler); #endif - int long_option_index = 0; - Status error; - std::string listen_host_port; - int ch; - std::string log_file; - StringRef - log_channels; // e.g. "lldb process threads:gdb-remote default:linux all" + // Special handling for 'help' as first argument. + if (argc > 0 && strcmp(argv[0], "help") == 0) { + PlatformOptTable Opts; + display_usage(Opts, progname, subcommand); + return EXIT_SUCCESS; + } + Status error; shared_fd_t fd = SharedSocket::kInvalidFD; - uint16_t gdbserver_port = 0; - FileSpec socket_file; - bool show_usage = false; - int option_error = 0; - std::string short_options(OptionParser::GetShortOptionString(g_long_options)); + PlatformOptTable Opts; + BumpPtrAllocator Alloc; + StringSaver Saver(Alloc); + bool HasError = false; -#if __GLIBC__ - optind = 0; -#else - optreset = 1; - optind = 1; -#endif + opt::InputArgList Args = + Opts.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](llvm::StringRef Msg) { + WithColor::error() << Msg << "\n"; + HasError = true; + }); - while ((ch = getopt_long_only(argc, argv, short_options.c_str(), - g_long_options, &long_option_index)) != -1) { - switch (ch) { - case 0: // Any optional that auto set themselves will return 0 - break; + std::string Name = + (llvm::sys::path::filename(progname) + " " + subcommand).str(); + std::string HelpText = + "Use '" + Name + " --help' for a complete list of options.\n"; - case 'L': - listen_host_port.append(optarg); - break; + if (HasError) { + llvm::errs() << HelpText; + return EXIT_FAILURE; + } - case 'l': // Set Log File - if (optarg && optarg[0]) - log_file.assign(optarg); - break; + if (Args.hasArg(OPT_help)) { + display_usage(Opts, progname, subcommand); + return EXIT_SUCCESS; + } - case 'c': // Log Channels - if (optarg && optarg[0]) - log_channels = StringRef(optarg); - break; + // Parse arguments. + std::string listen_host_port = Args.getLastArgValue(OPT_listen).str(); + std::string log_file = Args.getLastArgValue(OPT_log_file).str(); + StringRef log_channels = Args.getLastArgValue(OPT_log_channels); + bool multi_client = Args.hasArg(OPT_server); + [[maybe_unused]] bool debug = Args.hasArg(OPT_debug); + [[maybe_unused]] bool verbose = Args.hasArg(OPT_verbose); + + if (Args.hasArg(OPT_socket_file)) { + socket_file.SetFile(Args.getLastArgValue(OPT_socket_file), + FileSpec::Style::native); + } - case 'f': // Socket file - if (optarg && optarg[0]) - socket_file.SetFile(optarg, FileSpec::Style::native); - break; + if (Args.hasArg(OPT_gdbserver_port)) { + if (!llvm::to_integer(Args.getLastArgValue(OPT_gdbserver_port), + gdbserver_port)) { + WithColor::error() << "invalid --gdbserver-port value\n"; + return EXIT_FAILURE; + } + } - case 'P': - case 'm': - case 'M': { - uint16_t portnum; - if (!llvm::to_integer(optarg, portnum)) { - WithColor::error() << "invalid port number string " << optarg << "\n"; - option_error = 2; - break; - } - // Note the condition gdbserver_port > HIGH_PORT is valid in case of using - // --child-platform-fd. Check gdbserver_port later. - if (ch == 'P') - gdbserver_port = portnum; - else if (gdbserver_port == 0) - gdbserver_port = portnum; - } break; - - case 2: { - uint64_t _fd; - if (!llvm::to_integer(optarg, _fd)) { - WithColor::error() << "invalid fd " << optarg << "\n"; - option_error = 6; - } else - fd = (shared_fd_t)_fd; - } break; - - case 'h': /* fall-through is intentional */ - case '?': - show_usage = true; - break; + if (Args.hasArg(OPT_child_platform_fd)) { + uint64_t _fd; + if (!llvm::to_integer(Args.getLastArgValue(OPT_child_platform_fd), _fd)) { + WithColor::error() << "invalid --child-platform-fd value\n"; + return EXIT_FAILURE; } + fd = (shared_fd_t)_fd; } if (!LLDBServerUtilities::SetupLogging(log_file, log_channels, 0)) return -1; // Print usage and exit if no listening port is specified. - if (listen_host_port.empty() && fd == SharedSocket::kInvalidFD) - show_usage = true; + if (listen_host_port.empty() && fd == SharedSocket::kInvalidFD) { + WithColor::error() << "either --listen or --child-platform-fd is required\n" + << HelpText; + return EXIT_FAILURE; + } - if (show_usage || option_error) { - display_usage(progname, subcommand); - exit(option_error); + // Get remaining arguments for inferior. + std::vector Inputs; + for (opt::Arg *Arg : Args.filtered(OPT_INPUT)) + Inputs.push_back(Arg->getValue()); + if (opt::Arg *Arg = Args.getLastArg(OPT_REM)) { + for (const char *Val : Arg->getValues()) + Inputs.push_back(Val); } - // Skip any options we consumed with getopt_long_only. - argc -= optind; - argv += optind; lldb_private::Args inferior_arguments; - inferior_arguments.SetArguments(argc, const_cast(argv)); + if (!Inputs.empty()) { + std::vector args_ptrs; + for (const auto &Input : Inputs) + args_ptrs.push_back(Input.data()); + inferior_arguments.SetArguments(args_ptrs.size(), args_ptrs.data()); + } FileSpec debugserver_path = GetDebugserverPath(); if (!debugserver_path) { @@ -514,7 +561,7 @@ int main_platform(int argc, char *argv[]) { platform.SetConnection( std::make_unique(std::move(socket))); client_handle(platform, inferior_arguments); - return 0; + return EXIT_SUCCESS; } if (gdbserver_port != 0 && @@ -522,7 +569,7 @@ int main_platform(int argc, char *argv[]) { WithColor::error() << llvm::formatv("Port number {0} is not in the " "valid user port range of {1} - {2}\n", gdbserver_port, LOW_PORT, HIGH_PORT); - return 1; + return EXIT_FAILURE; } Socket::SocketProtocol protocol = Socket::ProtocolUnixDomain; @@ -559,7 +606,7 @@ int main_platform(int argc, char *argv[]) { if (error.Fail()) { fprintf(stderr, "failed to write socket id to %s: %s\n", socket_file.GetPath().c_str(), error.AsCString()); - return 1; + return EXIT_FAILURE; } } @@ -577,22 +624,22 @@ int main_platform(int argc, char *argv[]) { llvm::Expected> platform_handles = platform_sock->Accept( main_loop, [progname, gdbserver_port, &inferior_arguments, log_file, - log_channels, &main_loop, + log_channels, &main_loop, multi_client, &platform_handles](std::unique_ptr sock_up) { printf("Connection established.\n"); Status error = spawn_process( progname, HostInfo::GetProgramFileSpec(), sock_up.get(), gdbserver_port, inferior_arguments, log_file, log_channels, - main_loop); + main_loop, multi_client); if (error.Fail()) { Log *log = GetLog(LLDBLog::Platform); LLDB_LOGF(log, "spawn_process failed: %s", error.AsCString()); WithColor::error() << "spawn_process failed: " << error.AsCString() << "\n"; - if (!g_server) + if (!multi_client) main_loop.RequestTermination(); } - if (!g_server) + if (!multi_client) platform_handles->clear(); }); if (!platform_handles) { @@ -616,5 +663,5 @@ int main_platform(int argc, char *argv[]) { fprintf(stderr, "lldb-server exiting...\n"); - return 0; + return EXIT_SUCCESS; } diff --git a/lldb/unittests/Utility/RegisterValueTest.cpp b/lldb/unittests/Utility/RegisterValueTest.cpp index 6239dbe21634a..7b27e841cbec5 100644 --- a/lldb/unittests/Utility/RegisterValueTest.cpp +++ b/lldb/unittests/Utility/RegisterValueTest.cpp @@ -57,13 +57,12 @@ TEST(RegisterValueTest, GetScalarValue) { APInt(128, 0x7766554433221100))); } -static const Scalar etalon128(APInt(128, 0xffeeddccbbaa9988ull) << 64 | - APInt(128, 0x7766554433221100ull)); - -void TestSetValueFromData128(void *src, const lldb::ByteOrder endianness) { - RegisterInfo ri{"uint128_register", +void TestSetValueFromData(const Scalar &etalon, void *src, size_t src_byte_size, + const lldb::ByteOrder endianness, + const RegisterValue::Type register_value_type) { + RegisterInfo ri{"test", nullptr, - 16, + static_cast(src_byte_size), 0, lldb::Encoding::eEncodingUint, lldb::Format::eFormatDefault, @@ -71,26 +70,289 @@ void TestSetValueFromData128(void *src, const lldb::ByteOrder endianness) { nullptr, nullptr, nullptr}; - DataExtractor src_extractor(src, 16, endianness, 8); + DataExtractor src_extractor(src, src_byte_size, endianness, 8); RegisterValue rv; EXPECT_TRUE(rv.SetValueFromData(ri, src_extractor, 0, false).Success()); Scalar s; EXPECT_TRUE(rv.GetScalarValue(s)); - EXPECT_EQ(s, etalon128); + EXPECT_EQ(rv.GetType(), register_value_type); + EXPECT_EQ(s, etalon); +} + +static const Scalar etalon7(APInt(32, 0x0000007F)); + +TEST(RegisterValueTest, SetValueFromData_7_le) { + uint8_t src[] = {0x7F}; + TestSetValueFromData(etalon7, src, 1, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt8); +} + +TEST(RegisterValueTest, SetValueFromData_7_be) { + uint8_t src[] = {0x7F}; + TestSetValueFromData(etalon7, src, 1, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt8); +} + +static const Scalar etalon8(APInt(32, 0x000000FE)); + +TEST(RegisterValueTest, SetValueFromData_8_le) { + uint8_t src[] = {0xFE}; + TestSetValueFromData(etalon8, src, 1, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt8); +} + +TEST(RegisterValueTest, SetValueFromData_8_be) { + uint8_t src[] = {0xFE}; + TestSetValueFromData(etalon8, src, 1, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt8); +} + +static const Scalar etalon9(APInt(32, 0x000001FE)); + +TEST(RegisterValueTest, SetValueFromData_9_le) { + uint8_t src[] = {0xFE, 0x01}; + TestSetValueFromData(etalon9, src, 2, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt16); +} + +TEST(RegisterValueTest, SetValueFromData_9_be) { + uint8_t src[] = {0x01, 0xFE}; + TestSetValueFromData(etalon9, src, 2, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt16); +} + +static const Scalar etalon15(APInt(32, 0x00007FED)); + +TEST(RegisterValueTest, SetValueFromData_15_le) { + uint8_t src[] = {0xED, 0x7F}; + TestSetValueFromData(etalon15, src, 2, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt16); +} + +TEST(RegisterValueTest, SetValueFromData_15_be) { + uint8_t src[] = {0x7F, 0xED}; + TestSetValueFromData(etalon15, src, 2, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt16); +} + +static const Scalar etalon16(APInt(32, 0x0000FEDC)); + +TEST(RegisterValueTest, SetValueFromData_16_le) { + uint8_t src[] = {0xDC, 0xFE}; + TestSetValueFromData(etalon16, src, 2, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt16); +} + +TEST(RegisterValueTest, SetValueFromData_16_be) { + uint8_t src[] = {0xFE, 0xDC}; + TestSetValueFromData(etalon16, src, 2, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt16); +} + +static const Scalar etalon17(APInt(32, 0x0001FEDC)); + +TEST(RegisterValueTest, SetValueFromData_17_le) { + uint8_t src[] = {0xDC, 0xFE, 0x01}; + TestSetValueFromData(etalon17, src, 3, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt32); +} + +TEST(RegisterValueTest, SetValueFromData_17_be) { + uint8_t src[] = {0x01, 0xFE, 0xDC}; + TestSetValueFromData(etalon17, src, 3, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt32); +} + +static const Scalar etalon24(APInt(32, 0x00FEDCBA)); + +TEST(RegisterValueTest, SetValueFromData_24_le) { + uint8_t src[] = {0xBA, 0xDC, 0xFE}; + TestSetValueFromData(etalon24, src, 3, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt32); +} + +TEST(RegisterValueTest, SetValueFromData_24_be) { + uint8_t src[] = {0xFE, 0xDC, 0xBA}; + TestSetValueFromData(etalon24, src, 3, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt32); +} + +static const Scalar etalon31(APInt(32, 0x7EDCBA98)); + +TEST(RegisterValueTest, SetValueFromData_31_le) { + uint8_t src[] = {0x98, 0xBA, 0xDC, 0x7E}; + TestSetValueFromData(etalon31, src, 4, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt32); +} + +TEST(RegisterValueTest, SetValueFromData_31_be) { + uint8_t src[] = {0x7E, 0xDC, 0xBA, 0x98}; + TestSetValueFromData(etalon31, src, 4, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt32); +} + +static const Scalar etalon32(APInt(32, 0xFEDCBA98)); + +TEST(RegisterValueTest, SetValueFromData_32_le) { + uint8_t src[] = {0x98, 0xBA, 0xDC, 0xFE}; + TestSetValueFromData(etalon32, src, 4, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt32); } -// Test that the "RegisterValue::SetValueFromData" method works correctly -// with 128-bit little-endian data that represents an integer. +TEST(RegisterValueTest, SetValueFromData_32_be) { + uint8_t src[] = {0xFE, 0xDC, 0xBA, 0x98}; + TestSetValueFromData(etalon32, src, 4, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt32); +} + +static const Scalar etalon33(APInt(64, 0x00000001FEDCBA98)); + +TEST(RegisterValueTest, SetValueFromData_33_le) { + uint8_t src[] = {0x98, 0xBA, 0xDC, 0xFE, 0x01}; + TestSetValueFromData(etalon33, src, 5, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt64); +} + +TEST(RegisterValueTest, SetValueFromData_33_be) { + uint8_t src[] = {0x01, 0xFE, 0xDC, 0xBA, 0x98}; + TestSetValueFromData(etalon33, src, 5, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt64); +} + +static const Scalar etalon40(APInt(64, 0x000000FEDCBA9876)); + +TEST(RegisterValueTest, SetValueFromData_40_le) { + uint8_t src[] = {0x76, 0x98, 0xBA, 0xDC, 0xFE}; + TestSetValueFromData(etalon40, src, 5, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt64); +} + +TEST(RegisterValueTest, SetValueFromData_40_be) { + uint8_t src[] = {0xFE, 0xDC, 0xBA, 0x98, 0x76}; + TestSetValueFromData(etalon40, src, 5, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt64); +} + +static const Scalar etalon63(APInt(64, 0x7EDCBA9876543210)); + +TEST(RegisterValueTest, SetValueFromData_63_le) { + uint8_t src[] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0x7E}; + TestSetValueFromData(etalon63, src, 8, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt64); +} + +TEST(RegisterValueTest, SetValueFromData_63_be) { + uint8_t src[] = {0x7E, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10}; + TestSetValueFromData(etalon63, src, 8, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt64); +} + +static const Scalar etalon64(APInt(64, 0xFEDCBA9876543210)); + +TEST(RegisterValueTest, SetValueFromData_64_le) { + uint8_t src[] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE}; + TestSetValueFromData(etalon64, src, 8, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt64); +} + +TEST(RegisterValueTest, SetValueFromData_64_be) { + uint8_t src[] = {0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10}; + TestSetValueFromData(etalon64, src, 8, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt64); +} + +static const Scalar etalon65(APInt(72, 0x0000000000000001ull) << 1 * 64 | + APInt(72, 0x0706050403020100ull) << 0 * 64); + +TEST(RegisterValueTest, SetValueFromData_65_le) { + uint8_t src[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x01}; + TestSetValueFromData(etalon65, src, 9, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUIntN); +} + +TEST(RegisterValueTest, SetValueFromData_65_be) { + uint8_t src[] = {0x01, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00}; + TestSetValueFromData(etalon65, src, 9, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUIntN); +} + +static const Scalar etalon127(APInt(128, 0x7f0e0d0c0b0a0908ull) << 1 * 64 | + APInt(128, 0x0706050403020100ull) << 0 * 64); + +TEST(RegisterValueTest, SetValueFromData_127_le) { + uint8_t src[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x7f}; + TestSetValueFromData(etalon127, src, 16, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUIntN); +} + +TEST(RegisterValueTest, SetValueFromData_127_be) { + uint8_t src[] = {0x7f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00}; + TestSetValueFromData(etalon127, src, 16, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUIntN); +} + +static const Scalar etalon128(APInt(128, 0x0f0e0d0c0b0a0908ull) << 1 * 64 | + APInt(128, 0x0706050403020100ull) << 0 * 64); + TEST(RegisterValueTest, SetValueFromData_128_le) { - uint8_t src[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, - 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff}; - TestSetValueFromData128(src, lldb::ByteOrder::eByteOrderLittle); + uint8_t src[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f}; + TestSetValueFromData(etalon128, src, 16, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUIntN); } -// Test that the "RegisterValue::SetValueFromData" method works correctly -// with 128-bit big-endian data that represents an integer. TEST(RegisterValueTest, SetValueFromData_128_be) { - uint8_t src[] = {0xff, 0xee, 0xdd, 0xcc, 0xbb, 0xaa, 0x99, 0x88, - 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11, 0x00}; - TestSetValueFromData128(src, lldb::ByteOrder::eByteOrderBig); + uint8_t src[] = {0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00}; + TestSetValueFromData(etalon128, src, 16, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUIntN); +} + +static const Scalar etalon256(APInt(256, 0x1f1e1d1c1b1a1918ull) << 3 * 64 | + APInt(256, 0x1716151413121110ull) << 2 * 64 | + APInt(256, 0x0f0e0d0c0b0a0908ull) << 1 * 64 | + APInt(256, 0x0706050403020100ull) << 0 * 64); + +TEST(RegisterValueTest, SetValueFromData_256_le) { + uint8_t src[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f}; + TestSetValueFromData(etalon256, src, 32, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUIntN); +} + +TEST(RegisterValueTest, SetValueFromData_256_be) { + uint8_t src[] = {0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, + 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, + 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00}; + TestSetValueFromData(etalon256, src, 32, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUIntN); +} + +static const Scalar etalon257(APInt(512, 0x0000000000000001ull) << 4 * 64 | + APInt(512, 0x1f1e1d1c1b1a1918ull) << 3 * 64 | + APInt(512, 0x1716151413121110ull) << 2 * 64 | + APInt(512, 0x0f0e0d0c0b0a0908ull) << 1 * 64 | + APInt(512, 0x0706050403020100ull) << 0 * 64); + +TEST(RegisterValueTest, SetValueFromData_257_le) { + uint8_t src[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, + 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x01}; + TestSetValueFromData(etalon257, src, 33, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUIntN); +} + +TEST(RegisterValueTest, SetValueFromData_257_be) { + uint8_t src[] = {0x01, 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, + 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, + 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, + 0x05, 0x04, 0x03, 0x02, 0x01, 0x00}; + TestSetValueFromData(etalon257, src, 33, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUIntN); } diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake index 9a2e73a1e3718..84c03cd6432ed 100644 --- a/llvm/cmake/modules/TableGen.cmake +++ b/llvm/cmake/modules/TableGen.cmake @@ -66,6 +66,16 @@ function(tablegen project ofn) list(APPEND LLVM_TABLEGEN_FLAGS "-omit-comments") endif() + set(EXTRA_OUTPUTS) + if("-gen-register-info" IN_LIST ARGN) + cmake_path(GET ofn STEM OUTPUT_BASENAME) + list(APPEND EXTRA_OUTPUTS + ${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_BASENAME}Enums.inc + ${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_BASENAME}Header.inc + ${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_BASENAME}MCDesc.inc + ${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_BASENAME}TargetDesc.inc) + endif() + # MSVC can't support long string literals ("long" > 65534 bytes)[1], so if there's # a possibility of generated tables being consumed by MSVC, generate arrays of # char literals, instead. If we're cross-compiling, then conservatively assume @@ -126,7 +136,7 @@ function(tablegen project ofn) set(LLVM_TABLEGEN_JOB_POOL "") endif() - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn} + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn} ${EXTRA_OUTPUTS} COMMAND ${tablegen_exe} ${ARG_UNPARSED_ARGUMENTS} ${tblgen_includes} ${LLVM_TABLEGEN_FLAGS} diff --git a/llvm/docs/TableGen/BackEnds.rst b/llvm/docs/TableGen/BackEnds.rst index 7f571378860b2..1e3cb8783df16 100644 --- a/llvm/docs/TableGen/BackEnds.rst +++ b/llvm/docs/TableGen/BackEnds.rst @@ -355,6 +355,13 @@ ClangAttrParsedAttrKinds ``AttributeList::getKind`` function, mapping a string (and syntax) to a parsed attribute ``AttributeList::Kind`` enumeration. +ClangAttrIsTypeDependent +------------------------ + +**Purpose**: Creates ``AttrIsTypeDependent.inc``, which is used to implement the +``Sema::CheckAttributesOnDeducedType`` function, mapping an attribute kind to a +Sema function if it exists. + ClangAttrDump ------------- diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index fdb3b84b73a1f..7e73cc1957c05 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -2440,6 +2440,27 @@ LLVM_ABI APInt fshl(const APInt &Hi, const APInt &Lo, const APInt &Shift); /// (4) fshr(i8 255, i8 0, i8 9) = fshr(i8 255, i8 0, i8 1) // 9 % 8 LLVM_ABI APInt fshr(const APInt &Hi, const APInt &Lo, const APInt &Shift); +/// Perform a carry-less multiply, also known as XOR multiplication, and return +/// low-bits. All arguments and result have the same bitwidth. +/// +/// Examples: +/// (1) clmul(i4 1, i4 2) = 2 +/// (2) clmul(i4 5, i4 6) = 14 +/// (3) clmul(i4 -4, i4 2) = -8 +/// (4) clmul(i4 -4, i4 -5) = 4 +LLVM_ABI APInt clmul(const APInt &LHS, const APInt &RHS); + +/// Perform a reversed carry-less multiply. +/// +/// clmulr(a, b) = bitreverse(clmul(bitreverse(a), bitreverse(b))) +LLVM_ABI APInt clmulr(const APInt &LHS, const APInt &RHS); + +/// Perform a carry-less multiply, and return high-bits. All arguments and +/// result have the same bitwidth. +/// +/// clmulh(a, b) = clmulr(a, b) >> 1 +LLVM_ABI APInt clmulh(const APInt &LHS, const APInt &RHS); + } // namespace APIntOps // See friend declaration above. This additional declaration is required in diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index cec7d09f494d6..4c932c523e423 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3492,6 +3492,13 @@ class LLVM_ABI TargetLoweringBase { return MathUsed && (VT.isSimple() || !isOperationExpand(Opcode, VT)); } + // Return true if the target wants to optimize the mul overflow intrinsic + // for the given \p VT. + virtual bool shouldOptimizeMulOverflowWithZeroHighBits(LLVMContext &Context, + EVT VT) const { + return false; + } + // Return true if it is profitable to use a scalar input to a BUILD_VECTOR // even if the vector itself has multiple uses. virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const { diff --git a/llvm/include/llvm/IR/Constant.h b/llvm/include/llvm/IR/Constant.h index 0be1fc172ebd4..e8ce453559ed7 100644 --- a/llvm/include/llvm/IR/Constant.h +++ b/llvm/include/llvm/IR/Constant.h @@ -79,6 +79,9 @@ class Constant : public User { /// Return true if the value is the smallest signed value. LLVM_ABI bool isMinSignedValue() const; + /// Return true if the value is the largest signed value. + LLVM_ABI bool isMaxSignedValue() const; + /// Return true if this is a finite and non-zero floating-point scalar /// constant or a fixed width vector constant with all finite and non-zero /// elements. diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index d7db935ee07f1..5a4cc776b26a5 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -170,6 +170,8 @@ def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0> [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>; def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>; +def int_dx_ddx_coarse : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; +def int_dx_ddy_coarse : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index f39c6cda2c579..2f7c25550a0cc 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -134,6 +134,8 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty] def int_spv_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>; def int_spv_discard : DefaultAttrsIntrinsic<[], [], []>; + def int_spv_ddx_coarse : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + def int_spv_ddy_coarse : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_spv_uclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_sclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_nclamp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; diff --git a/llvm/include/llvm/TableGen/Main.h b/llvm/include/llvm/TableGen/Main.h index bafce3a463acc..daede9f5a46f0 100644 --- a/llvm/include/llvm/TableGen/Main.h +++ b/llvm/include/llvm/TableGen/Main.h @@ -14,7 +14,6 @@ #define LLVM_TABLEGEN_MAIN_H #include "llvm/Support/CommandLine.h" -#include #include namespace llvm { @@ -30,18 +29,17 @@ struct TableGenOutputFiles { }; /// Returns true on error, false otherwise. -using TableGenMainFn = bool(raw_ostream &OS, const RecordKeeper &Records); +using TableGenMainFn = + function_ref; /// Perform the action using Records, and store output in OutFiles. /// Returns true on error, false otherwise. -using MultiFileTableGenMainFn = bool(TableGenOutputFiles &OutFiles, - const RecordKeeper &Records); +using MultiFileTableGenMainFn = function_ref; -int TableGenMain(const char *argv0, - std::function MainFn = nullptr); +int TableGenMain(const char *argv0, TableGenMainFn MainFn = nullptr); -int TableGenMain(const char *argv0, - std::function MainFn = nullptr); +int TableGenMain(const char *argv0, MultiFileTableGenMainFn MainFn = nullptr); /// Controls emitting large character arrays as strings or character arrays. /// Typically set to false when building with MSVC. diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 6f44713bd22cd..8968f6b934d77 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -41,6 +41,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Statepoint.h" @@ -6676,6 +6677,62 @@ static MinMaxOptResult OptimizeConstMinMax(const Constant *RHSConst, return MinMaxOptResult::CannotOptimize; } +static Value *simplifySVEIntReduction(Intrinsic::ID IID, Type *ReturnType, + Value *Op0, Value *Op1) { + Constant *C0 = dyn_cast(Op0); + Constant *C1 = dyn_cast(Op1); + unsigned Width = ReturnType->getPrimitiveSizeInBits(); + + // All false predicate or reduction of neutral values ==> neutral result. + switch (IID) { + case Intrinsic::aarch64_sve_eorv: + case Intrinsic::aarch64_sve_orv: + case Intrinsic::aarch64_sve_saddv: + case Intrinsic::aarch64_sve_uaddv: + case Intrinsic::aarch64_sve_umaxv: + if ((C0 && C0->isNullValue()) || (C1 && C1->isNullValue())) + return ConstantInt::get(ReturnType, 0); + break; + case Intrinsic::aarch64_sve_andv: + case Intrinsic::aarch64_sve_uminv: + if ((C0 && C0->isNullValue()) || (C1 && C1->isAllOnesValue())) + return ConstantInt::get(ReturnType, APInt::getMaxValue(Width)); + break; + case Intrinsic::aarch64_sve_smaxv: + if ((C0 && C0->isNullValue()) || (C1 && C1->isMinSignedValue())) + return ConstantInt::get(ReturnType, APInt::getSignedMinValue(Width)); + break; + case Intrinsic::aarch64_sve_sminv: + if ((C0 && C0->isNullValue()) || (C1 && C1->isMaxSignedValue())) + return ConstantInt::get(ReturnType, APInt::getSignedMaxValue(Width)); + break; + } + + switch (IID) { + case Intrinsic::aarch64_sve_andv: + case Intrinsic::aarch64_sve_orv: + case Intrinsic::aarch64_sve_smaxv: + case Intrinsic::aarch64_sve_sminv: + case Intrinsic::aarch64_sve_umaxv: + case Intrinsic::aarch64_sve_uminv: + // sve_reduce_##(all, splat(X)) ==> X + if (C0 && C0->isAllOnesValue()) { + if (Value *SplatVal = getSplatValue(Op1)) { + assert(SplatVal->getType() == ReturnType && "Unexpected result type!"); + return SplatVal; + } + } + break; + case Intrinsic::aarch64_sve_eorv: + // sve_reduce_xor(all, splat(X)) ==> 0 + if (C0 && C0->isAllOnesValue()) + return ConstantInt::get(ReturnType, 0); + break; + } + + return nullptr; +} + Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType, Value *Op0, Value *Op1, const SimplifyQuery &Q, @@ -7037,6 +7094,17 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType, break; } + + case Intrinsic::aarch64_sve_andv: + case Intrinsic::aarch64_sve_eorv: + case Intrinsic::aarch64_sve_orv: + case Intrinsic::aarch64_sve_saddv: + case Intrinsic::aarch64_sve_smaxv: + case Intrinsic::aarch64_sve_sminv: + case Intrinsic::aarch64_sve_uaddv: + case Intrinsic::aarch64_sve_umaxv: + case Intrinsic::aarch64_sve_uminv: + return simplifySVEIntReduction(IID, ReturnType, Op0, Op1); default: break; } diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 921462e28a467..799234a0b491d 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -315,11 +315,10 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) { return error(NT.second.second, "use of undefined type '%" + Twine(NT.first) + "'"); - for (StringMap >::iterator I = - NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I) - if (I->second.second.isValid()) - return error(I->second.second, - "use of undefined type named '" + I->getKey() + "'"); + for (const auto &[Name, TypeInfo] : NamedTypes) + if (TypeInfo.second.isValid()) + return error(TypeInfo.second, + "use of undefined type named '" + Name + "'"); if (!ForwardRefComdats.empty()) return error(ForwardRefComdats.begin()->second, diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp index f497c574ee75d..36d0d35d024cc 100644 --- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -616,9 +616,8 @@ void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) { /// EnumerateValueSymbolTable - Insert all of the values in the specified symbol /// table into the values table. void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) { - for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end(); - VI != VE; ++VI) - EnumerateValue(VI->getValue()); + for (const auto &VI : VST) + EnumerateValue(VI.getValue()); } /// Insert all of the values referenced by named metadata in the specified diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index b6dd174f9be80..587c1372b19cb 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -431,6 +431,8 @@ class CodeGenPrepare { bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy, unsigned AddrSpace); bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr); + bool optimizeMulWithOverflow(Instruction *I, bool IsSigned, + ModifyDT &ModifiedDT); bool optimizeInlineAsmInst(CallInst *CS); bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT); bool optimizeExt(Instruction *&I); @@ -2797,6 +2799,10 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { } } return false; + case Intrinsic::umul_with_overflow: + return optimizeMulWithOverflow(II, /*IsSigned=*/false, ModifiedDT); + case Intrinsic::smul_with_overflow: + return optimizeMulWithOverflow(II, /*IsSigned=*/true, ModifiedDT); } SmallVector PtrOps; @@ -6391,6 +6397,182 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, return true; } +// This is a helper for CodeGenPrepare::optimizeMulWithOverflow. +// Check the pattern we are interested in where there are maximum 2 uses +// of the intrinsic which are the extract instructions. +static bool matchOverflowPattern(Instruction *&I, ExtractValueInst *&MulExtract, + ExtractValueInst *&OverflowExtract) { + // Bail out if it's more than 2 users: + if (I->hasNUsesOrMore(3)) + return false; + + for (User *U : I->users()) { + auto *Extract = dyn_cast(U); + if (!Extract || Extract->getNumIndices() != 1) + return false; + + unsigned Index = Extract->getIndices()[0]; + if (Index == 0) + MulExtract = Extract; + else if (Index == 1) + OverflowExtract = Extract; + else + return false; + } + return true; +} + +// Rewrite the mul_with_overflow intrinsic by checking if both of the +// operands' value ranges are within the legal type. If so, we can optimize the +// multiplication algorithm. This code is supposed to be written during the step +// of type legalization, but given that we need to reconstruct the IR which is +// not doable there, we do it here. +// The IR after the optimization will look like: +// entry: +// if signed: +// ( (lhs_lo>>BW-1) ^ lhs_hi) || ( (rhs_lo>>BW-1) ^ rhs_hi) ? overflow, +// overflow_no +// else: +// (lhs_hi != 0) || (rhs_hi != 0) ? overflow, overflow_no +// overflow_no: +// overflow: +// overflow.res: +// \returns true if optimization was applied +// TODO: This optimization can be further improved to optimize branching on +// overflow where the 'overflow_no' BB can branch directly to the false +// successor of overflow, but that would add additional complexity so we leave +// it for future work. +bool CodeGenPrepare::optimizeMulWithOverflow(Instruction *I, bool IsSigned, + ModifyDT &ModifiedDT) { + // Check if target supports this optimization. + if (!TLI->shouldOptimizeMulOverflowWithZeroHighBits( + I->getContext(), + TLI->getValueType(*DL, I->getType()->getContainedType(0)))) + return false; + + ExtractValueInst *MulExtract = nullptr, *OverflowExtract = nullptr; + if (!matchOverflowPattern(I, MulExtract, OverflowExtract)) + return false; + + // Keep track of the instruction to stop reoptimizing it again. + InsertedInsts.insert(I); + + Value *LHS = I->getOperand(0); + Value *RHS = I->getOperand(1); + Type *Ty = LHS->getType(); + unsigned VTHalfBitWidth = Ty->getScalarSizeInBits() / 2; + Type *LegalTy = Ty->getWithNewBitWidth(VTHalfBitWidth); + + // New BBs: + BasicBlock *OverflowEntryBB = + I->getParent()->splitBasicBlock(I, "", /*Before*/ true); + OverflowEntryBB->takeName(I->getParent()); + // Keep the 'br' instruction that is generated as a result of the split to be + // erased/replaced later. + Instruction *OldTerminator = OverflowEntryBB->getTerminator(); + BasicBlock *NoOverflowBB = + BasicBlock::Create(I->getContext(), "overflow.no", I->getFunction()); + NoOverflowBB->moveAfter(OverflowEntryBB); + BasicBlock *OverflowBB = + BasicBlock::Create(I->getContext(), "overflow", I->getFunction()); + OverflowBB->moveAfter(NoOverflowBB); + + // BB overflow.entry: + IRBuilder<> Builder(OverflowEntryBB); + // Extract low and high halves of LHS: + Value *LoLHS = Builder.CreateTrunc(LHS, LegalTy, "lo.lhs"); + Value *HiLHS = Builder.CreateLShr(LHS, VTHalfBitWidth, "lhs.lsr"); + HiLHS = Builder.CreateTrunc(HiLHS, LegalTy, "hi.lhs"); + + // Extract low and high halves of RHS: + Value *LoRHS = Builder.CreateTrunc(RHS, LegalTy, "lo.rhs"); + Value *HiRHS = Builder.CreateLShr(RHS, VTHalfBitWidth, "rhs.lsr"); + HiRHS = Builder.CreateTrunc(HiRHS, LegalTy, "hi.rhs"); + + Value *IsAnyBitTrue; + if (IsSigned) { + Value *SignLoLHS = + Builder.CreateAShr(LoLHS, VTHalfBitWidth - 1, "sign.lo.lhs"); + Value *SignLoRHS = + Builder.CreateAShr(LoRHS, VTHalfBitWidth - 1, "sign.lo.rhs"); + Value *XorLHS = Builder.CreateXor(HiLHS, SignLoLHS); + Value *XorRHS = Builder.CreateXor(HiRHS, SignLoRHS); + Value *Or = Builder.CreateOr(XorLHS, XorRHS, "or.lhs.rhs"); + IsAnyBitTrue = Builder.CreateCmp(ICmpInst::ICMP_NE, Or, + ConstantInt::getNullValue(Or->getType())); + } else { + Value *CmpLHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiLHS, + ConstantInt::getNullValue(LegalTy)); + Value *CmpRHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiRHS, + ConstantInt::getNullValue(LegalTy)); + IsAnyBitTrue = Builder.CreateOr(CmpLHS, CmpRHS, "or.lhs.rhs"); + } + Builder.CreateCondBr(IsAnyBitTrue, OverflowBB, NoOverflowBB); + + // BB overflow.no: + Builder.SetInsertPoint(NoOverflowBB); + Value *ExtLoLHS, *ExtLoRHS; + if (IsSigned) { + ExtLoLHS = Builder.CreateSExt(LoLHS, Ty, "lo.lhs.ext"); + ExtLoRHS = Builder.CreateSExt(LoRHS, Ty, "lo.rhs.ext"); + } else { + ExtLoLHS = Builder.CreateZExt(LoLHS, Ty, "lo.lhs.ext"); + ExtLoRHS = Builder.CreateZExt(LoRHS, Ty, "lo.rhs.ext"); + } + + Value *Mul = Builder.CreateMul(ExtLoLHS, ExtLoRHS, "mul.overflow.no"); + + // Create the 'overflow.res' BB to merge the results of + // the two paths: + BasicBlock *OverflowResBB = I->getParent(); + OverflowResBB->setName("overflow.res"); + + // BB overflow.no: jump to overflow.res BB + Builder.CreateBr(OverflowResBB); + // No we don't need the old terminator in overflow.entry BB, erase it: + OldTerminator->eraseFromParent(); + + // BB overflow.res: + Builder.SetInsertPoint(OverflowResBB, OverflowResBB->getFirstInsertionPt()); + // Create PHI nodes to merge results from no.overflow BB and overflow BB to + // replace the extract instructions. + PHINode *OverflowResPHI = Builder.CreatePHI(Ty, 2), + *OverflowFlagPHI = + Builder.CreatePHI(IntegerType::getInt1Ty(I->getContext()), 2); + + // Add the incoming values from no.overflow BB and later from overflow BB. + OverflowResPHI->addIncoming(Mul, NoOverflowBB); + OverflowFlagPHI->addIncoming(ConstantInt::getFalse(I->getContext()), + NoOverflowBB); + + // Replace all users of MulExtract and OverflowExtract to use the PHI nodes. + if (MulExtract) { + MulExtract->replaceAllUsesWith(OverflowResPHI); + MulExtract->eraseFromParent(); + } + if (OverflowExtract) { + OverflowExtract->replaceAllUsesWith(OverflowFlagPHI); + OverflowExtract->eraseFromParent(); + } + + // Remove the intrinsic from parent (overflow.res BB) as it will be part of + // overflow BB + I->removeFromParent(); + // BB overflow: + I->insertInto(OverflowBB, OverflowBB->end()); + Builder.SetInsertPoint(OverflowBB, OverflowBB->end()); + Value *MulOverflow = Builder.CreateExtractValue(I, {0}, "mul.overflow"); + Value *OverflowFlag = Builder.CreateExtractValue(I, {1}, "overflow.flag"); + Builder.CreateBr(OverflowResBB); + + // Add The Extracted values to the PHINodes in the overflow.res BB. + OverflowResPHI->addIncoming(MulOverflow, OverflowBB); + OverflowFlagPHI->addIncoming(OverflowFlag, OverflowBB); + + ModifiedDT = ModifyDT::ModifyBBDT; + return true; +} + /// If there are any memory operands, use OptimizeMemoryInst to sink their /// address computing into the block when possible / profitable. bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index c1fb8b6d78ff8..ecba323f8d6bf 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -247,6 +247,7 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { const MachineOperand &Src = MI.getOperand(Idx); Register SrcReg = Src.getReg(); + LLT SrcTy = MRI.getType(SrcReg); // Look through trivial copies and phis but don't look through trivial // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits // analysis is currently unable to determine the bit width of a @@ -255,9 +256,15 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, // We can't use NoSubRegister by name as it's defined by each target but // it's always defined to be 0 by tablegen. if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ && - MRI.getType(SrcReg).isValid()) { + SrcTy.isValid()) { + // In case we're forwarding from a vector register to a non-vector + // register we need to update the demanded elements to reflect this + // before recursing. + APInt NowDemandedElts = SrcTy.isFixedVector() && !DstTy.isFixedVector() + ? APInt::getAllOnes(SrcTy.getNumElements()) + : DemandedElts; // Known to be APInt(1, 1) // For COPYs we don't do anything, don't increase the depth. - computeKnownBitsImpl(SrcReg, Known2, DemandedElts, + computeKnownBitsImpl(SrcReg, Known2, NowDemandedElts, Depth + (Opcode != TargetOpcode::COPY)); Known2 = Known2.anyextOrTrunc(BitWidth); Known = Known.intersectWith(Known2); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index cacb292acee18..ba28e4dda3313 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3439,6 +3439,18 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_LROUND: + case TargetOpcode::G_LLROUND: + Observer.changingInstr(MI); + + if (TypeIdx == 0) + widenScalarDst(MI, WideTy); + else + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); + + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_INTTOPTR: if (TypeIdx != 1) return UnableToLegalize; diff --git a/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp b/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp index bca820fa807c8..4acc064dbc212 100644 --- a/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp +++ b/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp @@ -64,7 +64,6 @@ dwarf::CFIProgram DWARFCFIState::convert(MCCFIInstruction Directive) { /* CodeAlignmentFactor */ 1, /* DataAlignmentFactor */ 1, Context->getTargetTriple().getArch()); - auto MaybeCurrentRow = getCurrentUnwindRow(); switch (Directive.getOperation()) { case MCCFIInstruction::OpSameValue: CFIP.addInstruction(dwarf::DW_CFA_same_value, Directive.getRegister()); diff --git a/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt index ab287c7af60be..6be59b0890c44 100644 --- a/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt @@ -22,6 +22,7 @@ add_llvm_component_library(LLVMOrcDebugging BinaryFormat DebugInfoDWARF JITLink + Object OrcJIT OrcShared Support diff --git a/llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp index 9f556b0d07a8b..653645ff03f15 100644 --- a/llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp @@ -1,4 +1,4 @@ -//===------- ELFDebugObjectPlugin.cpp - JITLink debug objects ---------===// +//===--------- ELFDebugObjectPlugin.cpp - JITLink debug objects -----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index cbce8bd736102..a3aa5e9571657 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -183,6 +183,23 @@ bool Constant::isMinSignedValue() const { return false; } +bool Constant::isMaxSignedValue() const { + // Check for INT_MAX integers + if (const ConstantInt *CI = dyn_cast(this)) + return CI->isMaxValue(/*isSigned=*/true); + + // Check for FP which are bitcasted from INT_MAX integers + if (const ConstantFP *CFP = dyn_cast(this)) + return CFP->getValueAPF().bitcastToAPInt().isMaxSignedValue(); + + // Check for splats of INT_MAX values. + if (getType()->isVectorTy()) + if (const auto *SplatVal = getSplatValue()) + return SplatVal->isMaxSignedValue(); + + return false; +} + bool Constant::isNotMinSignedValue() const { // Check for INT_MIN integers if (const ConstantInt *CI = dyn_cast(this)) diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp index f6fd5f9ddd633..673cd867f0e45 100644 --- a/llvm/lib/Support/APInt.cpp +++ b/llvm/lib/Support/APInt.cpp @@ -15,10 +15,10 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/bit.h" -#include "llvm/Config/llvm-config.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -3187,3 +3187,23 @@ APInt llvm::APIntOps::fshr(const APInt &Hi, const APInt &Lo, return Lo; return Hi.shl(Hi.getBitWidth() - ShiftAmt) | Lo.lshr(ShiftAmt); } + +APInt llvm::APIntOps::clmul(const APInt &LHS, const APInt &RHS) { + assert(LHS.getBitWidth() == RHS.getBitWidth()); + unsigned BW = LHS.getBitWidth(); + APInt Result(BW, 0); + for (unsigned I : seq(BW)) + if (RHS[I]) + Result ^= LHS.shl(I); + return Result; +} + +APInt llvm::APIntOps::clmulr(const APInt &LHS, const APInt &RHS) { + assert(LHS.getBitWidth() == RHS.getBitWidth()); + return clmul(LHS.reverseBits(), RHS.reverseBits()).reverseBits(); +} + +APInt llvm::APIntOps::clmulh(const APInt &LHS, const APInt &RHS) { + assert(LHS.getBitWidth() == RHS.getBitWidth()); + return clmulr(LHS, RHS).lshr(1); +} diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp index 8e0c724accb36..ab220b8f2ceba 100644 --- a/llvm/lib/Support/Parallel.cpp +++ b/llvm/lib/Support/Parallel.cpp @@ -193,16 +193,7 @@ class ThreadPoolExecutor : public Executor { JobserverClient *TheJobserver = nullptr; }; -// A global raw pointer to the executor. Lifetime is managed by the -// objects created within createExecutor(). -static Executor *TheExec = nullptr; -static std::once_flag Flag; - -// This function will be called exactly once to create the executor. -// It contains the necessary platform-specific logic. Since functions -// called by std::call_once cannot return value, we have to set the -// executor as a global variable. -void createExecutor() { +Executor *Executor::getDefaultExecutor() { #ifdef _WIN32 // The ManagedStatic enables the ThreadPoolExecutor to be stopped via // llvm_shutdown() which allows a "clean" fast exit, e.g. via _exit(). This @@ -226,22 +217,16 @@ void createExecutor() { ThreadPoolExecutor::Deleter> ManagedExec; static std::unique_ptr Exec(&(*ManagedExec)); - TheExec = Exec.get(); + return Exec.get(); #else // ManagedStatic is not desired on other platforms. When `Exec` is destroyed // by llvm_shutdown(), worker threads will clean up and invoke TLS // destructors. This can lead to race conditions if other threads attempt to // access TLS objects that have already been destroyed. static ThreadPoolExecutor Exec(strategy); - TheExec = &Exec; + return &Exec; #endif } - -Executor *Executor::getDefaultExecutor() { - // Use std::call_once to lazily and safely initialize the executor. - std::call_once(Flag, createExecutor); - return TheExec; -} } // namespace } // namespace detail diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp index 3330b70cdc2e1..939e9c6bf5d2f 100644 --- a/llvm/lib/TableGen/Main.cpp +++ b/llvm/lib/TableGen/Main.cpp @@ -127,8 +127,7 @@ static int WriteOutput(const TGParser &Parser, const char *argv0, return 0; } -int llvm::TableGenMain(const char *argv0, - std::function MainFn) { +int llvm::TableGenMain(const char *argv0, MultiFileTableGenMainFn MainFn) { RecordKeeper Records; TGTimer &Timer = Records.getTimer(); @@ -209,8 +208,7 @@ int llvm::TableGenMain(const char *argv0, return 0; } -int llvm::TableGenMain(const char *argv0, - std::function MainFn) { +int llvm::TableGenMain(const char *argv0, TableGenMainFn MainFn) { return TableGenMain(argv0, [&MainFn](TableGenOutputFiles &OutFiles, const RecordKeeper &Records) { std::string S; diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index f1db05dda4e40..08466667c0fa5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -4403,43 +4403,46 @@ bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) { bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert) { - if (auto CNode = dyn_cast(N)) { - uint64_t ImmVal = CNode->getZExtValue(); - SDLoc DL(N); - - if (Invert) - ImmVal = ~ImmVal; + uint64_t ImmVal; + if (auto CI = dyn_cast(N)) + ImmVal = CI->getZExtValue(); + else if (auto CFP = dyn_cast(N)) + ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + else + return false; - // Shift mask depending on type size. - switch (VT.SimpleTy) { - case MVT::i8: - ImmVal &= 0xFF; - ImmVal |= ImmVal << 8; - ImmVal |= ImmVal << 16; - ImmVal |= ImmVal << 32; - break; - case MVT::i16: - ImmVal &= 0xFFFF; - ImmVal |= ImmVal << 16; - ImmVal |= ImmVal << 32; - break; - case MVT::i32: - ImmVal &= 0xFFFFFFFF; - ImmVal |= ImmVal << 32; - break; - case MVT::i64: - break; - default: - llvm_unreachable("Unexpected type"); - } + if (Invert) + ImmVal = ~ImmVal; - uint64_t encoding; - if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) { - Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64); - return true; - } + // Shift mask depending on type size. + switch (VT.SimpleTy) { + case MVT::i8: + ImmVal &= 0xFF; + ImmVal |= ImmVal << 8; + ImmVal |= ImmVal << 16; + ImmVal |= ImmVal << 32; + break; + case MVT::i16: + ImmVal &= 0xFFFF; + ImmVal |= ImmVal << 16; + ImmVal |= ImmVal << 32; + break; + case MVT::i32: + ImmVal &= 0xFFFFFFFF; + ImmVal |= ImmVal << 32; + break; + case MVT::i64: + break; + default: + llvm_unreachable("Unexpected type"); } - return false; + + uint64_t encoding; + if (!AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) + return false; + + Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64); + return true; } // SVE shift intrinsics allow shift amounts larger than the element's bitwidth. diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 42567883b2594..8f41f230b5521 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3886,22 +3886,30 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, /// \param MustBeFirst Set to true if this subtree needs to be negated and we /// cannot do the negation naturally. We are required to /// emit the subtree first in this case. +/// \param PreferFirst Set to true if processing this subtree first may +/// result in more efficient code. /// \param WillNegate Is true if are called when the result of this /// subexpression must be negated. This happens when the /// outer expression is an OR. We can use this fact to know /// that we have a double negation (or (or ...) ...) that /// can be implemented for free. -static bool canEmitConjunction(const SDValue Val, bool &CanNegate, - bool &MustBeFirst, bool WillNegate, +static bool canEmitConjunction(SelectionDAG &DAG, const SDValue Val, + bool &CanNegate, bool &MustBeFirst, + bool &PreferFirst, bool WillNegate, unsigned Depth = 0) { if (!Val.hasOneUse()) return false; unsigned Opcode = Val->getOpcode(); if (Opcode == ISD::SETCC) { - if (Val->getOperand(0).getValueType() == MVT::f128) + EVT VT = Val->getOperand(0).getValueType(); + if (VT == MVT::f128) return false; CanNegate = true; MustBeFirst = false; + // Designate this operation as a preferred first operation if the result + // of a SUB operation can be reused. + PreferFirst = DAG.doesNodeExist(ISD::SUB, DAG.getVTList(VT), + {Val->getOperand(0), Val->getOperand(1)}); return true; } // Protect against exponential runtime and stack overflow. @@ -3913,11 +3921,15 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate, SDValue O1 = Val->getOperand(1); bool CanNegateL; bool MustBeFirstL; - if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1)) + bool PreferFirstL; + if (!canEmitConjunction(DAG, O0, CanNegateL, MustBeFirstL, PreferFirstL, + IsOR, Depth + 1)) return false; bool CanNegateR; bool MustBeFirstR; - if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1)) + bool PreferFirstR; + if (!canEmitConjunction(DAG, O1, CanNegateR, MustBeFirstR, PreferFirstR, + IsOR, Depth + 1)) return false; if (MustBeFirstL && MustBeFirstR) @@ -3940,6 +3952,7 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate, CanNegate = false; MustBeFirst = MustBeFirstL || MustBeFirstR; } + PreferFirst = PreferFirstL || PreferFirstR; return true; } return false; @@ -4001,19 +4014,25 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, SDValue LHS = Val->getOperand(0); bool CanNegateL; bool MustBeFirstL; - bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR); + bool PreferFirstL; + bool ValidL = canEmitConjunction(DAG, LHS, CanNegateL, MustBeFirstL, + PreferFirstL, IsOR); assert(ValidL && "Valid conjunction/disjunction tree"); (void)ValidL; SDValue RHS = Val->getOperand(1); bool CanNegateR; bool MustBeFirstR; - bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR); + bool PreferFirstR; + bool ValidR = canEmitConjunction(DAG, RHS, CanNegateR, MustBeFirstR, + PreferFirstR, IsOR); assert(ValidR && "Valid conjunction/disjunction tree"); (void)ValidR; - // Swap sub-tree that must come first to the right side. - if (MustBeFirstL) { + bool ShouldFirstL = PreferFirstL && !PreferFirstR && !MustBeFirstR; + + // Swap sub-tree that must or should come first to the right side. + if (MustBeFirstL || ShouldFirstL) { assert(!MustBeFirstR && "Valid conjunction/disjunction tree"); std::swap(LHS, RHS); std::swap(CanNegateL, CanNegateR); @@ -4069,7 +4088,9 @@ static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC) { bool DummyCanNegate; bool DummyMustBeFirst; - if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false)) + bool DummyPreferFirst; + if (!canEmitConjunction(DAG, Val, DummyCanNegate, DummyMustBeFirst, + DummyPreferFirst, false)) return SDValue(); return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL); @@ -18851,6 +18872,15 @@ bool AArch64TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, return (Index == 0 || Index == ResVT.getVectorMinNumElements()); } +bool AArch64TargetLowering::shouldOptimizeMulOverflowWithZeroHighBits( + LLVMContext &Context, EVT VT) const { + if (getTypeAction(Context, VT) != TypeExpandInteger) + return false; + + EVT LegalTy = EVT::getIntegerVT(Context, VT.getSizeInBits() / 2); + return getTypeAction(Context, LegalTy) == TargetLowering::TypeLegal; +} + /// Turn vector tests of the signbit in the form of: /// xor (sra X, elt_size(X)-1), -1 /// into: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 70bfae717fb76..be198e54cbcbf 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -333,6 +333,11 @@ class AArch64TargetLowering : public TargetLowering { return TargetLowering::shouldFormOverflowOp(Opcode, VT, true); } + // Return true if the target wants to optimize the mul overflow intrinsic + // for the given \p VT. + bool shouldOptimizeMulOverflowWithZeroHighBits(LLVMContext &Context, + EVT VT) const override; + Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override; Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index c8c21c4822ffe..e99b3f8ff07e0 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -989,7 +989,7 @@ let Predicates = [HasSVE_or_SME] in { (DUP_ZR_D (MOVi64imm (bitcast_fpimm_to_i64 f64:$val)))>; // Duplicate FP immediate into all vector elements - let AddedComplexity = 2 in { + let AddedComplexity = 3 in { def : Pat<(nxv8f16 (splat_vector fpimm16:$imm8)), (FDUP_ZI_H fpimm16:$imm8)>; def : Pat<(nxv4f16 (splat_vector fpimm16:$imm8)), diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index a88817c9d2d19..fdf69b04bf676 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -449,10 +449,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .minScalar(0, s32) .libcallFor({{s32, s32}, {s64, s32}, {s128, s32}}); - // TODO: Libcall support for s128. - // TODO: s16 should be legal with full FP16 support. getActionDefinitionsBuilder({G_LROUND, G_LLROUND}) - .legalFor({{s64, s32}, {s64, s64}}); + .legalFor({{s64, s32}, {s64, s64}}) + .legalFor(HasFP16, {{s64, s16}}) + .minScalar(0, s64) + .minScalar(1, s32) + .libcallFor({{s64, s128}}); // TODO: Custom legalization for mismatched types. getActionDefinitionsBuilder(G_FCOPYSIGN) diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 1664f4ad0c8fa..1e771e1fb9403 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -347,6 +347,11 @@ def SVELogicalImm16Pat : ComplexPattern", def SVELogicalImm32Pat : ComplexPattern", []>; def SVELogicalImm64Pat : ComplexPattern", []>; +def SVELogicalFPImm16Pat : ComplexPattern", []>; +def SVELogicalFPImm32Pat : ComplexPattern", []>; +def SVELogicalFPImm64Pat : ComplexPattern", []>; +def SVELogicalBFPImmPat : ComplexPattern", []>; + def SVELogicalImm8NotPat : ComplexPattern", []>; def SVELogicalImm16NotPat : ComplexPattern", []>; def SVELogicalImm32NotPat : ComplexPattern", []>; @@ -2160,6 +2165,26 @@ multiclass sve_int_dup_mask_imm { (!cast(NAME) i64:$imm)>; def : Pat<(nxv2i64 (splat_vector (i64 (SVELogicalImm64Pat i64:$imm)))), (!cast(NAME) i64:$imm)>; + + def : Pat<(nxv8f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv4f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv2f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv4f32 (splat_vector (f32 (SVELogicalFPImm32Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv2f32 (splat_vector (f32 (SVELogicalFPImm32Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv2f64 (splat_vector (f64 (SVELogicalFPImm64Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + + def : Pat<(nxv8bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv4bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv2bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))), + (!cast(NAME) i64:$imm)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index b81a08de383d9..e36c57ad59bfd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -960,6 +960,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat) .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat); + addRulesForGOpcs({G_IS_FPCLASS}) + .Any({{DivS1, S16}, {{Vcc}, {Vgpr16}}}) + .Any({{UniS1, S16}, {{UniInVcc}, {Vgpr16}}}) + .Any({{DivS1, S32}, {{Vcc}, {Vgpr32}}}) + .Any({{UniS1, S32}, {{UniInVcc}, {Vgpr32}}}) + .Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}}) + .Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}}); + using namespace Intrinsic; addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}}); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp index 9af812960542c..b7078825928be 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp @@ -314,9 +314,7 @@ class SplitGraph { #endif bool empty() const { return Nodes.empty(); } - const iterator_range nodes() const { - return {Nodes.begin(), Nodes.end()}; - } + iterator_range nodes() const { return Nodes; } const Node &getNode(unsigned ID) const { return *Nodes[ID]; } unsigned getNumNodes() const { return Nodes.size(); } diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 7a2f84a2f73eb..29d22f27a2d8e 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1502,9 +1502,8 @@ static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF, bool HasVmem = false; for (auto &MBB : MF) { for (auto &MI : MBB) { - HasLds |= SIInstrInfo::isDS(MI); - HasVmem |= (SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isFLAT(MI)) || - SIInstrInfo::isSegmentSpecificFLAT(MI); + HasLds |= SIInstrInfo::isDS(MI) || SIInstrInfo::isLDSDMA(MI); + HasVmem |= SIInstrInfo::isVMEM(MI); if (HasLds && HasVmem) return true; } @@ -1526,10 +1525,9 @@ bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) { assert(!ST.hasExtendedWaitCounts()); auto IsHazardInst = [](const MachineInstr &MI) { - if (SIInstrInfo::isDS(MI)) + if (SIInstrInfo::isDS(MI) || SIInstrInfo::isLDSDMA(MI)) return 1; - if ((SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isFLAT(MI)) || - SIInstrInfo::isSegmentSpecificFLAT(MI)) + if (SIInstrInfo::isVMEM(MI)) return 2; return 0; }; diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 847b7af5a9b11..26b5e5a22386e 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3965,31 +3965,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; // Other cases are autogenerated. break; - case ARMISD::WLSSETUP: { - SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32, - N->getOperand(0)); - ReplaceUses(N, New); - CurDAG->RemoveDeadNode(N); - return; - } - case ARMISD::WLS: { - SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, - N->getOperand(1), N->getOperand(2), - N->getOperand(0)); - ReplaceUses(N, New); - CurDAG->RemoveDeadNode(N); - return; - } - case ARMISD::LE: { - SDValue Ops[] = { N->getOperand(1), - N->getOperand(2), - N->getOperand(0) }; - unsigned Opc = ARM::t2LoopEnd; - SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); - ReplaceUses(N, New); - CurDAG->RemoveDeadNode(N); - return; - } case ARMISD::LDRD: { if (Subtarget->isThumb2()) break; // TableGen handles isel in this case. @@ -4043,17 +4018,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { CurDAG->RemoveDeadNode(N); return; } - case ARMISD::LOOP_DEC: { - SDValue Ops[] = { N->getOperand(1), - N->getOperand(2), - N->getOperand(0) }; - SDNode *Dec = - CurDAG->getMachineNode(ARM::t2LoopDec, dl, - CurDAG->getVTList(MVT::i32, MVT::Other), Ops); - ReplaceUses(N, Dec); - CurDAG->RemoveDeadNode(N); - return; - } case ARMISD::BRCOND: { // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index c229c8e4491df..911d7ebfba141 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -5581,6 +5581,20 @@ class t2LOL let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB]; } +def arm_wlssetup + : SDNode<"ARMISD::WLSSETUP", + SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<1, 0>]>>; + +def arm_wls : SDNode<"ARMISD::WLS", + SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>, + [SDNPHasChain]>; + +def arm_loop_dec : SDNode<"ARMISD::LOOP_DEC", SDTIntBinOp, [SDNPHasChain]>; + +def arm_le : SDNode<"ARMISD::LE", + SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>, + [SDNPHasChain]>; + let isNotDuplicable = 1 in { def t2WLS : t2LOL<(outs GPRlr:$LR), (ins rGPR:$Rn, wlslabel_u11:$label), @@ -5651,15 +5665,17 @@ def t2DoLoopStartTP : // valid after reg alloc, as it should be lowered during MVETPAndVPTOptimisations // into a t2WhileLoopStartLR (or expanded). def t2WhileLoopSetup : - t2PseudoInst<(outs GPRlr:$lr), (ins rGPR:$tc), 4, IIC_Br, []>; + t2PseudoInst<(outs GPRlr:$lr), (ins rGPR:$tc), 4, IIC_Br, + [(set i32:$lr, (arm_wlssetup i32:$tc))]>; // A pseudo to represent the decrement in a low overhead loop. A t2LoopDec and // t2LoopEnd together represent a LE instruction. Ideally these are converted // to a t2LoopEndDec which is lowered as a single instruction. let hasSideEffects = 0 in def t2LoopDec : - t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size), - 4, IIC_Br, []>, Sched<[WriteBr]>; + t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size), 4, IIC_Br, + [(set i32:$Rm, (arm_loop_dec i32:$Rn, timm:$size))]>, + Sched<[WriteBr]>; let isBranch = 1, isTerminator = 1, hasSideEffects = 1, Defs = [CPSR] in { // The branch in a t2WhileLoopSetup/t2WhileLoopStart pair, eventually turned @@ -5667,8 +5683,8 @@ let isBranch = 1, isTerminator = 1, hasSideEffects = 1, Defs = [CPSR] in { def t2WhileLoopStart : t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target), - 4, IIC_Br, []>, - Sched<[WriteBr]>; + 4, IIC_Br, [(arm_wls i32:$tc, bb:$target)]>, + Sched<[WriteBr]>; // WhileLoopStartLR that sets up LR and branches on zero, equivalent to WLS. It // is lowered in the ARMLowOverheadLoops pass providing the branches are within @@ -5690,8 +5706,9 @@ def t2WhileLoopStartTP : // t2LoopEnd - the branch half of a t2LoopDec/t2LoopEnd pair. def t2LoopEnd : - t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target), - 8, IIC_Br, []>, Sched<[WriteBr]>; + t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target), + 8, IIC_Br, [(arm_le i32:$tc, bb:$target)]>, + Sched<[WriteBr]>; // The combination of a t2LoopDec and t2LoopEnd, performing both the LR // decrement and branch as a single instruction. Is lowered to a LE or diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 67437f6969b27..8b2866260e9c9 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -930,6 +930,24 @@ def Discard : DXILOp<82, discard> { let stages = [Stages]; } +def DerivCoarseX : DXILOp<83, unary> { + let Doc = "computes the rate of change per stamp in x direction"; + let intrinsics = [IntrinSelect]; + let arguments = [OverloadTy]; + let result = OverloadTy; + let overloads = [Overloads]; + let stages = [Stages]; +} + +def DerivCoarseY : DXILOp<84, unary> { + let Doc = "computes the rate of change per stamp in y direction"; + let intrinsics = [IntrinSelect]; + let arguments = [OverloadTy]; + let result = OverloadTy; + let overloads = [Overloads]; + let stages = [Stages]; +} + def ThreadId : DXILOp<93, threadId> { let Doc = "Reads the thread ID"; let intrinsics = [IntrinSelect]; diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp index 6cacbf6564db2..a755dd522969d 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp @@ -64,6 +64,8 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable( case Intrinsic::dx_wave_reduce_usum: case Intrinsic::dx_imad: case Intrinsic::dx_umad: + case Intrinsic::dx_ddx_coarse: + case Intrinsic::dx_ddy_coarse: return true; default: return false; diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index 47022b3f89a8b..76fd834fd7219 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -1697,11 +1697,16 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType(unsigned BitWidth, MachineIRBuilder MIRBuilder(DepMBB, DepMBB.getFirstNonPHI()); const MachineInstr *NewMI = createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) { - return BuildMI(MIRBuilder.getMBB(), *MIRBuilder.getInsertPt(), - MIRBuilder.getDL(), TII.get(SPIRVOPcode)) - .addDef(createTypeVReg(CurMF->getRegInfo())) - .addImm(BitWidth) - .addImm(0); + auto NewTypeMI = BuildMI(MIRBuilder.getMBB(), *MIRBuilder.getInsertPt(), + MIRBuilder.getDL(), TII.get(SPIRVOPcode)) + .addDef(createTypeVReg(CurMF->getRegInfo())) + .addImm(BitWidth); + // Don't add Encoding to FP type + if (!Ty->isFloatTy()) { + return NewTypeMI.addImm(0); + } else { + return NewTypeMI; + } }); add(Ty, false, NewMI); return finishCreatingSPIRVType(Ty, NewMI); diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index fc87288a4a212..0653b4eb9dfe2 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -328,6 +328,8 @@ class SPIRVInstructionSelector : public InstructionSelector { MachineInstr &I) const; bool selectFrexp(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; + bool selectDpdCoarse(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I, const unsigned DPdOpCode) const; // Utilities std::pair buildI32Constant(uint32_t Val, MachineInstr &I, @@ -371,6 +373,7 @@ class SPIRVInstructionSelector : public InstructionSelector { bool loadHandleBeforePosition(Register &HandleReg, const SPIRVType *ResType, GIntrinsic &HandleDef, MachineInstr &Pos) const; void decorateUsesAsNonUniform(Register &NonUniformReg) const; + void errorIfInstrOutsideShader(MachineInstr &I) const; }; bool sampledTypeIsSignedInteger(const llvm::Type *HandleType) { @@ -3140,6 +3143,58 @@ bool SPIRVInstructionSelector::wrapIntoSpecConstantOp( return Result; } +bool SPIRVInstructionSelector::selectDpdCoarse(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I, + const unsigned DPdOpCode) const { + // TODO: This should check specifically for Fragment Execution Model, but STI + // doesn't provide that information yet. See #167562 + errorIfInstrOutsideShader(I); + + // If the arg/result types are half then we need to wrap the instr in + // conversions to float + // This case occurs because a half arg/result is legal in HLSL but not spirv. + Register SrcReg = I.getOperand(2).getReg(); + SPIRVType *SrcType = GR.getSPIRVTypeForVReg(SrcReg); + unsigned BitWidth = std::min(GR.getScalarOrVectorBitWidth(SrcType), + GR.getScalarOrVectorBitWidth(ResType)); + if (BitWidth == 32) + return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(DPdOpCode)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(I.getOperand(2).getReg()); + + MachineIRBuilder MIRBuilder(I); + unsigned componentCount = GR.getScalarOrVectorComponentCount(SrcType); + SPIRVType *F32ConvertTy = GR.getOrCreateSPIRVFloatType(32, I, TII); + if (componentCount != 1) + F32ConvertTy = GR.getOrCreateSPIRVVectorType(F32ConvertTy, componentCount, + MIRBuilder, false); + + const TargetRegisterClass *RegClass = GR.getRegClass(SrcType); + Register ConvertToVReg = MRI->createVirtualRegister(RegClass); + Register DpdOpVReg = MRI->createVirtualRegister(RegClass); + + bool Result = + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFConvert)) + .addDef(ConvertToVReg) + .addUse(GR.getSPIRVTypeID(F32ConvertTy)) + .addUse(SrcReg) + .constrainAllUses(TII, TRI, RBI); + Result &= BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(DPdOpCode)) + .addDef(DpdOpVReg) + .addUse(GR.getSPIRVTypeID(F32ConvertTy)) + .addUse(ConvertToVReg) + .constrainAllUses(TII, TRI, RBI); + Result &= + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFConvert)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(DpdOpVReg) + .constrainAllUses(TII, TRI, RBI); + return Result; +} + bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { @@ -3528,7 +3583,12 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, case Intrinsic::spv_unpackhalf2x16: { return selectExtInst(ResVReg, ResType, I, GL::UnpackHalf2x16); } - + case Intrinsic::spv_ddx_coarse: { + return selectDpdCoarse(ResVReg, ResType, I, SPIRV::OpDPdxCoarse); + } + case Intrinsic::spv_ddy_coarse: { + return selectDpdCoarse(ResVReg, ResType, I, SPIRV::OpDPdyCoarse); + } default: { std::string DiagMsg; raw_string_ostream OS(DiagMsg); @@ -4694,6 +4754,17 @@ bool SPIRVInstructionSelector::loadHandleBeforePosition( .constrainAllUses(TII, TRI, RBI); } +void SPIRVInstructionSelector::errorIfInstrOutsideShader( + MachineInstr &I) const { + if (!STI.isShader()) { + std::string DiagMsg; + raw_string_ostream OS(DiagMsg); + I.print(OS, true, false, false, false); + DiagMsg += " is only supported in shaders.\n"; + report_fatal_error(DiagMsg.c_str(), false); + } +} + namespace llvm { InstructionSelector * createSPIRVInstructionSelector(const SPIRVTargetMachine &TM, diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index b8cd9c1358f00..bd754d17694b8 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -934,7 +934,8 @@ void RequirementHandler::initAvailableCapabilitiesForVulkan( Capability::UniformBufferArrayDynamicIndexing, Capability::SampledImageArrayDynamicIndexing, Capability::StorageBufferArrayDynamicIndexing, - Capability::StorageImageArrayDynamicIndexing}); + Capability::StorageImageArrayDynamicIndexing, + Capability::DerivativeControl}); // Became core in Vulkan 1.2 if (ST.isAtLeastSPIRVVer(VersionTuple(1, 5))) { @@ -2148,6 +2149,12 @@ void addInstrRequirements(const MachineInstr &MI, } break; } + case SPIRV::OpDPdxCoarse: + case SPIRV::OpDPdyCoarse: { + Reqs.addCapability(SPIRV::Capability::DerivativeControl); + break; + } + default: break; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 621f1868d3311..864e5dc67682c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -54688,11 +54688,14 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, KnownBits KnownAmt = DAG.computeKnownBits(ShAmt); // Check the shift amount is byte aligned. // Check the truncation doesn't use any shifted in (zero) top bits. - // Check the shift amount doesn't depend on the original load. + // Check the shift amount doesn't depend on the original load chain. if (KnownAmt.countMinTrailingZeros() >= 3 && KnownAmt.getMaxValue().ule(SrcVT.getSizeInBits() - VT.getSizeInBits()) && - !Ld->isPredecessorOf(ShAmt.getNode())) { + none_of(Ld->uses(), [&ShAmt](SDUse &Use) { + return Use.getResNo() == 1 && + Use.getUser()->isPredecessorOf(ShAmt.getNode()); + })) { EVT PtrVT = Ld->getBasePtr().getValueType(); SDValue PtrBitOfs = DAG.getZExtOrTrunc(ShAmt, DL, PtrVT); SDValue PtrByteOfs = diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 9572f9d702e1b..e7dc366b13798 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1027,10 +1027,9 @@ static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI, return Result; } -static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal, - InstCombiner::BuilderTy &Builder) { - if (!Cmp->hasOneUse()) - return nullptr; +static Value * +canonicalizeSaturatedAddUnsigned(ICmpInst *Cmp, Value *TVal, Value *FVal, + InstCombiner::BuilderTy &Builder) { // Match unsigned saturated add with constant. Value *Cmp0 = Cmp->getOperand(0); @@ -1130,6 +1129,94 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal, return nullptr; } +static Value *canonicalizeSaturatedAddSigned(ICmpInst *Cmp, Value *TVal, + Value *FVal, + InstCombiner::BuilderTy &Builder) { + // Match saturated add with constant. + Value *Cmp0 = Cmp->getOperand(0); + Value *Cmp1 = Cmp->getOperand(1); + ICmpInst::Predicate Pred = Cmp->getPredicate(); + Value *X; + const APInt *C; + + // Canonicalize INT_MAX to true value of the select. + if (match(FVal, m_MaxSignedValue())) { + std::swap(TVal, FVal); + Pred = CmpInst::getInversePredicate(Pred); + } + + if (!match(TVal, m_MaxSignedValue())) + return nullptr; + + // sge maximum signed value is canonicalized to eq maximum signed value and + // requires special handling (a == INT_MAX) ? INT_MAX : a + 1 -> sadd.sat(a, + // 1) + if (Pred == ICmpInst::ICMP_EQ) { + if (match(FVal, m_Add(m_Specific(Cmp0), m_One())) && Cmp1 == TVal) { + return Builder.CreateBinaryIntrinsic( + Intrinsic::sadd_sat, Cmp0, ConstantInt::get(Cmp0->getType(), 1)); + } + return nullptr; + } + + // (X > Y) ? INT_MAX : (X + C) --> sadd.sat(X, C) + // (X >= Y) ? INT_MAX : (X + C) --> sadd.sat(X, C) + // where Y is INT_MAX - C or INT_MAX - C - 1, and C > 0 + if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) && + match(FVal, m_Add(m_Specific(Cmp0), m_StrictlyPositive(C)))) { + APInt IntMax = + APInt::getSignedMaxValue(Cmp1->getType()->getScalarSizeInBits()); + + // For SGE, try to flip to SGT to normalize the comparison constant. + if (Pred == ICmpInst::ICMP_SGE) { + if (auto Flipped = getFlippedStrictnessPredicateAndConstant( + Pred, cast(Cmp1))) { + Pred = Flipped->first; + Cmp1 = Flipped->second; + } + } + + // Check the pattern: X > INT_MAX - C or X > INT_MAX - C - 1 + if (Pred == ICmpInst::ICMP_SGT && + (match(Cmp1, m_SpecificIntAllowPoison(IntMax - *C)) || + match(Cmp1, m_SpecificIntAllowPoison(IntMax - *C - 1)))) + return Builder.CreateBinaryIntrinsic( + Intrinsic::sadd_sat, Cmp0, ConstantInt::get(Cmp0->getType(), *C)); + } + + // Canonicalize predicate to less-than or less-or-equal-than. + if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) { + std::swap(Cmp0, Cmp1); + Pred = CmpInst::getSwappedPredicate(Pred); + } + + if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SLE) + return nullptr; + + if (match(Cmp0, m_NSWSub(m_MaxSignedValue(), m_Value(X))) && + match(FVal, m_c_Add(m_Specific(X), m_Specific(Cmp1)))) { + // (INT_MAX - X s< Y) ? INT_MAX : (X + Y) --> sadd.sat(X, Y) + // (INT_MAX - X s< Y) ? INT_MAX : (Y + X) --> sadd.sat(X, Y) + return Builder.CreateBinaryIntrinsic(Intrinsic::sadd_sat, X, Cmp1); + } + + return nullptr; +} + +static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal, + InstCombiner::BuilderTy &Builder) { + if (!Cmp->hasOneUse()) + return nullptr; + + if (Value *V = canonicalizeSaturatedAddUnsigned(Cmp, TVal, FVal, Builder)) + return V; + + if (Value *V = canonicalizeSaturatedAddSigned(Cmp, TVal, FVal, Builder)) + return V; + + return nullptr; +} + /// Try to match patterns with select and subtract as absolute difference. static Value *foldAbsDiff(ICmpInst *Cmp, Value *TVal, Value *FVal, InstCombiner::BuilderTy &Builder) { diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 7c364f86fb0e8..c9f249a8733ac 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" @@ -248,6 +249,11 @@ static cl::opt "platforms that support this"), cl::Hidden, cl::init(true)); +static cl::opt + ClShadowAddrSpace("asan-shadow-addr-space", + cl::desc("Address space for pointers to the shadow map"), + cl::Hidden, cl::init(0)); + static cl::opt ClWithIfuncSuppressRemat( "asan-with-ifunc-suppress-remat", cl::desc("Suppress rematerialization of dynamic shadow address by passing " @@ -436,6 +442,15 @@ static cl::opt ClOverrideDestructorKind( "Use global destructors")), cl::init(AsanDtorKind::Invalid), cl::Hidden); +static SmallSet SrcAddrSpaces; +static cl::list ClAddrSpaces( + "asan-instrument-address-spaces", + cl::desc("Only instrument variables in the specified address spaces."), + cl::Hidden, cl::CommaSeparated, cl::ZeroOrMore, + cl::callback([](const unsigned &AddrSpace) { + SrcAddrSpaces.insert(AddrSpace); + })); + // Debug flags. static cl::opt ClDebug("asan-debug", cl::desc("debug"), cl::Hidden, @@ -503,6 +518,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, bool IsAMDGPU = TargetTriple.isAMDGPU(); bool IsHaiku = TargetTriple.isOSHaiku(); bool IsWasm = TargetTriple.isWasm(); + bool IsBPF = TargetTriple.isBPF(); ShadowMapping Mapping; @@ -579,6 +595,8 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, else if (IsHaiku && IsX86_64) Mapping.Offset = (kSmallX86_64ShadowOffsetBase & (kSmallX86_64ShadowOffsetAlignMask << Mapping.Scale)); + else if (IsBPF) + Mapping.Offset = kDynamicShadowSentinel; else Mapping.Offset = kDefaultShadowOffset64; } @@ -1355,11 +1373,25 @@ static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) { static bool isUnsupportedAMDGPUAddrspace(Value *Addr) { Type *PtrTy = cast(Addr->getType()->getScalarType()); unsigned int AddrSpace = PtrTy->getPointerAddressSpace(); + // Globals in address space 1 and 4 are supported for AMDGPU. if (AddrSpace == 3 || AddrSpace == 5) return true; return false; } +static bool isSupportedAddrspace(const Triple &TargetTriple, Value *Addr) { + Type *PtrTy = cast(Addr->getType()->getScalarType()); + unsigned int AddrSpace = PtrTy->getPointerAddressSpace(); + + if (!SrcAddrSpaces.empty()) + return SrcAddrSpaces.count(AddrSpace); + + if (TargetTriple.isAMDGPU()) + return !isUnsupportedAMDGPUAddrspace(Addr); + + return AddrSpace == 0; +} + Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { // Shadow >> scale Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); @@ -1423,10 +1455,9 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) { } bool AddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) { - // Instrument accesses from different address spaces only for AMDGPU. - Type *PtrTy = cast(Ptr->getType()->getScalarType()); - if (PtrTy->getPointerAddressSpace() != 0 && - !(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(Ptr))) + // Check whether the target supports sanitizing the address space + // of the pointer. + if (!isSupportedAddrspace(TargetTriple, Ptr)) return true; // Ignore swifterror addresses. @@ -1942,7 +1973,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Type *ShadowTy = IntegerType::get(*C, std::max(8U, TypeStoreSize >> Mapping.Scale)); - Type *ShadowPtrTy = PointerType::get(*C, 0); + Type *ShadowPtrTy = PointerType::get(*C, ClShadowAddrSpace); Value *ShadowPtr = memToShadow(AddrLong, IRB); const uint64_t ShadowAlign = std::max(Alignment.valueOrOne().value() >> Mapping.Scale, 1); @@ -2089,9 +2120,7 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const { return false; if (!Ty->isSized()) return false; if (!G->hasInitializer()) return false; - // Globals in address space 1 and 4 are supported for AMDGPU. - if (G->getAddressSpace() && - !(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(G))) + if (!isSupportedAddrspace(TargetTriple, G)) return false; if (GlobalWasGeneratedByCompiler(G)) return false; // Our own globals. // Two problems with thread-locals: diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index f533a47150a7b..741392247c0d6 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -152,11 +152,12 @@ class VPBuilder { /// its underlying Instruction. VPInstruction *createNaryOp(unsigned Opcode, ArrayRef Operands, Instruction *Inst = nullptr, + const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {}, DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") { VPInstruction *NewVPInst = tryInsertInstruction( - new VPInstruction(Opcode, Operands, {}, MD, DL, Name)); + new VPInstruction(Opcode, Operands, Flags, MD, DL, Name)); NewVPInst->setUnderlyingValue(Inst); return NewVPInst; } @@ -329,7 +330,7 @@ class VPBuilder { else if (Opcode == Instruction::ZExt) Flags = VPIRFlags::NonNegFlagsTy(false); return tryInsertInstruction( - new VPWidenCastRecipe(Opcode, Op, ResultTy, Flags)); + new VPWidenCastRecipe(Opcode, Op, ResultTy, nullptr, Flags)); } VPScalarIVStepsRecipe * diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 356d759b94799..c680b6fca84cd 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7750,7 +7750,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(VPInstruction *VPI, }, Range); if (ShouldUseVectorIntrinsic) - return new VPWidenIntrinsicRecipe(*CI, ID, Ops, CI->getType(), *VPI, + return new VPWidenIntrinsicRecipe(*CI, ID, Ops, CI->getType(), *VPI, *VPI, VPI->getDebugLoc()); Function *Variant = nullptr; @@ -7804,7 +7804,8 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(VPInstruction *VPI, } Ops.push_back(VPI->getOperand(VPI->getNumOperands() - 1)); - return new VPWidenCallRecipe(CI, Variant, Ops, VPI->getDebugLoc()); + return new VPWidenCallRecipe(CI, Variant, Ops, *VPI, *VPI, + VPI->getDebugLoc()); } return nullptr; @@ -7842,7 +7843,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) { auto *SafeRHS = Builder.createSelect(Mask, Ops[1], One, VPI->getDebugLoc()); Ops[1] = SafeRHS; - return new VPWidenRecipe(*I, Ops, *VPI, VPI->getDebugLoc()); + return new VPWidenRecipe(*I, Ops, *VPI, *VPI, VPI->getDebugLoc()); } [[fallthrough]]; } @@ -7888,7 +7889,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) { // For other binops, the legacy cost model only checks the second operand. NewOps[1] = GetConstantViaSCEV(NewOps[1]); } - return new VPWidenRecipe(*I, NewOps, *VPI, VPI->getDebugLoc()); + return new VPWidenRecipe(*I, NewOps, *VPI, *VPI, VPI->getDebugLoc()); } case Instruction::ExtractValue: { SmallVector NewOps(VPI->operands()); @@ -7896,7 +7897,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) { assert(EVI->getNumIndices() == 1 && "Expected one extractvalue index"); unsigned Idx = EVI->getIndices()[0]; NewOps.push_back(Plan.getConstantInt(32, Idx)); - return new VPWidenRecipe(*I, NewOps, *VPI, VPI->getDebugLoc()); + return new VPWidenRecipe(*I, NewOps, *VPI, *VPI, VPI->getDebugLoc()); } }; } @@ -7981,7 +7982,8 @@ VPReplicateRecipe *VPRecipeBuilder::handleReplication(VPInstruction *VPI, (Range.Start.isScalable() && isa(I))) && "Should not predicate a uniform recipe"); auto *Recipe = - new VPReplicateRecipe(I, VPI->operands(), IsUniform, BlockInMask, *VPI); + new VPReplicateRecipe(I, VPI->operands(), IsUniform, BlockInMask, *VPI, + *VPI, VPI->getDebugLoc()); return Recipe; } @@ -8231,17 +8233,19 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, return nullptr; if (VPI->getOpcode() == Instruction::GetElementPtr) - return new VPWidenGEPRecipe(cast(Instr), R->operands()); + return new VPWidenGEPRecipe(cast(Instr), R->operands(), + *VPI, VPI->getDebugLoc()); if (VPI->getOpcode() == Instruction::Select) - return new VPWidenSelectRecipe(*cast(Instr), R->operands(), - *VPI); + return new VPWidenSelectRecipe(cast(Instr), R->operands(), *VPI, + *VPI, VPI->getDebugLoc()); if (Instruction::isCast(VPI->getOpcode())) { - auto *CastR = cast(R); auto *CI = cast(Instr); + auto *CastR = cast(VPI); return new VPWidenCastRecipe(CI->getOpcode(), VPI->getOperand(0), - CastR->getResultType(), *CI, *VPI); + CastR->getResultType(), CI, *VPI, *VPI, + VPI->getDebugLoc()); } return tryToWiden(VPI); @@ -8269,8 +8273,8 @@ VPRecipeBuilder::tryToCreatePartialReduction(VPInstruction *Reduction, SmallVector Ops; Ops.push_back(Plan.getOrAddLiveIn(Zero)); Ops.push_back(BinOp); - BinOp = new VPWidenRecipe(*ReductionI, Ops, VPIRMetadata(), - ReductionI->getDebugLoc()); + BinOp = new VPWidenRecipe(*ReductionI, Ops, VPIRFlags(*ReductionI), + VPIRMetadata(), ReductionI->getDebugLoc()); Builder.insert(BinOp->getDefiningRecipe()); ReductionOpcode = Instruction::Add; } @@ -8454,9 +8458,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) { // Only create recipe for the final invariant store of the reduction. if (Legal->isInvariantStoreOfReduction(SI)) { + auto *VPI = cast(SingleDef); auto *Recipe = new VPReplicateRecipe( - SI, R.operands(), true /* IsUniform */, nullptr /*Mask*/, - *cast(SingleDef)); + SI, R.operands(), true /* IsUniform */, nullptr /*Mask*/, *VPI, + *VPI, VPI->getDebugLoc()); Recipe->insertBefore(*MiddleVPBB, MBIP); } R.eraseFromParent(); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index fc29ab0c84093..fedbcfb6bd32a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -882,14 +882,6 @@ class VPIRFlags { /// A pure-virtual common base class for recipes defining a single VPValue and /// using IR flags. struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags { - VPRecipeWithIRFlags(const unsigned char SC, ArrayRef Operands, - DebugLoc DL = DebugLoc::getUnknown()) - : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags() {} - - VPRecipeWithIRFlags(const unsigned char SC, ArrayRef Operands, - Instruction &I) - : VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()), VPIRFlags(I) {} - VPRecipeWithIRFlags(const unsigned char SC, ArrayRef Operands, const VPIRFlags &Flags, DebugLoc DL = DebugLoc::getUnknown()) @@ -1474,9 +1466,12 @@ class LLVM_ABI_FOR_TEST VPWidenRecipe : public VPRecipeWithIRFlags, VPIRMetadata(Metadata), Opcode(Opcode) {} VPWidenRecipe(Instruction &I, ArrayRef Operands, - const VPIRMetadata &Metadata, DebugLoc DL) - : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), - VPIRMetadata(Metadata), Opcode(I.getOpcode()) {} + const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {}, + DebugLoc DL = {}) + : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL), + VPIRMetadata(Metadata), Opcode(I.getOpcode()) { + setUnderlyingValue(&I); + } ~VPWidenRecipe() override = default; @@ -1517,30 +1512,22 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { public: VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, - CastInst &UI, const VPIRMetadata &Metadata) - : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), - VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) { - assert(UI.getOpcode() == Opcode && - "opcode of underlying cast doesn't match"); - } - VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, - const VPIRFlags &Flags = {}, + CastInst *CI = nullptr, const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {}, DebugLoc DL = DebugLoc::getUnknown()) : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL), VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) { assert(flagsValidForOpcode(Opcode) && "Set flags not supported for the provided opcode"); + setUnderlyingValue(CI); } ~VPWidenCastRecipe() override = default; VPWidenCastRecipe *clone() override { - auto *New = new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy, *this, - *this, getDebugLoc()); - if (auto *UV = getUnderlyingValue()) - New->setUnderlyingValue(UV); - return New; + return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy, + cast_or_null(getUnderlyingValue()), + *this, *this, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenCastSC) @@ -1585,13 +1572,17 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { public: VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef CallArguments, Type *Ty, + const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {}, DebugLoc DL = DebugLoc::getUnknown()) - : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI), + : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags, + DL), VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty), MayReadFromMemory(CI.mayReadFromMemory()), MayWriteToMemory(CI.mayWriteToMemory()), - MayHaveSideEffects(CI.mayHaveSideEffects()) {} + MayHaveSideEffects(CI.mayHaveSideEffects()) { + setUnderlyingValue(&CI); + } VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef CallArguments, Type *Ty, @@ -1617,7 +1608,7 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { VPWidenIntrinsicRecipe *clone() override { if (Value *CI = getUnderlyingValue()) return new VPWidenIntrinsicRecipe(*cast(CI), VectorIntrinsicID, - operands(), ResultTy, *this, + operands(), ResultTy, *this, *this, getDebugLoc()); return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy, *this, *this, getDebugLoc()); @@ -1671,10 +1662,11 @@ class LLVM_ABI_FOR_TEST VPWidenCallRecipe : public VPRecipeWithIRFlags, public: VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef CallArguments, - DebugLoc DL = DebugLoc::getUnknown()) - : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, - *cast(UV)), - VPIRMetadata(*cast(UV)), Variant(Variant) { + const VPIRFlags &Flags = {}, + const VPIRMetadata &Metadata = {}, DebugLoc DL = {}) + : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, Flags, DL), + VPIRMetadata(Metadata), Variant(Variant) { + setUnderlyingValue(UV); assert( isa(getOperand(getNumOperands() - 1)->getLiveInIRValue()) && "last operand must be the called function"); @@ -1684,7 +1676,7 @@ class LLVM_ABI_FOR_TEST VPWidenCallRecipe : public VPRecipeWithIRFlags, VPWidenCallRecipe *clone() override { return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(), - getDebugLoc()); + *this, *this, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenCallSC) @@ -1761,16 +1753,19 @@ class VPHistogramRecipe : public VPRecipeBase { /// instruction. struct LLVM_ABI_FOR_TEST VPWidenSelectRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { - VPWidenSelectRecipe(SelectInst &I, ArrayRef Operands, - const VPIRMetadata &MD = {}) - : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, I), - VPIRMetadata(MD) {} + VPWidenSelectRecipe(SelectInst *SI, ArrayRef Operands, + const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {}, + DebugLoc DL = {}) + : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, Flags, DL), + VPIRMetadata(MD) { + setUnderlyingValue(SI); + } ~VPWidenSelectRecipe() override = default; VPWidenSelectRecipe *clone() override { - return new VPWidenSelectRecipe(*cast(getUnderlyingInstr()), - operands(), *this); + return new VPWidenSelectRecipe(cast(getUnderlyingInstr()), + operands(), *this, *this, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC) @@ -1822,9 +1817,12 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags { } public: - VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef Operands) - : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP), + VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef Operands, + const VPIRFlags &Flags = {}, + DebugLoc DL = DebugLoc::getUnknown()) + : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, Flags, DL), SourceElementTy(GEP->getSourceElementType()) { + setUnderlyingValue(GEP); SmallVector> Metadata; (void)Metadata; getMetadataToPropagate(GEP, Metadata); @@ -1835,7 +1833,7 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags { VPWidenGEPRecipe *clone() override { return new VPWidenGEPRecipe(cast(getUnderlyingInstr()), - operands()); + operands(), *this, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC) @@ -2929,10 +2927,12 @@ class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags, public: VPReplicateRecipe(Instruction *I, ArrayRef Operands, bool IsSingleScalar, VPValue *Mask = nullptr, - VPIRMetadata Metadata = {}) - : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I), + const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {}, + DebugLoc DL = DebugLoc::getUnknown()) + : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, Flags, DL), VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar), IsPredicated(Mask) { + setUnderlyingValue(I); if (Mask) addOperand(Mask); } @@ -2940,9 +2940,9 @@ class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags, ~VPReplicateRecipe() override = default; VPReplicateRecipe *clone() override { - auto *Copy = - new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsSingleScalar, - isPredicated() ? getMask() : nullptr, *this); + auto *Copy = new VPReplicateRecipe( + getUnderlyingInstr(), operands(), IsSingleScalar, + isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc()); Copy->transferFlags(*this); return Copy; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 612202d049774..dbbde1cafa9f2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -190,7 +190,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, // recipes. if (Br->isConditional()) { VPValue *Cond = getOrCreateVPOperand(Br->getCondition()); - VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst, + VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst, {}, VPIRMetadata(*Inst), Inst->getDebugLoc()); } @@ -205,7 +205,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, SmallVector Ops = {getOrCreateVPOperand(SI->getCondition())}; for (auto Case : SI->cases()) Ops.push_back(getOrCreateVPOperand(Case.getCaseValue())); - VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst, + VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst, {}, VPIRMetadata(*Inst), Inst->getDebugLoc()); continue; } @@ -255,13 +255,14 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, if (auto *CI = dyn_cast(Inst)) { NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0], CI->getType(), CI->getDebugLoc(), - {}, MD); + VPIRFlags(*CI), MD); NewR->setUnderlyingValue(CI); } else { // Build VPInstruction for any arbitrary Instruction without specific // representation in VPlan. - NewR = VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst, MD, - Inst->getDebugLoc()); + NewR = + VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst, + VPIRFlags(*Inst), MD, Inst->getDebugLoc()); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index fca6554ad77c6..ef36e29aaa5c4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2056,24 +2056,26 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const { switch (OpType) { case OperationType::OverflowingBinOp: return Opcode == Instruction::Add || Opcode == Instruction::Sub || - Opcode == Instruction::Mul || + Opcode == Instruction::Mul || Opcode == Instruction::Shl || Opcode == VPInstruction::VPInstruction::CanonicalIVIncrementForPart; case OperationType::Trunc: return Opcode == Instruction::Trunc; case OperationType::DisjointOp: return Opcode == Instruction::Or; case OperationType::PossiblyExactOp: - return Opcode == Instruction::AShr; + return Opcode == Instruction::AShr || Opcode == Instruction::LShr || + Opcode == Instruction::UDiv || Opcode == Instruction::SDiv; case OperationType::GEPOp: return Opcode == Instruction::GetElementPtr || Opcode == VPInstruction::PtrAdd || Opcode == VPInstruction::WidePtrAdd; case OperationType::FPMathOp: - return Opcode == Instruction::FAdd || Opcode == Instruction::FMul || - Opcode == Instruction::FSub || Opcode == Instruction::FNeg || - Opcode == Instruction::FDiv || Opcode == Instruction::FRem || - Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc || - Opcode == Instruction::FCmp || Opcode == Instruction::Select || + return Opcode == Instruction::Call || Opcode == Instruction::FAdd || + Opcode == Instruction::FMul || Opcode == Instruction::FSub || + Opcode == Instruction::FNeg || Opcode == Instruction::FDiv || + Opcode == Instruction::FRem || Opcode == Instruction::FPExt || + Opcode == Instruction::FPTrunc || Opcode == Instruction::FCmp || + Opcode == Instruction::Select || Opcode == VPInstruction::WideIVStep || Opcode == VPInstruction::ReductionStartVector || Opcode == VPInstruction::ComputeReductionResult; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 26563242de283..25557f1d5d651 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -104,24 +104,26 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/, *VPI, Ingredient.getDebugLoc()); } else if (GetElementPtrInst *GEP = dyn_cast(Inst)) { - NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands()); + NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands(), *VPI, + Ingredient.getDebugLoc()); } else if (CallInst *CI = dyn_cast(Inst)) { Intrinsic::ID VectorID = getVectorIntrinsicIDForCall(CI, &TLI); if (VectorID == Intrinsic::not_intrinsic) return false; NewRecipe = new VPWidenIntrinsicRecipe( *CI, getVectorIntrinsicIDForCall(CI, &TLI), - drop_end(Ingredient.operands()), CI->getType(), *VPI, - CI->getDebugLoc()); + drop_end(Ingredient.operands()), CI->getType(), VPIRFlags(*CI), + *VPI, CI->getDebugLoc()); } else if (SelectInst *SI = dyn_cast(Inst)) { - NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands(), *VPI); + NewRecipe = new VPWidenSelectRecipe(SI, Ingredient.operands(), *VPI, + *VPI, Ingredient.getDebugLoc()); } else if (auto *CI = dyn_cast(Inst)) { - NewRecipe = - new VPWidenCastRecipe(CI->getOpcode(), Ingredient.getOperand(0), - CI->getType(), *CI, *VPI); + NewRecipe = new VPWidenCastRecipe( + CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI, + VPIRFlags(*CI), VPIRMetadata(*CI)); } else { NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands(), *VPI, - Ingredient.getDebugLoc()); + *VPI, Ingredient.getDebugLoc()); } } @@ -226,7 +228,8 @@ static bool sinkScalarOperands(VPlan &Plan) { // then cloning should be sufficient here. Instruction *I = SinkCandidate->getUnderlyingInstr(); Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true, - nullptr /*Mask*/, *SinkCandidateRepR); + nullptr /*Mask*/, *SinkCandidateRepR, + *SinkCandidateRepR); // TODO: add ".cloned" suffix to name of Clone's VPValue. } else { Clone = SinkCandidate->clone(); @@ -385,7 +388,8 @@ static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe, // mask but in the replicate region. auto *RecipeWithoutMask = new VPReplicateRecipe( PredRecipe->getUnderlyingInstr(), drop_end(PredRecipe->operands()), - PredRecipe->isSingleScalar(), nullptr /*Mask*/, *PredRecipe); + PredRecipe->isSingleScalar(), nullptr /*Mask*/, *PredRecipe, *PredRecipe, + PredRecipe->getDebugLoc()); auto *Pred = Plan.createVPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask); @@ -691,7 +695,7 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) { // analysis. auto Users = collectUsersRecursively(PhiR); for (VPUser *U : reverse(Users)) { - auto *Def = dyn_cast(U); + auto *Def = dyn_cast(U); auto *RepR = dyn_cast(U); // Skip recipes that shouldn't be narrowed. if (!Def || !isa(Def) || @@ -704,7 +708,8 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) { continue; auto *Clone = new VPReplicateRecipe(Def->getUnderlyingInstr(), - Def->operands(), /*IsUniform*/ true); + Def->operands(), /*IsUniform*/ true, + /*Mask*/ nullptr, /*Flags*/ *Def); Clone->insertAfter(Def); Def->replaceAllUsesWith(Clone); } @@ -1423,12 +1428,13 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { if (RepR && (RepR->isSingleScalar() || RepR->isPredicated())) continue; - auto *RepOrWidenR = cast(&R); + auto *RepOrWidenR = cast(&R); if (RepR && isa(RepR->getUnderlyingInstr()) && vputils::isSingleScalar(RepR->getOperand(1))) { auto *Clone = new VPReplicateRecipe( RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(), - true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Metadata*/); + true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Flags*/, + *RepR /*Metadata*/, RepR->getDebugLoc()); Clone->insertBefore(RepOrWidenR); unsigned ExtractOpc = vputils::isUniformAcrossVFsAndUFs(RepR->getOperand(1)) @@ -1469,9 +1475,9 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { })) continue; - auto *Clone = new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(), - RepOrWidenR->operands(), - true /*IsSingleScalar*/); + auto *Clone = new VPReplicateRecipe( + RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(), + true /*IsSingleScalar*/, nullptr, *RepOrWidenR); Clone->insertBefore(RepOrWidenR); RepOrWidenR->replaceAllUsesWith(Clone); if (isDeadRecipe(*RepOrWidenR)) @@ -3824,15 +3830,15 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, Ext0->getOpcode() == Ext1->getOpcode() && IsMulAccValidAndClampRange(Mul, Ext0, Ext1, Ext) && Mul->hasOneUse()) { auto *NewExt0 = new VPWidenCastRecipe( - Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), *Ext0, - *Ext0, Ext0->getDebugLoc()); + Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), nullptr, + *Ext0, *Ext0, Ext0->getDebugLoc()); NewExt0->insertBefore(Ext0); VPWidenCastRecipe *NewExt1 = NewExt0; if (Ext0 != Ext1) { NewExt1 = new VPWidenCastRecipe(Ext1->getOpcode(), Ext1->getOperand(0), - Ext->getResultType(), *Ext1, *Ext1, - Ext1->getDebugLoc()); + Ext->getResultType(), nullptr, *Ext1, + *Ext1, Ext1->getDebugLoc()); NewExt1->insertBefore(Ext1); } Mul->setOperand(0, NewExt0); @@ -4353,7 +4359,7 @@ narrowInterleaveGroupOp(VPValue *V, SmallPtrSetImpl &NarrowedOps) { // process one original iteration. auto *N = new VPReplicateRecipe(&WideLoad->getIngredient(), {PtrOp}, /*IsUniform*/ true, - /*Mask*/ nullptr, *WideLoad); + /*Mask*/ nullptr, {}, *WideLoad); N->insertBefore(WideLoad); NarrowedOps.insert(N); return N; diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index d4b8b72beb942..d76d2ed5f1c76 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -518,9 +518,9 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, // TODO: have cloning of replicate recipes also provide the desired result // coupled with setting its operands to NewOps (deriving IsSingleScalar and // Mask from the operands?) - New = - new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps, - /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR); + New = new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps, + /*IsSingleScalar=*/true, /*Mask=*/nullptr, + *RepR, *RepR, RepR->getDebugLoc()); } else { assert(isa(DefR) && "DefR must be a VPReplicateRecipe or VPInstruction"); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll new file mode 100644 index 0000000000000..f15253682c336 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -global-isel -o - %s | FileCheck %s + +target triple = "aarch64-unknown-unknown" + +; Check we don't crash here when computing known bits. + +define <4 x i32> @test(<8 x i16> %in, i1 %continue) { +; CHECK-LABEL: test: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: mov w12, wzr +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: mov w9, #2 // =0x2 +; CHECK-NEXT: mov w10, #0 // =0x0 +; CHECK-NEXT: .LBB0_1: // %loop +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: mov w11, w12 +; CHECK-NEXT: mov w12, w12 +; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: and x12, x12, #0x7 +; CHECK-NEXT: umull x12, w12, w9 +; CHECK-NEXT: ldrb w12, [x8, x12] +; CHECK-NEXT: cmp w12, #0 +; CHECK-NEXT: cset w12, eq +; CHECK-NEXT: fmov s1, w12 +; CHECK-NEXT: mov v1.b[1], w10 +; CHECK-NEXT: mov v1.b[2], w10 +; CHECK-NEXT: mov v1.b[3], w10 +; CHECK-NEXT: fmov w12, s1 +; CHECK-NEXT: tbz w0, #0, .LBB0_1 +; CHECK-NEXT: // %bb.2: // %exit +; CHECK-NEXT: fmov s0, w11 +; CHECK-NEXT: mov v0.s[1], wzr +; CHECK-NEXT: mov v0.s[2], wzr +; CHECK-NEXT: mov v0.s[3], wzr +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret +entry: + br label %loop + +exit: + %result = insertelement <4 x i32> zeroinitializer, i32 %index, i64 0 + ret <4 x i32> %result + +loop: + %index = phi i32 [ 0, %entry ], [ %insert.bitcast, %loop ] + %extracted = extractelement <8 x i16> %in, i32 %index + %masked = and i16 %extracted, 255 + %maskedIsZero = icmp eq i16 %masked, 0 + %maskedIsZero.zext = zext i1 %maskedIsZero to i8 + %insert = insertelement <4 x i8> zeroinitializer, i8 %maskedIsZero.zext, i64 0 + %insert.bitcast = bitcast <4 x i8> %insert to i32 + br i1 %continue, label %exit, label %loop +} diff --git a/llvm/test/CodeGen/AArch64/ccmp-cse.ll b/llvm/test/CodeGen/AArch64/ccmp-cse.ll new file mode 100644 index 0000000000000..657498172a04c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ccmp-cse.ll @@ -0,0 +1,139 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s + +define i64 @test_single_or(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_single_or: +; CHECK: // %bb.0: +; CHECK-NEXT: subs x8, x2, x1 +; CHECK-NEXT: ccmp x2, x0, #2, hs +; CHECK-NEXT: csel x0, xzr, x8, hi +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch = icmp ugt i64 %y, %unrelated + %or.cond = or i1 %cmp.match, %cmp.nomatch + %sub.reuse = sub nuw i64 %y, %x + %res = select i1 %or.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +define i64 @test_two_ors(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_two_ors: +; CHECK: // %bb.0: +; CHECK-NEXT: subs x8, x2, x1 +; CHECK-NEXT: ccmp x0, x1, #0, hs +; CHECK-NEXT: ccmp x2, x0, #2, hs +; CHECK-NEXT: csel x0, xzr, x8, hi +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch1 = icmp ult i64 %unrelated, %x + %cmp.nomatch2 = icmp ugt i64 %y, %unrelated + %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2 + %or.cond = or i1 %cmp.match, %or.nomatch + %sub.reuse = sub nuw i64 %y, %x + %res = select i1 %or.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +define i64 @test_two_ors_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_two_ors_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: subs x8, x2, x1 +; CHECK-NEXT: ccmp x0, x1, #0, hs +; CHECK-NEXT: ccmp x2, x0, #2, hs +; CHECK-NEXT: csel x0, xzr, x8, hi +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch1 = icmp ult i64 %unrelated, %x + %cmp.nomatch2 = icmp ugt i64 %y, %unrelated + %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2 + %or.cond = or i1 %or.nomatch, %cmp.match + %sub.reuse = sub nuw i64 %y, %x + %res = select i1 %or.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +define i64 @test_single_and(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_single_and: +; CHECK: // %bb.0: +; CHECK-NEXT: subs x8, x2, x1 +; CHECK-NEXT: ccmp x2, x0, #0, lo +; CHECK-NEXT: csel x0, xzr, x8, hi +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch = icmp ugt i64 %y, %unrelated + %and.cond = and i1 %cmp.match, %cmp.nomatch + %sub.reuse = sub nuw i64 %y, %x + %res = select i1 %and.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +define i64 @test_single_or_sub_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_single_or_sub_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: subs x8, x1, x2 +; CHECK-NEXT: ccmp x2, x0, #2, ls +; CHECK-NEXT: csel x0, xzr, x8, hi +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch = icmp ugt i64 %y, %unrelated + %or.cond = or i1 %cmp.match, %cmp.nomatch + %sub.reuse = sub nuw i64 %x, %y + %res = select i1 %or.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +; Negative test: We must negate the or operation, hence this must come first. +define i64 @test_mustbefirst_overrides_preferfirst_negative(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_mustbefirst_overrides_preferfirst_negative: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x2, x0 +; CHECK-NEXT: sub x8, x2, x1 +; CHECK-NEXT: ccmp x0, x1, #0, ls +; CHECK-NEXT: ccmp x2, x1, #2, lo +; CHECK-NEXT: csel x0, xzr, x8, lo +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch1 = icmp ult i64 %unrelated, %x + %cmp.nomatch2 = icmp ugt i64 %y, %unrelated + %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2 + %and.cond = and i1 %or.nomatch, %cmp.match + %sub.reuse = sub nuw i64 %y, %x + %res = select i1 %and.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +; Negative test: There is no analogue of SUBS for floating point. +define float @test_negative_float(float %unrelated, float %x, float %y) nounwind { +; CHECK-LABEL: test_negative_float: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s2, s0 +; CHECK-NEXT: fsub s0, s2, s1 +; CHECK-NEXT: movi d3, #0000000000000000 +; CHECK-NEXT: fccmp s2, s1, #8, le +; CHECK-NEXT: fcsel s0, s3, s0, mi +; CHECK-NEXT: ret + %cmp.nomatch1 = fcmp olt float %y, %x + %cmp.nomatch2 = fcmp ogt float %y, %unrelated + %or.cond = or i1 %cmp.nomatch1, %cmp.nomatch2 + %sub.noreuse = fsub float %y, %x + %res = select i1 %or.cond, float 0.0, float %sub.noreuse + ret float %res +} + +; Negative test: If both operands match a sub, do not reorder them. +define i64 @test_prefer_right_negative(i64 %x, i64 %y, i64 %z) nounwind { +; CHECK-LABEL: test_prefer_right_negative: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x2, x0 +; CHECK-NEXT: ccmp x2, x1, #0, ls +; CHECK-NEXT: csel x8, x0, x1, lo +; CHECK-NEXT: sub x0, x2, x8 +; CHECK-NEXT: ret + %cmp.match1 = icmp ult i64 %z, %y + %cmp.match2 = icmp ugt i64 %z, %x + %or.cond = or i1 %cmp.match1, %cmp.match2 + %sub.reuse1 = sub nuw i64 %z, %y + %sub.reuse2 = sub nuw i64 %z, %x + %res = select i1 %or.cond, i64 %sub.reuse2, i64 %sub.reuse1 + ret i64 %res +} diff --git a/llvm/test/CodeGen/AArch64/i128-math.ll b/llvm/test/CodeGen/AArch64/i128-math.ll index 9e1c0c1b115ab..12ae241dda4bd 100644 --- a/llvm/test/CodeGen/AArch64/i128-math.ll +++ b/llvm/test/CodeGen/AArch64/i128-math.ll @@ -262,20 +262,28 @@ define i128 @u128_mul(i128 %x, i128 %y) { define { i128, i8 } @u128_checked_mul(i128 %x, i128 %y) { ; CHECK-LABEL: u128_checked_mul: ; CHECK: // %bb.0: +; CHECK-NEXT: orr x8, x1, x3 +; CHECK-NEXT: cbz x8, .LBB17_2 +; CHECK-NEXT: // %bb.1: // %overflow ; CHECK-NEXT: mul x9, x3, x0 ; CHECK-NEXT: cmp x1, #0 ; CHECK-NEXT: ccmp x3, #0, #4, ne -; CHECK-NEXT: umulh x8, x1, x2 -; CHECK-NEXT: umulh x10, x3, x0 +; CHECK-NEXT: umulh x10, x1, x2 +; CHECK-NEXT: umulh x8, x3, x0 ; CHECK-NEXT: madd x9, x1, x2, x9 -; CHECK-NEXT: ccmp xzr, x8, #0, eq -; CHECK-NEXT: umulh x11, x0, x2 ; CHECK-NEXT: ccmp xzr, x10, #0, eq +; CHECK-NEXT: umulh x11, x0, x2 +; CHECK-NEXT: ccmp xzr, x8, #0, eq ; CHECK-NEXT: mul x0, x0, x2 ; CHECK-NEXT: cset w8, ne ; CHECK-NEXT: adds x1, x11, x9 ; CHECK-NEXT: csinc w8, w8, wzr, lo ; CHECK-NEXT: eor w2, w8, #0x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB17_2: // %overflow.no +; CHECK-NEXT: umulh x1, x0, x2 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: eor w2, w8, #0x1 ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -290,19 +298,27 @@ define { i128, i8 } @u128_checked_mul(i128 %x, i128 %y) { define { i128, i8 } @u128_overflowing_mul(i128 %x, i128 %y) { ; CHECK-LABEL: u128_overflowing_mul: ; CHECK: // %bb.0: +; CHECK-NEXT: orr x8, x1, x3 +; CHECK-NEXT: cbz x8, .LBB18_2 +; CHECK-NEXT: // %bb.1: // %overflow ; CHECK-NEXT: mul x9, x3, x0 ; CHECK-NEXT: cmp x1, #0 ; CHECK-NEXT: ccmp x3, #0, #4, ne -; CHECK-NEXT: umulh x8, x1, x2 -; CHECK-NEXT: umulh x10, x3, x0 +; CHECK-NEXT: umulh x10, x1, x2 +; CHECK-NEXT: umulh x8, x3, x0 ; CHECK-NEXT: madd x9, x1, x2, x9 -; CHECK-NEXT: ccmp xzr, x8, #0, eq -; CHECK-NEXT: umulh x11, x0, x2 ; CHECK-NEXT: ccmp xzr, x10, #0, eq +; CHECK-NEXT: umulh x11, x0, x2 +; CHECK-NEXT: ccmp xzr, x8, #0, eq ; CHECK-NEXT: mul x0, x0, x2 ; CHECK-NEXT: cset w8, ne ; CHECK-NEXT: adds x1, x11, x9 ; CHECK-NEXT: csinc w2, w8, wzr, lo +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB18_2: // %overflow.no +; CHECK-NEXT: umulh x1, x0, x2 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: mov w2, wzr ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -316,19 +332,28 @@ define { i128, i8 } @u128_overflowing_mul(i128 %x, i128 %y) { define i128 @u128_saturating_mul(i128 %x, i128 %y) { ; CHECK-LABEL: u128_saturating_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: mul x9, x3, x0 +; CHECK-NEXT: orr x8, x1, x3 +; CHECK-NEXT: cbz x8, .LBB19_2 +; CHECK-NEXT: // %bb.1: // %overflow +; CHECK-NEXT: mul x8, x3, x0 ; CHECK-NEXT: cmp x1, #0 ; CHECK-NEXT: ccmp x3, #0, #4, ne -; CHECK-NEXT: umulh x8, x1, x2 -; CHECK-NEXT: umulh x10, x3, x0 -; CHECK-NEXT: madd x9, x1, x2, x9 -; CHECK-NEXT: ccmp xzr, x8, #0, eq -; CHECK-NEXT: umulh x11, x0, x2 +; CHECK-NEXT: umulh x10, x1, x2 +; CHECK-NEXT: umulh x9, x3, x0 +; CHECK-NEXT: madd x11, x1, x2, x8 ; CHECK-NEXT: ccmp xzr, x10, #0, eq +; CHECK-NEXT: umulh x12, x0, x2 +; CHECK-NEXT: ccmp xzr, x9, #0, eq ; CHECK-NEXT: mul x8, x0, x2 ; CHECK-NEXT: cset w10, ne -; CHECK-NEXT: adds x9, x11, x9 +; CHECK-NEXT: adds x9, x12, x11 ; CHECK-NEXT: csinc w10, w10, wzr, lo +; CHECK-NEXT: b .LBB19_3 +; CHECK-NEXT: .LBB19_2: // %overflow.no +; CHECK-NEXT: umulh x9, x0, x2 +; CHECK-NEXT: mov w10, wzr +; CHECK-NEXT: mul x8, x0, x2 +; CHECK-NEXT: .LBB19_3: // %overflow.res ; CHECK-NEXT: cmp w10, #0 ; CHECK-NEXT: csinv x0, x8, xzr, eq ; CHECK-NEXT: csinv x1, x9, xzr, eq @@ -355,6 +380,11 @@ define i128 @i128_mul(i128 %x, i128 %y) { define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) { ; CHECK-LABEL: i128_checked_mul: ; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x3, x2, asr #63 +; CHECK-NEXT: eor x9, x1, x0, asr #63 +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: cbz x8, .LBB21_2 +; CHECK-NEXT: // %bb.1: // %overflow ; CHECK-NEXT: asr x9, x1, #63 ; CHECK-NEXT: umulh x10, x0, x2 ; CHECK-NEXT: asr x13, x3, #63 @@ -364,24 +394,30 @@ define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) { ; CHECK-NEXT: adds x10, x11, x10 ; CHECK-NEXT: mul x14, x0, x3 ; CHECK-NEXT: umulh x12, x0, x3 -; CHECK-NEXT: adc x9, x8, x9 +; CHECK-NEXT: adc x8, x8, x9 +; CHECK-NEXT: mov x9, x1 ; CHECK-NEXT: mul x13, x0, x13 -; CHECK-NEXT: adds x8, x14, x10 -; CHECK-NEXT: mul x15, x1, x3 -; CHECK-NEXT: smulh x10, x1, x3 -; CHECK-NEXT: mov x1, x8 -; CHECK-NEXT: adc x11, x12, x13 -; CHECK-NEXT: asr x12, x9, #63 -; CHECK-NEXT: asr x13, x11, #63 -; CHECK-NEXT: adds x9, x9, x11 ; CHECK-NEXT: asr x11, x8, #63 +; CHECK-NEXT: mul x15, x1, x3 +; CHECK-NEXT: adds x1, x14, x10 +; CHECK-NEXT: smulh x9, x9, x3 +; CHECK-NEXT: adc x10, x12, x13 +; CHECK-NEXT: asr x12, x10, #63 +; CHECK-NEXT: adds x8, x8, x10 +; CHECK-NEXT: asr x10, x1, #63 ; CHECK-NEXT: mul x0, x0, x2 -; CHECK-NEXT: adc x12, x12, x13 -; CHECK-NEXT: adds x9, x15, x9 -; CHECK-NEXT: adc x10, x10, x12 -; CHECK-NEXT: cmp x9, x11 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: cset w2, eq +; CHECK-NEXT: adc x11, x11, x12 +; CHECK-NEXT: adds x8, x15, x8 +; CHECK-NEXT: adc x9, x9, x11 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ccmp x9, x10, #0, eq +; CHECK-NEXT: cset w8, ne +; CHECK-NEXT: eor w2, w8, #0x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB21_2: // %overflow.no +; CHECK-NEXT: smulh x1, x0, x2 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: eor w2, w8, #0x1 ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -396,6 +432,11 @@ define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) { define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) { ; CHECK-LABEL: i128_overflowing_mul: ; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x3, x2, asr #63 +; CHECK-NEXT: eor x9, x1, x0, asr #63 +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: cbz x8, .LBB22_2 +; CHECK-NEXT: // %bb.1: // %overflow ; CHECK-NEXT: asr x9, x1, #63 ; CHECK-NEXT: umulh x10, x0, x2 ; CHECK-NEXT: asr x13, x3, #63 @@ -405,24 +446,29 @@ define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) { ; CHECK-NEXT: adds x10, x11, x10 ; CHECK-NEXT: mul x14, x0, x3 ; CHECK-NEXT: umulh x12, x0, x3 -; CHECK-NEXT: adc x9, x8, x9 +; CHECK-NEXT: adc x8, x8, x9 +; CHECK-NEXT: mov x9, x1 ; CHECK-NEXT: mul x13, x0, x13 -; CHECK-NEXT: adds x8, x14, x10 -; CHECK-NEXT: mul x15, x1, x3 -; CHECK-NEXT: smulh x10, x1, x3 -; CHECK-NEXT: mov x1, x8 -; CHECK-NEXT: adc x11, x12, x13 -; CHECK-NEXT: asr x12, x9, #63 -; CHECK-NEXT: asr x13, x11, #63 -; CHECK-NEXT: adds x9, x9, x11 ; CHECK-NEXT: asr x11, x8, #63 +; CHECK-NEXT: mul x15, x1, x3 +; CHECK-NEXT: adds x1, x14, x10 +; CHECK-NEXT: smulh x9, x9, x3 +; CHECK-NEXT: adc x10, x12, x13 +; CHECK-NEXT: asr x12, x10, #63 +; CHECK-NEXT: adds x8, x8, x10 +; CHECK-NEXT: asr x10, x1, #63 ; CHECK-NEXT: mul x0, x0, x2 -; CHECK-NEXT: adc x12, x12, x13 -; CHECK-NEXT: adds x9, x15, x9 -; CHECK-NEXT: adc x10, x10, x12 -; CHECK-NEXT: cmp x9, x11 -; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: adc x11, x11, x12 +; CHECK-NEXT: adds x8, x15, x8 +; CHECK-NEXT: adc x9, x9, x11 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ccmp x9, x10, #0, eq ; CHECK-NEXT: cset w2, ne +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB22_2: // %overflow.no +; CHECK-NEXT: smulh x1, x0, x2 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: mov w2, wzr ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -436,6 +482,11 @@ define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) { define i128 @i128_saturating_mul(i128 %x, i128 %y) { ; CHECK-LABEL: i128_saturating_mul: ; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x3, x2, asr #63 +; CHECK-NEXT: eor x9, x1, x0, asr #63 +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: cbz x8, .LBB23_2 +; CHECK-NEXT: // %bb.1: // %overflow ; CHECK-NEXT: asr x9, x1, #63 ; CHECK-NEXT: umulh x10, x0, x2 ; CHECK-NEXT: asr x13, x3, #63 @@ -445,29 +496,35 @@ define i128 @i128_saturating_mul(i128 %x, i128 %y) { ; CHECK-NEXT: adds x10, x11, x10 ; CHECK-NEXT: mul x14, x0, x3 ; CHECK-NEXT: umulh x12, x0, x3 -; CHECK-NEXT: adc x8, x8, x9 +; CHECK-NEXT: adc x9, x8, x9 ; CHECK-NEXT: mul x13, x0, x13 -; CHECK-NEXT: adds x9, x14, x10 -; CHECK-NEXT: mul x11, x1, x3 -; CHECK-NEXT: adc x10, x12, x13 -; CHECK-NEXT: smulh x12, x1, x3 -; CHECK-NEXT: asr x13, x8, #63 -; CHECK-NEXT: asr x14, x10, #63 -; CHECK-NEXT: adds x8, x8, x10 -; CHECK-NEXT: adc x10, x13, x14 -; CHECK-NEXT: adds x8, x11, x8 -; CHECK-NEXT: asr x11, x9, #63 -; CHECK-NEXT: mul x13, x0, x2 -; CHECK-NEXT: adc x10, x12, x10 -; CHECK-NEXT: eor x12, x3, x1 -; CHECK-NEXT: eor x8, x8, x11 -; CHECK-NEXT: eor x10, x10, x11 -; CHECK-NEXT: asr x11, x12, #63 -; CHECK-NEXT: orr x8, x8, x10 -; CHECK-NEXT: eor x10, x11, #0x7fffffffffffffff -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: csinv x0, x13, x11, eq -; CHECK-NEXT: csel x1, x10, x9, ne +; CHECK-NEXT: adds x8, x14, x10 +; CHECK-NEXT: mul x15, x1, x3 +; CHECK-NEXT: asr x14, x8, #63 +; CHECK-NEXT: smulh x10, x1, x3 +; CHECK-NEXT: adc x11, x12, x13 +; CHECK-NEXT: asr x12, x9, #63 +; CHECK-NEXT: asr x13, x11, #63 +; CHECK-NEXT: adds x11, x9, x11 +; CHECK-NEXT: mul x9, x0, x2 +; CHECK-NEXT: adc x12, x12, x13 +; CHECK-NEXT: adds x11, x15, x11 +; CHECK-NEXT: adc x10, x10, x12 +; CHECK-NEXT: cmp x11, x14 +; CHECK-NEXT: ccmp x10, x14, #0, eq +; CHECK-NEXT: cset w10, ne +; CHECK-NEXT: b .LBB23_3 +; CHECK-NEXT: .LBB23_2: // %overflow.no +; CHECK-NEXT: smulh x8, x0, x2 +; CHECK-NEXT: mov w10, wzr +; CHECK-NEXT: mul x9, x0, x2 +; CHECK-NEXT: .LBB23_3: // %overflow.res +; CHECK-NEXT: eor x11, x3, x1 +; CHECK-NEXT: cmp w10, #0 +; CHECK-NEXT: asr x11, x11, #63 +; CHECK-NEXT: eor x12, x11, #0x7fffffffffffffff +; CHECK-NEXT: csinv x0, x9, x11, eq +; CHECK-NEXT: csel x1, x12, x8, ne ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 diff --git a/llvm/test/CodeGen/AArch64/i128_with_overflow.ll b/llvm/test/CodeGen/AArch64/i128_with_overflow.ll index 9924b7c63f763..3d90e094a5747 100644 --- a/llvm/test/CodeGen/AArch64/i128_with_overflow.ll +++ b/llvm/test/CodeGen/AArch64/i128_with_overflow.ll @@ -224,21 +224,29 @@ cleanup: define i128 @test_umul_i128(i128 noundef %x, i128 noundef %y) { ; CHECK-LABEL: test_umul_i128: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr x8, x1, x3 +; CHECK-NEXT: cbz x8, .LBB4_2 +; CHECK-NEXT: // %bb.1: // %overflow ; CHECK-NEXT: mul x9, x3, x0 ; CHECK-NEXT: cmp x1, #0 ; CHECK-NEXT: ccmp x3, #0, #4, ne -; CHECK-NEXT: umulh x8, x1, x2 -; CHECK-NEXT: umulh x10, x3, x0 +; CHECK-NEXT: umulh x10, x1, x2 +; CHECK-NEXT: umulh x8, x3, x0 ; CHECK-NEXT: madd x9, x1, x2, x9 -; CHECK-NEXT: ccmp xzr, x8, #0, eq -; CHECK-NEXT: umulh x11, x0, x2 ; CHECK-NEXT: ccmp xzr, x10, #0, eq +; CHECK-NEXT: umulh x11, x0, x2 +; CHECK-NEXT: ccmp xzr, x8, #0, eq +; CHECK-NEXT: mul x0, x0, x2 ; CHECK-NEXT: cset w8, ne ; CHECK-NEXT: adds x1, x11, x9 ; CHECK-NEXT: csinc w8, w8, wzr, lo -; CHECK-NEXT: cmp w8, #1 -; CHECK-NEXT: b.ne .LBB4_2 -; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: cbnz w8, .LBB4_3 +; CHECK-NEXT: b .LBB4_4 +; CHECK-NEXT: .LBB4_2: // %overflow.no +; CHECK-NEXT: umulh x1, x0, x2 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: cbz w8, .LBB4_4 +; CHECK-NEXT: .LBB4_3: // %if.then ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 @@ -247,9 +255,7 @@ define i128 @test_umul_i128(i128 noundef %x, i128 noundef %y) { ; CHECK-NEXT: sxtw x0, w0 ; CHECK-NEXT: asr x1, x0, #63 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB4_2: // %if.end -; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: .LBB4_4: // %cleanup ; CHECK-NEXT: ret entry: %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y) @@ -273,34 +279,40 @@ cleanup: define i128 @test_smul_i128(i128 noundef %x, i128 noundef %y) { ; CHECK-LABEL: test_smul_i128: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: asr x10, x1, #63 -; CHECK-NEXT: umulh x11, x0, x2 -; CHECK-NEXT: asr x14, x3, #63 -; CHECK-NEXT: mov x8, x1 -; CHECK-NEXT: mul x12, x1, x2 -; CHECK-NEXT: umulh x9, x1, x2 -; CHECK-NEXT: mul x10, x10, x2 -; CHECK-NEXT: adds x11, x12, x11 -; CHECK-NEXT: mul x15, x0, x3 -; CHECK-NEXT: umulh x13, x0, x3 -; CHECK-NEXT: adc x9, x9, x10 -; CHECK-NEXT: mul x14, x0, x14 -; CHECK-NEXT: mul x16, x1, x3 -; CHECK-NEXT: adds x1, x15, x11 -; CHECK-NEXT: asr x11, x9, #63 -; CHECK-NEXT: smulh x8, x8, x3 -; CHECK-NEXT: adc x10, x13, x14 -; CHECK-NEXT: asr x12, x10, #63 -; CHECK-NEXT: adds x9, x9, x10 -; CHECK-NEXT: adc x10, x11, x12 -; CHECK-NEXT: adds x9, x16, x9 -; CHECK-NEXT: asr x11, x1, #63 -; CHECK-NEXT: adc x8, x8, x10 -; CHECK-NEXT: eor x8, x8, x11 -; CHECK-NEXT: eor x9, x9, x11 +; CHECK-NEXT: eor x8, x3, x2, asr #63 +; CHECK-NEXT: eor x9, x1, x0, asr #63 ; CHECK-NEXT: orr x8, x9, x8 -; CHECK-NEXT: cbz x8, .LBB5_2 -; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: cbz x8, .LBB5_4 +; CHECK-NEXT: // %bb.1: // %overflow +; CHECK-NEXT: asr x9, x1, #63 +; CHECK-NEXT: umulh x10, x0, x2 +; CHECK-NEXT: asr x13, x3, #63 +; CHECK-NEXT: mul x11, x1, x2 +; CHECK-NEXT: umulh x8, x1, x2 +; CHECK-NEXT: mul x9, x9, x2 +; CHECK-NEXT: adds x10, x11, x10 +; CHECK-NEXT: mul x14, x0, x3 +; CHECK-NEXT: umulh x12, x0, x3 +; CHECK-NEXT: adc x8, x8, x9 +; CHECK-NEXT: mov x9, x1 +; CHECK-NEXT: mul x13, x0, x13 +; CHECK-NEXT: asr x11, x8, #63 +; CHECK-NEXT: mul x15, x1, x3 +; CHECK-NEXT: adds x1, x14, x10 +; CHECK-NEXT: smulh x9, x9, x3 +; CHECK-NEXT: adc x10, x12, x13 +; CHECK-NEXT: asr x12, x10, #63 +; CHECK-NEXT: adds x8, x8, x10 +; CHECK-NEXT: asr x10, x1, #63 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: adc x11, x11, x12 +; CHECK-NEXT: adds x8, x15, x8 +; CHECK-NEXT: adc x9, x9, x11 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ccmp x9, x10, #0, eq +; CHECK-NEXT: cset w8, ne +; CHECK-NEXT: cbz w8, .LBB5_3 +; CHECK-NEXT: .LBB5_2: // %if.then ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 @@ -309,10 +321,13 @@ define i128 @test_smul_i128(i128 noundef %x, i128 noundef %y) { ; CHECK-NEXT: sxtw x0, w0 ; CHECK-NEXT: asr x1, x0, #63 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .LBB5_3: // %cleanup ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB5_2: // %if.end +; CHECK-NEXT: .LBB5_4: // %overflow.no +; CHECK-NEXT: smulh x1, x0, x2 ; CHECK-NEXT: mul x0, x0, x2 -; CHECK-NEXT: ret +; CHECK-NEXT: cbnz w8, .LBB5_2 +; CHECK-NEXT: b .LBB5_3 entry: %0 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y) %1 = extractvalue { i128, i1 } %0, 1 diff --git a/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll b/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll index cb042757a4a42..3a4be1bda7cd6 100644 --- a/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll +++ b/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll @@ -1,12 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16 ; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16 -; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK-NOFP16,CHECK-GI -; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK-FP16,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for testmhhs -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for testmhws -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for testmhxs +; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK-NOFP16 +; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK-FP16 define i16 @testmhhs(half %x) { ; CHECK-NOFP16-LABEL: testmhhs: diff --git a/llvm/test/CodeGen/AArch64/llround-conv.ll b/llvm/test/CodeGen/AArch64/llround-conv.ll index 4cc089804ce97..bdee73076347a 100644 --- a/llvm/test/CodeGen/AArch64/llround-conv.ll +++ b/llvm/test/CodeGen/AArch64/llround-conv.ll @@ -1,9 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s -; RUN: llc < %s -mtriple=aarch64 -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for testmswl -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for testmsll +; RUN: llc < %s -mtriple=aarch64 -mattr=+neon -global-isel | FileCheck %s define i32 @testmsws(float %x) { ; CHECK-LABEL: testmsws: diff --git a/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll b/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll index a29dea0eb9f9f..0b18f220067ca 100644 --- a/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll +++ b/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll @@ -1,12 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16 ; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16 -; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK-NOFP16,CHECK-GI -; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK-FP16,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for testmhhs -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for testmhws -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for testmhxs +; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK-NOFP16 +; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK-FP16 define i16 @testmhhs(half %x) { ; CHECK-NOFP16-LABEL: testmhhs: diff --git a/llvm/test/CodeGen/AArch64/lround-conv.ll b/llvm/test/CodeGen/AArch64/lround-conv.ll index 0bf82b538e70c..4b1782457cc10 100644 --- a/llvm/test/CodeGen/AArch64/lround-conv.ll +++ b/llvm/test/CodeGen/AArch64/lround-conv.ll @@ -1,9 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s -; RUN: llc < %s -mtriple=aarch64 -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for testmswl -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for testmsll +; RUN: llc < %s -mtriple=aarch64 -mattr=+neon -global-isel | FileCheck %s define i32 @testmsws(float %x) { ; CHECK-LABEL: testmsws: diff --git a/llvm/test/CodeGen/AArch64/mul-i128-overflow.ll b/llvm/test/CodeGen/AArch64/mul-i128-overflow.ll new file mode 100644 index 0000000000000..7b60f81539aa8 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/mul-i128-overflow.ll @@ -0,0 +1,261 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64 -o - %s | FileCheck %s + + +declare i32 @error() + +define i128 @test1(i128 noundef %x, i128 noundef %y) { +; CHECK-LABEL: test1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor x8, x3, x2, asr #63 +; CHECK-NEXT: eor x9, x1, x0, asr #63 +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: cbz x8, .LBB0_4 +; CHECK-NEXT: // %bb.1: // %overflow +; CHECK-NEXT: asr x9, x1, #63 +; CHECK-NEXT: umulh x10, x0, x2 +; CHECK-NEXT: asr x13, x3, #63 +; CHECK-NEXT: mul x11, x1, x2 +; CHECK-NEXT: umulh x8, x1, x2 +; CHECK-NEXT: mul x9, x9, x2 +; CHECK-NEXT: adds x10, x11, x10 +; CHECK-NEXT: mul x14, x0, x3 +; CHECK-NEXT: umulh x12, x0, x3 +; CHECK-NEXT: adc x8, x8, x9 +; CHECK-NEXT: mov x9, x1 +; CHECK-NEXT: mul x13, x0, x13 +; CHECK-NEXT: asr x11, x8, #63 +; CHECK-NEXT: mul x15, x1, x3 +; CHECK-NEXT: adds x1, x14, x10 +; CHECK-NEXT: smulh x9, x9, x3 +; CHECK-NEXT: adc x10, x12, x13 +; CHECK-NEXT: asr x12, x10, #63 +; CHECK-NEXT: adds x8, x8, x10 +; CHECK-NEXT: asr x10, x1, #63 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: adc x11, x11, x12 +; CHECK-NEXT: adds x8, x15, x8 +; CHECK-NEXT: adc x9, x9, x11 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ccmp x9, x10, #0, eq +; CHECK-NEXT: cset w8, ne +; CHECK-NEXT: cbz w8, .LBB0_3 +; CHECK-NEXT: .LBB0_2: // %if.then +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl error +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtw x0, w0 +; CHECK-NEXT: asr x1, x0, #63 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .LBB0_3: // %cleanup +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_4: // %overflow.no +; CHECK-NEXT: smulh x1, x0, x2 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: cbnz w8, .LBB0_2 +; CHECK-NEXT: b .LBB0_3 +entry: + %0 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y) + %1 = extractvalue { i128, i1 } %0, 1 + br i1 %1, label %if.then, label %if.end + +if.then: + %call = tail call i32 @error() + %conv1 = sext i32 %call to i128 + br label %cleanup + +if.end: + %2 = extractvalue { i128, i1 } %0, 0 + br label %cleanup + +cleanup: + %retval.0 = phi i128 [ %conv1, %if.then ], [ %2, %if.end ] + ret i128 %retval.0 +} + +define i128 @test2(i128 noundef %x, i128 noundef %y, ptr %out) { +; CHECK-LABEL: test2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor x8, x3, x2, asr #63 +; CHECK-NEXT: eor x9, x1, x0, asr #63 +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: cbz x8, .LBB1_4 +; CHECK-NEXT: // %bb.1: // %overflow +; CHECK-NEXT: asr x9, x1, #63 +; CHECK-NEXT: umulh x10, x0, x2 +; CHECK-NEXT: asr x13, x3, #63 +; CHECK-NEXT: mul x11, x1, x2 +; CHECK-NEXT: umulh x8, x1, x2 +; CHECK-NEXT: mul x9, x9, x2 +; CHECK-NEXT: adds x10, x11, x10 +; CHECK-NEXT: mul x14, x0, x3 +; CHECK-NEXT: umulh x12, x0, x3 +; CHECK-NEXT: adc x8, x8, x9 +; CHECK-NEXT: mov x9, x1 +; CHECK-NEXT: mul x13, x0, x13 +; CHECK-NEXT: asr x11, x8, #63 +; CHECK-NEXT: mul x15, x1, x3 +; CHECK-NEXT: adds x1, x14, x10 +; CHECK-NEXT: smulh x9, x9, x3 +; CHECK-NEXT: adc x10, x12, x13 +; CHECK-NEXT: asr x12, x10, #63 +; CHECK-NEXT: adds x8, x8, x10 +; CHECK-NEXT: asr x10, x1, #63 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: adc x11, x11, x12 +; CHECK-NEXT: adds x8, x15, x8 +; CHECK-NEXT: adc x9, x9, x11 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ccmp x9, x10, #0, eq +; CHECK-NEXT: cset w8, ne +; CHECK-NEXT: stp x0, x1, [x4] +; CHECK-NEXT: cbz w8, .LBB1_3 +; CHECK-NEXT: .LBB1_2: // %if.then +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl error +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtw x0, w0 +; CHECK-NEXT: asr x1, x0, #63 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .LBB1_3: // %cleanup +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_4: // %overflow.no +; CHECK-NEXT: smulh x1, x0, x2 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: stp x0, x1, [x4] +; CHECK-NEXT: cbnz w8, .LBB1_2 +; CHECK-NEXT: b .LBB1_3 +entry: + %0 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y) + %1 = extractvalue { i128, i1 } %0, 0 + store i128 %1, ptr %out + %2 = extractvalue { i128, i1 } %0, 1 + br i1 %2, label %if.then, label %cleanup + +if.then: + %call = tail call i32 @error() + %conv1 = sext i32 %call to i128 + br label %cleanup + +cleanup: + %retval.0 = phi i128 [ %conv1, %if.then ], [ %1, %entry ] + ret i128 %retval.0 +} + +define i128 @test3(i128 noundef %x, i128 noundef %y, ptr %out) { +; CHECK-LABEL: test3: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr x8, x1, x3 +; CHECK-NEXT: cbz x8, .LBB2_3 +; CHECK-NEXT: // %bb.1: // %overflow +; CHECK-NEXT: mul x8, x3, x0 +; CHECK-NEXT: cmp x1, #0 +; CHECK-NEXT: ccmp x3, #0, #4, ne +; CHECK-NEXT: umulh x10, x1, x2 +; CHECK-NEXT: umulh x9, x3, x0 +; CHECK-NEXT: madd x11, x1, x2, x8 +; CHECK-NEXT: ccmp xzr, x10, #0, eq +; CHECK-NEXT: umulh x12, x0, x2 +; CHECK-NEXT: ccmp xzr, x9, #0, eq +; CHECK-NEXT: mul x8, x0, x2 +; CHECK-NEXT: cset w10, ne +; CHECK-NEXT: adds x9, x12, x11 +; CHECK-NEXT: csinc w10, w10, wzr, lo +; CHECK-NEXT: stp x8, x9, [x4] +; CHECK-NEXT: cbnz w10, .LBB2_4 +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: mov x1, xzr +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_3: // %overflow.no +; CHECK-NEXT: umulh x9, x0, x2 +; CHECK-NEXT: mov w10, wzr +; CHECK-NEXT: mul x8, x0, x2 +; CHECK-NEXT: stp x8, x9, [x4] +; CHECK-NEXT: cbz w10, .LBB2_2 +; CHECK-NEXT: .LBB2_4: // %if.then +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl error +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtw x0, w0 +; CHECK-NEXT: asr x1, x0, #63 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y) + %1 = extractvalue { i128, i1 } %0, 0 + store i128 %1, ptr %out + %2 = extractvalue { i128, i1 } %0, 1 + br i1 %2, label %if.then, label %cleanup + +if.then: + %call = tail call i32 @error() + %conv1 = sext i32 %call to i128 + br label %cleanup + +cleanup: + %retval.0 = phi i128 [ %conv1, %if.then ], [ 1, %entry ] + ret i128 %retval.0 +} + +define i128 @test4(i128 noundef %x, i128 noundef %y, i128 %out) { +; CHECK-LABEL: test4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr x8, x1, x3 +; CHECK-NEXT: cbz x8, .LBB3_2 +; CHECK-NEXT: // %bb.1: // %overflow +; CHECK-NEXT: mul x8, x3, x0 +; CHECK-NEXT: cmp x1, #0 +; CHECK-NEXT: ccmp x3, #0, #4, ne +; CHECK-NEXT: umulh x10, x1, x2 +; CHECK-NEXT: umulh x9, x3, x0 +; CHECK-NEXT: madd x11, x1, x2, x8 +; CHECK-NEXT: ccmp xzr, x10, #0, eq +; CHECK-NEXT: umulh x12, x0, x2 +; CHECK-NEXT: ccmp xzr, x9, #0, eq +; CHECK-NEXT: mul x8, x0, x2 +; CHECK-NEXT: cset w10, ne +; CHECK-NEXT: adds x9, x12, x11 +; CHECK-NEXT: csinc w10, w10, wzr, lo +; CHECK-NEXT: b .LBB3_3 +; CHECK-NEXT: .LBB3_2: // %overflow.no +; CHECK-NEXT: umulh x9, x0, x2 +; CHECK-NEXT: mov w10, wzr +; CHECK-NEXT: mul x8, x0, x2 +; CHECK-NEXT: .LBB3_3: // %overflow.res +; CHECK-NEXT: adds x0, x8, x4 +; CHECK-NEXT: adc x1, x9, x5 +; CHECK-NEXT: cbz w10, .LBB3_5 +; CHECK-NEXT: // %bb.4: // %if.then +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl error +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtw x0, w0 +; CHECK-NEXT: asr x1, x0, #63 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .LBB3_5: // %cleanup +; CHECK-NEXT: ret +entry: + %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y) + %1 = extractvalue { i128, i1 } %0, 0 + %res = add i128 %1, %out + %2 = extractvalue { i128, i1 } %0, 1 + br i1 %2, label %if.then, label %cleanup + +if.then: + %call = tail call i32 @error() + %conv1 = sext i32 %call to i128 + br label %cleanup + +cleanup: + %retval.0 = phi i128 [ %conv1, %if.then ], [ %res, %entry ] + ret i128 %retval.0 +} diff --git a/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll b/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll index 16e8feb0dc5bb..fc3e018f2ec7a 100644 --- a/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll +++ b/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll @@ -632,7 +632,6 @@ define @fsub_sel_fmul_negzero_nxv8bf16( @fsub_sel_fmul_negzero_nxv8bf16( @fsub_sel_fmul_negzero_nxv8bf16( @fadd_sel_fmul_d_negzero( %a, define @fsub_sel_fmul_h_negzero( %a, %b, %c, %mask) { ; CHECK-LABEL: fsub_sel_fmul_h_negzero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 +; CHECK-NEXT: dupm z3.h, #0x8000 ; CHECK-NEXT: fmul z1.h, z1.h, z2.h -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h +; CHECK-NEXT: sel z1.h, p0, z1.h, z3.h ; CHECK-NEXT: fsub z0.h, z0.h, z1.h ; CHECK-NEXT: ret %fmul = fmul %b, %c @@ -1150,10 +1149,9 @@ define @fsub_sel_fmul_h_negzero( %a, @fsub_sel_fmul_s_negzero( %a, %b, %c, %mask) { ; CHECK-LABEL: fsub_sel_fmul_s_negzero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 +; CHECK-NEXT: mov z3.s, #0x80000000 ; CHECK-NEXT: fmul z1.s, z1.s, z2.s -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s +; CHECK-NEXT: sel z1.s, p0, z1.s, z3.s ; CHECK-NEXT: fsub z0.s, z0.s, z1.s ; CHECK-NEXT: ret %fmul = fmul %b, %c @@ -1166,10 +1164,9 @@ define @fsub_sel_fmul_s_negzero( %a, @fsub_sel_fmul_d_negzero( %a, %b, %c, %mask) { ; CHECK-LABEL: fsub_sel_fmul_d_negzero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: fmul z1.d, z1.d, z2.d -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d +; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d ; CHECK-NEXT: fsub z0.d, z0.d, z1.d ; CHECK-NEXT: ret %fmul = fmul %b, %c diff --git a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll index 8750867c56731..1223ae1c0cbdd 100644 --- a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll @@ -51,10 +51,9 @@ define half @fadda_nxv6f16( %v, half %s) { ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: mov w8, #32768 // =0x8000 +; CHECK-NEXT: dupm z2.h, #0x8000 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: str z0, [sp] -; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fmov s0, s1 ; CHECK-NEXT: st1h { z2.d }, p0, [sp, #3, mul vl] ; CHECK-NEXT: ptrue p0.h @@ -77,12 +76,11 @@ define half @fadda_nxv10f16( %v, half %s) { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 -; CHECK-NEXT: mov w8, #32768 // =0x8000 ; CHECK-NEXT: str z1, [sp] +; CHECK-NEXT: addvl x8, sp, #1 ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: fadda h2, p0, h2, z0.h -; CHECK-NEXT: mov z0.h, w8 -; CHECK-NEXT: addvl x8, sp, #1 +; CHECK-NEXT: dupm z0.h, #0x8000 ; CHECK-NEXT: st1h { z0.d }, p1, [sp, #1, mul vl] ; CHECK-NEXT: ldr z1, [sp] ; CHECK-NEXT: str z1, [sp, #1, mul vl] @@ -105,11 +103,10 @@ define half @fadda_nxv12f16( %v, half %s) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 -; CHECK-NEXT: mov w8, #32768 // =0x8000 +; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: fadda h2, p0, h2, z0.h -; CHECK-NEXT: uunpklo z0.s, z1.h -; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: dupm z0.h, #0x8000 +; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h ; CHECK-NEXT: fadda h2, p0, h2, z0.h ; CHECK-NEXT: fmov s0, s2 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll index 4ae7ac7b292e9..897ade00320db 100644 --- a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll +++ b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll @@ -454,18 +454,17 @@ declare @llvm.fptosi.sat.nxv4f16.nxv4i64() define @test_signed_v2f16_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov z2.d, #0xffffffff80000000 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.d, #0xffffffff80000000 -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.d, #0x7fffffff +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: fcvtzs z2.d, p1/m, z0.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z1.d, #0x7fffffff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.h -; CHECK-NEXT: sel z0.d, p2, z2.d, z1.d +; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f16.nxv2i32( %f) @@ -475,18 +474,17 @@ define @test_signed_v2f16_v2i32( %f) { define @test_signed_v4f16_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov z2.s, #0x80000000 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.s, #0x80000000 -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.s, #0x7fffffff +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: fcvtzs z2.s, p1/m, z0.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z1.s, #0x7fffffff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: fcvtzs z1.s, p1/m, z0.h -; CHECK-NEXT: sel z0.s, p2, z2.s, z1.s +; CHECK-NEXT: sel z0.s, p1, z1.s, z2.s ; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f16.nxv4i32( %f) @@ -496,26 +494,25 @@ define @test_signed_v4f16_v4i32( %f) { define @test_signed_v8f16_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: uunpklo z1.s, z0.h +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff +; CHECK-NEXT: uunpklo z2.s, z0.h +; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: uunpkhi z0.s, z0.h -; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: mov z3.s, #0x80000000 ; CHECK-NEXT: mov z4.s, #0x80000000 ; CHECK-NEXT: mov z5.h, w8 -; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z2.h -; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.s, #0x7fffffff +; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, z1.h +; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z1.s, #0x7fffffff ; CHECK-NEXT: fcmgt p3.h, p0/z, z0.h, z5.h -; CHECK-NEXT: fcvtzs z3.s, p1/m, z1.h -; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z5.h +; CHECK-NEXT: fcvtzs z3.s, p1/m, z2.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z5.h ; CHECK-NEXT: fcvtzs z4.s, p2/m, z0.h -; CHECK-NEXT: fcmuo p2.h, p0/z, z1.h, z1.h +; CHECK-NEXT: fcmuo p2.h, p0/z, z2.h, z2.h ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: sel z0.s, p1, z2.s, z3.s -; CHECK-NEXT: sel z1.s, p3, z2.s, z4.s +; CHECK-NEXT: sel z0.s, p1, z1.s, z3.s +; CHECK-NEXT: sel z1.s, p3, z1.s, z4.s ; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret @@ -526,18 +523,17 @@ define @test_signed_v8f16_v8i32( %f) { define @test_signed_v4f16_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #63488 // =0xf800 +; CHECK-NEXT: dupm z1.h, #0xf800 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z2.s, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #30719 // =0x77ff +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.s, #32767 // =0x7fff -; CHECK-NEXT: fcvtzs z2.s, p1/m, z0.h +; CHECK-NEXT: mov z1.s, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcvtzs z1.s, p1/m, z0.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.s, #32767 // =0x7fff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: sel z0.s, p2, z1.s, z2.s +; CHECK-NEXT: sel z0.s, p1, z2.s, z1.s ; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f16.nxv4i16( %f) @@ -547,18 +543,17 @@ define @test_signed_v4f16_v4i16( %f) { define @test_signed_v8f16_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #63488 // =0xf800 +; CHECK-NEXT: dupm z1.h, #0xf800 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z2.h, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #30719 // =0x77ff +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.h, #32767 // =0x7fff -; CHECK-NEXT: fcvtzs z2.h, p1/m, z0.h +; CHECK-NEXT: mov z1.h, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcvtzs z1.h, p1/m, z0.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.h, #32767 // =0x7fff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: sel z0.h, p2, z1.h, z2.h +; CHECK-NEXT: sel z0.h, p1, z2.h, z1.h ; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f16.nxv8i16( %f) @@ -568,18 +563,17 @@ define @test_signed_v8f16_v8i16( %f) { define @test_signed_v2f16_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.d, #0x8000000000000000 -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: fcvtzs z2.d, p1/m, z0.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z1.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.h -; CHECK-NEXT: sel z0.d, p2, z2.d, z1.d +; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f16.nxv2i64( %f) @@ -589,26 +583,25 @@ define @test_signed_v2f16_v2i64( %f) { define @test_signed_v4f16_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff +; CHECK-NEXT: uunpklo z2.d, z0.s +; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: mov z4.d, #0x8000000000000000 ; CHECK-NEXT: mov z5.h, w8 -; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z2.h -; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, z1.h +; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z1.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmgt p3.h, p0/z, z0.h, z5.h -; CHECK-NEXT: fcvtzs z3.d, p1/m, z1.h -; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z5.h +; CHECK-NEXT: fcvtzs z3.d, p1/m, z2.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z5.h ; CHECK-NEXT: fcvtzs z4.d, p2/m, z0.h -; CHECK-NEXT: fcmuo p2.h, p0/z, z1.h, z1.h +; CHECK-NEXT: fcmuo p2.h, p0/z, z2.h, z2.h ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: sel z0.d, p1, z2.d, z3.d -; CHECK-NEXT: sel z1.d, p3, z2.d, z4.d +; CHECK-NEXT: sel z0.d, p1, z1.d, z3.d +; CHECK-NEXT: sel z1.d, p3, z1.d, z4.d ; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-llrint.ll b/llvm/test/CodeGen/AArch64/sve-llrint.ll index f964d70e0a05c..c2bb0c81ab405 100644 --- a/llvm/test/CodeGen/AArch64/sve-llrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-llrint.ll @@ -5,9 +5,8 @@ define @llrint_v1i64_v1f16( %x) { ; CHECK-LABEL: llrint_v1i64_v1f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h @@ -28,9 +27,8 @@ define @llrint_v1i64_v2f16( %x) { ; CHECK-LABEL: llrint_v1i64_v2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h @@ -52,10 +50,9 @@ define @llrint_v4i64_v4f16( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: mov z4.d, #0x8000000000000000 ; CHECK-NEXT: mov z5.d, #0x7fffffffffffffff @@ -92,10 +89,9 @@ define @llrint_v8i64_v8f16( %x) { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z1.s, z0.h ; CHECK-NEXT: uunpkhi z0.s, z0.h -; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z4.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z4.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z5.d, #0x8000000000000000 ; CHECK-NEXT: mov z6.d, #0x8000000000000000 ; CHECK-NEXT: mov z7.d, #0x8000000000000000 @@ -162,12 +158,13 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z2.s, z0.h ; CHECK-NEXT: uunpkhi z3.s, z0.h -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: uunpklo z7.s, z1.h ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z0.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: uunpkhi z1.s, z1.h -; CHECK-NEXT: mov z0.d, #0x8000000000000000 ; CHECK-NEXT: mov z5.d, #0x8000000000000000 +; CHECK-NEXT: mov z29.h, w8 ; CHECK-NEXT: mov z31.d, #0x8000000000000000 ; CHECK-NEXT: uunpklo z4.d, z2.s ; CHECK-NEXT: uunpklo z24.d, z3.s @@ -175,10 +172,8 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK-NEXT: uunpkhi z6.d, z2.s ; CHECK-NEXT: uunpklo z26.d, z7.s ; CHECK-NEXT: uunpkhi z7.d, z7.s -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: uunpklo z30.d, z1.s -; CHECK-NEXT: mov z29.h, w8 ; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: movprfx z27, z4 @@ -191,17 +186,17 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK-NEXT: frintx z26.h, p0/m, z26.h ; CHECK-NEXT: frintx z7.h, p0/m, z7.h ; CHECK-NEXT: mov z6.d, #0x8000000000000000 -; CHECK-NEXT: fcmge p1.h, p0/z, z27.h, z2.h -; CHECK-NEXT: fcmge p3.h, p0/z, z24.h, z2.h -; CHECK-NEXT: fcmge p4.h, p0/z, z25.h, z2.h -; CHECK-NEXT: fcmge p2.h, p0/z, z28.h, z2.h -; CHECK-NEXT: fcmge p5.h, p0/z, z26.h, z2.h -; CHECK-NEXT: fcvtzs z0.d, p1/m, z27.h +; CHECK-NEXT: fcmge p1.h, p0/z, z27.h, z0.h +; CHECK-NEXT: fcmge p3.h, p0/z, z24.h, z0.h +; CHECK-NEXT: fcmge p4.h, p0/z, z25.h, z0.h +; CHECK-NEXT: fcmge p2.h, p0/z, z28.h, z0.h +; CHECK-NEXT: fcmge p5.h, p0/z, z26.h, z0.h +; CHECK-NEXT: fcvtzs z2.d, p1/m, z27.h ; CHECK-NEXT: fcvtzs z4.d, p3/m, z24.h ; CHECK-NEXT: fcvtzs z5.d, p4/m, z25.h ; CHECK-NEXT: fcmgt p3.h, p0/z, z27.h, z29.h ; CHECK-NEXT: fcvtzs z3.d, p2/m, z28.h -; CHECK-NEXT: fcmge p4.h, p0/z, z7.h, z2.h +; CHECK-NEXT: fcmge p4.h, p0/z, z7.h, z0.h ; CHECK-NEXT: fcvtzs z6.d, p5/m, z26.h ; CHECK-NEXT: fcmuo p1.h, p0/z, z27.h, z27.h ; CHECK-NEXT: movprfx z27, z30 @@ -212,7 +207,7 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK-NEXT: fcmuo p2.h, p0/z, z28.h, z28.h ; CHECK-NEXT: mov z28.d, #0x8000000000000000 ; CHECK-NEXT: fcvtzs z31.d, p4/m, z7.h -; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z2.h +; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z0.h ; CHECK-NEXT: fcmgt p6.h, p0/z, z24.h, z29.h ; CHECK-NEXT: fcmuo p7.h, p0/z, z24.h, z24.h ; CHECK-NEXT: mov z24.d, #0x7fffffffffffffff @@ -221,31 +216,31 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK-NEXT: fcmuo p10.h, p0/z, z25.h, z25.h ; CHECK-NEXT: mov z25.d, #0x8000000000000000 ; CHECK-NEXT: sel z1.d, p5, z24.d, z3.d -; CHECK-NEXT: mov z0.d, p3/m, z24.d ; CHECK-NEXT: sel z3.d, p8, z24.d, z5.d -; CHECK-NEXT: fcmge p4.h, p0/z, z30.h, z2.h +; CHECK-NEXT: fcmge p4.h, p0/z, z30.h, z0.h +; CHECK-NEXT: sel z0.d, p3, z24.d, z2.d ; CHECK-NEXT: sel z2.d, p6, z24.d, z4.d -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z3.d, p10/m, #0 // =0x0 ; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Reload +; CHECK-NEXT: fcmgt p9.h, p0/z, z26.h, z29.h ; CHECK-NEXT: mov z2.d, p7/m, #0 // =0x0 ; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload -; CHECK-NEXT: fcmgt p9.h, p0/z, z26.h, z29.h ; CHECK-NEXT: fcvtzs z25.d, p4/m, z30.h +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: fcmgt p5.h, p0/z, z7.h, z29.h ; CHECK-NEXT: fcmgt p6.h, p0/z, z27.h, z29.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z30.h, z29.h ; CHECK-NEXT: sel z4.d, p9, z24.d, z6.d +; CHECK-NEXT: fcmgt p4.h, p0/z, z30.h, z29.h ; CHECK-NEXT: fcmuo p8.h, p0/z, z7.h, z7.h ; CHECK-NEXT: sel z5.d, p5, z24.d, z31.d ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: sel z6.d, p6, z24.d, z28.d ; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p9.h, p0/z, z27.h, z27.h +; CHECK-NEXT: fcmuo p3.h, p0/z, z26.h, z26.h ; CHECK-NEXT: sel z7.d, p4, z24.d, z25.d ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload -; CHECK-NEXT: fcmuo p3.h, p0/z, z26.h, z26.h ; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 ; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p0.h, p0/z, z30.h, z30.h @@ -302,48 +297,47 @@ define @llrint_v32i64_v32f16( %x) { ; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x40, 0x1c // $d14 @ cfa - 56 * VG - 16 ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x40, 0x1c // $d15 @ cfa - 64 * VG - 16 ; CHECK-NEXT: uunpklo z4.s, z0.h -; CHECK-NEXT: uunpkhi z5.s, z0.h -; CHECK-NEXT: mov w9, #64511 // =0xfbff -; CHECK-NEXT: uunpklo z6.s, z1.h -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpkhi z28.s, z1.h -; CHECK-NEXT: mov z30.h, w9 +; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: mov w9, #31743 // =0x7bff +; CHECK-NEXT: uunpklo z5.s, z1.h +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z28.h, #-1025 // =0xfffffffffffffbff +; CHECK-NEXT: uunpkhi z29.s, z1.h +; CHECK-NEXT: mov z7.d, #0x8000000000000000 ; CHECK-NEXT: uunpklo z13.s, z2.h ; CHECK-NEXT: mov z9.d, #0x8000000000000000 ; CHECK-NEXT: uunpkhi z14.s, z2.h ; CHECK-NEXT: uunpkhi z17.s, z3.h -; CHECK-NEXT: uunpklo z7.d, z4.s +; CHECK-NEXT: uunpklo z6.d, z4.s ; CHECK-NEXT: uunpkhi z4.d, z4.s -; CHECK-NEXT: uunpklo z27.d, z5.s -; CHECK-NEXT: uunpklo z31.d, z6.s -; CHECK-NEXT: uunpkhi z8.d, z6.s -; CHECK-NEXT: uunpkhi z29.d, z5.s -; CHECK-NEXT: uunpkhi z11.d, z28.s -; CHECK-NEXT: uunpklo z10.d, z28.s +; CHECK-NEXT: uunpklo z27.d, z0.s +; CHECK-NEXT: uunpklo z31.d, z5.s +; CHECK-NEXT: uunpkhi z8.d, z5.s +; CHECK-NEXT: uunpkhi z30.d, z0.s +; CHECK-NEXT: uunpkhi z11.d, z29.s +; CHECK-NEXT: uunpklo z10.d, z29.s ; CHECK-NEXT: uunpklo z15.s, z3.h ; CHECK-NEXT: uunpklo z16.d, z14.s ; CHECK-NEXT: uunpkhi z14.d, z14.s ; CHECK-NEXT: mov z24.d, #0x8000000000000000 -; CHECK-NEXT: movprfx z1, z7 -; CHECK-NEXT: frintx z1.h, p0/m, z7.h ; CHECK-NEXT: movprfx z5, z27 ; CHECK-NEXT: frintx z5.h, p0/m, z27.h +; CHECK-NEXT: movprfx z1, z6 +; CHECK-NEXT: frintx z1.h, p0/m, z6.h ; CHECK-NEXT: frintx z4.h, p0/m, z4.h ; CHECK-NEXT: movprfx z12, z31 ; CHECK-NEXT: frintx z12.h, p0/m, z31.h ; CHECK-NEXT: movprfx z27, z8 ; CHECK-NEXT: frintx z27.h, p0/m, z8.h -; CHECK-NEXT: movprfx z6, z29 -; CHECK-NEXT: frintx z6.h, p0/m, z29.h +; CHECK-NEXT: movprfx z6, z30 +; CHECK-NEXT: frintx z6.h, p0/m, z30.h ; CHECK-NEXT: movprfx z31, z10 ; CHECK-NEXT: frintx z31.h, p0/m, z10.h -; CHECK-NEXT: mov z7.d, #0x8000000000000000 ; CHECK-NEXT: mov z8.d, #0x8000000000000000 +; CHECK-NEXT: frintx z11.h, p0/m, z11.h ; CHECK-NEXT: movprfx z3, z16 ; CHECK-NEXT: frintx z3.h, p0/m, z16.h -; CHECK-NEXT: frintx z11.h, p0/m, z11.h -; CHECK-NEXT: mov z29.h, w9 +; CHECK-NEXT: mov z30.h, w9 ; CHECK-NEXT: uunpklo z10.d, z13.s ; CHECK-NEXT: uunpkhi z13.d, z13.s ; CHECK-NEXT: uunpkhi z20.d, z15.s @@ -354,124 +348,124 @@ define @llrint_v32i64_v32f16( %x) { ; CHECK-NEXT: uunpklo z15.d, z15.s ; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z21.d, #0x8000000000000000 +; CHECK-NEXT: frintx z10.h, p0/m, z10.h ; CHECK-NEXT: mov z26.d, #0x8000000000000000 -; CHECK-NEXT: mov z28.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z29.d, #0x7fffffffffffffff ; CHECK-NEXT: movprfx z19, z13 ; CHECK-NEXT: frintx z19.h, p0/m, z13.h ; CHECK-NEXT: movprfx z13, z14 ; CHECK-NEXT: frintx z13.h, p0/m, z14.h -; CHECK-NEXT: frintx z10.h, p0/m, z10.h ; CHECK-NEXT: frintx z16.h, p0/m, z16.h ; CHECK-NEXT: mov z22.d, #0x8000000000000000 ; CHECK-NEXT: mov z23.d, #0x8000000000000000 -; CHECK-NEXT: frintx z15.h, p0/m, z15.h ; CHECK-NEXT: mov z14.d, #0x8000000000000000 -; CHECK-NEXT: fcmge p4.h, p0/z, z4.h, z30.h -; CHECK-NEXT: fcmge p2.h, p0/z, z12.h, z30.h -; CHECK-NEXT: fcmgt p9.h, p0/z, z12.h, z29.h +; CHECK-NEXT: frintx z15.h, p0/m, z15.h +; CHECK-NEXT: fcmge p4.h, p0/z, z4.h, z28.h +; CHECK-NEXT: fcmge p2.h, p0/z, z12.h, z28.h +; CHECK-NEXT: fcmgt p9.h, p0/z, z12.h, z30.h ; CHECK-NEXT: fcmuo p8.h, p0/z, z12.h, z12.h ; CHECK-NEXT: fcvtzs z7.d, p4/m, z4.h ; CHECK-NEXT: fcvtzs z8.d, p2/m, z12.h ; CHECK-NEXT: mov z12.d, #0x8000000000000000 -; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z30.h +; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z28.h ; CHECK-NEXT: fcmuo p10.h, p0/z, z11.h, z11.h -; CHECK-NEXT: fcmge p3.h, p0/z, z5.h, z30.h -; CHECK-NEXT: mov z8.d, p9/m, z28.d +; CHECK-NEXT: fcmge p3.h, p0/z, z5.h, z28.h +; CHECK-NEXT: mov z8.d, p9/m, z29.d ; CHECK-NEXT: fcvtzs z9.d, p4/m, z27.h -; CHECK-NEXT: fcmge p4.h, p0/z, z11.h, z30.h +; CHECK-NEXT: fcmge p4.h, p0/z, z11.h, z28.h ; CHECK-NEXT: fcvtzs z24.d, p3/m, z5.h ; CHECK-NEXT: mov z8.d, p8/m, #0 // =0x0 -; CHECK-NEXT: fcmge p1.h, p0/z, z6.h, z30.h -; CHECK-NEXT: fcmge p5.h, p0/z, z1.h, z30.h +; CHECK-NEXT: fcmge p1.h, p0/z, z6.h, z28.h +; CHECK-NEXT: fcmge p5.h, p0/z, z1.h, z28.h ; CHECK-NEXT: str z8, [x8, #4, mul vl] ; CHECK-NEXT: fcvtzs z12.d, p4/m, z11.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z29.h +; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z30.h ; CHECK-NEXT: uunpkhi z11.d, z17.s ; CHECK-NEXT: movprfx z17, z20 ; CHECK-NEXT: frintx z17.h, p0/m, z20.h ; CHECK-NEXT: fcvtzs z25.d, p1/m, z6.h ; CHECK-NEXT: mov z20.d, #0x8000000000000000 ; CHECK-NEXT: fcvtzs z0.d, p5/m, z1.h -; CHECK-NEXT: fcmge p6.h, p0/z, z10.h, z30.h +; CHECK-NEXT: fcmge p6.h, p0/z, z10.h, z28.h ; CHECK-NEXT: frintx z11.h, p0/m, z11.h -; CHECK-NEXT: fcmge p3.h, p0/z, z31.h, z30.h -; CHECK-NEXT: fcmge p1.h, p0/z, z13.h, z30.h +; CHECK-NEXT: fcmge p3.h, p0/z, z31.h, z28.h +; CHECK-NEXT: fcmge p1.h, p0/z, z13.h, z28.h ; CHECK-NEXT: fcvtzs z18.d, p6/m, z10.h -; CHECK-NEXT: fcmgt p11.h, p0/z, z10.h, z29.h -; CHECK-NEXT: fcmge p5.h, p0/z, z11.h, z30.h +; CHECK-NEXT: fcmgt p11.h, p0/z, z10.h, z30.h +; CHECK-NEXT: fcmge p5.h, p0/z, z11.h, z28.h ; CHECK-NEXT: fcvtzs z2.d, p3/m, z31.h ; CHECK-NEXT: fcvtzs z21.d, p1/m, z13.h -; CHECK-NEXT: fcmge p2.h, p0/z, z17.h, z30.h -; CHECK-NEXT: fcmge p3.h, p0/z, z16.h, z30.h +; CHECK-NEXT: fcmge p2.h, p0/z, z17.h, z28.h +; CHECK-NEXT: fcmge p3.h, p0/z, z16.h, z28.h ; CHECK-NEXT: fcmuo p1.h, p0/z, z10.h, z10.h -; CHECK-NEXT: sel z10.d, p4, z28.d, z12.d -; CHECK-NEXT: sel z12.d, p11, z28.d, z18.d +; CHECK-NEXT: sel z10.d, p4, z29.d, z12.d +; CHECK-NEXT: sel z12.d, p11, z29.d, z18.d ; CHECK-NEXT: fcvtzs z26.d, p5/m, z11.h ; CHECK-NEXT: fcvtzs z22.d, p2/m, z17.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z29.h +; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z30.h ; CHECK-NEXT: fcvtzs z23.d, p3/m, z16.h ; CHECK-NEXT: mov z10.d, p10/m, #0 // =0x0 ; CHECK-NEXT: mov z12.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmge p6.h, p0/z, z19.h, z30.h +; CHECK-NEXT: fcmge p6.h, p0/z, z19.h, z28.h ; CHECK-NEXT: str z10, [x8, #7, mul vl] -; CHECK-NEXT: fcmge p7.h, p0/z, z3.h, z30.h +; CHECK-NEXT: fcmge p7.h, p0/z, z3.h, z28.h ; CHECK-NEXT: str z12, [x8, #8, mul vl] -; CHECK-NEXT: mov z26.d, p4/m, z28.d -; CHECK-NEXT: fcmge p2.h, p0/z, z15.h, z30.h -; CHECK-NEXT: mov z30.d, #0x8000000000000000 +; CHECK-NEXT: mov z26.d, p4/m, z29.d +; CHECK-NEXT: fcmge p2.h, p0/z, z15.h, z28.h +; CHECK-NEXT: mov z28.d, #0x8000000000000000 ; CHECK-NEXT: fcvtzs z14.d, p6/m, z19.h -; CHECK-NEXT: fcmgt p5.h, p0/z, z16.h, z29.h -; CHECK-NEXT: fcmgt p3.h, p0/z, z17.h, z29.h +; CHECK-NEXT: fcmgt p5.h, p0/z, z16.h, z30.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z17.h, z30.h ; CHECK-NEXT: fcvtzs z20.d, p7/m, z3.h -; CHECK-NEXT: fcvtzs z30.d, p2/m, z15.h +; CHECK-NEXT: fcvtzs z28.d, p2/m, z15.h ; CHECK-NEXT: fcmuo p1.h, p0/z, z11.h, z11.h ; CHECK-NEXT: fcmuo p2.h, p0/z, z16.h, z16.h -; CHECK-NEXT: sel z11.d, p5, z28.d, z23.d -; CHECK-NEXT: sel z16.d, p3, z28.d, z22.d -; CHECK-NEXT: fcmgt p4.h, p0/z, z19.h, z29.h -; CHECK-NEXT: fcmgt p3.h, p0/z, z15.h, z29.h +; CHECK-NEXT: sel z11.d, p5, z29.d, z23.d +; CHECK-NEXT: sel z16.d, p3, z29.d, z22.d +; CHECK-NEXT: fcmgt p4.h, p0/z, z19.h, z30.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z15.h, z30.h ; CHECK-NEXT: mov z26.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z11.d, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p1.h, p0/z, z13.h, z29.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z13.h, z30.h ; CHECK-NEXT: fcmuo p6.h, p0/z, z17.h, z17.h ; CHECK-NEXT: str z26, [x8, #15, mul vl] -; CHECK-NEXT: sel z26.d, p4, z28.d, z14.d +; CHECK-NEXT: sel z26.d, p4, z29.d, z14.d ; CHECK-NEXT: str z11, [x8, #14, mul vl] -; CHECK-NEXT: mov z30.d, p3/m, z28.d -; CHECK-NEXT: fcmgt p2.h, p0/z, z3.h, z29.h +; CHECK-NEXT: mov z28.d, p3/m, z29.d +; CHECK-NEXT: fcmgt p2.h, p0/z, z3.h, z30.h ; CHECK-NEXT: fcmuo p4.h, p0/z, z13.h, z13.h ; CHECK-NEXT: fcmuo p3.h, p0/z, z3.h, z3.h -; CHECK-NEXT: sel z3.d, p1, z28.d, z21.d +; CHECK-NEXT: sel z3.d, p1, z29.d, z21.d ; CHECK-NEXT: mov z16.d, p6/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p12.h, p0/z, z27.h, z29.h -; CHECK-NEXT: sel z11.d, p2, z28.d, z20.d +; CHECK-NEXT: fcmgt p12.h, p0/z, z27.h, z30.h +; CHECK-NEXT: sel z11.d, p2, z29.d, z20.d ; CHECK-NEXT: str z16, [x8, #13, mul vl] ; CHECK-NEXT: mov z3.d, p4/m, #0 // =0x0 ; CHECK-NEXT: fcmuo p6.h, p0/z, z15.h, z15.h -; CHECK-NEXT: fcmgt p1.h, p0/z, z4.h, z29.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z4.h, z30.h ; CHECK-NEXT: mov z11.d, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z9.d, p12/m, z28.d +; CHECK-NEXT: mov z9.d, p12/m, z29.d ; CHECK-NEXT: str z3, [x8, #11, mul vl] ; CHECK-NEXT: fcmuo p5.h, p0/z, z19.h, z19.h -; CHECK-NEXT: fcmgt p2.h, p0/z, z5.h, z29.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z5.h, z30.h ; CHECK-NEXT: str z11, [x8, #10, mul vl] -; CHECK-NEXT: mov z30.d, p6/m, #0 // =0x0 -; CHECK-NEXT: sel z3.d, p1, z28.d, z7.d -; CHECK-NEXT: fcmgt p4.h, p0/z, z6.h, z29.h +; CHECK-NEXT: mov z28.d, p6/m, #0 // =0x0 +; CHECK-NEXT: sel z3.d, p1, z29.d, z7.d +; CHECK-NEXT: fcmgt p4.h, p0/z, z6.h, z30.h ; CHECK-NEXT: fcmuo p3.h, p0/z, z27.h, z27.h -; CHECK-NEXT: str z30, [x8, #12, mul vl] +; CHECK-NEXT: str z28, [x8, #12, mul vl] ; CHECK-NEXT: mov z26.d, p5/m, #0 // =0x0 -; CHECK-NEXT: sel z7.d, p2, z28.d, z24.d -; CHECK-NEXT: fcmgt p6.h, p0/z, z31.h, z29.h -; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z29.h +; CHECK-NEXT: sel z7.d, p2, z29.d, z24.d +; CHECK-NEXT: fcmgt p6.h, p0/z, z31.h, z30.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z30.h ; CHECK-NEXT: str z26, [x8, #9, mul vl] -; CHECK-NEXT: sel z24.d, p4, z28.d, z25.d +; CHECK-NEXT: sel z24.d, p4, z29.d, z25.d ; CHECK-NEXT: mov z9.d, p3/m, #0 // =0x0 ; CHECK-NEXT: fcmuo p5.h, p0/z, z31.h, z31.h ; CHECK-NEXT: fcmuo p2.h, p0/z, z6.h, z6.h -; CHECK-NEXT: mov z2.d, p6/m, z28.d +; CHECK-NEXT: mov z2.d, p6/m, z29.d ; CHECK-NEXT: str z9, [x8, #5, mul vl] -; CHECK-NEXT: mov z0.d, p1/m, z28.d +; CHECK-NEXT: mov z0.d, p1/m, z29.d ; CHECK-NEXT: fcmuo p3.h, p0/z, z5.h, z5.h ; CHECK-NEXT: fcmuo p4.h, p0/z, z4.h, z4.h ; CHECK-NEXT: mov z2.d, p5/m, #0 // =0x0 diff --git a/llvm/test/CodeGen/AArch64/sve-lrint.ll b/llvm/test/CodeGen/AArch64/sve-lrint.ll index f517e7fe8dc16..f1224d30d53cc 100644 --- a/llvm/test/CodeGen/AArch64/sve-lrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-lrint.ll @@ -6,9 +6,8 @@ define @lrint_v1f16( %x) { ; CHECK-LABEL: lrint_v1f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h @@ -29,9 +28,8 @@ define @lrint_v2f16( %x) { ; CHECK-LABEL: lrint_v2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h @@ -53,10 +51,9 @@ define @lrint_v4f16( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: mov z4.d, #0x8000000000000000 ; CHECK-NEXT: mov z5.d, #0x7fffffffffffffff @@ -93,10 +90,9 @@ define @lrint_v8f16( %x) { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z1.s, z0.h ; CHECK-NEXT: uunpkhi z0.s, z0.h -; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z4.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z4.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z5.d, #0x8000000000000000 ; CHECK-NEXT: mov z6.d, #0x8000000000000000 ; CHECK-NEXT: mov z7.d, #0x8000000000000000 @@ -163,12 +159,13 @@ define @lrint_v16f16( %x) { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z2.s, z0.h ; CHECK-NEXT: uunpkhi z3.s, z0.h -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: uunpklo z7.s, z1.h ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z0.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: uunpkhi z1.s, z1.h -; CHECK-NEXT: mov z0.d, #0x8000000000000000 ; CHECK-NEXT: mov z5.d, #0x8000000000000000 +; CHECK-NEXT: mov z29.h, w8 ; CHECK-NEXT: mov z31.d, #0x8000000000000000 ; CHECK-NEXT: uunpklo z4.d, z2.s ; CHECK-NEXT: uunpklo z24.d, z3.s @@ -176,10 +173,8 @@ define @lrint_v16f16( %x) { ; CHECK-NEXT: uunpkhi z6.d, z2.s ; CHECK-NEXT: uunpklo z26.d, z7.s ; CHECK-NEXT: uunpkhi z7.d, z7.s -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: uunpklo z30.d, z1.s -; CHECK-NEXT: mov z29.h, w8 ; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: movprfx z27, z4 @@ -192,17 +187,17 @@ define @lrint_v16f16( %x) { ; CHECK-NEXT: frintx z26.h, p0/m, z26.h ; CHECK-NEXT: frintx z7.h, p0/m, z7.h ; CHECK-NEXT: mov z6.d, #0x8000000000000000 -; CHECK-NEXT: fcmge p1.h, p0/z, z27.h, z2.h -; CHECK-NEXT: fcmge p3.h, p0/z, z24.h, z2.h -; CHECK-NEXT: fcmge p4.h, p0/z, z25.h, z2.h -; CHECK-NEXT: fcmge p2.h, p0/z, z28.h, z2.h -; CHECK-NEXT: fcmge p5.h, p0/z, z26.h, z2.h -; CHECK-NEXT: fcvtzs z0.d, p1/m, z27.h +; CHECK-NEXT: fcmge p1.h, p0/z, z27.h, z0.h +; CHECK-NEXT: fcmge p3.h, p0/z, z24.h, z0.h +; CHECK-NEXT: fcmge p4.h, p0/z, z25.h, z0.h +; CHECK-NEXT: fcmge p2.h, p0/z, z28.h, z0.h +; CHECK-NEXT: fcmge p5.h, p0/z, z26.h, z0.h +; CHECK-NEXT: fcvtzs z2.d, p1/m, z27.h ; CHECK-NEXT: fcvtzs z4.d, p3/m, z24.h ; CHECK-NEXT: fcvtzs z5.d, p4/m, z25.h ; CHECK-NEXT: fcmgt p3.h, p0/z, z27.h, z29.h ; CHECK-NEXT: fcvtzs z3.d, p2/m, z28.h -; CHECK-NEXT: fcmge p4.h, p0/z, z7.h, z2.h +; CHECK-NEXT: fcmge p4.h, p0/z, z7.h, z0.h ; CHECK-NEXT: fcvtzs z6.d, p5/m, z26.h ; CHECK-NEXT: fcmuo p1.h, p0/z, z27.h, z27.h ; CHECK-NEXT: movprfx z27, z30 @@ -213,7 +208,7 @@ define @lrint_v16f16( %x) { ; CHECK-NEXT: fcmuo p2.h, p0/z, z28.h, z28.h ; CHECK-NEXT: mov z28.d, #0x8000000000000000 ; CHECK-NEXT: fcvtzs z31.d, p4/m, z7.h -; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z2.h +; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z0.h ; CHECK-NEXT: fcmgt p6.h, p0/z, z24.h, z29.h ; CHECK-NEXT: fcmuo p7.h, p0/z, z24.h, z24.h ; CHECK-NEXT: mov z24.d, #0x7fffffffffffffff @@ -222,31 +217,31 @@ define @lrint_v16f16( %x) { ; CHECK-NEXT: fcmuo p10.h, p0/z, z25.h, z25.h ; CHECK-NEXT: mov z25.d, #0x8000000000000000 ; CHECK-NEXT: sel z1.d, p5, z24.d, z3.d -; CHECK-NEXT: mov z0.d, p3/m, z24.d ; CHECK-NEXT: sel z3.d, p8, z24.d, z5.d -; CHECK-NEXT: fcmge p4.h, p0/z, z30.h, z2.h +; CHECK-NEXT: fcmge p4.h, p0/z, z30.h, z0.h +; CHECK-NEXT: sel z0.d, p3, z24.d, z2.d ; CHECK-NEXT: sel z2.d, p6, z24.d, z4.d -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z3.d, p10/m, #0 // =0x0 ; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Reload +; CHECK-NEXT: fcmgt p9.h, p0/z, z26.h, z29.h ; CHECK-NEXT: mov z2.d, p7/m, #0 // =0x0 ; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload -; CHECK-NEXT: fcmgt p9.h, p0/z, z26.h, z29.h ; CHECK-NEXT: fcvtzs z25.d, p4/m, z30.h +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: fcmgt p5.h, p0/z, z7.h, z29.h ; CHECK-NEXT: fcmgt p6.h, p0/z, z27.h, z29.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z30.h, z29.h ; CHECK-NEXT: sel z4.d, p9, z24.d, z6.d +; CHECK-NEXT: fcmgt p4.h, p0/z, z30.h, z29.h ; CHECK-NEXT: fcmuo p8.h, p0/z, z7.h, z7.h ; CHECK-NEXT: sel z5.d, p5, z24.d, z31.d ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: sel z6.d, p6, z24.d, z28.d ; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p9.h, p0/z, z27.h, z27.h +; CHECK-NEXT: fcmuo p3.h, p0/z, z26.h, z26.h ; CHECK-NEXT: sel z7.d, p4, z24.d, z25.d ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload -; CHECK-NEXT: fcmuo p3.h, p0/z, z26.h, z26.h ; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 ; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p0.h, p0/z, z30.h, z30.h @@ -303,48 +298,47 @@ define @lrint_v32f16( %x) { ; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x40, 0x1c // $d14 @ cfa - 56 * VG - 16 ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x40, 0x1c // $d15 @ cfa - 64 * VG - 16 ; CHECK-NEXT: uunpklo z4.s, z0.h -; CHECK-NEXT: uunpkhi z5.s, z0.h -; CHECK-NEXT: mov w9, #64511 // =0xfbff -; CHECK-NEXT: uunpklo z6.s, z1.h -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpkhi z28.s, z1.h -; CHECK-NEXT: mov z30.h, w9 +; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: mov w9, #31743 // =0x7bff +; CHECK-NEXT: uunpklo z5.s, z1.h +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z28.h, #-1025 // =0xfffffffffffffbff +; CHECK-NEXT: uunpkhi z29.s, z1.h +; CHECK-NEXT: mov z7.d, #0x8000000000000000 ; CHECK-NEXT: uunpklo z13.s, z2.h ; CHECK-NEXT: mov z9.d, #0x8000000000000000 ; CHECK-NEXT: uunpkhi z14.s, z2.h ; CHECK-NEXT: uunpkhi z17.s, z3.h -; CHECK-NEXT: uunpklo z7.d, z4.s +; CHECK-NEXT: uunpklo z6.d, z4.s ; CHECK-NEXT: uunpkhi z4.d, z4.s -; CHECK-NEXT: uunpklo z27.d, z5.s -; CHECK-NEXT: uunpklo z31.d, z6.s -; CHECK-NEXT: uunpkhi z8.d, z6.s -; CHECK-NEXT: uunpkhi z29.d, z5.s -; CHECK-NEXT: uunpkhi z11.d, z28.s -; CHECK-NEXT: uunpklo z10.d, z28.s +; CHECK-NEXT: uunpklo z27.d, z0.s +; CHECK-NEXT: uunpklo z31.d, z5.s +; CHECK-NEXT: uunpkhi z8.d, z5.s +; CHECK-NEXT: uunpkhi z30.d, z0.s +; CHECK-NEXT: uunpkhi z11.d, z29.s +; CHECK-NEXT: uunpklo z10.d, z29.s ; CHECK-NEXT: uunpklo z15.s, z3.h ; CHECK-NEXT: uunpklo z16.d, z14.s ; CHECK-NEXT: uunpkhi z14.d, z14.s ; CHECK-NEXT: mov z24.d, #0x8000000000000000 -; CHECK-NEXT: movprfx z1, z7 -; CHECK-NEXT: frintx z1.h, p0/m, z7.h ; CHECK-NEXT: movprfx z5, z27 ; CHECK-NEXT: frintx z5.h, p0/m, z27.h +; CHECK-NEXT: movprfx z1, z6 +; CHECK-NEXT: frintx z1.h, p0/m, z6.h ; CHECK-NEXT: frintx z4.h, p0/m, z4.h ; CHECK-NEXT: movprfx z12, z31 ; CHECK-NEXT: frintx z12.h, p0/m, z31.h ; CHECK-NEXT: movprfx z27, z8 ; CHECK-NEXT: frintx z27.h, p0/m, z8.h -; CHECK-NEXT: movprfx z6, z29 -; CHECK-NEXT: frintx z6.h, p0/m, z29.h +; CHECK-NEXT: movprfx z6, z30 +; CHECK-NEXT: frintx z6.h, p0/m, z30.h ; CHECK-NEXT: movprfx z31, z10 ; CHECK-NEXT: frintx z31.h, p0/m, z10.h -; CHECK-NEXT: mov z7.d, #0x8000000000000000 ; CHECK-NEXT: mov z8.d, #0x8000000000000000 +; CHECK-NEXT: frintx z11.h, p0/m, z11.h ; CHECK-NEXT: movprfx z3, z16 ; CHECK-NEXT: frintx z3.h, p0/m, z16.h -; CHECK-NEXT: frintx z11.h, p0/m, z11.h -; CHECK-NEXT: mov z29.h, w9 +; CHECK-NEXT: mov z30.h, w9 ; CHECK-NEXT: uunpklo z10.d, z13.s ; CHECK-NEXT: uunpkhi z13.d, z13.s ; CHECK-NEXT: uunpkhi z20.d, z15.s @@ -355,124 +349,124 @@ define @lrint_v32f16( %x) { ; CHECK-NEXT: uunpklo z15.d, z15.s ; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z21.d, #0x8000000000000000 +; CHECK-NEXT: frintx z10.h, p0/m, z10.h ; CHECK-NEXT: mov z26.d, #0x8000000000000000 -; CHECK-NEXT: mov z28.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z29.d, #0x7fffffffffffffff ; CHECK-NEXT: movprfx z19, z13 ; CHECK-NEXT: frintx z19.h, p0/m, z13.h ; CHECK-NEXT: movprfx z13, z14 ; CHECK-NEXT: frintx z13.h, p0/m, z14.h -; CHECK-NEXT: frintx z10.h, p0/m, z10.h ; CHECK-NEXT: frintx z16.h, p0/m, z16.h ; CHECK-NEXT: mov z22.d, #0x8000000000000000 ; CHECK-NEXT: mov z23.d, #0x8000000000000000 -; CHECK-NEXT: frintx z15.h, p0/m, z15.h ; CHECK-NEXT: mov z14.d, #0x8000000000000000 -; CHECK-NEXT: fcmge p4.h, p0/z, z4.h, z30.h -; CHECK-NEXT: fcmge p2.h, p0/z, z12.h, z30.h -; CHECK-NEXT: fcmgt p9.h, p0/z, z12.h, z29.h +; CHECK-NEXT: frintx z15.h, p0/m, z15.h +; CHECK-NEXT: fcmge p4.h, p0/z, z4.h, z28.h +; CHECK-NEXT: fcmge p2.h, p0/z, z12.h, z28.h +; CHECK-NEXT: fcmgt p9.h, p0/z, z12.h, z30.h ; CHECK-NEXT: fcmuo p8.h, p0/z, z12.h, z12.h ; CHECK-NEXT: fcvtzs z7.d, p4/m, z4.h ; CHECK-NEXT: fcvtzs z8.d, p2/m, z12.h ; CHECK-NEXT: mov z12.d, #0x8000000000000000 -; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z30.h +; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z28.h ; CHECK-NEXT: fcmuo p10.h, p0/z, z11.h, z11.h -; CHECK-NEXT: fcmge p3.h, p0/z, z5.h, z30.h -; CHECK-NEXT: mov z8.d, p9/m, z28.d +; CHECK-NEXT: fcmge p3.h, p0/z, z5.h, z28.h +; CHECK-NEXT: mov z8.d, p9/m, z29.d ; CHECK-NEXT: fcvtzs z9.d, p4/m, z27.h -; CHECK-NEXT: fcmge p4.h, p0/z, z11.h, z30.h +; CHECK-NEXT: fcmge p4.h, p0/z, z11.h, z28.h ; CHECK-NEXT: fcvtzs z24.d, p3/m, z5.h ; CHECK-NEXT: mov z8.d, p8/m, #0 // =0x0 -; CHECK-NEXT: fcmge p1.h, p0/z, z6.h, z30.h -; CHECK-NEXT: fcmge p5.h, p0/z, z1.h, z30.h +; CHECK-NEXT: fcmge p1.h, p0/z, z6.h, z28.h +; CHECK-NEXT: fcmge p5.h, p0/z, z1.h, z28.h ; CHECK-NEXT: str z8, [x8, #4, mul vl] ; CHECK-NEXT: fcvtzs z12.d, p4/m, z11.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z29.h +; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z30.h ; CHECK-NEXT: uunpkhi z11.d, z17.s ; CHECK-NEXT: movprfx z17, z20 ; CHECK-NEXT: frintx z17.h, p0/m, z20.h ; CHECK-NEXT: fcvtzs z25.d, p1/m, z6.h ; CHECK-NEXT: mov z20.d, #0x8000000000000000 ; CHECK-NEXT: fcvtzs z0.d, p5/m, z1.h -; CHECK-NEXT: fcmge p6.h, p0/z, z10.h, z30.h +; CHECK-NEXT: fcmge p6.h, p0/z, z10.h, z28.h ; CHECK-NEXT: frintx z11.h, p0/m, z11.h -; CHECK-NEXT: fcmge p3.h, p0/z, z31.h, z30.h -; CHECK-NEXT: fcmge p1.h, p0/z, z13.h, z30.h +; CHECK-NEXT: fcmge p3.h, p0/z, z31.h, z28.h +; CHECK-NEXT: fcmge p1.h, p0/z, z13.h, z28.h ; CHECK-NEXT: fcvtzs z18.d, p6/m, z10.h -; CHECK-NEXT: fcmgt p11.h, p0/z, z10.h, z29.h -; CHECK-NEXT: fcmge p5.h, p0/z, z11.h, z30.h +; CHECK-NEXT: fcmgt p11.h, p0/z, z10.h, z30.h +; CHECK-NEXT: fcmge p5.h, p0/z, z11.h, z28.h ; CHECK-NEXT: fcvtzs z2.d, p3/m, z31.h ; CHECK-NEXT: fcvtzs z21.d, p1/m, z13.h -; CHECK-NEXT: fcmge p2.h, p0/z, z17.h, z30.h -; CHECK-NEXT: fcmge p3.h, p0/z, z16.h, z30.h +; CHECK-NEXT: fcmge p2.h, p0/z, z17.h, z28.h +; CHECK-NEXT: fcmge p3.h, p0/z, z16.h, z28.h ; CHECK-NEXT: fcmuo p1.h, p0/z, z10.h, z10.h -; CHECK-NEXT: sel z10.d, p4, z28.d, z12.d -; CHECK-NEXT: sel z12.d, p11, z28.d, z18.d +; CHECK-NEXT: sel z10.d, p4, z29.d, z12.d +; CHECK-NEXT: sel z12.d, p11, z29.d, z18.d ; CHECK-NEXT: fcvtzs z26.d, p5/m, z11.h ; CHECK-NEXT: fcvtzs z22.d, p2/m, z17.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z29.h +; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z30.h ; CHECK-NEXT: fcvtzs z23.d, p3/m, z16.h ; CHECK-NEXT: mov z10.d, p10/m, #0 // =0x0 ; CHECK-NEXT: mov z12.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmge p6.h, p0/z, z19.h, z30.h +; CHECK-NEXT: fcmge p6.h, p0/z, z19.h, z28.h ; CHECK-NEXT: str z10, [x8, #7, mul vl] -; CHECK-NEXT: fcmge p7.h, p0/z, z3.h, z30.h +; CHECK-NEXT: fcmge p7.h, p0/z, z3.h, z28.h ; CHECK-NEXT: str z12, [x8, #8, mul vl] -; CHECK-NEXT: mov z26.d, p4/m, z28.d -; CHECK-NEXT: fcmge p2.h, p0/z, z15.h, z30.h -; CHECK-NEXT: mov z30.d, #0x8000000000000000 +; CHECK-NEXT: mov z26.d, p4/m, z29.d +; CHECK-NEXT: fcmge p2.h, p0/z, z15.h, z28.h +; CHECK-NEXT: mov z28.d, #0x8000000000000000 ; CHECK-NEXT: fcvtzs z14.d, p6/m, z19.h -; CHECK-NEXT: fcmgt p5.h, p0/z, z16.h, z29.h -; CHECK-NEXT: fcmgt p3.h, p0/z, z17.h, z29.h +; CHECK-NEXT: fcmgt p5.h, p0/z, z16.h, z30.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z17.h, z30.h ; CHECK-NEXT: fcvtzs z20.d, p7/m, z3.h -; CHECK-NEXT: fcvtzs z30.d, p2/m, z15.h +; CHECK-NEXT: fcvtzs z28.d, p2/m, z15.h ; CHECK-NEXT: fcmuo p1.h, p0/z, z11.h, z11.h ; CHECK-NEXT: fcmuo p2.h, p0/z, z16.h, z16.h -; CHECK-NEXT: sel z11.d, p5, z28.d, z23.d -; CHECK-NEXT: sel z16.d, p3, z28.d, z22.d -; CHECK-NEXT: fcmgt p4.h, p0/z, z19.h, z29.h -; CHECK-NEXT: fcmgt p3.h, p0/z, z15.h, z29.h +; CHECK-NEXT: sel z11.d, p5, z29.d, z23.d +; CHECK-NEXT: sel z16.d, p3, z29.d, z22.d +; CHECK-NEXT: fcmgt p4.h, p0/z, z19.h, z30.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z15.h, z30.h ; CHECK-NEXT: mov z26.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z11.d, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p1.h, p0/z, z13.h, z29.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z13.h, z30.h ; CHECK-NEXT: fcmuo p6.h, p0/z, z17.h, z17.h ; CHECK-NEXT: str z26, [x8, #15, mul vl] -; CHECK-NEXT: sel z26.d, p4, z28.d, z14.d +; CHECK-NEXT: sel z26.d, p4, z29.d, z14.d ; CHECK-NEXT: str z11, [x8, #14, mul vl] -; CHECK-NEXT: mov z30.d, p3/m, z28.d -; CHECK-NEXT: fcmgt p2.h, p0/z, z3.h, z29.h +; CHECK-NEXT: mov z28.d, p3/m, z29.d +; CHECK-NEXT: fcmgt p2.h, p0/z, z3.h, z30.h ; CHECK-NEXT: fcmuo p4.h, p0/z, z13.h, z13.h ; CHECK-NEXT: fcmuo p3.h, p0/z, z3.h, z3.h -; CHECK-NEXT: sel z3.d, p1, z28.d, z21.d +; CHECK-NEXT: sel z3.d, p1, z29.d, z21.d ; CHECK-NEXT: mov z16.d, p6/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p12.h, p0/z, z27.h, z29.h -; CHECK-NEXT: sel z11.d, p2, z28.d, z20.d +; CHECK-NEXT: fcmgt p12.h, p0/z, z27.h, z30.h +; CHECK-NEXT: sel z11.d, p2, z29.d, z20.d ; CHECK-NEXT: str z16, [x8, #13, mul vl] ; CHECK-NEXT: mov z3.d, p4/m, #0 // =0x0 ; CHECK-NEXT: fcmuo p6.h, p0/z, z15.h, z15.h -; CHECK-NEXT: fcmgt p1.h, p0/z, z4.h, z29.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z4.h, z30.h ; CHECK-NEXT: mov z11.d, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z9.d, p12/m, z28.d +; CHECK-NEXT: mov z9.d, p12/m, z29.d ; CHECK-NEXT: str z3, [x8, #11, mul vl] ; CHECK-NEXT: fcmuo p5.h, p0/z, z19.h, z19.h -; CHECK-NEXT: fcmgt p2.h, p0/z, z5.h, z29.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z5.h, z30.h ; CHECK-NEXT: str z11, [x8, #10, mul vl] -; CHECK-NEXT: mov z30.d, p6/m, #0 // =0x0 -; CHECK-NEXT: sel z3.d, p1, z28.d, z7.d -; CHECK-NEXT: fcmgt p4.h, p0/z, z6.h, z29.h +; CHECK-NEXT: mov z28.d, p6/m, #0 // =0x0 +; CHECK-NEXT: sel z3.d, p1, z29.d, z7.d +; CHECK-NEXT: fcmgt p4.h, p0/z, z6.h, z30.h ; CHECK-NEXT: fcmuo p3.h, p0/z, z27.h, z27.h -; CHECK-NEXT: str z30, [x8, #12, mul vl] +; CHECK-NEXT: str z28, [x8, #12, mul vl] ; CHECK-NEXT: mov z26.d, p5/m, #0 // =0x0 -; CHECK-NEXT: sel z7.d, p2, z28.d, z24.d -; CHECK-NEXT: fcmgt p6.h, p0/z, z31.h, z29.h -; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z29.h +; CHECK-NEXT: sel z7.d, p2, z29.d, z24.d +; CHECK-NEXT: fcmgt p6.h, p0/z, z31.h, z30.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z30.h ; CHECK-NEXT: str z26, [x8, #9, mul vl] -; CHECK-NEXT: sel z24.d, p4, z28.d, z25.d +; CHECK-NEXT: sel z24.d, p4, z29.d, z25.d ; CHECK-NEXT: mov z9.d, p3/m, #0 // =0x0 ; CHECK-NEXT: fcmuo p5.h, p0/z, z31.h, z31.h ; CHECK-NEXT: fcmuo p2.h, p0/z, z6.h, z6.h -; CHECK-NEXT: mov z2.d, p6/m, z28.d +; CHECK-NEXT: mov z2.d, p6/m, z29.d ; CHECK-NEXT: str z9, [x8, #5, mul vl] -; CHECK-NEXT: mov z0.d, p1/m, z28.d +; CHECK-NEXT: mov z0.d, p1/m, z29.d ; CHECK-NEXT: fcmuo p3.h, p0/z, z5.h, z5.h ; CHECK-NEXT: fcmuo p4.h, p0/z, z4.h, z4.h ; CHECK-NEXT: mov z2.d, p5/m, #0 // =0x0 diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll index 5cca5539048b5..1ceaa5ad27734 100644 --- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -509,6 +509,294 @@ define @splat_nxv2bf16_imm() { ret splat(bfloat 1.0) } +define @splat_nzero_nxv2f16() { +; CHECK-LABEL: splat_nzero_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x8000 +; CHECK-NEXT: ret + ret splat (half -0.0) +} + +define @splat_nzero_nxv4f16() { +; CHECK-LABEL: splat_nzero_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x8000 +; CHECK-NEXT: ret + ret splat (half -0.0) +} + +define @splat_nzero_nxv8f16() { +; CHECK-LABEL: splat_nzero_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x8000 +; CHECK-NEXT: ret + ret splat (half -0.0) +} + +define @splat_nzero_nxv2f32() { +; CHECK-LABEL: splat_nzero_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0x80000000 +; CHECK-NEXT: ret + ret splat (float -0.0) +} + +define @splat_nzero_nxv4f32() { +; CHECK-LABEL: splat_nzero_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0x80000000 +; CHECK-NEXT: ret + ret splat (float -0.0) +} + +define @splat_nzero_nxv2f64() { +; CHECK-LABEL: splat_nzero_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #0x8000000000000000 +; CHECK-NEXT: ret + ret splat (double -0.0) +} + +define @splat_nzero_nxv2bf16() { +; CHECK-LABEL: splat_nzero_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x8000 +; CHECK-NEXT: ret + ret splat (bfloat -0.0) +} + +define @splat_nzero_nxv4bf16() { +; CHECK-LABEL: splat_nzero_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x8000 +; CHECK-NEXT: ret + ret splat (bfloat -0.0) +} + +define @splat_nzero_nxv8bf16() { +; CHECK-LABEL: splat_nzero_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x8000 +; CHECK-NEXT: ret + ret splat (bfloat -0.0) +} + +define @splat_pinf_nxv2f16() { +; CHECK-LABEL: splat_pinf_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x7c00 +; CHECK-NEXT: ret + ret splat (half 0x7FF0000000000000) +} + +define @splat_pinf_nxv4f16() { +; CHECK-LABEL: splat_pinf_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x7c00 +; CHECK-NEXT: ret + ret splat (half 0x7FF0000000000000) +} + +define @splat_pinf_nxv8f16() { +; CHECK-LABEL: splat_pinf_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x7c00 +; CHECK-NEXT: ret + ret splat (half 0x7FF0000000000000) +} + +define @splat_pinf_nxv2f32() { +; CHECK-LABEL: splat_pinf_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0x7f800000 +; CHECK-NEXT: ret + ret splat (float 0x7FF0000000000000) +} + +define @splat_pinf_nxv4f32() { +; CHECK-LABEL: splat_pinf_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0x7f800000 +; CHECK-NEXT: ret + ret splat (float 0x7FF0000000000000) +} + +define @splat_pinf_nxv2f64() { +; CHECK-LABEL: splat_pinf_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #0x7ff0000000000000 +; CHECK-NEXT: ret + ret splat (double 0x7FF0000000000000) +} + +define @splat_pinf_nxv2bf16() { +; CHECK-LABEL: splat_pinf_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #32640 // =0x7f80 +; CHECK-NEXT: ret + ret splat (bfloat 0x7FF0000000000000) +} + +define @splat_pinf_nxv4bf16() { +; CHECK-LABEL: splat_pinf_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #32640 // =0x7f80 +; CHECK-NEXT: ret + ret splat (bfloat 0x7FF0000000000000) +} + +define @splat_pinf_nxv8bf16() { +; CHECK-LABEL: splat_pinf_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #32640 // =0x7f80 +; CHECK-NEXT: ret + ret splat (bfloat 0x7FF0000000000000) +} + +define @splat_ninf_nxv2f16() { +; CHECK-LABEL: splat_ninf_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0xfc00 +; CHECK-NEXT: ret + ret splat (half 0xFFF0000000000000) +} + +define @splat_ninf_nxv4f16() { +; CHECK-LABEL: splat_ninf_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0xfc00 +; CHECK-NEXT: ret + ret splat (half 0xFFF0000000000000) +} + +define @splat_ninf_nxv8f16() { +; CHECK-LABEL: splat_ninf_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0xfc00 +; CHECK-NEXT: ret + ret splat (half 0xFFF0000000000000) +} + +define @splat_ninf_nxv2f32() { +; CHECK-LABEL: splat_ninf_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0xff800000 +; CHECK-NEXT: ret + ret splat (float 0xFFF0000000000000) +} + +define @splat_ninf_nxv4f32() { +; CHECK-LABEL: splat_ninf_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0xff800000 +; CHECK-NEXT: ret + ret splat (float 0xFFF0000000000000) +} + +define @splat_ninf_nxv2f64() { +; CHECK-LABEL: splat_ninf_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #0xfff0000000000000 +; CHECK-NEXT: ret + ret splat (double 0xFFF0000000000000) +} + +define @splat_ninf_nxv2bf16() { +; CHECK-LABEL: splat_ninf_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0xff80 +; CHECK-NEXT: ret + ret splat (bfloat 0xFFF0000000000000) +} + +define @splat_ninf_nxv4bf16() { +; CHECK-LABEL: splat_ninf_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0xff80 +; CHECK-NEXT: ret + ret splat (bfloat 0xFFF0000000000000) +} + +define @splat_ninf_nxv8bf16() { +; CHECK-LABEL: splat_ninf_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0xff80 +; CHECK-NEXT: ret + ret splat (bfloat 0xFFF0000000000000) +} + +define @splat_nan_nxv2f16() { +; CHECK-LABEL: splat_nan_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x7e00 +; CHECK-NEXT: ret + ret splat (half 0x7FF8000000000000) +} + +define @splat_nan_nxv4f16() { +; CHECK-LABEL: splat_nan_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x7e00 +; CHECK-NEXT: ret + ret splat (half 0x7FF8000000000000) +} + +define @splat_nan_nxv8f16() { +; CHECK-LABEL: splat_nan_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x7e00 +; CHECK-NEXT: ret + ret splat (half 0x7FF8000000000000) +} + +define @splat_nan_nxv2f32() { +; CHECK-LABEL: splat_nan_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0x7fc00000 +; CHECK-NEXT: ret + ret splat (float 0x7FF8000000000000) +} + +define @splat_nan_nxv4f32() { +; CHECK-LABEL: splat_nan_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0x7fc00000 +; CHECK-NEXT: ret + ret splat (float 0x7FF8000000000000) +} + +define @splat_nan_nxv2f64() { +; CHECK-LABEL: splat_nan_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #0x7ff8000000000000 +; CHECK-NEXT: ret + ret splat (double 0x7FF8000000000000) +} + +define @splat_nan_nxv2bf16() { +; CHECK-LABEL: splat_nan_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #32704 // =0x7fc0 +; CHECK-NEXT: ret + ret splat (bfloat 0x7FF8000000000000) +} + +define @splat_nan_nxv4bf16() { +; CHECK-LABEL: splat_nan_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #32704 // =0x7fc0 +; CHECK-NEXT: ret + ret splat (bfloat 0x7FF8000000000000) +} + +define @splat_nan_nxv8bf16() { +; CHECK-LABEL: splat_nan_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #32704 // =0x7fc0 +; CHECK-NEXT: ret + ret splat (bfloat 0x7FF8000000000000) +} + define @splat_nxv4i32_fold( %x) { ; CHECK-LABEL: splat_nxv4i32_fold: ; CHECK: // %bb.0: @@ -581,8 +869,8 @@ define @splat_nxv2f64_imm_out_of_range() { ; CHECK-LABEL: splat_nxv2f64_imm_out_of_range: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: adrp x8, .LCPI60_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI60_0 +; CHECK-NEXT: adrp x8, .LCPI96_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI96_0 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret ret splat(double 3.33) diff --git a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll index 6b5b3d6d436cb..b04029c273ae2 100644 --- a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll @@ -338,8 +338,7 @@ ret %sel define @sel_merge_nxv8f16_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nxv8f16_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: dupm z1.h, #0x8000 ; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret %sel = select %p, splat (half -0.0), %in @@ -349,8 +348,7 @@ ret %sel define @sel_merge_nx4f16_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nx4f16_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: dupm z1.h, #0x8000 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret %sel = select %p, splat (half -0.0), %in @@ -360,8 +358,7 @@ ret %sel define @sel_merge_nx2f16_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nx2f16_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: dupm z1.h, #0x8000 ; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret %sel = select %p, splat (half -0.0), %in @@ -371,8 +368,7 @@ ret %sel define @sel_merge_nx4f32_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nx4f32_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 -; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov z1.s, #0x80000000 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret %sel = select %p, splat (float -0.0), %in @@ -382,8 +378,7 @@ ret %sel define @sel_merge_nx2f32_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nx2f32_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 -; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov z1.s, #0x80000000 ; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret %sel = select %p, splat (float -0.0), %in @@ -393,8 +388,7 @@ ret %sel define @sel_merge_nx2f64_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nx2f64_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 -; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov z1.d, #0x8000000000000000 ; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret %sel = select %p, splat (double -0.0), %in diff --git a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll index edfd80b4f2706..ace0c83e63c7c 100644 --- a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll @@ -4,20 +4,28 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; AARCH-LABEL: muloti_test: ; AARCH: // %bb.0: // %start +; AARCH-NEXT: orr x8, x1, x3 +; AARCH-NEXT: cbz x8, .LBB0_2 +; AARCH-NEXT: // %bb.1: // %overflow ; AARCH-NEXT: mul x9, x3, x0 ; AARCH-NEXT: cmp x1, #0 ; AARCH-NEXT: ccmp x3, #0, #4, ne -; AARCH-NEXT: umulh x8, x1, x2 -; AARCH-NEXT: umulh x10, x3, x0 +; AARCH-NEXT: umulh x10, x1, x2 +; AARCH-NEXT: umulh x8, x3, x0 ; AARCH-NEXT: madd x9, x1, x2, x9 -; AARCH-NEXT: ccmp xzr, x8, #0, eq -; AARCH-NEXT: umulh x11, x0, x2 ; AARCH-NEXT: ccmp xzr, x10, #0, eq +; AARCH-NEXT: umulh x11, x0, x2 +; AARCH-NEXT: ccmp xzr, x8, #0, eq ; AARCH-NEXT: mul x0, x0, x2 ; AARCH-NEXT: cset w8, ne ; AARCH-NEXT: adds x1, x11, x9 ; AARCH-NEXT: csinc w2, w8, wzr, lo ; AARCH-NEXT: ret +; AARCH-NEXT: .LBB0_2: // %overflow.no +; AARCH-NEXT: umulh x1, x0, x2 +; AARCH-NEXT: mul x0, x0, x2 +; AARCH-NEXT: mov w2, wzr +; AARCH-NEXT: ret start: %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 %1 = extractvalue { i128, i1 } %0, 0 @@ -35,45 +43,56 @@ start: define i128 @__muloti4(i128 %0, i128 %1, ptr nocapture nonnull writeonly align 4 %2) #2 { ; AARCH-LABEL: __muloti4: ; AARCH: // %bb.0: // %Entry -; AARCH-NEXT: asr x11, x1, #63 -; AARCH-NEXT: asr x9, x3, #63 -; AARCH-NEXT: umulh x12, x0, x2 -; AARCH-NEXT: mov x8, x1 +; AARCH-NEXT: eor x8, x3, x2, asr #63 +; AARCH-NEXT: eor x9, x1, x0, asr #63 ; AARCH-NEXT: str wzr, [x4] -; AARCH-NEXT: mul x13, x1, x2 -; AARCH-NEXT: umulh x10, x1, x2 -; AARCH-NEXT: mul x11, x11, x2 -; AARCH-NEXT: adds x12, x13, x12 -; AARCH-NEXT: mul x15, x0, x3 -; AARCH-NEXT: umulh x14, x0, x3 -; AARCH-NEXT: adc x10, x10, x11 -; AARCH-NEXT: mul x9, x0, x9 -; AARCH-NEXT: mul x16, x1, x3 -; AARCH-NEXT: adds x1, x15, x12 -; AARCH-NEXT: asr x12, x10, #63 -; AARCH-NEXT: smulh x11, x8, x3 -; AARCH-NEXT: adc x9, x14, x9 -; AARCH-NEXT: asr x13, x9, #63 -; AARCH-NEXT: adds x9, x10, x9 -; AARCH-NEXT: asr x10, x1, #63 +; AARCH-NEXT: orr x8, x9, x8 +; AARCH-NEXT: cbz x8, .LBB1_2 +; AARCH-NEXT: // %bb.1: // %overflow +; AARCH-NEXT: asr x9, x1, #63 +; AARCH-NEXT: umulh x10, x0, x2 +; AARCH-NEXT: asr x13, x3, #63 +; AARCH-NEXT: mul x11, x1, x2 +; AARCH-NEXT: umulh x8, x1, x2 +; AARCH-NEXT: mul x9, x9, x2 +; AARCH-NEXT: adds x10, x11, x10 +; AARCH-NEXT: mul x14, x0, x3 +; AARCH-NEXT: umulh x12, x0, x3 +; AARCH-NEXT: adc x9, x8, x9 +; AARCH-NEXT: mul x13, x0, x13 +; AARCH-NEXT: adds x8, x14, x10 +; AARCH-NEXT: mul x15, x1, x3 +; AARCH-NEXT: smulh x10, x1, x3 +; AARCH-NEXT: adc x11, x12, x13 +; AARCH-NEXT: asr x12, x9, #63 +; AARCH-NEXT: asr x13, x11, #63 +; AARCH-NEXT: adds x9, x9, x11 +; AARCH-NEXT: asr x11, x8, #63 ; AARCH-NEXT: mul x0, x0, x2 ; AARCH-NEXT: adc x12, x12, x13 -; AARCH-NEXT: adds x9, x16, x9 -; AARCH-NEXT: adc x11, x11, x12 -; AARCH-NEXT: cmp x9, x10 -; AARCH-NEXT: ccmp x11, x10, #0, eq +; AARCH-NEXT: adds x9, x15, x9 +; AARCH-NEXT: adc x10, x10, x12 +; AARCH-NEXT: cmp x9, x11 +; AARCH-NEXT: ccmp x10, x11, #0, eq ; AARCH-NEXT: cset w9, ne -; AARCH-NEXT: tbz x8, #63, .LBB1_2 -; AARCH-NEXT: // %bb.1: // %Entry -; AARCH-NEXT: eor x8, x3, #0x8000000000000000 -; AARCH-NEXT: orr x8, x2, x8 -; AARCH-NEXT: cbz x8, .LBB1_3 -; AARCH-NEXT: .LBB1_2: // %Else2 -; AARCH-NEXT: cbz w9, .LBB1_4 -; AARCH-NEXT: .LBB1_3: // %Then7 -; AARCH-NEXT: mov w8, #1 // =0x1 -; AARCH-NEXT: str w8, [x4] -; AARCH-NEXT: .LBB1_4: // %Block9 +; AARCH-NEXT: tbnz x1, #63, .LBB1_3 +; AARCH-NEXT: b .LBB1_4 +; AARCH-NEXT: .LBB1_2: // %overflow.no +; AARCH-NEXT: smulh x8, x0, x2 +; AARCH-NEXT: mov w9, wzr +; AARCH-NEXT: mul x0, x0, x2 +; AARCH-NEXT: tbz x1, #63, .LBB1_4 +; AARCH-NEXT: .LBB1_3: // %overflow.res +; AARCH-NEXT: eor x10, x3, #0x8000000000000000 +; AARCH-NEXT: orr x10, x2, x10 +; AARCH-NEXT: cbz x10, .LBB1_5 +; AARCH-NEXT: .LBB1_4: // %Else2 +; AARCH-NEXT: cbz w9, .LBB1_6 +; AARCH-NEXT: .LBB1_5: // %Then7 +; AARCH-NEXT: mov w9, #1 // =0x1 +; AARCH-NEXT: str w9, [x4] +; AARCH-NEXT: .LBB1_6: // %Block9 +; AARCH-NEXT: mov x1, x8 ; AARCH-NEXT: ret Entry: store i32 0, ptr %2, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir b/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir index 86e657093b5b2..ab4077d8f5b68 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir @@ -269,11 +269,12 @@ body: | S_ENDPGM 0 ... -# GCN-LABEL: name: no_hazard_lds_branch_flat +# GCN-LABEL: name: hazard_lds_branch_flat # GCN: bb.1: +# GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 # GCN-NEXT: FLAT_LOAD_DWORD --- -name: no_hazard_lds_branch_flat +name: hazard_lds_branch_flat body: | bb.0: successors: %bb.1 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll index dd2cffd7bd161..dd19ba17bb292 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll @@ -1,16 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7SELDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8SELDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9SELDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10SELDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG,GFX11SELDAG-TRUE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG,GFX11SELDAG-FAKE16 %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-TRUE16 %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-FAKE16 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-TRUE16 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-FAKE16 %s + +; FIXME: There are code size regressions in GlobalISel due to use of SGPRs and +; moving those SGPRs into VGPRs. define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) { ; GFX7SELDAG-LABEL: sgpr_isnan_f16: @@ -34,48 +37,98 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) { ; GFX7GLISEL-NEXT: s_mov_b32 s2, -1 ; GFX7GLISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX7GLISEL-NEXT: s_and_b32 s3, s3, 0x7fff +; GFX7GLISEL-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX7GLISEL-NEXT: s_cmpk_gt_u32 s3, 0x7c00 -; GFX7GLISEL-NEXT: s_cselect_b32 s3, 1, 0 -; GFX7GLISEL-NEXT: s_bfe_i32 s3, s3, 0x10000 +; GFX7GLISEL-NEXT: s_cselect_b32 s3, -1, 0 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, s3 ; GFX7GLISEL-NEXT: s_mov_b32 s3, 0xf000 ; GFX7GLISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7GLISEL-NEXT: s_endpgm ; -; GFX8CHECK-LABEL: sgpr_isnan_f16: -; GFX8CHECK: ; %bb.0: -; GFX8CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX8CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3 -; GFX8CHECK-NEXT: v_mov_b32_e32 v0, s0 -; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3] -; GFX8CHECK-NEXT: v_mov_b32_e32 v1, s1 -; GFX8CHECK-NEXT: flat_store_dword v[0:1], v2 -; GFX8CHECK-NEXT: s_endpgm -; -; GFX9CHECK-LABEL: sgpr_isnan_f16: -; GFX9CHECK: ; %bb.0: -; GFX9CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX9CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX9CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3 -; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] -; GFX9CHECK-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9CHECK-NEXT: s_endpgm -; -; GFX10CHECK-LABEL: sgpr_isnan_f16: -; GFX10CHECK: ; %bb.0: -; GFX10CHECK-NEXT: s_clause 0x1 -; GFX10CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX10CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX10CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s2, s2, 3 -; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 -; GFX10CHECK-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10CHECK-NEXT: s_endpgm +; GFX8SELDAG-LABEL: sgpr_isnan_f16: +; GFX8SELDAG: ; %bb.0: +; GFX8SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX8SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3 +; GFX8SELDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3] +; GFX8SELDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX8SELDAG-NEXT: flat_store_dword v[0:1], v2 +; GFX8SELDAG-NEXT: s_endpgm +; +; GFX8GLISEL-LABEL: sgpr_isnan_f16: +; GFX8GLISEL: ; %bb.0: +; GFX8GLISEL-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX8GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3 +; GFX8GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX8GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX8GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX8GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX8GLISEL-NEXT: flat_store_dword v[0:1], v2 +; GFX8GLISEL-NEXT: s_endpgm +; +; GFX9SELDAG-LABEL: sgpr_isnan_f16: +; GFX9SELDAG: ; %bb.0: +; GFX9SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX9SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX9SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9SELDAG-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3 +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] +; GFX9SELDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9SELDAG-NEXT: s_endpgm +; +; GFX9GLISEL-LABEL: sgpr_isnan_f16: +; GFX9GLISEL: ; %bb.0: +; GFX9GLISEL-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX9GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX9GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9GLISEL-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3 +; GFX9GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX9GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX9GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX9GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9GLISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX9GLISEL-NEXT: s_endpgm +; +; GFX10SELDAG-LABEL: sgpr_isnan_f16: +; GFX10SELDAG: ; %bb.0: +; GFX10SELDAG-NEXT: s_clause 0x1 +; GFX10SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX10SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX10SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s2, s2, 3 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX10SELDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX10SELDAG-NEXT: s_endpgm +; +; GFX10GLISEL-LABEL: sgpr_isnan_f16: +; GFX10GLISEL: ; %bb.0: +; GFX10GLISEL-NEXT: s_load_dword s0, s[4:5], 0x2c +; GFX10GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s2, s0, 3 +; GFX10GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX10GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10GLISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX10GLISEL-NEXT: s_endpgm ; ; GFX11SELDAG-TRUE16-LABEL: sgpr_isnan_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: @@ -103,26 +156,36 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) { ; ; GFX11GLISEL-TRUE16-LABEL: sgpr_isnan_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: -; GFX11GLISEL-TRUE16-NEXT: s_clause 0x1 -; GFX11GLISEL-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x2c -; GFX11GLISEL-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11GLISEL-TRUE16-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX11GLISEL-TRUE16-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 0 ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, s2, v0.l -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, s0, v0.l +; GFX11GLISEL-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11GLISEL-TRUE16-NEXT: s_cmp_lg_u32 vcc_lo, 0 +; GFX11GLISEL-TRUE16-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11GLISEL-TRUE16-NEXT: s_and_b32 s2, s2, 1 +; GFX11GLISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-TRUE16-NEXT: s_cselect_b32 s2, -1, 0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX11GLISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11GLISEL-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11GLISEL-TRUE16-NEXT: s_endpgm ; ; GFX11GLISEL-FAKE16-LABEL: sgpr_isnan_f16: ; GFX11GLISEL-FAKE16: ; %bb.0: -; GFX11GLISEL-FAKE16-NEXT: s_clause 0x1 -; GFX11GLISEL-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11GLISEL-FAKE16-NEXT: s_load_b32 s0, s[4:5], 0x2c +; GFX11GLISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11GLISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11GLISEL-FAKE16-NEXT: v_cmp_class_f16_e64 s2, s0, 3 ; GFX11GLISEL-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11GLISEL-FAKE16-NEXT: v_mov_b32_e32 v0, 0 +; GFX11GLISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-FAKE16-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11GLISEL-FAKE16-NEXT: s_and_b32 s2, s2, 1 +; GFX11GLISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-FAKE16-NEXT: s_cselect_b32 s2, -1, 0 +; GFX11GLISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2 ; GFX11GLISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GLISEL-FAKE16-NEXT: v_cmp_class_f16_e64 s2, s2, 3 -; GFX11GLISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 -; GFX11GLISEL-FAKE16-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11GLISEL-FAKE16-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11GLISEL-FAKE16-NEXT: s_endpgm %result = call i1 @llvm.is.fpclass.f16(half %x, i32 3) %sext = sext i1 %result to i32 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll index 4f5432a202058..0a9fe10874c38 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll @@ -1,14 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7SELDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8SELDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK %s -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK %s -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefix=GFX11CHECK %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefix=GFX11CHECK %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9SELDAG %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10SELDAG %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL %s + +; FIXME: There are code size regressions in GlobalISel due to use of SGPRs and +; moving those SGPRs into VGPRs. define amdgpu_kernel void @sgpr_isnan_f32(ptr addrspace(1) %out, float %x) { ; GFX7SELDAG-LABEL: sgpr_isnan_f32: @@ -30,58 +33,132 @@ define amdgpu_kernel void @sgpr_isnan_f32(ptr addrspace(1) %out, float %x) { ; GFX7GLISEL-NEXT: s_mov_b32 s2, -1 ; GFX7GLISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], s3, 3 -; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5] +; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5] +; GFX7GLISEL-NEXT: s_cselect_b32 s3, 1, 0 +; GFX7GLISEL-NEXT: s_and_b32 s3, s3, 1 +; GFX7GLISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX7GLISEL-NEXT: s_cselect_b32 s3, -1, 0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, s3 ; GFX7GLISEL-NEXT: s_mov_b32 s3, 0xf000 ; GFX7GLISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7GLISEL-NEXT: s_endpgm ; -; GFX8CHECK-LABEL: sgpr_isnan_f32: -; GFX8CHECK: ; %bb.0: -; GFX8CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX8CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[2:3], s2, 3 -; GFX8CHECK-NEXT: v_mov_b32_e32 v0, s0 -; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3] -; GFX8CHECK-NEXT: v_mov_b32_e32 v1, s1 -; GFX8CHECK-NEXT: flat_store_dword v[0:1], v2 -; GFX8CHECK-NEXT: s_endpgm -; -; GFX9CHECK-LABEL: sgpr_isnan_f32: -; GFX9CHECK: ; %bb.0: -; GFX9CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX9CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX9CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[2:3], s2, 3 -; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] -; GFX9CHECK-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9CHECK-NEXT: s_endpgm -; -; GFX10CHECK-LABEL: sgpr_isnan_f32: -; GFX10CHECK: ; %bb.0: -; GFX10CHECK-NEXT: s_clause 0x1 -; GFX10CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX10CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX10CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s2, s2, 3 -; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 -; GFX10CHECK-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10CHECK-NEXT: s_endpgm -; -; GFX11CHECK-LABEL: sgpr_isnan_f32: -; GFX11CHECK: ; %bb.0: -; GFX11CHECK-NEXT: s_clause 0x1 -; GFX11CHECK-NEXT: s_load_b32 s2, s[4:5], 0x2c -; GFX11CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX11CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s2, s2, 3 -; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 -; GFX11CHECK-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX11CHECK-NEXT: s_endpgm +; GFX8SELDAG-LABEL: sgpr_isnan_f32: +; GFX8SELDAG: ; %bb.0: +; GFX8SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX8SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX8SELDAG-NEXT: v_cmp_class_f32_e64 s[2:3], s2, 3 +; GFX8SELDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3] +; GFX8SELDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX8SELDAG-NEXT: flat_store_dword v[0:1], v2 +; GFX8SELDAG-NEXT: s_endpgm +; +; GFX8GLISEL-LABEL: sgpr_isnan_f32: +; GFX8GLISEL: ; %bb.0: +; GFX8GLISEL-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX8GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8GLISEL-NEXT: v_cmp_class_f32_e64 s[2:3], s2, 3 +; GFX8GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX8GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX8GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX8GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX8GLISEL-NEXT: flat_store_dword v[0:1], v2 +; GFX8GLISEL-NEXT: s_endpgm +; +; GFX9SELDAG-LABEL: sgpr_isnan_f32: +; GFX9SELDAG: ; %bb.0: +; GFX9SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX9SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX9SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9SELDAG-NEXT: v_cmp_class_f32_e64 s[2:3], s2, 3 +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] +; GFX9SELDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9SELDAG-NEXT: s_endpgm +; +; GFX9GLISEL-LABEL: sgpr_isnan_f32: +; GFX9GLISEL: ; %bb.0: +; GFX9GLISEL-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX9GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX9GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9GLISEL-NEXT: v_cmp_class_f32_e64 s[2:3], s2, 3 +; GFX9GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX9GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX9GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX9GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9GLISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX9GLISEL-NEXT: s_endpgm +; +; GFX10SELDAG-LABEL: sgpr_isnan_f32: +; GFX10SELDAG: ; %bb.0: +; GFX10SELDAG-NEXT: s_clause 0x1 +; GFX10SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX10SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX10SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10SELDAG-NEXT: v_cmp_class_f32_e64 s2, s2, 3 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX10SELDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX10SELDAG-NEXT: s_endpgm +; +; GFX10GLISEL-LABEL: sgpr_isnan_f32: +; GFX10GLISEL: ; %bb.0: +; GFX10GLISEL-NEXT: s_load_dword s0, s[4:5], 0x2c +; GFX10GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10GLISEL-NEXT: v_cmp_class_f32_e64 s2, s0, 3 +; GFX10GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX10GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10GLISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX10GLISEL-NEXT: s_endpgm +; +; GFX11SELDAG-LABEL: sgpr_isnan_f32: +; GFX11SELDAG: ; %bb.0: +; GFX11SELDAG-NEXT: s_clause 0x1 +; GFX11SELDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11SELDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11SELDAG-NEXT: v_cmp_class_f32_e64 s2, s2, 3 +; GFX11SELDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX11SELDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11SELDAG-NEXT: s_endpgm +; +; GFX11GLISEL-LABEL: sgpr_isnan_f32: +; GFX11GLISEL: ; %bb.0: +; GFX11GLISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c +; GFX11GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11GLISEL-NEXT: v_cmp_class_f32_e64 s2, s0, 3 +; GFX11GLISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11GLISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX11GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX11GLISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX11GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11GLISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11GLISEL-NEXT: s_endpgm %result = call i1 @llvm.is.fpclass.f32(float %x, i32 3) ; nan %sext = sext i1 %result to i32 store i32 %sext, ptr addrspace(1) %out, align 4 @@ -106,9 +183,14 @@ define amdgpu_kernel void @sgpr_isnan_f64(ptr addrspace(1) %out, double %x) { ; GFX7GLISEL: ; %bb.0: ; GFX7GLISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 ; GFX7GLISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX7GLISEL-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 3 -; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[2:3] +; GFX7GLISEL-NEXT: v_cmp_class_f64_e64 s[4:5], s[2:3], 3 ; GFX7GLISEL-NEXT: s_mov_b32 s2, -1 +; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5] +; GFX7GLISEL-NEXT: s_cselect_b32 s3, 1, 0 +; GFX7GLISEL-NEXT: s_and_b32 s3, s3, 1 +; GFX7GLISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX7GLISEL-NEXT: s_cselect_b32 s3, -1, 0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, s3 ; GFX7GLISEL-NEXT: s_mov_b32 s3, 0xf000 ; GFX7GLISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7GLISEL-NEXT: s_endpgm @@ -131,40 +213,92 @@ define amdgpu_kernel void @sgpr_isnan_f64(ptr addrspace(1) %out, double %x) { ; GFX8GLISEL-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 3 ; GFX8GLISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX8GLISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3] +; GFX8GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX8GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX8GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX8GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v2, s2 ; GFX8GLISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8GLISEL-NEXT: s_endpgm ; -; GFX9CHECK-LABEL: sgpr_isnan_f64: -; GFX9CHECK: ; %bb.0: -; GFX9CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX9CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX9CHECK-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 3 -; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] -; GFX9CHECK-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9CHECK-NEXT: s_endpgm -; -; GFX10CHECK-LABEL: sgpr_isnan_f64: -; GFX10CHECK: ; %bb.0: -; GFX10CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX10CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s2, s[2:3], 3 -; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 -; GFX10CHECK-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10CHECK-NEXT: s_endpgm -; -; GFX11CHECK-LABEL: sgpr_isnan_f64: -; GFX11CHECK: ; %bb.0: -; GFX11CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX11CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s2, s[2:3], 3 -; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 -; GFX11CHECK-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX11CHECK-NEXT: s_endpgm +; GFX9SELDAG-LABEL: sgpr_isnan_f64: +; GFX9SELDAG: ; %bb.0: +; GFX9SELDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX9SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9SELDAG-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 3 +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] +; GFX9SELDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9SELDAG-NEXT: s_endpgm +; +; GFX9GLISEL-LABEL: sgpr_isnan_f64: +; GFX9GLISEL: ; %bb.0: +; GFX9GLISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX9GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9GLISEL-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 3 +; GFX9GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX9GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX9GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX9GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9GLISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX9GLISEL-NEXT: s_endpgm +; +; GFX10SELDAG-LABEL: sgpr_isnan_f64: +; GFX10SELDAG: ; %bb.0: +; GFX10SELDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX10SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10SELDAG-NEXT: v_cmp_class_f64_e64 s2, s[2:3], 3 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX10SELDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX10SELDAG-NEXT: s_endpgm +; +; GFX10GLISEL-LABEL: sgpr_isnan_f64: +; GFX10GLISEL: ; %bb.0: +; GFX10GLISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX10GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10GLISEL-NEXT: v_cmp_class_f64_e64 s2, s[2:3], 3 +; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX10GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX10GLISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX10GLISEL-NEXT: s_endpgm +; +; GFX11SELDAG-LABEL: sgpr_isnan_f64: +; GFX11SELDAG: ; %bb.0: +; GFX11SELDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11SELDAG-NEXT: v_cmp_class_f64_e64 s2, s[2:3], 3 +; GFX11SELDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX11SELDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11SELDAG-NEXT: s_endpgm +; +; GFX11GLISEL-LABEL: sgpr_isnan_f64: +; GFX11GLISEL: ; %bb.0: +; GFX11GLISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11GLISEL-NEXT: v_cmp_class_f64_e64 s2, s[2:3], 3 +; GFX11GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11GLISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX11GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX11GLISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX11GLISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11GLISEL-NEXT: s_endpgm %result = call i1 @llvm.is.fpclass.f64(double %x, i32 3) ; nan %sext = sext i1 %result to i32 store i32 %sext, ptr addrspace(1) %out, align 4 diff --git a/llvm/test/CodeGen/DirectX/ddx_coarse-errors.ll b/llvm/test/CodeGen/DirectX/ddx_coarse-errors.ll new file mode 100644 index 0000000000000..0679eec31cec1 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ddx_coarse-errors.ll @@ -0,0 +1,15 @@ +; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s + +; DXIL operation ddx.coarse does not support double overload type +; CHECK: in function ddx.coarse +; CHECK-SAME: Cannot create DerivCoarseX operation: Invalid overload type + +; Function Attrs: noinline nounwind optnone +define noundef double @ddx.coarse_double(double noundef %a) #0 { +entry: + %a.addr = alloca double, align 8 + store double %a, ptr %a.addr, align 8 + %0 = load double, ptr %a.addr, align 8 + %dx.ddx.coarse = call double @llvm.dx.ddx.coarse.f64(double %0) + ret double %dx.ddx.coarse +} diff --git a/llvm/test/CodeGen/DirectX/ddx_coarse.ll b/llvm/test/CodeGen/DirectX/ddx_coarse.ll new file mode 100644 index 0000000000000..f6ea031273263 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ddx_coarse.ll @@ -0,0 +1,40 @@ +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +; Make sure dxil operation function calls for ddx_coarse are generated for half/float and matching vectors + +define noundef half @deriv_coarse_x_half(half noundef %a) { +; CHECK: call half @dx.op.unary.f16(i32 83, half %{{.*}}) +entry: + %dx.ddx.coarse = call half @llvm.dx.ddx.coarse.f16(half %a) + ret half %dx.ddx.coarse +} + +define noundef float @deriv_coarse_x_float(float noundef %a) { +; CHECK: call float @dx.op.unary.f32(i32 83, float %{{.*}}) +entry: + %dx.ddx.coarse = call float @llvm.dx.ddx.coarse.f32(float %a) + ret float %dx.ddx.coarse +} + +define noundef <4 x float> @deriv_coarse_x_float4(<4 x float> noundef %a) { +; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 +; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee0]]) +; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 +; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee1]]) +; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 +; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee2]]) +; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 +; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee3]]) +; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 +; CHECK: ret <4 x float> %{{.*}} +entry: + %dx.ddx.coarse = call <4 x float> @llvm.dx.ddx.coarse.v4f32(<4 x float> %a) + ret <4 x float> %dx.ddx.coarse +} + +declare half @llvm.dx.ddx.coarse.f16(half) +declare float @llvm.dx.ddx.coarse.f32(float) +declare <4 x float> @llvm.dx.ddx.coarse.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/ddy_coarse-errors.ll b/llvm/test/CodeGen/DirectX/ddy_coarse-errors.ll new file mode 100644 index 0000000000000..df8e3eb0f7e0b --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ddy_coarse-errors.ll @@ -0,0 +1,15 @@ +; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s + +; DXIL operation ddy.coarse does not support double overload type +; CHECK: in function ddy.coarse +; CHECK-SAME: Cannot create DerivCoarseY operation: Invalid overload type + +; Function Attrs: noinline nounwind optnone +define noundef double @ddy.coarse_double(double noundef %a) #0 { +entry: + %a.addr = alloca double, align 8 + store double %a, ptr %a.addr, align 8 + %0 = load double, ptr %a.addr, align 8 + %dx.ddy.coarse = call double @llvm.dx.ddy.coarse.f64(double %0) + ret double %dx.ddy.coarse +} diff --git a/llvm/test/CodeGen/DirectX/ddy_coarse.ll b/llvm/test/CodeGen/DirectX/ddy_coarse.ll new file mode 100644 index 0000000000000..e3337022e1b01 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ddy_coarse.ll @@ -0,0 +1,40 @@ +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +; Make sure dxil operation function calls for ddy_coarse are generated for half/float and matching vectors + +define noundef half @deriv_coarse_y_half(half noundef %a) { +; CHECK: call half @dx.op.unary.f16(i32 84, half %{{.*}}) +entry: + %dx.ddy.coarse = call half @llvm.dx.ddy.coarse.f16(half %a) + ret half %dx.ddy.coarse +} + +define noundef float @deriv_coarse_y_float(float noundef %a) { +; CHECK: call float @dx.op.unary.f32(i32 84, float %{{.*}}) +entry: + %dx.ddy.coarse = call float @llvm.dx.ddy.coarse.f32(float %a) + ret float %dx.ddy.coarse +} + +define noundef <4 x float> @deriv_coarse_y_float4(<4 x float> noundef %a) { +; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 +; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee0]]) +; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 +; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee1]]) +; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 +; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee2]]) +; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 +; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee3]]) +; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 +; CHECK: ret <4 x float> %{{.*}} +entry: + %dx.ddy.coarse = call <4 x float> @llvm.dx.ddy.coarse.v4f32(<4 x float> %a) + ret <4 x float> %dx.ddy.coarse +} + +declare half @llvm.dx.ddy.coarse.f16(half) +declare float @llvm.dx.ddy.coarse.f32(float) +declare <4 x float> @llvm.dx.ddy.coarse.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddx_coarse.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddx_coarse.ll new file mode 100644 index 0000000000000..478acb53701ea --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddx_coarse.ll @@ -0,0 +1,47 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val --target-env spv1.4 %} + +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 + +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 + +define noundef float @ddx_coarse_float(float noundef %a) { +entry: +; CHECK: %[[#float_32_arg:]] = OpFunctionParameter %[[#float_32]] +; CHECK: %[[#]] = OpDPdxCoarse %[[#float_32]] %[[#float_32_arg]] + %elt.ddx.coarse = call float @llvm.spv.ddx.coarse.f32(float %a) + ret float %elt.ddx.coarse +} + +define noundef half @ddx_coarse_half(half noundef %a) { +entry: +; CHECK: %[[#float_16_arg:]] = OpFunctionParameter %[[#float_16]] +; CHECK: %[[#converted:]] = OpFConvert %[[#float_32:]] %[[#float_16_arg]] +; CHECK: %[[#coarse:]] = OpDPdxCoarse %[[#float_32]] %[[#converted]] +; CHECK: %[[#]] = OpFConvert %[[#float_16]] %[[#coarse]] + %elt.ddx.coarse = call half @llvm.spv.ddx.coarse.f16(half %a) + ret half %elt.ddx.coarse +} + +define noundef <4 x float> @ddx_coarse_float_vector(<4 x float> noundef %a) { +entry: +; CHECK: %[[#vec4_float_32_arg:]] = OpFunctionParameter %[[#vec4_float_32]] +; CHECK: %[[#]] = OpDPdxCoarse %[[#vec4_float_32]] %[[#vec4_float_32_arg]] + %elt.ddx.coarse = call <4 x float> @llvm.spv.ddx.coarse.v4f32(<4 x float> %a) + ret <4 x float> %elt.ddx.coarse +} + +define noundef <4 x half> @ddx_coarse_half_vector(<4 x half> noundef %a) { +entry: +; CHECK: %[[#vec4_float_16_arg:]] = OpFunctionParameter %[[#vec4_float_16]] +; CHECK: %[[#converted:]] = OpFConvert %[[#vec4_float_32:]] %[[#vec4_float_16_arg]] +; CHECK: %[[#coarse:]] = OpDPdxCoarse %[[#vec4_float_32]] %[[#converted]] +; CHECK: %[[#]] = OpFConvert %[[#vec4_float_16]] %[[#coarse]] + %elt.ddx.coarse = call <4 x half> @llvm.spv.ddx.coarse.v4f16(<4 x half> %a) + ret <4 x half> %elt.ddx.coarse +} + +declare float @llvm.spv.ddx.coarse.f32(float) +declare half @llvm.spv.ddx.coarse.f16(half) diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddy_coarse.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddy_coarse.ll new file mode 100644 index 0000000000000..8ad67cb644aa7 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddy_coarse.ll @@ -0,0 +1,47 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val --target-env spv1.4 %} + +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 + +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 + +define noundef float @ddy_coarse_float(float noundef %a) { +entry: +; CHECK: %[[#float_32_arg:]] = OpFunctionParameter %[[#float_32]] +; CHECK: %[[#]] = OpDPdyCoarse %[[#float_32]] %[[#float_32_arg]] + %elt.ddy.coarse = call float @llvm.spv.ddy.coarse.f32(float %a) + ret float %elt.ddy.coarse +} + +define noundef half @ddy_coarse_half(half noundef %a) { +entry: +; CHECK: %[[#float_16_arg:]] = OpFunctionParameter %[[#float_16]] +; CHECK: %[[#converted:]] = OpFConvert %[[#float_32:]] %[[#float_16_arg]] +; CHECK: %[[#coarse:]] = OpDPdyCoarse %[[#float_32]] %[[#converted]] +; CHECK: %[[#]] = OpFConvert %[[#float_16]] %[[#coarse]] + %elt.ddy.coarse = call half @llvm.spv.ddy.coarse.f16(half %a) + ret half %elt.ddy.coarse +} + +define noundef <4 x float> @ddy_coarse_float_vector(<4 x float> noundef %a) { +entry: +; CHECK: %[[#vec4_float_32_arg:]] = OpFunctionParameter %[[#vec4_float_32]] +; CHECK: %[[#]] = OpDPdyCoarse %[[#vec4_float_32]] %[[#vec4_float_32_arg]] + %elt.ddy.coarse = call <4 x float> @llvm.spv.ddy.coarse.v4f32(<4 x float> %a) + ret <4 x float> %elt.ddy.coarse +} + +define noundef <4 x half> @ddy_coarse_half_vector(<4 x half> noundef %a) { +entry: +; CHECK: %[[#vec4_float_16_arg:]] = OpFunctionParameter %[[#vec4_float_16]] +; CHECK: %[[#converted:]] = OpFConvert %[[#vec4_float_32:]] %[[#vec4_float_16_arg]] +; CHECK: %[[#coarse:]] = OpDPdyCoarse %[[#vec4_float_32]] %[[#converted]] +; CHECK: %[[#]] = OpFConvert %[[#vec4_float_16]] %[[#coarse]] + %elt.ddy.coarse = call <4 x half> @llvm.spv.ddy.coarse.v4f16(<4 x half> %a) + ret <4 x half> %elt.ddy.coarse +} + +declare float @llvm.spv.ddy.coarse.f32(float) +declare half @llvm.spv.ddy.coarse.f16(half) diff --git a/llvm/test/CodeGen/SPIRV/opencl/ddx_coarse-error.ll b/llvm/test/CodeGen/SPIRV/opencl/ddx_coarse-error.ll new file mode 100644 index 0000000000000..e93c1d1ba4d36 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/opencl/ddx_coarse-error.ll @@ -0,0 +1,12 @@ +; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s +; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s + +; CHECK: LLVM ERROR: %{{.*}} = G_INTRINSIC intrinsic(@llvm.spv.ddx.coarse), %{{.*}} is only supported in shaders. + +define noundef float @ddx_coarse(float noundef %a) { +entry: + %spv.ddx.coarse = call float @llvm.spv.ddx.coarse.f32(float %a) + ret float %spv.ddx.coarse +} + +declare float @llvm.spv.ddx.coarse.f32(float) diff --git a/llvm/test/CodeGen/SPIRV/opencl/ddy_coarse-error.ll b/llvm/test/CodeGen/SPIRV/opencl/ddy_coarse-error.ll new file mode 100644 index 0000000000000..aa71a395d8680 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/opencl/ddy_coarse-error.ll @@ -0,0 +1,12 @@ +; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s +; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s + +; CHECK: LLVM ERROR: %{{.*}} = G_INTRINSIC intrinsic(@llvm.spv.ddy.coarse), %{{.*}} is only supported in shaders. + +define noundef float @ddy_coarse(float noundef %a) { +entry: + %spv.ddy.coarse = call float @llvm.spv.ddy.coarse.f32(float %a) + ret float %spv.ddy.coarse +} + +declare float @llvm.spv.ddy.coarse.f32(float) diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll index b85a20b9d6b6e..023fb5065b892 100644 --- a/llvm/test/CodeGen/X86/bittest-big-integer.ll +++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll @@ -1877,85 +1877,56 @@ define i32 @blsr_u512(ptr %word) nounwind { ; SSE: # %bb.0: ; SSE-NEXT: pushq %r15 ; SSE-NEXT: pushq %r14 -; SSE-NEXT: pushq %r12 ; SSE-NEXT: pushq %rbx -; SSE-NEXT: pushq %rax -; SSE-NEXT: movq 56(%rdi), %rcx -; SSE-NEXT: movq 48(%rdi), %rdx -; SSE-NEXT: movq 40(%rdi), %rsi -; SSE-NEXT: movq 32(%rdi), %r11 +; SSE-NEXT: movq 48(%rdi), %r11 +; SSE-NEXT: movq 40(%rdi), %r9 ; SSE-NEXT: movq 24(%rdi), %r8 -; SSE-NEXT: movq 16(%rdi), %r9 -; SSE-NEXT: movq (%rdi), %rax -; SSE-NEXT: movq 8(%rdi), %r10 -; SSE-NEXT: rep bsfq %rax, %rbx -; SSE-NEXT: rep bsfq %r10, %r14 -; SSE-NEXT: addq $64, %r14 -; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: cmovneq %rbx, %r14 -; SSE-NEXT: rep bsfq %r9, %r15 -; SSE-NEXT: rep bsfq %r8, %rbx +; SSE-NEXT: movq 16(%rdi), %rdx +; SSE-NEXT: movq (%rdi), %rcx +; SSE-NEXT: movq 8(%rdi), %rsi +; SSE-NEXT: rep bsfq %rcx, %rax +; SSE-NEXT: rep bsfq %rsi, %rbx ; SSE-NEXT: addq $64, %rbx -; SSE-NEXT: testq %r9, %r9 -; SSE-NEXT: cmovneq %r15, %rbx -; SSE-NEXT: subq $-128, %rbx -; SSE-NEXT: movq %rax, %r15 -; SSE-NEXT: movq %rax, %r12 -; SSE-NEXT: orq %r10, %r12 -; SSE-NEXT: cmovneq %r14, %rbx -; SSE-NEXT: rep bsfq %r11, %r12 -; SSE-NEXT: rep bsfq %rsi, %r14 -; SSE-NEXT: addq $64, %r14 -; SSE-NEXT: testq %r11, %r11 -; SSE-NEXT: cmovneq %r12, %r14 -; SSE-NEXT: xorps %xmm0, %xmm0 -; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) -; SSE-NEXT: rep bsfq %rdx, %r12 +; SSE-NEXT: testq %rcx, %rcx +; SSE-NEXT: cmovneq %rax, %rbx +; SSE-NEXT: rep bsfq %rdx, %rax +; SSE-NEXT: rep bsfq %r8, %r10 +; SSE-NEXT: addq $64, %r10 +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovneq %rax, %r10 +; SSE-NEXT: movq 32(%rdi), %r14 +; SSE-NEXT: subq $-128, %r10 +; SSE-NEXT: movq %rcx, %rax +; SSE-NEXT: orq %rsi, %rax +; SSE-NEXT: cmovneq %rbx, %r10 +; SSE-NEXT: rep bsfq %r14, %rax +; SSE-NEXT: rep bsfq %r9, %rbx +; SSE-NEXT: addq $64, %rbx +; SSE-NEXT: testq %r14, %r14 +; SSE-NEXT: cmovneq %rax, %rbx +; SSE-NEXT: rep bsfq %r11, %r15 ; SSE-NEXT: movl $64, %eax -; SSE-NEXT: rep bsfq %rcx, %rax +; SSE-NEXT: rep bsfq 56(%rdi), %rax ; SSE-NEXT: addq $64, %rax -; SSE-NEXT: testq %rdx, %rdx -; SSE-NEXT: cmovneq %r12, %rax +; SSE-NEXT: testq %r11, %r11 +; SSE-NEXT: cmovneq %r15, %rax ; SSE-NEXT: subq $-128, %rax -; SSE-NEXT: movq %r11, -{{[0-9]+}}(%rsp) -; SSE-NEXT: orq %rsi, %r11 -; SSE-NEXT: cmovneq %r14, %rax -; SSE-NEXT: addq $256, %rax # imm = 0x100 -; SSE-NEXT: movq %r10, -{{[0-9]+}}(%rsp) -; SSE-NEXT: orq %r8, %r10 -; SSE-NEXT: orq %r9, %r15 -; SSE-NEXT: orq %r10, %r15 +; SSE-NEXT: orq %r9, %r14 ; SSE-NEXT: cmovneq %rbx, %rax -; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movq %r9, -{{[0-9]+}}(%rsp) +; SSE-NEXT: addq $256, %rax # imm = 0x100 +; SSE-NEXT: orq %r8, %rsi +; SSE-NEXT: orq %rdx, %rcx +; SSE-NEXT: orq %rsi, %rcx +; SSE-NEXT: cmovneq %r10, %rax +; SSE-NEXT: movl $-2, %edx +; SSE-NEXT: movl %eax, %ecx +; SSE-NEXT: roll %cl, %edx ; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: andl $32, %ecx -; SSE-NEXT: movl %eax, %edx -; SSE-NEXT: andl $480, %edx # imm = 0x1E0 -; SSE-NEXT: shrl $3, %edx -; SSE-NEXT: movl %edx, %esi -; SSE-NEXT: andl $-8, %esi -; SSE-NEXT: movq -128(%rsp,%rsi), %r8 -; SSE-NEXT: shrq %cl, %r8 -; SSE-NEXT: movl -120(%rsp,%rsi), %esi -; SSE-NEXT: addl %esi, %esi -; SSE-NEXT: notl %ecx -; SSE-NEXT: # kill: def $cl killed $cl killed $ecx -; SSE-NEXT: shlq %cl, %rsi -; SSE-NEXT: orl %r8d, %esi -; SSE-NEXT: btrl %eax, %esi -; SSE-NEXT: movl %esi, (%rdi,%rdx) +; SSE-NEXT: shrl $3, %ecx +; SSE-NEXT: andl $60, %ecx +; SSE-NEXT: andl %edx, (%rdi,%rcx) ; SSE-NEXT: # kill: def $eax killed $eax killed $rax -; SSE-NEXT: addq $8, %rsp ; SSE-NEXT: popq %rbx -; SSE-NEXT: popq %r12 ; SSE-NEXT: popq %r14 ; SSE-NEXT: popq %r15 ; SSE-NEXT: retq @@ -1964,133 +1935,86 @@ define i32 @blsr_u512(ptr %word) nounwind { ; AVX2: # %bb.0: ; AVX2-NEXT: pushq %r15 ; AVX2-NEXT: pushq %r14 -; AVX2-NEXT: pushq %r13 -; AVX2-NEXT: pushq %r12 ; AVX2-NEXT: pushq %rbx -; AVX2-NEXT: movq 56(%rdi), %rcx -; AVX2-NEXT: movq 40(%rdi), %rdx -; AVX2-NEXT: movq 32(%rdi), %r11 -; AVX2-NEXT: movq 24(%rdi), %rsi -; AVX2-NEXT: movq 16(%rdi), %r8 -; AVX2-NEXT: movq (%rdi), %r9 -; AVX2-NEXT: movq 8(%rdi), %r10 -; AVX2-NEXT: xorl %ebx, %ebx -; AVX2-NEXT: tzcntq %r9, %rbx -; AVX2-NEXT: tzcntq %r10, %rax -; AVX2-NEXT: addq $64, %rax -; AVX2-NEXT: testq %r9, %r9 -; AVX2-NEXT: cmovneq %rbx, %rax -; AVX2-NEXT: xorl %r14d, %r14d -; AVX2-NEXT: tzcntq %r8, %r14 +; AVX2-NEXT: movq 40(%rdi), %r9 +; AVX2-NEXT: movq 32(%rdi), %r10 +; AVX2-NEXT: movq 24(%rdi), %r8 +; AVX2-NEXT: movq 16(%rdi), %rdx +; AVX2-NEXT: movq (%rdi), %rcx +; AVX2-NEXT: movq 8(%rdi), %rsi +; AVX2-NEXT: tzcntq %rcx, %rax ; AVX2-NEXT: xorl %ebx, %ebx ; AVX2-NEXT: tzcntq %rsi, %rbx ; AVX2-NEXT: addq $64, %rbx -; AVX2-NEXT: testq %r8, %r8 -; AVX2-NEXT: cmovneq %r14, %rbx -; AVX2-NEXT: subq $-128, %rbx -; AVX2-NEXT: movq %r9, %r14 -; AVX2-NEXT: movq %r9, %r15 -; AVX2-NEXT: orq %r10, %r15 +; AVX2-NEXT: testq %rcx, %rcx ; AVX2-NEXT: cmovneq %rax, %rbx ; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: tzcntq %r11, %rax -; AVX2-NEXT: xorl %r12d, %r12d -; AVX2-NEXT: tzcntq %rdx, %r12 -; AVX2-NEXT: addq $64, %r12 -; AVX2-NEXT: testq %r11, %r11 -; AVX2-NEXT: cmovneq %rax, %r12 -; AVX2-NEXT: movq 48(%rdi), %r15 -; AVX2-NEXT: xorl %r13d, %r13d -; AVX2-NEXT: tzcntq %r15, %r13 +; AVX2-NEXT: tzcntq %rdx, %rax +; AVX2-NEXT: tzcntq %r8, %r11 +; AVX2-NEXT: addq $64, %r11 +; AVX2-NEXT: testq %rdx, %rdx +; AVX2-NEXT: cmovneq %rax, %r11 +; AVX2-NEXT: subq $-128, %r11 +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: orq %rsi, %rax +; AVX2-NEXT: cmovneq %rbx, %r11 ; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: tzcntq %rcx, %rax +; AVX2-NEXT: tzcntq %r10, %rax +; AVX2-NEXT: xorl %ebx, %ebx +; AVX2-NEXT: tzcntq %r9, %rbx +; AVX2-NEXT: addq $64, %rbx +; AVX2-NEXT: testq %r10, %r10 +; AVX2-NEXT: cmovneq %rax, %rbx +; AVX2-NEXT: movq 48(%rdi), %r14 +; AVX2-NEXT: xorl %r15d, %r15d +; AVX2-NEXT: tzcntq %r14, %r15 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq 56(%rdi), %rax ; AVX2-NEXT: addq $64, %rax -; AVX2-NEXT: testq %r15, %r15 -; AVX2-NEXT: cmovneq %r13, %rax +; AVX2-NEXT: testq %r14, %r14 +; AVX2-NEXT: cmovneq %r15, %rax ; AVX2-NEXT: subq $-128, %rax -; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %r11, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: orq %rdx, %r11 -; AVX2-NEXT: cmovneq %r12, %rax -; AVX2-NEXT: addq $256, %rax # imm = 0x100 -; AVX2-NEXT: movq %r10, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: orq %rsi, %r10 -; AVX2-NEXT: orq %r8, %r14 -; AVX2-NEXT: orq %r10, %r14 +; AVX2-NEXT: orq %r9, %r10 ; AVX2-NEXT: cmovneq %rbx, %rax -; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %r9, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %r15, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: addq $256, %rax # imm = 0x100 +; AVX2-NEXT: orq %r8, %rsi +; AVX2-NEXT: orq %rdx, %rcx +; AVX2-NEXT: orq %rsi, %rcx +; AVX2-NEXT: cmovneq %r11, %rax +; AVX2-NEXT: movl $-2, %edx +; AVX2-NEXT: movl %eax, %ecx +; AVX2-NEXT: roll %cl, %edx ; AVX2-NEXT: movl %eax, %ecx -; AVX2-NEXT: andl $32, %ecx -; AVX2-NEXT: movl %eax, %edx -; AVX2-NEXT: andl $480, %edx # imm = 0x1E0 -; AVX2-NEXT: shrl $3, %edx -; AVX2-NEXT: movl %edx, %esi -; AVX2-NEXT: andl $-8, %esi -; AVX2-NEXT: shrxq %rcx, -128(%rsp,%rsi), %r8 -; AVX2-NEXT: notl %ecx -; AVX2-NEXT: movl -120(%rsp,%rsi), %esi -; AVX2-NEXT: addl %esi, %esi -; AVX2-NEXT: shlxq %rcx, %rsi, %rcx -; AVX2-NEXT: orl %r8d, %ecx -; AVX2-NEXT: btrl %eax, %ecx -; AVX2-NEXT: movl %ecx, (%rdi,%rdx) +; AVX2-NEXT: shrl $3, %ecx +; AVX2-NEXT: andl $60, %ecx +; AVX2-NEXT: andl %edx, (%rdi,%rcx) ; AVX2-NEXT: # kill: def $eax killed $eax killed $rax ; AVX2-NEXT: popq %rbx -; AVX2-NEXT: popq %r12 -; AVX2-NEXT: popq %r13 ; AVX2-NEXT: popq %r14 ; AVX2-NEXT: popq %r15 -; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: blsr_u512: ; AVX512: # %bb.0: -; AVX512-NEXT: pushq %rax -; AVX512-NEXT: vmovups (%rdi), %ymm0 -; AVX512-NEXT: vmovups 32(%rdi), %ymm1 -; AVX512-NEXT: vmovdqu64 (%rdi), %zmm2 -; AVX512-NEXT: vpternlogd {{.*#+}} zmm3 = -1 -; AVX512-NEXT: vpaddq %zmm3, %zmm2, %zmm3 -; AVX512-NEXT: vpandnq %zmm3, %zmm2, %zmm3 -; AVX512-NEXT: vplzcntq %zmm3, %zmm3 -; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm4 = [64,128,192,256,320,384,448,512] -; AVX512-NEXT: vpsubq %zmm3, %zmm4, %zmm3 -; AVX512-NEXT: vptestmq %zmm2, %zmm2, %k1 -; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm2 = [512,512,512,512,512,512,512,512] -; AVX512-NEXT: vpcompressq %zmm3, %zmm2 {%k1} -; AVX512-NEXT: vmovq %xmm2, %rax -; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vmovdqu %ymm2, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovdqu %ymm2, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 +; AVX512-NEXT: vpternlogd {{.*#+}} zmm1 = -1 +; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm1 +; AVX512-NEXT: vpandnq %zmm1, %zmm0, %zmm1 +; AVX512-NEXT: vplzcntq %zmm1, %zmm1 +; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm2 = [64,128,192,256,320,384,448,512] +; AVX512-NEXT: vpsubq %zmm1, %zmm2, %zmm1 +; AVX512-NEXT: vptestmq %zmm0, %zmm0, %k1 +; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm0 = [512,512,512,512,512,512,512,512] +; AVX512-NEXT: vpcompressq %zmm1, %zmm0 {%k1} +; AVX512-NEXT: vmovq %xmm0, %rax +; AVX512-NEXT: movl $-2, %edx +; AVX512-NEXT: movl %eax, %ecx +; AVX512-NEXT: roll %cl, %edx ; AVX512-NEXT: movl %eax, %ecx -; AVX512-NEXT: andl $32, %ecx -; AVX512-NEXT: movl %ecx, %edx -; AVX512-NEXT: notl %edx -; AVX512-NEXT: movl %eax, %esi -; AVX512-NEXT: shrl $3, %esi -; AVX512-NEXT: movl %esi, %r8d -; AVX512-NEXT: andl $56, %r8d -; AVX512-NEXT: movl -120(%rsp,%r8), %r9d -; AVX512-NEXT: addl %r9d, %r9d -; AVX512-NEXT: shlxq %rdx, %r9, %rdx ; AVX512-NEXT: shrl $3, %ecx -; AVX512-NEXT: addq %rsp, %r8 -; AVX512-NEXT: addq $-128, %r8 -; AVX512-NEXT: orl (%rcx,%r8), %edx -; AVX512-NEXT: btrl %eax, %edx -; AVX512-NEXT: andl $60, %esi -; AVX512-NEXT: movl %edx, (%rdi,%rsi) +; AVX512-NEXT: andl $60, %ecx +; AVX512-NEXT: andl %edx, (%rdi,%rcx) ; AVX512-NEXT: # kill: def $eax killed $eax killed $rax -; AVX512-NEXT: popq %rcx ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %ld = load i512, ptr %word diff --git a/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll b/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll index b3093a92624ae..f0e40f4ede161 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll @@ -123,8 +123,7 @@ define i8 @udiv_slt_exact(i8 %x) { define i8 @canonicalize_icmp_operands(i8 %x) { ; CHECK-LABEL: define i8 @canonicalize_icmp_operands( ; CHECK-SAME: i8 [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smin.i8(i8 [[X]], i8 119) -; CHECK-NEXT: [[S:%.*]] = add nsw i8 [[TMP1]], 8 +; CHECK-NEXT: [[S:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X]], i8 8) ; CHECK-NEXT: ret i8 [[S]] ; %add = add nsw i8 %x, 8 diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll index cfd679c0cc592..c0ad5818e448a 100644 --- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll +++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll @@ -2351,3 +2351,323 @@ define i8 @fold_add_umax_to_usub_multiuse(i8 %a) { } declare void @usei8(i8) + +define i8 @sadd_sat_uge_int_max(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_uge_int_max( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]] +; CHECK-NEXT: ret i8 [[R]] +; + %cmp = icmp sge i8 %x, 127 + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_ugt_int_max(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_ugt_int_max( +; CHECK-NEXT: [[R:%.*]] = add i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i8 [[R]] +; + %cmp = icmp sgt i8 %x, 127 + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_eq_int_max(i8 %x) { +; CHECK-LABEL: @sadd_sat_eq_int_max( +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 1) +; CHECK-NEXT: ret i8 [[R]] +; + %cmp = icmp eq i8 %x, 127 + %add = add i8 %x, 1 + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_constant(i8 %x) { +; CHECK-LABEL: @sadd_sat_constant( +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 10) +; CHECK-NEXT: ret i8 [[R]] +; + %cmp = icmp sge i8 %x, 118 + %add = add i8 %x, 10 + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_negative_no_fold(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_negative_no_fold( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]] +; CHECK-NEXT: ret i8 [[R]] +; + %cmp = icmp sge i8 %x, 127 + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_wrong_predicate(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_wrong_predicate( +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i8 [[X:%.*]], 127 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[Y:%.*]], 127 +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP_NOT]], i8 [[ADD]], i8 127 +; CHECK-NEXT: ret i8 [[R]] +; + %cmp = icmp slt i8 %x, 127 + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_wrong_constant(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_wrong_constant( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], 125 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]] +; CHECK-NEXT: ret i8 [[R]] +; + %cmp = icmp sge i8 %x, 126 + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define <2 x i8> @sadd_sat_vector(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @sadd_sat_vector( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 127) +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[X]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP]], <2 x i8> splat (i8 127), <2 x i8> [[ADD]] +; CHECK-NEXT: ret <2 x i8> [[R]] +; + %cmp = icmp sge <2 x i8> %x, + %add = add <2 x i8> %x, %y + %r = select <2 x i1> %cmp, <2 x i8> , <2 x i8> %add + ret <2 x i8> %r +} + +define <2 x i8> @sadd_sat_vector_constant(<2 x i8> %x) { +; CHECK-LABEL: @sadd_sat_vector_constant( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) +; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[TMP1]], +; CHECK-NEXT: ret <2 x i8> [[R]] +; + %cmp = icmp sge <2 x i8> %x, + %add = add <2 x i8> %x, + %r = select <2 x i1> %cmp, <2 x i8> , <2 x i8> %add + ret <2 x i8> %r +} + +define i8 @sadd_sat_int_max_minus_x(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_int_max_minus_x( +; CHECK-NEXT: [[SUB:%.*]] = sub i8 127, [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[SUB]], [[Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]] +; CHECK-NEXT: ret i8 [[R]] +; + %sub = sub i8 127, %x + %cmp = icmp slt i8 %sub, %y + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_int_max_minus_x_commuted(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_int_max_minus_x_commuted( +; CHECK-NEXT: [[SUB:%.*]] = sub i8 127, [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[Y:%.*]], [[SUB]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]] +; CHECK-NEXT: ret i8 [[R]] +; + %sub = sub i8 127, %x + %cmp = icmp sgt i8 %y, %sub + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_int_max_minus_x_nonstrict(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_int_max_minus_x_nonstrict( +; CHECK-NEXT: [[SUB:%.*]] = sub i8 127, [[X:%.*]] +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp sgt i8 [[SUB]], [[Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP_NOT]], i8 [[ADD]], i8 127 +; CHECK-NEXT: ret i8 [[R]] +; + %sub = sub i8 127, %x + %cmp = icmp sle i8 %sub, %y + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_int_max_minus_x_commuted_nonstrict(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_int_max_minus_x_commuted_nonstrict( +; CHECK-NEXT: [[SUB:%.*]] = sub i8 127, [[X:%.*]] +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp slt i8 [[Y:%.*]], [[SUB]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP_NOT]], i8 [[ADD]], i8 127 +; CHECK-NEXT: ret i8 [[R]] +; + %sub = sub i8 127, %x + %cmp = icmp sge i8 %y, %sub + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_int_max_minus_x_wrong_constant(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_int_max_minus_x_wrong_constant( +; CHECK-NEXT: [[SUB:%.*]] = sub i8 126, [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[SUB]], [[Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]] +; CHECK-NEXT: ret i8 [[R]] +; + %sub = sub i8 126, %x + %cmp = icmp slt i8 %sub, %y + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_int_max_minus_x_wrong_predicate(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_int_max_minus_x_wrong_predicate( +; CHECK-NEXT: [[SUB:%.*]] = sub i8 127, [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[SUB]], [[Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]] +; CHECK-NEXT: ret i8 [[R]] +; + %sub = sub i8 127, %x + %cmp = icmp sgt i8 %sub, %y + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define <2 x i8> @sadd_sat_int_max_minus_x_vector(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @sadd_sat_int_max_minus_x_vector( +; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i8> splat (i8 127), [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[SUB]], [[Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP]], <2 x i8> splat (i8 127), <2 x i8> [[ADD]] +; CHECK-NEXT: ret <2 x i8> [[R]] +; + %sub = sub <2 x i8> , %x + %cmp = icmp slt <2 x i8> %sub, %y + %add = add <2 x i8> %x, %y + %r = select <2 x i1> %cmp, <2 x i8> , <2 x i8> %add + ret <2 x i8> %r +} + +define i8 @sadd_sat_commuted_select(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_commuted_select( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[Y:%.*]], 127 +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 [[ADD]], i8 127 +; CHECK-NEXT: ret i8 [[R]] +; + %cmp = icmp sge i8 %x, 127 + %add = add i8 %x, %y + %r = select i1 %cmp, i8 %add, i8 127 + ret i8 %r +} + +define i8 @sadd_sat_commuted_add(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_commuted_add( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[Y:%.*]], [[X]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]] +; CHECK-NEXT: ret i8 [[R]] +; + %cmp = icmp sge i8 %x, 127 + %add = add i8 %y, %x + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_commuted_both(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_commuted_both( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[Y:%.*]], 127 +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 [[ADD]], i8 127 +; CHECK-NEXT: ret i8 [[R]] +; + %cmp = icmp sge i8 %x, 127 + %add = add i8 %y, %x + %r = select i1 %cmp, i8 %add, i8 127 + ret i8 %r +} + +define i8 @sadd_sat_int_max_minus_x_nsw_slt(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_int_max_minus_x_nsw_slt( +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: ret i8 [[R]] +; + %sub = sub nsw i8 127, %x + %cmp = icmp slt i8 %sub, %y + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_int_max_minus_x_nsw_sge_commuted(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_int_max_minus_x_nsw_sge_commuted( +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: ret i8 [[R]] +; + %sub = sub nsw i8 127, %x + %cmp = icmp sge i8 %y, %sub + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_int_max_minus_x_no_nsw_neg(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_int_max_minus_x_no_nsw_neg( +; CHECK-NEXT: [[SUB:%.*]] = sub i8 127, [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[SUB]], [[Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]] +; CHECK-NEXT: ret i8 [[R]] +; + %sub = sub i8 127, %x + %cmp = icmp slt i8 %sub, %y + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @neg_no_nsw(i8 %x, i8 %y) { +; CHECK-LABEL: @neg_no_nsw( +; CHECK-NEXT: [[ADD:%.*]] = sub i8 127, [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], [[ADD]] +; CHECK-NEXT: [[D:%.*]] = add i8 [[X]], [[Y]] +; CHECK-NEXT: [[S:%.*]] = select i1 [[CMP]], i8 127, i8 [[D]] +; CHECK-NEXT: ret i8 [[S]] +; + %add = sub i8 127, %y + %cmp = icmp sgt i8 %x, %add + %d = add i8 %x, %y + %s = select i1 %cmp, i8 127, i8 %d + ret i8 %s +} + +define i8 @neg_neg_constant(i8 %x, i8 %y) { +; CHECK-LABEL: @neg_neg_constant( +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smin.i8(i8 [[X:%.*]], i8 -1) +; CHECK-NEXT: [[S:%.*]] = and i8 [[TMP1]], 127 +; CHECK-NEXT: ret i8 [[S]] +; + %cmp = icmp sgt i8 %x, -2 + %d = add i8 %x, -128 + %s = select i1 %cmp, i8 127, i8 %d + ret i8 %s +} diff --git a/llvm/test/Transforms/InstSimplify/AArch64/aarch64-sve-reductions.ll b/llvm/test/Transforms/InstSimplify/AArch64/aarch64-sve-reductions.ll new file mode 100644 index 0000000000000..a54d6044d04b1 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/AArch64/aarch64-sve-reductions.ll @@ -0,0 +1,912 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes=instsimplify < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; +; ANDV +; + +define i8 @andv_i8_no_active( %a) #0 { +; CHECK-LABEL: define i8 @andv_i8_no_active( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret i8 -1 +; + %out = call i8 @llvm.aarch64.sve.andv.nxv16i8( zeroinitializer, %a) + ret i8 %out +} + +define i8 @andv_i8_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @andv_i8_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 -1 +; + %out = call i8 @llvm.aarch64.sve.andv.nxv16i8( %pg, splat(i8 -1)) + ret i8 %out +} + +define i8 @andv_i8_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @andv_i8_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.andv.nxv16i8( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.andv.nxv16i8( %pg, zeroinitializer) + ret i8 %out +} + +define i8 @andv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @andv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 [[A]] +; + %a.insert = insertelement poison, i8 %a, i8 0 + %a.splat = shufflevector %a.insert, poison, zeroinitializer + %out = call i8 @llvm.aarch64.sve.andv.nxv16i8( splat (i1 true), %a.splat) + ret i8 %out +} + +define i16 @andv_i16_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @andv_i16_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 -1 +; + %out = call i16 @llvm.aarch64.sve.andv.nxv8i16( %pg, splat(i16 -1)) + ret i16 %out +} + +define i16 @andv_i16_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @andv_i16_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.andv.nxv8i16( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.andv.nxv8i16( %pg, zeroinitializer) + ret i16 %out +} + +define i32 @andv_i32_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @andv_i32_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 -1 +; + %out = call i32 @llvm.aarch64.sve.andv.nxv4i32( %pg, splat(i32 -1)) + ret i32 %out +} + +define i32 @andv_i32_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @andv_i32_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.andv.nxv4i32( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.andv.nxv4i32( %pg, zeroinitializer) + ret i32 %out +} + +define i64 @andv_i64_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @andv_i64_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 -1 +; + %out = call i64 @llvm.aarch64.sve.andv.nxv2i64( %pg, splat(i64 -1)) + ret i64 %out +} + +define i64 @andv_i64_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @andv_i64_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.andv.nxv2i64( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.andv.nxv2i64( %pg, zeroinitializer) + ret i64 %out +} + +; +; EORV +; + +define i8 @eorv_i8_no_active( %a) #0 { +; CHECK-LABEL: define i8 @eorv_i8_no_active( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8( zeroinitializer, %a) + ret i8 %out +} + +define i8 @eorv_i8_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @eorv_i8_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8( %pg, zeroinitializer) + ret i8 %out +} + +define i8 @eorv_i8_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @eorv_i8_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.eorv.nxv16i8( [[PG]], splat (i8 1)) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8( %pg, splat(i8 1)) + ret i8 %out +} + +define i8 @eorv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @eorv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %a.insert = insertelement poison, i8 %a, i8 0 + %a.splat = shufflevector %a.insert, poison, zeroinitializer + %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8( splat (i1 true), %a.splat) + ret i8 %out +} + +define i16 @eorv_i16_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @eorv_i16_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 0 +; + %out = call i16 @llvm.aarch64.sve.eorv.nxv8i16( %pg, zeroinitializer) + ret i16 %out +} + +define i16 @eorv_i16_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @eorv_i16_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.eorv.nxv8i16( [[PG]], splat (i16 1)) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.eorv.nxv8i16( %pg, splat(i16 1)) + ret i16 %out +} + +define i32 @eorv_i32_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @eorv_i32_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 0 +; + %out = call i32 @llvm.aarch64.sve.eorv.nxv4i32( %pg, zeroinitializer) + ret i32 %out +} + +define i32 @eorv_i32_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @eorv_i32_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.eorv.nxv4i32( [[PG]], splat (i32 1)) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.eorv.nxv4i32( %pg, splat(i32 1)) + ret i32 %out +} + +define i64 @eorv_i64_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @eorv_i64_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.eorv.nxv2i64( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @eorv_i64_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @eorv_i64_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.eorv.nxv2i64( [[PG]], splat (i64 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.eorv.nxv2i64( %pg, splat(i64 1)) + ret i64 %out +} + +; +; ORV +; + +define i8 @orv_i8_no_active( %a) #0 { +; CHECK-LABEL: define i8 @orv_i8_no_active( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %out = call i8 @llvm.aarch64.sve.orv.nxv16i8( zeroinitializer, %a) + ret i8 %out +} + +define i8 @orv_i8_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @orv_i8_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %out = call i8 @llvm.aarch64.sve.orv.nxv16i8( %pg, zeroinitializer) + ret i8 %out +} + +define i8 @orv_i8_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @orv_i8_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.orv.nxv16i8( [[PG]], splat (i8 1)) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.orv.nxv16i8( %pg, splat(i8 1)) + ret i8 %out +} + +define i8 @orv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @orv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 [[A]] +; + %a.insert = insertelement poison, i8 %a, i8 0 + %a.splat = shufflevector %a.insert, poison, zeroinitializer + %out = call i8 @llvm.aarch64.sve.orv.nxv16i8( splat (i1 true), %a.splat) + ret i8 %out +} + +define i16 @orv_i16_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @orv_i16_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 0 +; + %out = call i16 @llvm.aarch64.sve.orv.nxv8i16( %pg, zeroinitializer) + ret i16 %out +} + +define i16 @orv_i16_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @orv_i16_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.orv.nxv8i16( [[PG]], splat (i16 1)) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.orv.nxv8i16( %pg, splat(i16 1)) + ret i16 %out +} + +define i32 @orv_i32_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @orv_i32_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 0 +; + %out = call i32 @llvm.aarch64.sve.orv.nxv4i32( %pg, zeroinitializer) + ret i32 %out +} + +define i32 @orv_i32_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @orv_i32_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.orv.nxv4i32( [[PG]], splat (i32 1)) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.orv.nxv4i32( %pg, splat(i32 1)) + ret i32 %out +} + +define i64 @orv_i64_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @orv_i64_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.orv.nxv2i64( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @orv_i64_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @orv_i64_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.orv.nxv2i64( [[PG]], splat (i64 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.orv.nxv2i64( %pg, splat(i64 1)) + ret i64 %out +} + +; +; SADDV +; + +define i64 @saddv_i8_no_active( %a) #0 { +; CHECK-LABEL: define i64 @saddv_i8_no_active( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8( zeroinitializer, %a) + ret i64 %out +} + +define i64 @saddv_i8_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i8_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @saddv_i8_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i8_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv16i8( [[PG]], splat (i8 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8( %pg, splat(i8 1)) + ret i64 %out +} + +define i64 @saddv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i64 @saddv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[A_INSERT:%.*]] = insertelement poison, i8 [[A]], i8 0 +; CHECK-NEXT: [[A_SPLAT:%.*]] = shufflevector [[A_INSERT]], poison, zeroinitializer +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv16i8( splat (i1 true), [[A_SPLAT]]) +; CHECK-NEXT: ret i64 [[OUT]] +; + %a.insert = insertelement poison, i8 %a, i8 0 + %a.splat = shufflevector %a.insert, poison, zeroinitializer + %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8( splat (i1 true), %a.splat) + ret i64 %out +} + +define i64 @saddv_i16_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i16_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv8i16( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @saddv_i16_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i16_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv8i16( [[PG]], splat (i16 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv8i16( %pg, splat(i16 1)) + ret i64 %out +} + +define i64 @saddv_i32_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i32_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv4i32( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @saddv_i32_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i32_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv4i32( [[PG]], splat (i32 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv4i32( %pg, splat(i32 1)) + ret i64 %out +} + +define i64 @saddv_i64_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i64_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv2i64( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @saddv_i64_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i64_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv2i64( [[PG]], splat (i64 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv2i64( %pg, splat(i64 1)) + ret i64 %out +} + +; +; SMAXV +; + +define i8 @smaxv_i8_no_active( %a) #0 { +; CHECK-LABEL: define i8 @smaxv_i8_no_active( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 -128 +; + %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8( zeroinitializer, %a) + ret i8 %out +} + +define i8 @smaxv_i8_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @smaxv_i8_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 -128 +; + %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8( %pg, splat(i8 -128)) + ret i8 %out +} + +define i8 @smaxv_i8_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @smaxv_i8_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.smaxv.nxv16i8( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8( %pg, zeroinitializer) + ret i8 %out +} + +define i8 @smaxv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @smaxv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 [[A]] +; + %a.insert = insertelement poison, i8 %a, i8 0 + %a.splat = shufflevector %a.insert, poison, zeroinitializer + %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8( splat (i1 true), %a.splat) + ret i8 %out +} + +define i16 @smaxv_i16_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @smaxv_i16_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 -32768 +; + %out = call i16 @llvm.aarch64.sve.smaxv.nxv8i16( %pg, splat(i16 -32768)) + ret i16 %out +} + +define i16 @smaxv_i16_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @smaxv_i16_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.smaxv.nxv8i16( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.smaxv.nxv8i16( %pg, zeroinitializer) + ret i16 %out +} + +define i32 @smaxv_i32_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @smaxv_i32_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 -2147483648 +; + %out = call i32 @llvm.aarch64.sve.smaxv.nxv4i32( %pg, splat(i32 -2147483648)) + ret i32 %out +} + +define i32 @smaxv_i32_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @smaxv_i32_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.smaxv.nxv4i32( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.smaxv.nxv4i32( %pg, zeroinitializer) + ret i32 %out +} + +define i64 @smaxv_i64_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @smaxv_i64_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 -9223372036854775808 +; + %out = call i64 @llvm.aarch64.sve.smaxv.nxv2i64( %pg, splat(i64 -9223372036854775808)) + ret i64 %out +} + +define i64 @smaxv_i64_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @smaxv_i64_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.smaxv.nxv2i64( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.smaxv.nxv2i64( %pg, zeroinitializer) + ret i64 %out +} + +; +; SMINV +; + +define i8 @sminv_i8_no_active( %a) #0 { +; CHECK-LABEL: define i8 @sminv_i8_no_active( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 127 +; + %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8( zeroinitializer, %a) + ret i8 %out +} + +define i8 @sminv_i8_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @sminv_i8_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 127 +; + %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8( %pg, splat(i8 127)) + ret i8 %out +} + +define i8 @sminv_i8_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @sminv_i8_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.sminv.nxv16i8( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8( %pg, zeroinitializer) + ret i8 %out +} + +define i8 @sminv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @sminv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 [[A]] +; + %a.insert = insertelement poison, i8 %a, i8 0 + %a.splat = shufflevector %a.insert, poison, zeroinitializer + %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8( splat (i1 true), %a.splat) + ret i8 %out +} + +define i16 @sminv_i16_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @sminv_i16_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 32767 +; + %out = call i16 @llvm.aarch64.sve.sminv.nxv8i16( %pg, splat(i16 32767)) + ret i16 %out +} + +define i16 @sminv_i16_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @sminv_i16_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.sminv.nxv8i16( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.sminv.nxv8i16( %pg, zeroinitializer) + ret i16 %out +} + +define i32 @sminv_i32_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @sminv_i32_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 2147483647 +; + %out = call i32 @llvm.aarch64.sve.sminv.nxv4i32( %pg, splat(i32 2147483647)) + ret i32 %out +} + +define i32 @sminv_i32_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @sminv_i32_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.sminv.nxv4i32( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.sminv.nxv4i32( %pg, zeroinitializer) + ret i32 %out +} + +define i64 @sminv_i64_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @sminv_i64_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 9223372036854775807 +; + %out = call i64 @llvm.aarch64.sve.sminv.nxv2i64( %pg, splat(i64 9223372036854775807)) + ret i64 %out +} + +define i64 @sminv_i64_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @sminv_i64_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.sminv.nxv2i64( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.sminv.nxv2i64( %pg, zeroinitializer) + ret i64 %out +} + +; +; UADDV +; + +define i64 @uaddv_i8_no_active( %a) #0 { +; CHECK-LABEL: define i64 @uaddv_i8_no_active( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8( zeroinitializer, %a) + ret i64 %out +} + +define i64 @uaddv_i8_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i8_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @uaddv_i8_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i8_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv16i8( [[PG]], splat (i8 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8( %pg, splat(i8 1)) + ret i64 %out +} + +define i64 @uaddv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i64 @uaddv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[A_INSERT:%.*]] = insertelement poison, i8 [[A]], i8 0 +; CHECK-NEXT: [[A_SPLAT:%.*]] = shufflevector [[A_INSERT]], poison, zeroinitializer +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv16i8( splat (i1 true), [[A_SPLAT]]) +; CHECK-NEXT: ret i64 [[OUT]] +; + %a.insert = insertelement poison, i8 %a, i8 0 + %a.splat = shufflevector %a.insert, poison, zeroinitializer + %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8( splat (i1 true), %a.splat) + ret i64 %out +} + +define i64 @uaddv_i16_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i16_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv8i16( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @uaddv_i16_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i16_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv8i16( [[PG]], splat (i16 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv8i16( %pg, splat(i16 1)) + ret i64 %out +} + +define i64 @uaddv_i32_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i32_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv4i32( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @uaddv_i32_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i32_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv4i32( [[PG]], splat (i32 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv4i32( %pg, splat(i32 1)) + ret i64 %out +} + +define i64 @uaddv_i64_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i64_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv2i64( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @uaddv_i64_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i64_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv2i64( [[PG]], splat (i64 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv2i64( %pg, splat(i64 1)) + ret i64 %out +} + +; +; UMAXV +; + +define i8 @umaxv_i8_no_active( %a) #0 { +; CHECK-LABEL: define i8 @umaxv_i8_no_active( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8( zeroinitializer, %a) + ret i8 %out +} + +define i8 @umaxv_i8_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @umaxv_i8_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8( %pg, zeroinitializer) + ret i8 %out +} + +define i8 @umaxv_i8_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @umaxv_i8_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.umaxv.nxv16i8( [[PG]], splat (i8 1)) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8( %pg, splat(i8 1)) + ret i8 %out +} + +define i8 @umaxv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @umaxv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 [[A]] +; + %a.insert = insertelement poison, i8 %a, i8 0 + %a.splat = shufflevector %a.insert, poison, zeroinitializer + %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8( splat (i1 true), %a.splat) + ret i8 %out +} + +define i16 @umaxv_i16_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @umaxv_i16_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 0 +; + %out = call i16 @llvm.aarch64.sve.umaxv.nxv8i16( %pg, zeroinitializer) + ret i16 %out +} + +define i16 @umaxv_i16_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @umaxv_i16_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.umaxv.nxv8i16( [[PG]], splat (i16 1)) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.umaxv.nxv8i16( %pg, splat(i16 1)) + ret i16 %out +} + +define i32 @umaxv_i32_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @umaxv_i32_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 0 +; + %out = call i32 @llvm.aarch64.sve.umaxv.nxv4i32( %pg, zeroinitializer) + ret i32 %out +} + +define i32 @umaxv_i32_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @umaxv_i32_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.umaxv.nxv4i32( [[PG]], splat (i32 1)) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.umaxv.nxv4i32( %pg, splat(i32 1)) + ret i32 %out +} + +define i64 @umaxv_i64_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @umaxv_i64_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.umaxv.nxv2i64( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @umaxv_i64_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @umaxv_i64_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.umaxv.nxv2i64( [[PG]], splat (i64 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.umaxv.nxv2i64( %pg, splat(i64 1)) + ret i64 %out +} + +; +; UMINV +; + +define i8 @uminv_i8_no_active( %a) #0 { +; CHECK-LABEL: define i8 @uminv_i8_no_active( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 -1 +; + %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8( zeroinitializer, %a) + ret i8 %out +} + +define i8 @uminv_i8_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @uminv_i8_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 -1 +; + %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8( %pg, splat(i8 -1)) + ret i8 %out +} + +define i8 @uminv_i8_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @uminv_i8_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.uminv.nxv16i8( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8( %pg, zeroinitializer) + ret i8 %out +} + +define i8 @uminv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @uminv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 [[A]] +; + %a.insert = insertelement poison, i8 %a, i8 0 + %a.splat = shufflevector %a.insert, poison, zeroinitializer + %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8( splat (i1 true), %a.splat) + ret i8 %out +} + +define i16 @uminv_i16_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @uminv_i16_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 -1 +; + %out = call i16 @llvm.aarch64.sve.uminv.nxv8i16( %pg, splat(i16 -1)) + ret i16 %out +} + +define i16 @uminv_i16_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @uminv_i16_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.uminv.nxv8i16( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.uminv.nxv8i16( %pg, zeroinitializer) + ret i16 %out +} + +define i32 @uminv_i32_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @uminv_i32_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 -1 +; + %out = call i32 @llvm.aarch64.sve.uminv.nxv4i32( %pg, splat(i32 -1)) + ret i32 %out +} + +define i32 @uminv_i32_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @uminv_i32_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.uminv.nxv4i32( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.uminv.nxv4i32( %pg, zeroinitializer) + ret i32 %out +} + +define i64 @uminv_i64_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @uminv_i64_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 -1 +; + %out = call i64 @llvm.aarch64.sve.uminv.nxv2i64( %pg, splat(i64 -1)) + ret i64 %out +} + +define i64 @uminv_i64_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @uminv_i64_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uminv.nxv2i64( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.uminv.nxv2i64( %pg, zeroinitializer) + ret i64 %out +} + +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/Transforms/InstSimplify/AArch64/lit.local.cfg b/llvm/test/Transforms/InstSimplify/AArch64/lit.local.cfg new file mode 100644 index 0000000000000..10d4a0e953ed4 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/AArch64/lit.local.cfg @@ -0,0 +1,2 @@ +if not "AArch64" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll index 20676f3702294..10c265519952b 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -14,23 +14,23 @@ define void @foo(i64 %n) { ; CHECK-EMPTY: ; CHECK-NEXT: outer.header: ; CHECK-NEXT: EMIT-SCALAR ir<%outer.iv> = phi [ ir<%outer.iv.next>, outer.latch ], [ ir<0>, ir-bb ] -; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr ir<@arr2>, ir<0>, ir<%outer.iv> +; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr inbounds ir<@arr2>, ir<0>, ir<%outer.iv> ; CHECK-NEXT: EMIT store ir<%outer.iv>, ir<%gep.1> -; CHECK-NEXT: EMIT ir<%add> = add ir<%outer.iv>, ir<%n> +; CHECK-NEXT: EMIT ir<%add> = add nsw ir<%outer.iv>, ir<%n> ; CHECK-NEXT: Successor(s): inner ; CHECK-EMPTY: ; CHECK-NEXT: inner: ; CHECK-NEXT: EMIT-SCALAR ir<%inner.iv> = phi [ ir<%inner.iv.next>, inner ], [ ir<0>, outer.header ] -; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv> +; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr inbounds ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv> ; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2> -; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1> -; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8> +; CHECK-NEXT: EMIT ir<%inner.iv.next> = add nuw nsw ir<%inner.iv>, ir<1> +; CHECK-NEXT: EMIT ir<%inner.ec> = icmp eq ir<%inner.iv.next>, ir<8> ; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec> ; CHECK-NEXT: Successor(s): outer.latch, inner ; CHECK-EMPTY: ; CHECK-NEXT: outer.latch: -; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1> -; CHECK-NEXT: EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8> +; CHECK-NEXT: EMIT ir<%outer.iv.next> = add nuw nsw ir<%outer.iv>, ir<1> +; CHECK-NEXT: EMIT ir<%outer.ec> = icmp eq ir<%outer.iv.next>, ir<8> ; CHECK-NEXT: EMIT branch-on-cond ir<%outer.ec> ; CHECK-NEXT: Successor(s): ir-bb, outer.header ; CHECK-EMPTY: diff --git a/llvm/unittests/ADT/APIntTest.cpp b/llvm/unittests/ADT/APIntTest.cpp index ca9f9f17ee112..4cb537da72e87 100644 --- a/llvm/unittests/ADT/APIntTest.cpp +++ b/llvm/unittests/ADT/APIntTest.cpp @@ -3823,4 +3823,87 @@ TEST(APIntTest, Fshr) { -8193); } +TEST(APIntTest, clmul) { + EXPECT_EQ(APIntOps::clmul(APInt(4, 1), APInt(4, 2)).getZExtValue(), 2U); + EXPECT_EQ(APIntOps::clmul(APInt(4, 5), APInt(4, 6)).getZExtValue(), 14U); + EXPECT_EQ(APIntOps::clmul(APInt(4, -4, /*isSigned*/ true), + APInt(4, 2, /*isSigned*/ false)) + .getSExtValue(), + -8); + EXPECT_EQ(APIntOps::clmul(APInt(4, -4, /*isSigned*/ true), + APInt(4, -5, /*isSigned*/ true)) + .getSExtValue(), + 4); + EXPECT_EQ(APIntOps::clmul(APInt(8, 0), APInt(8, 255)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::clmul(APInt(8, 15), APInt(8, 15)).getZExtValue(), 85U); + EXPECT_EQ(APIntOps::clmul(APInt(8, 1), APInt(8, 2)).getZExtValue(), 2U); + EXPECT_EQ(APIntOps::clmul(APInt(64, 0, /*isSigned*/ true), + APInt(64, 9223372036854775807, /*isSigned*/ true)) + .getSExtValue(), + 0); + EXPECT_EQ(APIntOps::clmul(APInt(64, 1, /*isSigned*/ true), + APInt(64, 2, /*isSigned*/ true)) + .getSExtValue(), + 2); + EXPECT_EQ(APIntOps::clmul(APInt(16, -2, /*isSigned*/ true), + APInt(16, -1, /*isSigned*/ true)) + .getSExtValue(), + -21846); +} + +TEST(APIntTest, clmulr) { + EXPECT_EQ(APIntOps::clmulr(APInt(4, 1), APInt(4, 2)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::clmulr(APInt(4, 5), APInt(4, 6)).getZExtValue(), 3U); + EXPECT_EQ(APIntOps::clmulr(APInt(4, -4, /*isSigned*/ true), + APInt(4, 2, /*isSigned*/ false)) + .getSExtValue(), + 3); + EXPECT_EQ(APIntOps::clmulr(APInt(4, -4, /*isSigned*/ true), + APInt(4, -5, /*isSigned*/ true)) + .getSExtValue(), + -2); + EXPECT_EQ(APIntOps::clmulr(APInt(8, 0), APInt(8, 255)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::clmulr(APInt(8, 15), APInt(8, 15)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::clmulr(APInt(8, 1), APInt(8, 2)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::clmulr(APInt(64, 0, /*isSigned*/ true), + APInt(64, 9223372036854775807, /*isSigned*/ true)) + .getSExtValue(), + 0); + EXPECT_EQ(APIntOps::clmulr(APInt(64, 1, /*isSigned*/ true), + APInt(64, 2, /*isSigned*/ true)) + .getSExtValue(), + 0); + EXPECT_EQ(APIntOps::clmulr(APInt(16, -2, /*isSigned*/ true), + APInt(16, -1, /*isSigned*/ true)) + .getSExtValue(), + -21845); +} + +TEST(APIntTest, clmulh) { + EXPECT_EQ(APIntOps::clmulh(APInt(4, 1), APInt(4, 2)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::clmulh(APInt(4, 5), APInt(4, 6)).getZExtValue(), 1U); + EXPECT_EQ(APIntOps::clmulh(APInt(4, -4, /*isSigned*/ true), + APInt(4, 2, /*isSigned*/ false)) + .getSExtValue(), + 1); + EXPECT_EQ(APIntOps::clmulh(APInt(4, -4, /*isSigned*/ true), + APInt(4, -5, /*isSigned*/ true)) + .getSExtValue(), + 7); + EXPECT_EQ(APIntOps::clmulh(APInt(8, 0), APInt(8, 255)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::clmulh(APInt(8, 15), APInt(8, 15)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::clmulh(APInt(8, 1), APInt(8, 2)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::clmulh(APInt(64, 0, /*isSigned*/ true), + APInt(64, 9223372036854775807, /*isSigned*/ true)) + .getSExtValue(), + 0); + EXPECT_EQ(APIntOps::clmulh(APInt(64, 1, /*isSigned*/ true), + APInt(64, 2, /*isSigned*/ true)) + .getSExtValue(), + 0); + EXPECT_EQ(APIntOps::clmulh(APInt(16, -2, /*isSigned*/ true), + APInt(16, -1, /*isSigned*/ true)) + .getSExtValue(), + 21845); +} } // end anonymous namespace diff --git a/llvm/unittests/Support/JobserverTest.cpp b/llvm/unittests/Support/JobserverTest.cpp index d27445897db0a..1917145704608 100644 --- a/llvm/unittests/Support/JobserverTest.cpp +++ b/llvm/unittests/Support/JobserverTest.cpp @@ -15,6 +15,7 @@ #include "llvm/Config/llvm-config.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Parallel.h" +#include "llvm/Support/Program.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/raw_ostream.h" #include "gtest/gtest.h" @@ -40,8 +41,14 @@ using namespace llvm; +// Provided by the unit test main to locate the current test binary. +extern const char *TestMainArgv0; + namespace { +// Unique anchor whose address helps locate the current test binary. +static int JobserverTestAnchor = 0; + // RAII helper to set an environment variable for the duration of a test. class ScopedEnvironment { std::string Name; @@ -382,51 +389,93 @@ TEST_F(JobserverStrategyTest, ThreadPoolConcurrencyIsLimited) { EXPECT_EQ(CompletedTasks, NumTasks); } -TEST_F(JobserverStrategyTest, ParallelForIsLimited) { +// Parent-side driver that spawns a fresh process to run the child test which +// validates that parallelFor respects the jobserver limit when it is the first +// user of the default executor in that process. +TEST_F(JobserverStrategyTest, ParallelForIsLimited_Subprocess) { + // Mark child execution. + setenv("LLVM_JOBSERVER_TEST_CHILD", "1", 1); + + // Find the current test binary and build args to run only the child test. + std::string Executable = + sys::fs::getMainExecutable(TestMainArgv0, &JobserverTestAnchor); + ASSERT_FALSE(Executable.empty()) << "Failed to get main executable path"; + SmallVector Args{Executable, + "--gtest_filter=JobserverStrategyTest." + "ParallelForIsLimited_SubprocessChild"}; + + std::string Error; + bool ExecFailed = false; + int RC = sys::ExecuteAndWait(Executable, Args, std::nullopt, {}, 0, 0, &Error, + &ExecFailed); + unsetenv("LLVM_JOBSERVER_TEST_CHILD"); + ASSERT_FALSE(ExecFailed) << Error; + ASSERT_EQ(RC, 0) << "Executable failed with exit code " << RC; +} + +// Child-side test: create FIFO and make-proxy in this process, set the +// jobserver strategy, and then run parallelFor. +TEST_F(JobserverStrategyTest, ParallelForIsLimited_SubprocessChild) { + if (!getenv("LLVM_JOBSERVER_TEST_CHILD")) + GTEST_SKIP() << "Not running in child mode"; + // This test verifies that llvm::parallelFor respects the jobserver limit. const int NumExplicitJobs = 3; const int ConcurrencyLimit = NumExplicitJobs + 1; // +1 implicit const int NumTasks = 20; - LLVM_DEBUG(dbgs() << "Calling startMakeProxy with " << NumExplicitJobs - << " jobs.\n"); startMakeProxy(NumExplicitJobs); - LLVM_DEBUG(dbgs() << "MakeProxy is running.\n"); - // Set the global strategy. parallelFor will use this. + // Set the global strategy before any default executor is created. parallel::strategy = jobserver_concurrency(); std::atomic ActiveTasks{0}; std::atomic MaxActiveTasks{0}; - parallelFor(0, NumTasks, [&](int i) { + parallelFor(0, NumTasks, [&]([[maybe_unused]] int i) { int CurrentActive = ++ActiveTasks; - LLVM_DEBUG(dbgs() << "Task " << i << ": Active tasks: " << CurrentActive - << "\n"); int OldMax = MaxActiveTasks.load(); while (CurrentActive > OldMax) MaxActiveTasks.compare_exchange_weak(OldMax, CurrentActive); - std::this_thread::sleep_for(std::chrono::milliseconds(20)); --ActiveTasks; }); - LLVM_DEBUG(dbgs() << "ParallelFor finished. Max active tasks was " - << MaxActiveTasks << ".\n"); EXPECT_LE(MaxActiveTasks, ConcurrencyLimit); } -TEST_F(JobserverStrategyTest, ParallelSortIsLimited) { - // This test serves as an integration test to ensure parallelSort completes - // correctly when running under the jobserver strategy. It doesn't directly - // measure concurrency but verifies correctness. +// Parent-side driver for parallelSort child test. +TEST_F(JobserverStrategyTest, ParallelSortIsLimited_Subprocess) { + setenv("LLVM_JOBSERVER_TEST_CHILD", "1", 1); + + std::string Executable = + sys::fs::getMainExecutable(TestMainArgv0, &JobserverTestAnchor); + ASSERT_FALSE(Executable.empty()) << "Failed to get main executable path"; + SmallVector Args{Executable, + "--gtest_filter=JobserverStrategyTest." + "ParallelSortIsLimited_SubprocessChild"}; + + std::string Error; + bool ExecFailed = false; + int RC = sys::ExecuteAndWait(Executable, Args, std::nullopt, {}, 0, 0, &Error, + &ExecFailed); + unsetenv("LLVM_JOBSERVER_TEST_CHILD"); + ASSERT_FALSE(ExecFailed) << Error; + ASSERT_EQ(RC, 0) << "Executable failed with exit code " << RC; +} + +// Child-side test: ensure parallelSort runs and completes correctly under the +// jobserver strategy when it owns default executor initialization. +TEST_F(JobserverStrategyTest, ParallelSortIsLimited_SubprocessChild) { + if (!getenv("LLVM_JOBSERVER_TEST_CHILD")) + GTEST_SKIP() << "Not running in child mode"; + const int NumExplicitJobs = 3; startMakeProxy(NumExplicitJobs); parallel::strategy = jobserver_concurrency(); std::vector V(1024); - // Fill with random data std::mt19937 randEngine; std::uniform_int_distribution dist; for (int &i : V) diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp index b99d656c5c50f..5742df2aa3c53 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -139,12 +139,12 @@ compound=true "vector.body:\l" + " EMIT vp\<%2\> = CANONICAL-INDUCTION ir\<0\>, vp\<%index.next\>\l" + " EMIT-SCALAR ir\<%indvars.iv\> = phi [ ir\<0\>, vector.ph ], [ ir\<%indvars.iv.next\>, vector.body ]\l" + - " EMIT ir\<%arr.idx\> = getelementptr ir\<%A\>, ir\<%indvars.iv\>\l" + + " EMIT ir\<%arr.idx\> = getelementptr inbounds ir\<%A\>, ir\<%indvars.iv\>\l" + " EMIT ir\<%l1\> = load ir\<%arr.idx\>\l" + " EMIT ir\<%res\> = add ir\<%l1\>, ir\<10\>\l" + " EMIT store ir\<%res\>, ir\<%arr.idx\>\l" + " EMIT ir\<%indvars.iv.next\> = add ir\<%indvars.iv\>, ir\<1\>\l" + - " EMIT ir\<%exitcond\> = icmp ir\<%indvars.iv.next\>, ir\<%N\>\l" + + " EMIT ir\<%exitcond\> = icmp ne ir\<%indvars.iv.next\>, ir\<%N\>\l" + " EMIT vp\<%3\> = not ir\<%exitcond\>\l" + " EMIT vp\<%index.next\> = add nuw vp\<%2\>, vp\<%0\>\l" + " EMIT branch-on-count vp\<%index.next\>, vp\<%1\>\l" + @@ -305,9 +305,9 @@ compound=true "vector.body:\l" + " EMIT vp\<%2\> = CANONICAL-INDUCTION ir\<0\>, vp\<%index.next\>\l" + " EMIT-SCALAR ir\<%iv\> = phi [ ir\<0\>, vector.ph ], [ ir\<%iv.next\>, loop.latch ]\l" + - " EMIT ir\<%arr.idx\> = getelementptr ir\<%A\>, ir\<%iv\>\l" + + " EMIT ir\<%arr.idx\> = getelementptr inbounds ir\<%A\>, ir\<%iv\>\l" + " EMIT ir\<%l1\> = load ir\<%arr.idx\>\l" + - " EMIT ir\<%c\> = icmp ir\<%l1\>, ir\<0\>\l" + + " EMIT ir\<%c\> = icmp eq ir\<%l1\>, ir\<0\>\l" + "Successor(s): loop.latch\l" ] N4 -> N6 [ label=""] @@ -316,7 +316,7 @@ compound=true " EMIT ir\<%res\> = add ir\<%l1\>, ir\<10\>\l" + " EMIT store ir\<%res\>, ir\<%arr.idx\>\l" + " EMIT ir\<%iv.next\> = add ir\<%iv\>, ir\<1\>\l" + - " EMIT ir\<%exitcond\> = icmp ir\<%iv.next\>, ir\<%N\>\l" + + " EMIT ir\<%exitcond\> = icmp ne ir\<%iv.next\>, ir\<%N\>\l" + " EMIT vp\<%3\> = not ir\<%exitcond\>\l" + " EMIT vp\<%index.next\> = add nuw vp\<%2\>, vp\<%0\>\l" + " EMIT branch-on-count vp\<%index.next\>, vp\<%1\>\l" + diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index 3842ba235ead3..63776b78a2088 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -1009,7 +1009,7 @@ TEST_F(VPRecipeTest, CastVPWidenRecipeToVPUser) { SmallVector Args; Args.push_back(Op1); Args.push_back(Op2); - VPWidenRecipe WidenR(*AI, Args, VPIRMetadata(), DebugLoc()); + VPWidenRecipe WidenR(*AI, Args); checkVPRecipeCastImpl(&WidenR); delete AI; @@ -1053,7 +1053,7 @@ TEST_F(VPRecipeTest, CastVPWidenSelectRecipeToVPUserAndVPDef) { Args.push_back(Op1); Args.push_back(Op2); Args.push_back(Op3); - VPWidenSelectRecipe WidenSelectR(*SelectI, + VPWidenSelectRecipe WidenSelectR(SelectI, make_range(Args.begin(), Args.end())); checkVPRecipeCastImpl( @@ -1093,7 +1093,7 @@ TEST_F(VPRecipeTest, CastVPWidenCastRecipeToVPUser) { IntegerType *Int64 = IntegerType::get(C, 64); auto *Cast = CastInst::CreateZExtOrBitCast(PoisonValue::get(Int32), Int64); VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); - VPWidenCastRecipe Recipe(Instruction::ZExt, Op1, Int64, *Cast, {}); + VPWidenCastRecipe Recipe(Instruction::ZExt, Op1, Int64, Cast); checkVPRecipeCastImpl(&Recipe); delete Cast; @@ -1264,7 +1264,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { SmallVector Args; Args.push_back(Op1); Args.push_back(Op2); - VPWidenRecipe Recipe(*AI, Args, VPIRMetadata(), DebugLoc()); + VPWidenRecipe Recipe(*AI, Args); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1283,7 +1283,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { Args.push_back(Op1); Args.push_back(Op2); Args.push_back(Op3); - VPWidenSelectRecipe Recipe(*SelectI, make_range(Args.begin(), Args.end())); + VPWidenSelectRecipe Recipe(SelectI, make_range(Args.begin(), Args.end())); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1412,7 +1412,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { Args.push_back(Op1); Args.push_back(Op2); Args.push_back(CalledFn); - VPWidenCallRecipe Recipe(Call, TheFn, Args); + VPWidenCallRecipe Recipe(Call, TheFn, Args, VPIRFlags(), VPIRMetadata()); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1468,8 +1468,7 @@ TEST_F(VPRecipeTest, dumpRecipeInPlan) { VPValue *ExtVPV2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); Args.push_back(ExtVPV1); Args.push_back(ExtVPV2); - VPWidenRecipe *WidenR = - new VPWidenRecipe(*AI, Args, VPIRMetadata(), DebugLoc()); + VPWidenRecipe *WidenR = new VPWidenRecipe(*AI, Args); VPBB1->appendRecipe(WidenR); { diff --git a/llvm/utils/TableGen/Basic/TableGen.cpp b/llvm/utils/TableGen/Basic/TableGen.cpp index b79ae93dab4f7..a655cbbc16096 100644 --- a/llvm/utils/TableGen/Basic/TableGen.cpp +++ b/llvm/utils/TableGen/Basic/TableGen.cpp @@ -73,7 +73,7 @@ int tblgen_main(int argc, char **argv) { InitLLVM X(argc, argv); cl::ParseCommandLineOptions(argc, argv); - std::function MainFn = nullptr; + MultiFileTableGenMainFn MainFn = nullptr; return TableGenMain(argv[0], MainFn); } diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp index ef7b13e8940f8..3486a7a7fb08c 100644 --- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp +++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp @@ -1878,6 +1878,8 @@ TableGenOutputFiles RegisterInfoEmitter::run(StringRef FilenamePrefix) { if (RegisterInfoDebug) debugDump(errs()); + // The suffixes should be in sync with the tablegen function in + // llvm/cmake/modules/TableGen.cmake. return {Main, {{"Enums.inc", Enums}, {"MCDesc.inc", MCDesc}, diff --git a/mlir/docs/Dialects/NVVM/_index.md b/mlir/docs/Dialects/NVVM/_index.md new file mode 100644 index 0000000000000..f4832f76f86ad --- /dev/null +++ b/mlir/docs/Dialects/NVVM/_index.md @@ -0,0 +1,84 @@ +# NVVM Dialect + +The NVVM dialect is MLIR's LLVM-IR-based, NVIDIA-specific backend dialect. It +models NVVM intrinsics and public ISA functionality and introduces NVIDIA +extensions to the MLIR/LLVM type system and address spaces (e.g., global, +shared, and cluster memory), enabling faithful lowering of GPU kernels to the +NVPTX toolchain. While a NVVM op usually maps to a single LLVM IR intrinsic, +the NVVM dialect uses type polymorphism and other attributes so that a single +NVVM op can map to different LLVM intrinsics. + +## Scope and Capabilities + +The dialect covers core GPU features such as thread/block builtins, barriers +and atomics, warp-level collectives (e.g., shuffle/vote), matrix/tensor core +operations (e.g., `mma.sync`, `wgmma`), tensor memory accelerator (TMA) +operations, asynchronous copies (`cp.async`, bulk/tensor variants) with memory +barriers, cache and prefetch controls, and NVVM-specific attributes and enums +(e.g., FP rounding modes, memory scopes, and MMA types/layouts). + +## Placement in the Lowering Pipeline + +NVVM sits below target-agnostic dialects like `gpu` and NVIDIA's `nvgpu`. +Typical pipelines convert `gpu`/`nvgpu` ops into NVVM using +`-convert-gpu-to-nvvm` and `-convert-nvgpu-to-nvvm`, then translate into LLVM +for final code generation via NVPTX backend. + +## Target Configuration and Serialization + +NVVM provides a `#nvvm.target` attribute to describe the GPU target (SM, +features, and flags). In conjunction with `gpu` serialization (e.g., +`gpu-module-to-binary`), this enables producing architecture-specific GPU +binaries (such as CUBIN) from nested GPU modules. + +## Inline PTX + +When an intrinsic is unavailable or a performance-critical sequence must be +expressed directly, NVVM provides an `nvvm.inline_ptx` op to embed PTX inline +as a last-resort escape hatch, with explicit operands and results. + +## Memory Spaces + +The NVVM dialect introduces the following memory spaces, each with distinct +scopes and lifetimes: + +| Memory Space | Address Space | Scope | +|-------------------|---------------|----------------------| +| `generic` | 0 | All threads | +| `global` | 1 | All threads (device) | +| `shared` | 3 | Thread block (CTA) | +| `constant` | 4 | All threads | +| `local` | 5 | Single thread | +| `tensor` | 6 | Thread block (CTA) | +| `shared_cluster` | 7 | Thread block cluster | + +### Memory Space Details + +- **generic**: Can point to any memory space; requires runtime resolution of + actual address space. Use when pointer origin is unknown at compile time. + Performance varies based on the underlying memory space. +- **global**: Accessible by all threads across all blocks; persists across + kernel launches. Highest latency but largest capacity (device memory). Best + for large data and inter-kernel communication. +- **shared**: Shared within a thread block (CTA); very fast on-chip memory for + cooperation between threads in the same block. Limited capacity. Ideal for + block-level collaboration, caching, and reducing global memory traffic. +- **constant**: Read-only memory cached per SM. Size typically limited to 64KB. + Best for read-only data and uniform values accessed by all threads. +- **local**: Private to each thread. Use for per-thread private data and + automatic variables that don't fit in registers. +- **tensor**: Special memory space for tensor core operations. Used by + `tcgen05` instructions on SM 100+ for tensor input/output operations. +- **shared_cluster**: Distributed shared memory across thread blocks within a + cluster (SM 90+). Enables collaboration beyond single-block scope with fast + access across cluster threads. + + +## Non-Goals + +NVVM is not a place for convenience or "wrapper" ops. It is not intended to +introduce high-level ops that expand into multiple unrelated NVVM intrinsics or +that lower to no intrinsic at all. Such abstractions belong in higher-level +dialects (e.g., `nvgpu`, `gpu`, or project-specific dialects). The design +intent is a thin, predictable, low-level surface with near-mechanical lowering +to NVVM/LLVM IR. \ No newline at end of file diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 87c73c4587485..524b9f820f290 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -37,84 +37,6 @@ def LLVM_PointerSharedCluster : LLVM_PointerInAddressSpace<7>; //===----------------------------------------------------------------------===// def NVVM_Dialect : Dialect { - let summary = "The NVVM dialect that models NVIDIA's public ISA"; - - let description = [{ - The NVVM dialect is MLIR's LLVM-IR-based, NVIDIA-specific backend dialect. It - models NVVM intrinsics and public ISA functionality and introduces NVIDIA - extensions to the MLIR/LLVM type system and address spaces (e.g., global, - shared, and cluster memory), enabling faithful lowering of GPU kernels to the - NVPTX toolchain. While a NVVM op usually maps to a single LLVM IR intrinsic, - the NVVM dialect uses type polymorphism and other attributes so that a single - NVVM op can map to different LLVM intrinsics. - - **Scope and capabilities:** The dialect covers core GPU features such as - thread/block builtins, barriers and atomics, warp-level collectives (e.g., - shuffle/vote), matrix/tensor core operations (e.g., `mma.sync`, `wgmma`), - tensor memory accelerator (TMA) operations, asynchronous copies (`cp.async`, - bulk/tensor variants) with memory barriers, cache and prefetch controls, and - NVVM-specific attributes and enums (e.g., FP rounding modes, memory scopes, - and MMA types/layouts). - - **Non-goals:** NVVM is not a place for convenience or “wrapper” ops. It is - not intended to introduce high-level ops that expand into multiple unrelated - NVVM intrinsics or that lower to no intrinsic at all. Such abstractions belong - in higher-level dialects (e.g., `nvgpu`, `gpu`, or project-specific dialects). - The design intent is a thin, predictable, low-level surface with - near-mechanical lowering to NVVM/LLVM IR. - - **Placement in the lowering pipeline:** NVVM sits below target-agnostic - dialects like `gpu` and NVIDIA's `nvgpu`. Typical pipelines convert - `gpu`/`nvgpu` ops into NVVM using `-convert-gpu-to-nvvm` and - `-convert-nvgpu-to-nvvm`, then translate into LLVM for final code - generation via NVPTX backend. - - **Target configuration and serialization:** NVVM provides a `#nvvm.target` - attribute to describe the GPU target (SM, features, and flags). In - conjunction with `gpu` serialization (e.g., `gpu-module-to-binary`), this - enables producing architecture-specific GPU binaries (such as CUBIN) from - nested GPU modules. - - **Inline PTX:** When an intrinsic is unavailable or a performance-critical - sequence must be expressed directly, NVVM provides an `nvvm.inline_ptx` op to - embed PTX inline as a last-resort escape hatch, with explicit operands and - results. - - - **Memory Spaces:** The NVVM dialect introduces the following memory spaces, - each with distinct scopes and lifetimes: -``` - | Memory Space | Address Space | Scope | Lifetime | - |-------------------|---------------|----------------------|-------------------| - | `generic` | 0 | All threads | Context-dependent | - | `global` | 1 | All threads (device) | Application | - | `shared` | 3 | Thread block (CTA) | Kernel execution | - | `constant` | 4 | All threads (RO) | Application | - | `local` | 5 | Single thread | Kernel execution | - | `tensor` | 6 | Thread block (CTA) | Kernel execution | - | `shared_cluster` | 7 | Thread block cluster | Kernel execution | -``` - **Memory Space Details:** - - **generic**: Can point to any memory space; requires runtime resolution of - actual address space. Use when pointer origin is unknown at compile time. - Performance varies based on the underlying memory space. - - **global**: Accessible by all threads across all blocks; persists across - kernel launches. Highest latency but largest capacity (device memory). Best - for large data and inter-kernel communication. - - **shared**: Shared within a thread block (CTA); very fast on-chip memory for - cooperation between threads in the same block. Limited capacity. Ideal for - block-level collaboration, caching, and reducing global memory traffic. - - **constant**: Read-only memory cached per SM. Size typically limited to - 64KB. Best for read-only data and uniform values accessed by all threads. - - **local**: Private to each thread. Use for per-thread private data and - automatic variables that don't fit in registers. - - **tensor**: Special memory space for tensor core operations. Used by - `tcgen05` instructions on SM 100+ for tensor input/output operations. - - **shared_cluster**: Distributed shared memory across thread blocks within - a cluster (SM 90+). Enables collaboration beyond single-block scope with - fast access across cluster threads. - }]; - let name = "nvvm"; let cppNamespace = "::mlir::NVVM"; let dependentDialects = ["LLVM::LLVMDialect"]; diff --git a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td index 14b00b04ccc18..420e58192b8fd 100644 --- a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td @@ -166,4 +166,27 @@ def TosaAttachTarget : Pass<"tosa-attach-target", "ModuleOp"> { ]; } +def TosaNarrowI64ToI32Pass : Pass<"tosa-narrow-i64-to-i32", "func::FuncOp"> { + let summary = "Narrow I64 TOSA operations to I32"; + let description = [{ + This pass narrows TOSA operations with 64-bit integer tensor types to + 32-bit integer tensor types. This can be useful for backends that do not + support the EXT-INT64 extension of TOSA. + }]; + + let options = [ + Option<"aggressiveRewrite", "aggressive-rewrite", "bool", "false", + "If enabled, all TOSA operations are rewritten, regardless or whether the narrowing" + "is safe. This option may lead to data loss if not used carefully.">, + Option<"convertFunctionBoundaries", "convert-function-boundaries", "bool", "false", + "If enabled, the pass will convert function I/O types as well. Otherwise casts will" + "be inserted at the I/O boundaries."> + ]; + + let dependentDialects = [ + "func::FuncDialect", + "tosa::TosaDialect", + ]; +} + #endif // MLIR_DIALECT_TOSA_TRANSFORMS_PASSES diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index edc6565f44f00..b9a5e7d7f6eac 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -1738,15 +1738,11 @@ LogicalResult ScaledExtPacked816OpLowering::matchAndRewrite( auto sourceType = cast(op.getSource().getType()); auto srcElemType = cast(sourceType.getElementType()); unsigned bitWidth = srcElemType.getWidth(); - int32_t scaleSel = - getScaleSel(blockSize, bitWidth, firstScaleLane, firstScaleByte); auto targetType = cast(op.getResult().getType()); auto destElemType = cast(targetType.getElementType()); - IntegerType i32 = rewriter.getI32Type(); - Value castedScale = - LLVM::BitcastOp::create(rewriter, loc, i32, adaptor.getScale()); + IntegerType i32 = rewriter.getI32Type(); Value source = adaptor.getSource(); Type llvmResultType = typeConverter->convertType(op.getResult().getType()); Type packedType = nullptr; @@ -1767,15 +1763,19 @@ LogicalResult ScaledExtPacked816OpLowering::matchAndRewrite( return rewriter.notifyMatchFailure(op, "type conversion failed"); } - Value castedSource = - LLVM::BitcastOp::create(rewriter, loc, packedType, source); - std::optional maybeIntrinsic = scaledExtPacked816ToIntrinsic(srcElemType, destElemType); if (!maybeIntrinsic.has_value()) return op.emitOpError( "no intrinsic matching packed scaled conversion on the given chipset"); + int32_t scaleSel = + getScaleSel(blockSize, bitWidth, firstScaleLane, firstScaleByte); + Value castedScale = + LLVM::BitcastOp::create(rewriter, loc, i32, adaptor.getScale()); + Value castedSource = + LLVM::BitcastOp::create(rewriter, loc, packedType, source); + OperationState loweredOp(loc, *maybeIntrinsic); loweredOp.addTypes({llvmResultType}); loweredOp.addOperands({castedSource, castedScale}); diff --git a/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt index 41b338d6e7189..76e9ddd5b2304 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt @@ -12,6 +12,7 @@ add_mlir_dialect_library(MLIRTosaTransforms TosaTypeConverters.cpp TosaProfileCompliance.cpp TosaValidation.cpp + TosaNarrowI64ToI32.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tosa/Transforms @@ -21,6 +22,7 @@ add_mlir_dialect_library(MLIRTosaTransforms LINK_LIBS PUBLIC MLIRFuncDialect + MLIRFuncTransformOps MLIRPass MLIRTosaDialect MLIRTransformUtils diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaNarrowI64ToI32.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaNarrowI64ToI32.cpp new file mode 100644 index 0000000000000..ddaf7d8a5e033 --- /dev/null +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaNarrowI64ToI32.cpp @@ -0,0 +1,310 @@ +//===- TosaNarrowI64ToI32.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass narrows TOSA operations with 64-bit integer tensor types to +// 32-bit integer tensor types. This can be useful for backends that do not +// support the EXT-INT64 extension of TOSA. The pass has two options: +// +// - aggressive-rewrite - If enabled, all TOSA operations are rewritten, +// regardless or whether the narrowing is safe. This option may lead to +// data loss if not used carefully. +// - convert-function-boundaries - If enabled, the pass will convert function +// I/O types as well. Otherwise casts will be inserted at the I/O +// boundaries. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Tosa/Transforms/Passes.h" + +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Func/Transforms/FuncConversions.h" +#include "mlir/IR/Verifier.h" +#include "mlir/Pass/Pass.h" + +namespace mlir { +namespace tosa { +#define GEN_PASS_DEF_TOSANARROWI64TOI32PASS +#include "mlir/Dialect/Tosa/Transforms/Passes.h.inc" +} // namespace tosa +} // namespace mlir + +using namespace mlir; +using namespace mlir::tosa; + +namespace { + +LogicalResult convertGenericOp(Operation *op, ValueRange operands, + ConversionPatternRewriter &rewriter, + const TypeConverter *typeConverter) { + // Convert types of results + SmallVector newResults; + if (failed(typeConverter->convertTypes(op->getResultTypes(), newResults))) + return failure(); + + // Create a new operation state + OperationState state(op->getLoc(), op->getName().getStringRef(), operands, + newResults, {}, op->getSuccessors()); + + for (const NamedAttribute &namedAttribute : op->getAttrs()) { + const Attribute attribute = namedAttribute.getValue(); + + // Convert integer attribute type + if (const auto intAttr = dyn_cast(attribute)) { + const std::optional convertedAttribute = + typeConverter->convertTypeAttribute(intAttr.getType(), attribute); + state.addAttribute(namedAttribute.getName(), convertedAttribute.value()); + continue; + } + + if (const auto typeAttr = dyn_cast(attribute)) { + Type type = typeAttr.getValue(); + const std::optional convertedAttribute = + typeConverter->convertTypeAttribute(type, attribute); + if (!convertedAttribute) + return rewriter.notifyMatchFailure(op, + "Failed to convert type attribute."); + state.addAttribute(namedAttribute.getName(), convertedAttribute.value()); + continue; + } + + if (const auto denseElementsAttr = dyn_cast(attribute)) { + const Type type = denseElementsAttr.getType(); + const std::optional convertedAttribute = + typeConverter->convertTypeAttribute(type, denseElementsAttr); + if (!convertedAttribute) + return rewriter.notifyMatchFailure( + op, "Failed to convert dense elements attribute."); + state.addAttribute(namedAttribute.getName(), convertedAttribute.value()); + continue; + } + + state.addAttribute(namedAttribute.getName(), attribute); + } + + for (Region ®ion : op->getRegions()) { + Region *newRegion = state.addRegion(); + rewriter.inlineRegionBefore(region, *newRegion, newRegion->begin()); + if (failed(rewriter.convertRegionTypes(newRegion, *typeConverter))) + return failure(); + } + + Operation *newOp = rewriter.create(state); + rewriter.replaceOp(op, newOp->getResults()); + return success(); +} + +// =========================== +// Aggressive rewrite patterns +// =========================== + +class ConvertGenericOp : public ConversionPattern { +public: + ConvertGenericOp(TypeConverter &typeConverter, MLIRContext *context) + : ConversionPattern(typeConverter, MatchAnyOpTypeTag{}, 0, context) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + if (!isa(op)) + return rewriter.notifyMatchFailure( + op, + "Support for operations other than TOSA has not been implemented."); + + return convertGenericOp(op, operands, rewriter, typeConverter); + } +}; + +// =============================== +// Bounds checked rewrite patterns +// =============================== + +class ConvertArgMaxOpWithBoundsChecking + : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(tosa::ArgMaxOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + // Output type can be narrowed based on the size of the axis dimension + const int32_t axis = op.getAxis(); + const auto inputType = dyn_cast(adaptor.getInput().getType()); + if (!inputType || !inputType.isStaticDim(axis)) + return rewriter.notifyMatchFailure( + op, "Requires a static axis dimension for bounds checking."); + const int64_t axisDim = inputType.getDimSize(axis); + if (axisDim >= std::numeric_limits::max()) + return rewriter.notifyMatchFailure( + op, "Axis dimension is too large to narrow safely."); + + const Type resultType = op.getOutput().getType(); + const Type newResultType = typeConverter->convertType(resultType); + rewriter.replaceOpWithNewOp(op, newResultType, + adaptor.getInput(), axis); + return success(); + } +}; + +class ConvertCastOpWithBoundsChecking + : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(tosa::CastOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + const auto inputType = dyn_cast(adaptor.getInput().getType()); + const auto resultType = dyn_cast(op.getResult().getType()); + if (!inputType || !resultType) + return failure(); + + const auto elementInputIntType = + dyn_cast(inputType.getElementType()); + const auto elementResultIntType = + dyn_cast(resultType.getElementType()); + if (elementInputIntType && elementResultIntType && + elementInputIntType.getWidth() > elementResultIntType.getWidth()) + return rewriter.notifyMatchFailure( + op, "Narrowing cast may lead to data loss."); + + rewriter.replaceOpWithNewOp( + op, typeConverter->convertType(resultType), adaptor.getInput()); + return success(); + } +}; + +template +class ConvertTypedOp : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(OpTy op, typename OpTy::Adaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + return convertGenericOp(op, adaptor.getOperands(), rewriter, + this->getTypeConverter()); + } +}; + +struct TosaNarrowI64ToI32 + : public tosa::impl::TosaNarrowI64ToI32PassBase { +public: + explicit TosaNarrowI64ToI32() = default; + explicit TosaNarrowI64ToI32(const TosaNarrowI64ToI32PassOptions &options) + : TosaNarrowI64ToI32() { + this->aggressiveRewrite = options.aggressiveRewrite; + this->convertFunctionBoundaries = options.convertFunctionBoundaries; + } + + void runOnOperation() override { + MLIRContext *context = &getContext(); + + TypeConverter typeConverter; + typeConverter.addConversion([](Type type) -> Type { return type; }); + typeConverter.addConversion([](IntegerType type) -> Type { + if (!type.isInteger(64)) + return type; + return IntegerType::get(type.getContext(), 32); + }); + typeConverter.addConversion( + [&typeConverter](RankedTensorType type) -> Type { + const Type elementType = type.getElementType(); + if (!elementType.isInteger(64)) + return type; + return RankedTensorType::get(type.getShape(), + typeConverter.convertType(elementType)); + }); + + const auto materializeCast = [](OpBuilder &builder, Type resultType, + ValueRange inputs, Location loc) -> Value { + if (inputs.size() != 1) + return Value(); + return tosa::CastOp::create(builder, loc, resultType, inputs.front()); + }; + typeConverter.addSourceMaterialization(materializeCast); + typeConverter.addTargetMaterialization(materializeCast); + + typeConverter.addTypeAttributeConversion( + [](IntegerType type, IntegerAttr attribute) -> Attribute { + const APInt value = attribute.getValue().truncSSat(32); + return IntegerAttr::get(IntegerType::get(type.getContext(), 32), + value); + }); + typeConverter.addTypeAttributeConversion( + [&typeConverter](ShapedType type, + DenseIntElementsAttr attr) -> Attribute { + const ShapedType newType = + cast(typeConverter.convertType(type)); + const auto oldElementType = cast(type.getElementType()); + const auto newElementType = + cast(newType.getElementType()); + if (oldElementType.getWidth() == newElementType.getWidth()) + return attr; + + DenseElementsAttr mapped = + attr.mapValues(newElementType, [&](const APInt &v) { + return v.truncSSat(newElementType.getWidth()); + }); + return mapped; + }); + + ConversionTarget target(*context); + target.addDynamicallyLegalDialect( + [&typeConverter](Operation *op) { + return typeConverter.isLegal(op->getResultTypes()) && + typeConverter.isLegal(op->getOperandTypes()); + }); + if (convertFunctionBoundaries) { + target.addDynamicallyLegalOp( + [&typeConverter](func::FuncOp op) { + return typeConverter.isSignatureLegal(op.getFunctionType()) && + typeConverter.isLegal(&op.getBody()); + }); + target.addDynamicallyLegalOp([](func::ReturnOp op) { + const FunctionType funcType = + op->getParentOfType().getFunctionType(); + return llvm::equal(op.getOperandTypes(), funcType.getResults()); + }); + } else { + target.addDynamicallyLegalOp( + [](func::FuncOp op) { return true; }); + target.addDynamicallyLegalOp( + [](func::ReturnOp op) { return true; }); + } + + RewritePatternSet patterns(context); + if (convertFunctionBoundaries) { + populateFunctionOpInterfaceTypeConversionPattern( + patterns, typeConverter); + populateReturnOpTypeConversionPattern(patterns, typeConverter); + } + if (aggressiveRewrite) { + patterns.add(typeConverter, context); + } else { + // Tensor + patterns.add(typeConverter, context); + // Data layout + patterns.add>(typeConverter, context); + patterns.add>(typeConverter, context); + patterns.add>(typeConverter, context); + patterns.add>(typeConverter, context); + patterns.add>(typeConverter, context); + patterns.add>(typeConverter, context); + patterns.add>(typeConverter, context); + patterns.add>(typeConverter, context); + // Type conversion + patterns.add(typeConverter, context); + // Controlflow + patterns.add>(typeConverter, context); + patterns.add>(typeConverter, context); + } + + if (failed( + applyFullConversion(getOperation(), target, std::move(patterns)))) + signalPassFailure(); + } +}; + +} // namespace diff --git a/mlir/python/mlir/dialects/gpu/__init__.py b/mlir/python/mlir/dialects/gpu/__init__.py index 2fbcbb059f87a..d15643ca700e4 100644 --- a/mlir/python/mlir/dialects/gpu/__init__.py +++ b/mlir/python/mlir/dialects/gpu/__init__.py @@ -49,13 +49,13 @@ class GPUFuncOp(GPUFuncOp): FUNCTION_TYPE_ATTR_NAME = "function_type" SYM_NAME_ATTR_NAME = "sym_name" - ARGUMENT_ATTR_NAME = "arg_attrs" - RESULT_ATTR_NAME = "res_attrs" def __init__( self, function_type: Union[FunctionType, TypeAttr], sym_name: Optional[Union[str, StringAttr]] = None, + arg_attrs: Optional[Sequence[dict]] = None, + res_attrs: Optional[Sequence[dict]] = None, kernel: Optional[bool] = None, workgroup_attrib_attrs: Optional[Sequence[dict]] = None, private_attrib_attrs: Optional[Sequence[dict]] = None, @@ -88,6 +88,8 @@ def __init__( ) super().__init__( function_type, + arg_attrs=arg_attrs, + res_attrs=res_attrs, workgroup_attrib_attrs=workgroup_attrib_attrs, private_attrib_attrs=private_attrib_attrs, loc=loc, diff --git a/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32-aggressive.mlir b/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32-aggressive.mlir new file mode 100644 index 0000000000000..1a36177a37033 --- /dev/null +++ b/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32-aggressive.mlir @@ -0,0 +1,81 @@ +// RUN: mlir-opt -split-input-file -verify-diagnostics -tosa-narrow-i64-to-i32="aggressive-rewrite=1" %s | FileCheck %s --allow-unused-prefixes --check-prefixes=COMMON,DEFAULT +// RUN: mlir-opt -split-input-file -verify-diagnostics -tosa-narrow-i64-to-i32="aggressive-rewrite=1 convert-function-boundaries=1" %s | FileCheck %s --allow-unused-prefixes --check-prefixes=COMMON,FUNCBOUND + +// CHECK-LABEL: test_i64_argmax_large_axis_dim +func.func @test_i64_argmax_large_axis_dim(%arg0: tensor<1x513x513x2147483650xi8>) -> tensor<1x513x513xi64> { + // DEFAULT: tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x2147483650xi8>) -> tensor<1x513x513xi32> + %0 = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x2147483650xi8>) -> tensor<1x513x513xi64> + return %0 : tensor<1x513x513xi64> +} + +// ----- + +// CHECK-LABEL: test_convert_input_parameters +// DEFAULT: %[[IN:.*]]: tensor<1x513x513x3xi64> +// FUNCBOUND: %[[IN:.*]]: tensor<1x513x513x3xi32> +func.func @test_convert_input_parameters(%arg0: tensor<1x513x513x3xi64>) -> tensor<1x513x513x3xf32> { + // DEFAULT: %[[FUNC_BOUND_CAST:.*]] = tosa.cast %[[IN]] : (tensor<1x513x513x3xi64>) -> tensor<1x513x513x3xi32> + // DEFAULT: %[[CAST1:.*]] = tosa.cast %[[FUNC_BOUND_CAST]] : (tensor<1x513x513x3xi32>) -> tensor<1x513x513x3xi32> + // FUNCBOUND: %[[CAST1:.*]] = tosa.cast %[[IN]] : (tensor<1x513x513x3xi32>) -> tensor<1x513x513x3xi32> + %0 = tosa.cast %arg0 : (tensor<1x513x513x3xi64>) -> tensor<1x513x513x3xi32> + + // COMMON: %[[CAST2:.*]] = tosa.cast %[[CAST1]] : (tensor<1x513x513x3xi32>) -> tensor<1x513x513x3xf32> + %1 = tosa.cast %0 : (tensor<1x513x513x3xi32>) -> tensor<1x513x513x3xf32> + return %1 : tensor<1x513x513x3xf32> +} + +// ----- + +// CHECK-LABEL: test_add +// DEFAULT: %[[IN0:.*]]: tensor<13x21x1xi64>, %[[IN1:.*]]: tensor<13x21x3xi64> +// FUNCBOUND: %[[IN0:.*]]: tensor<13x21x1xi32>, %[[IN1:.*]]: tensor<13x21x3xi32> +func.func @test_add(%arg0: tensor<13x21x1xi64>, %arg1: tensor<13x21x3xi64>) -> tensor<13x21x3xi64> { + // DEFAULT-DAG: %[[FUNC_BOUND_CAST0:.*]] = tosa.cast %[[IN0]] : (tensor<13x21x1xi64>) -> tensor<13x21x1xi32> + // DEFAULT-DAG: %[[FUNC_BOUND_CAST1:.*]] = tosa.cast %[[IN1]] : (tensor<13x21x3xi64>) -> tensor<13x21x3xi32> + // DEFAULT: %[[ADD:.*]] = tosa.add %[[FUNC_BOUND_CAST0]], %[[FUNC_BOUND_CAST1]] : (tensor<13x21x1xi32>, tensor<13x21x3xi32>) -> tensor<13x21x3xi32> + // DEFAULT: %[[CAST:.*]] = tosa.cast %[[ADD]] : (tensor<13x21x3xi32>) -> tensor<13x21x3xi64> + // DEFAULT: return %[[CAST]] : tensor<13x21x3xi64> + // FUNCBOUND: %[[ADD:.*]] = tosa.add %[[IN0]], %[[IN1]] : (tensor<13x21x1xi32>, tensor<13x21x3xi32>) -> tensor<13x21x3xi32> + // FUNCBOUND: return %[[ADD]] : tensor<13x21x3xi32> + %0 = tosa.add %arg0, %arg1 : (tensor<13x21x1xi64>, tensor<13x21x3xi64>) -> tensor<13x21x3xi64> + return %0 : tensor<13x21x3xi64> +} + +// ----- + +// CHECK-LABEL: test_regions +// DEFAULT: %[[IN0:.*]]: tensor, %[[IN1:.*]]: tensor +func.func @test_regions(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { + // DEFAULT-DAG: %[[CAST0:.*]] = tosa.cast %[[IN0]] : (tensor) -> tensor + // DEFAULT-DAG: %[[CAST1:.*]] = tosa.cast %[[IN1]] : (tensor) -> tensor + // COMMON: %[[IF_RESULT:.*]] = tosa.cond_if + %0 = tosa.cond_if %arg2 : tensor -> (tensor) { + // DEFAULT: %[[ADD:.*]] = tosa.add %[[CAST0]], %[[CAST1]] : (tensor, tensor) -> tensor + // FUNCBOUND: %[[ADD:.*]] = tosa.add %[[IN0]], %[[IN1]] : (tensor, tensor) -> tensor + %1 = tosa.add %arg0, %arg1 : (tensor, tensor) -> tensor + // COMMON: tosa.yield %[[ADD]] : tensor + tosa.yield %1 : tensor + } else { + // DEFAULT: %[[SUB:.*]] = tosa.sub %[[CAST0]], %[[CAST1]] : (tensor, tensor) -> tensor + // FUNCBOUND: %[[SUB:.*]] = tosa.sub %[[IN0]], %[[IN1]] : (tensor, tensor) -> tensor + %1 = tosa.sub %arg0, %arg1 : (tensor, tensor) -> tensor + // COMMON: tosa.yield %[[SUB]] : tensor + tosa.yield %1 : tensor + } + // DEFAULT: %[[OUT:.*]] = tosa.cast %[[IF_RESULT]] : (tensor) -> tensor + // DEFAULT: return %[[OUT]] : tensor + // FUNCBOUND: return %[[IF_RESULT]] : tensor + return %0 : tensor +} + +// ----- + +// CHECK-LABEL: test_const +func.func @test_const() -> tensor<2xi64> { + // COMMON: %[[CONST:.*]] = "tosa.const"() <{values = dense<[1, 2]> : tensor<2xi32>}> : () -> tensor<2xi32> + %0 = "tosa.const"() <{values = dense<[1, 2]> : tensor<2xi64>}> : () -> tensor<2xi64> + // DEFAULT: %[[OUT:.*]] = tosa.cast %[[CONST]] : (tensor<2xi32>) -> tensor<2xi64> + // DEFAULT: return %[[OUT]] : tensor<2xi64> + // FUNCBOUND: return %[[CONST]] : tensor<2xi32> + return %0 : tensor<2xi64> +} diff --git a/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32.mlir b/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32.mlir new file mode 100644 index 0000000000000..a14483fcdd7b0 --- /dev/null +++ b/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32.mlir @@ -0,0 +1,162 @@ +// RUN: mlir-opt -split-input-file -verify-diagnostics -tosa-narrow-i64-to-i32="convert-function-boundaries=0" %s | FileCheck %s --allow-unused-prefixes --check-prefixes=COMMON,DEFAULT +// RUN: mlir-opt -split-input-file -verify-diagnostics -tosa-narrow-i64-to-i32="convert-function-boundaries=1" %s | FileCheck %s --allow-unused-prefixes --check-prefixes=COMMON,FUNCBOUND + +// ----- + +// CHECK-LABEL: test_i64_argmax +func.func @test_i64_argmax(%arg0: tensor<1x513x513x19xi8>) -> tensor<1x513x513xi64> { + // COMMON: %[[ARGMAX:.*]] = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x19xi8>) -> tensor<1x513x513xi32> + %0 = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x19xi8>) -> tensor<1x513x513xi64> + + // DEFAULT: %[[CAST:.*]] = tosa.cast %[[ARGMAX]] : (tensor<1x513x513xi32>) -> tensor<1x513x513xi64> + // FUNCBOUND: return %[[ARGMAX]] : tensor<1x513x513xi32> + return %0 : tensor<1x513x513xi64> +} + +// ----- + +// CHECK-LABEL: test_i64_argmax_cast +func.func @test_i64_argmax_cast(%arg0: tensor<1x513x513x19xi8>) -> tensor<1x513x513xf32> { + // COMMON: %[[ARGMAX:.*]] = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x19xi8>) -> tensor<1x513x513xi32> + %0 = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x19xi8>) -> tensor<1x513x513xi64> + // COMMON: tosa.cast %[[ARGMAX]] : (tensor<1x513x513xi32>) -> tensor<1x513x513xf32> + %1 = tosa.cast %0 : (tensor<1x513x513xi64>) -> tensor<1x513x513xf32> + return %1 : tensor<1x513x513xf32> +} + +// ----- + +// CHECK-LABEL: test_i64_argmax_large_axis_dim +func.func @test_i64_argmax_large_axis_dim(%arg0: tensor<1x513x513x2147483650xi8>) -> tensor<1x513x513xi64> { + // expected-error @+1 {{failed to legalize operation 'tosa.argmax'}} + %0 = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x2147483650xi8>) -> tensor<1x513x513xi64> + return %0 : tensor<1x513x513xi64> +} + +// ----- + +// CHECK-LABEL: test_add +func.func @test_add(%arg0: tensor<13x21x1xi64>, %arg1: tensor<13x21x3xi64>) -> tensor<13x21x3xi64> { + // expected-error @+1 {{failed to legalize operation 'tosa.add'}} + %0 = tosa.add %arg0, %arg1 : (tensor<13x21x1xi64>, tensor<13x21x3xi64>) -> tensor<13x21x3xi64> + return %0 : tensor<13x21x3xi64> +} + +// ----- + +// CHECK-LABEL: test_regions +func.func @test_regions(%arg0: tensor<1x2xi32>, %arg1: tensor<1xi32>, %arg2: tensor) -> tensor<1xi32> { + // COMMON: %[[IF_RESULT:.*]] = tosa.cond_if %arg2 : tensor -> tensor<1xi32> + %0 = tosa.cond_if %arg2 : tensor -> tensor<1xi32> { + // COMMON: %[[ARGMAX:.*]] = tosa.argmax %arg0 {axis = 1 : i32} : (tensor<1x2xi32>) -> tensor<1xi32> + %1 = tosa.argmax %arg0 {axis = 1 : i32} : (tensor<1x2xi32>) -> tensor<1xi64> + // COMMON: %[[CAST:.*]] = tosa.cast %[[ARGMAX]] : (tensor<1xi32>) -> tensor<1xi32> + %2 = tosa.cast %1 : (tensor<1xi64>) -> tensor<1xi32> + // COMMON: tosa.yield %[[CAST]] : tensor<1xi32> + tosa.yield %2 : tensor<1xi32> + } else { + tosa.yield %arg1 : tensor<1xi32> + } + // COMMON: return %[[IF_RESULT]] : tensor<1xi32> + return %0 : tensor<1xi32> +} + +// ----- + +// CHECK-LABEL: test_concat +func.func @test_concat(%arg0: tensor<13x21x3xi64>, %arg1: tensor<13x21x3xi64>) -> tensor<26x21x3xi64> { + // COMMON: tosa.concat %{{.*}}, %{{.*}} {axis = 0 : i32} : (tensor<13x21x3xi32>, tensor<13x21x3xi32>) -> tensor<26x21x3xi32> + %0 = tosa.concat %arg0, %arg1 {axis = 0 : i32} : (tensor<13x21x3xi64>, tensor<13x21x3xi64>) -> tensor<26x21x3xi64> + return %0 : tensor<26x21x3xi64> +} + +// ----- + +// CHECK-LABEL: test_pad +func.func @test_pad(%arg0: tensor<13x21x3xi64>, %arg1: tensor<1xi64>) -> tensor<15x23x5xi64> { + %padding = tosa.const_shape {values = dense<1> : tensor<6xindex>} : () -> !tosa.shape<6> + // COMMON: tosa.pad %{{.*}}, %{{.*}}, %{{.*}} : (tensor<13x21x3xi32>, !tosa.shape<6>, tensor<1xi32>) -> tensor<15x23x5xi32> + %1 = tosa.pad %arg0, %padding, %arg1 : (tensor<13x21x3xi64>, !tosa.shape<6>, tensor<1xi64>) -> tensor<15x23x5xi64> + return %1 : tensor<15x23x5xi64> +} + +// ----- + +// CHECK-LABEL: test_reshape +func.func @test_reshape(%arg0: tensor<13x21x3xi64>) -> tensor<1x819xi64> { + %1 = tosa.const_shape {values = dense<[1, 819]> : tensor<2xindex>} : () -> !tosa.shape<2> + // COMMON: tosa.reshape %{{.*}}, %{{.*}} : (tensor<13x21x3xi32>, !tosa.shape<2>) -> tensor<1x819xi32> + %0 = tosa.reshape %arg0, %1 : (tensor<13x21x3xi64>, !tosa.shape<2>) -> tensor<1x819xi64> + return %0 : tensor<1x819xi64> +} + +// ----- + +// CHECK-LABEL: test_reverse +func.func @test_reverse(%arg0: tensor<13x21x3xi64>) -> tensor<13x21x3xi64> { + // COMMON: tosa.reverse %{{.*}} {axis = 0 : i32} : (tensor<13x21x3xi32>) -> tensor<13x21x3xi32> + %0 = tosa.reverse %arg0 {axis = 0 : i32} : (tensor<13x21x3xi64>) -> tensor<13x21x3xi64> + return %0 : tensor<13x21x3xi64> +} + +// ----- + +// CHECK-LABEL: test_slice +func.func @test_slice(%arg0: tensor<13x21x3xi64>) -> tensor<4x11x1xi64> { + %0 = tosa.const_shape {values = dense<[4, 11, 1]> : tensor<3xindex>} : () -> !tosa.shape<3> + %1 = tosa.const_shape {values = dense<[6, 8, 0]> : tensor<3xindex>} : () -> !tosa.shape<3> + // COMMON: tosa.slice %{{.*}}, %{{.*}}, %{{.*}} : (tensor<13x21x3xi32>, !tosa.shape<3>, !tosa.shape<3>) -> tensor<4x11x1xi32> + %2 = tosa.slice %arg0, %0, %1 : (tensor<13x21x3xi64>, !tosa.shape<3>, !tosa.shape<3>) -> tensor<4x11x1xi64> + return %2 : tensor<4x11x1xi64> +} + +// ----- + +// CHECK-LABEL: test_tile +func.func @test_tile(%arg0: tensor<13x21x3xi64>) -> tensor<39x21x6xi64> { + %cst = tosa.const_shape { values = dense<[3, 1, 2]> : tensor<3xindex> } : () -> !tosa.shape<3> + // COMMON: tosa.tile %{{.*}}, %{{.*}} : (tensor<13x21x3xi32>, !tosa.shape<3>) -> tensor<39x21x6xi32> + %0 = tosa.tile %arg0, %cst: (tensor<13x21x3xi64>, !tosa.shape<3>) -> tensor<39x21x6xi64> + return %0 : tensor<39x21x6xi64> +} + +// ----- + +// CHECK-LABEL: transpose +func.func @test_transpose(%arg0: tensor<13x21x3xi64>) -> tensor<3x13x21xi64> { + // COMMON: tosa.transpose %{{.*}} {perms = array} : (tensor<13x21x3xi32>) -> tensor<3x13x21xi32> + %1 = tosa.transpose %arg0 {perms = array} : (tensor<13x21x3xi64>) -> tensor<3x13x21xi64> + return %1 : tensor<3x13x21xi64> +} + +// ----- + +// CHECK-LABEL: test_transition_to_i64 +func.func @test_transition_to_i64(%arg0: tensor<1xi32>) -> tensor<1xi64> { + // COMMON: %[[CAST:.*]] = tosa.cast %arg0 : (tensor<1xi32>) -> tensor<1xi32> + %0 = tosa.cast %arg0 : (tensor<1xi32>) -> tensor<1xi64> + // COMMON: %[[IDENTITY1:.*]] = tosa.identity %[[CAST]] : (tensor<1xi32>) -> tensor<1xi32> + %1 = tosa.identity %0 : (tensor<1xi64>) -> tensor<1xi64> + // COMMON: %[[IDENTITY2:.*]] = tosa.identity %[[IDENTITY1]] : (tensor<1xi32>) -> tensor<1xi32> + %2 = tosa.identity %1 : (tensor<1xi64>) -> tensor<1xi64> + // DEFAULT: %[[OUT_CAST:.*]] = tosa.cast %[[IDENTITY2]] : (tensor<1xi32>) -> tensor<1xi64> + // DEFAULT: return %[[OUT_CAST]] : tensor<1xi64> + // FUNCBOUND: return %[[IDENTITY2]] : tensor<1xi32> + return %2 : tensor<1xi64> +} + +// ----- + +// CHECK-LABEL: test_transition_from_i64 +func.func @test_transition_from_i64(%arg0: tensor<1xi64>) -> tensor<1xi32> { + // DEFAULT: %[[CAST:.*]] = tosa.cast %arg0 : (tensor<1xi64>) -> tensor<1xi32> + // DEFAULT: %[[IDENTITY1:.*]] = tosa.identity %[[CAST]] : (tensor<1xi32>) -> tensor<1xi32> + // FUNCBOUND: %[[IDENTITY1:.*]] = tosa.identity %arg0 : (tensor<1xi32>) -> tensor<1xi32> + %0 = tosa.identity %arg0 : (tensor<1xi64>) -> tensor<1xi64> + // COMMON: %[[IDENTITY2:.*]] = tosa.identity %[[IDENTITY1]] : (tensor<1xi32>) -> tensor<1xi32> + %1 = tosa.identity %0 : (tensor<1xi64>) -> tensor<1xi64> + // COMMON: %[[OUT_CAST:.*]] = tosa.cast %[[IDENTITY2]] : (tensor<1xi32>) -> tensor<1xi32> + %2 = tosa.cast %1 : (tensor<1xi64>) -> tensor<1xi32> + // COMMON: return %[[OUT_CAST]] : tensor<1xi32> + return %2 : tensor<1xi32> +} diff --git a/mlir/test/python/dialects/gpu/dialect.py b/mlir/test/python/dialects/gpu/dialect.py index 3945c99c41091..1a009b7dfa30d 100644 --- a/mlir/test/python/dialects/gpu/dialect.py +++ b/mlir/test/python/dialects/gpu/dialect.py @@ -133,9 +133,10 @@ def builder(func: gpu.GPUFuncOp) -> None: ), func.known_grid_size func = gpu.GPUFuncOp( - func_type, + ir.FunctionType.get(inputs=[T.index()], results=[]), sym_name="non_kernel_func", body_builder=builder, + arg_attrs=[{"gpu.some_attribute": ir.StringAttr.get("foo")}], ) assert not func.is_kernel assert func.known_block_size is None @@ -154,10 +155,11 @@ def builder(func: gpu.GPUFuncOp) -> None: # CHECK: %[[VAL_0:.*]] = gpu.global_id x # CHECK: gpu.return # CHECK: } - # CHECK: gpu.func @non_kernel_func() { - # CHECK: %[[VAL_0:.*]] = gpu.global_id x - # CHECK: gpu.return - # CHECK: } + # CHECK: gpu.func @non_kernel_func( + # CHECK-SAME: %[[ARG0:.*]]: index {gpu.some_attribute = "foo"}) { + # CHECK: %[[GLOBAL_ID_0:.*]] = gpu.global_id x + # CHECK: gpu.return + # CHECK: } # CHECK-LABEL: testGPULaunchFuncOp diff --git a/utils/bazel/MODULE.bazel b/utils/bazel/MODULE.bazel new file mode 100644 index 0000000000000..d061487acf4d7 --- /dev/null +++ b/utils/bazel/MODULE.bazel @@ -0,0 +1,38 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +"""bzlmod configuration for llvm-project""" +module(name = "llvm-project-overlay") + +bazel_dep(name = "apple_support", version = "1.24.1", repo_name = "build_bazel_apple_support") +bazel_dep(name = "bazel_skylib", version = "1.8.2") +bazel_dep(name = "platforms", version = "1.0.0") +bazel_dep(name = "rules_android", version = "0.6.6") +bazel_dep(name = "rules_cc", version = "0.2.11") +bazel_dep(name = "rules_foreign_cc", version = "0.15.1") +bazel_dep(name = "rules_python", version = "1.6.3") +bazel_dep(name = "rules_shell", version = "0.6.1") + +llvm_repos_extension = use_extension(":extensions.bzl", "llvm_repos_extension") + +use_repo( + llvm_repos_extension, + "llvm-raw", + "llvm_zlib", + "vulkan_headers", + "vulkan_sdk_setup", + "gmp", + "mpfr", + "mpc", + "pfm", + "llvm_zstd", + "pybind11", + "pyyaml", + "robin_map", + "nanobind", +) + +llvm_configure = use_repo_rule("@llvm-raw//utils/bazel:configure.bzl", "llvm_configure") + +llvm_configure(name = "llvm-project") diff --git a/utils/bazel/MODULE.bazel.lock b/utils/bazel/MODULE.bazel.lock new file mode 100644 index 0000000000000..64de258401e91 --- /dev/null +++ b/utils/bazel/MODULE.bazel.lock @@ -0,0 +1,490 @@ +{ + "lockFileVersion": 16, + "registryFileHashes": { + "https://bcr.bazel.build/bazel_registry.json": "8a28e4aff06ee60aed2a8c281907fb8bcbf3b753c91fb5a5c57da3215d5b3497", + "https://bcr.bazel.build/modules/abseil-cpp/20210324.2/MODULE.bazel": "7cd0312e064fde87c8d1cd79ba06c876bd23630c83466e9500321be55c96ace2", + "https://bcr.bazel.build/modules/abseil-cpp/20211102.0/MODULE.bazel": "70390338f7a5106231d20620712f7cccb659cd0e9d073d1991c038eb9fc57589", + "https://bcr.bazel.build/modules/abseil-cpp/20230125.1/MODULE.bazel": "89047429cb0207707b2dface14ba7f8df85273d484c2572755be4bab7ce9c3a0", + "https://bcr.bazel.build/modules/abseil-cpp/20230802.0.bcr.1/MODULE.bazel": "1c8cec495288dccd14fdae6e3f95f772c1c91857047a098fad772034264cc8cb", + "https://bcr.bazel.build/modules/abseil-cpp/20230802.0/MODULE.bazel": "d253ae36a8bd9ee3c5955384096ccb6baf16a1b1e93e858370da0a3b94f77c16", + "https://bcr.bazel.build/modules/abseil-cpp/20230802.1/MODULE.bazel": "fa92e2eb41a04df73cdabeec37107316f7e5272650f81d6cc096418fe647b915", + "https://bcr.bazel.build/modules/abseil-cpp/20240116.1/MODULE.bazel": "37bcdb4440fbb61df6a1c296ae01b327f19e9bb521f9b8e26ec854b6f97309ed", + "https://bcr.bazel.build/modules/abseil-cpp/20240116.2/MODULE.bazel": "73939767a4686cd9a520d16af5ab440071ed75cec1a876bf2fcfaf1f71987a16", + "https://bcr.bazel.build/modules/abseil-cpp/20250127.0/MODULE.bazel": "d1086e248cda6576862b4b3fe9ad76a214e08c189af5b42557a6e1888812c5d5", + "https://bcr.bazel.build/modules/abseil-cpp/20250127.0/source.json": "1b996859f840d8efc7c720efc61dcf2a84b1261cb3974cbbe9b6666ebf567775", + "https://bcr.bazel.build/modules/abseil-py/2.1.0/MODULE.bazel": "5ebe5bf853769c65707e5c28f216798f7a4b1042015e6a36e6d03094d94bec8a", + "https://bcr.bazel.build/modules/abseil-py/2.1.0/source.json": "0e8fc4f088ce07099c1cd6594c20c7ddbb48b4b3c0849b7d94ba94be88ff042b", + "https://bcr.bazel.build/modules/apple_support/1.11.1/MODULE.bazel": "1843d7cd8a58369a444fc6000e7304425fba600ff641592161d9f15b179fb896", + "https://bcr.bazel.build/modules/apple_support/1.15.1/MODULE.bazel": "a0556fefca0b1bb2de8567b8827518f94db6a6e7e7d632b4c48dc5f865bc7c85", + "https://bcr.bazel.build/modules/apple_support/1.24.1/MODULE.bazel": "f46e8ddad60aef170ee92b2f3d00ef66c147ceafea68b6877cb45bd91737f5f8", + "https://bcr.bazel.build/modules/apple_support/1.24.1/source.json": "cf725267cbacc5f028ef13bb77e7f2c2e0066923a4dab1025e4a0511b1ed258a", + "https://bcr.bazel.build/modules/bazel_features/1.1.0/MODULE.bazel": "cfd42ff3b815a5f39554d97182657f8c4b9719568eb7fded2b9135f084bf760b", + "https://bcr.bazel.build/modules/bazel_features/1.1.1/MODULE.bazel": "27b8c79ef57efe08efccbd9dd6ef70d61b4798320b8d3c134fd571f78963dbcd", + "https://bcr.bazel.build/modules/bazel_features/1.11.0/MODULE.bazel": "f9382337dd5a474c3b7d334c2f83e50b6eaedc284253334cf823044a26de03e8", + "https://bcr.bazel.build/modules/bazel_features/1.13.0/MODULE.bazel": "c14c33c7c3c730612bdbe14ebbb5e61936b6f11322ea95a6e91cd1ba962f94df", + "https://bcr.bazel.build/modules/bazel_features/1.15.0/MODULE.bazel": "d38ff6e517149dc509406aca0db3ad1efdd890a85e049585b7234d04238e2a4d", + "https://bcr.bazel.build/modules/bazel_features/1.17.0/MODULE.bazel": "039de32d21b816b47bd42c778e0454217e9c9caac4a3cf8e15c7231ee3ddee4d", + "https://bcr.bazel.build/modules/bazel_features/1.18.0/MODULE.bazel": "1be0ae2557ab3a72a57aeb31b29be347bcdc5d2b1eb1e70f39e3851a7e97041a", + "https://bcr.bazel.build/modules/bazel_features/1.19.0/MODULE.bazel": "59adcdf28230d220f0067b1f435b8537dd033bfff8db21335ef9217919c7fb58", + "https://bcr.bazel.build/modules/bazel_features/1.21.0/MODULE.bazel": "675642261665d8eea09989aa3b8afb5c37627f1be178382c320d1b46afba5e3b", + "https://bcr.bazel.build/modules/bazel_features/1.23.0/MODULE.bazel": "fd1ac84bc4e97a5a0816b7fd7d4d4f6d837b0047cf4cbd81652d616af3a6591a", + "https://bcr.bazel.build/modules/bazel_features/1.27.0/MODULE.bazel": "621eeee06c4458a9121d1f104efb80f39d34deff4984e778359c60eaf1a8cb65", + "https://bcr.bazel.build/modules/bazel_features/1.28.0/MODULE.bazel": "4b4200e6cbf8fa335b2c3f43e1d6ef3e240319c33d43d60cc0fbd4b87ece299d", + "https://bcr.bazel.build/modules/bazel_features/1.3.0/MODULE.bazel": "cdcafe83ec318cda34e02948e81d790aab8df7a929cec6f6969f13a489ccecd9", + "https://bcr.bazel.build/modules/bazel_features/1.30.0/MODULE.bazel": "a14b62d05969a293b80257e72e597c2da7f717e1e69fa8b339703ed6731bec87", + "https://bcr.bazel.build/modules/bazel_features/1.30.0/source.json": "b07e17f067fe4f69f90b03b36ef1e08fe0d1f3cac254c1241a1818773e3423bc", + "https://bcr.bazel.build/modules/bazel_features/1.4.1/MODULE.bazel": "e45b6bb2350aff3e442ae1111c555e27eac1d915e77775f6fdc4b351b758b5d7", + "https://bcr.bazel.build/modules/bazel_features/1.9.1/MODULE.bazel": "8f679097876a9b609ad1f60249c49d68bfab783dd9be012faf9d82547b14815a", + "https://bcr.bazel.build/modules/bazel_skylib/1.0.3/MODULE.bazel": "bcb0fd896384802d1ad283b4e4eb4d718eebd8cb820b0a2c3a347fb971afd9d8", + "https://bcr.bazel.build/modules/bazel_skylib/1.1.1/MODULE.bazel": "1add3e7d93ff2e6998f9e118022c84d163917d912f5afafb3058e3d2f1545b5e", + "https://bcr.bazel.build/modules/bazel_skylib/1.2.0/MODULE.bazel": "44fe84260e454ed94ad326352a698422dbe372b21a1ac9f3eab76eb531223686", + "https://bcr.bazel.build/modules/bazel_skylib/1.2.1/MODULE.bazel": "f35baf9da0efe45fa3da1696ae906eea3d615ad41e2e3def4aeb4e8bc0ef9a7a", + "https://bcr.bazel.build/modules/bazel_skylib/1.3.0/MODULE.bazel": "20228b92868bf5cfc41bda7afc8a8ba2a543201851de39d990ec957b513579c5", + "https://bcr.bazel.build/modules/bazel_skylib/1.4.1/MODULE.bazel": "a0dcb779424be33100dcae821e9e27e4f2901d9dfd5333efe5ac6a8d7ab75e1d", + "https://bcr.bazel.build/modules/bazel_skylib/1.4.2/MODULE.bazel": "3bd40978e7a1fac911d5989e6b09d8f64921865a45822d8b09e815eaa726a651", + "https://bcr.bazel.build/modules/bazel_skylib/1.5.0/MODULE.bazel": "32880f5e2945ce6a03d1fbd588e9198c0a959bb42297b2cfaf1685b7bc32e138", + "https://bcr.bazel.build/modules/bazel_skylib/1.6.1/MODULE.bazel": "8fdee2dbaace6c252131c00e1de4b165dc65af02ea278476187765e1a617b917", + "https://bcr.bazel.build/modules/bazel_skylib/1.7.0/MODULE.bazel": "0db596f4563de7938de764cc8deeabec291f55e8ec15299718b93c4423e9796d", + "https://bcr.bazel.build/modules/bazel_skylib/1.7.1/MODULE.bazel": "3120d80c5861aa616222ec015332e5f8d3171e062e3e804a2a0253e1be26e59b", + "https://bcr.bazel.build/modules/bazel_skylib/1.8.1/MODULE.bazel": "88ade7293becda963e0e3ea33e7d54d3425127e0a326e0d17da085a5f1f03ff6", + "https://bcr.bazel.build/modules/bazel_skylib/1.8.2/MODULE.bazel": "69ad6927098316848b34a9142bcc975e018ba27f08c4ff403f50c1b6e646ca67", + "https://bcr.bazel.build/modules/bazel_skylib/1.8.2/source.json": "34a3c8bcf233b835eb74be9d628899bb32999d3e0eadef1947a0a562a2b16ffb", + "https://bcr.bazel.build/modules/bazel_worker_api/0.0.1/MODULE.bazel": "02a13b77321773b2042e70ee5e4c5e099c8ddee4cf2da9cd420442c36938d4bd", + "https://bcr.bazel.build/modules/bazel_worker_api/0.0.4/MODULE.bazel": "460aa12d01231a80cce03c548287b433b321d205b0028ae596728c35e5ee442e", + "https://bcr.bazel.build/modules/bazel_worker_api/0.0.4/source.json": "d353c410d47a8b65d09fa98e83d57ebec257a2c2b9c6e42d6fda1cb25e5464a5", + "https://bcr.bazel.build/modules/bazel_worker_java/0.0.4/MODULE.bazel": "82494a01018bb7ef06d4a17ec4cd7a758721f10eb8b6c820a818e70d669500db", + "https://bcr.bazel.build/modules/bazel_worker_java/0.0.4/source.json": "a2d30458fd86cf022c2b6331e652526fa08e17573b2f5034a9dbcacdf9c2583c", + "https://bcr.bazel.build/modules/buildozer/7.1.2/MODULE.bazel": "2e8dd40ede9c454042645fd8d8d0cd1527966aa5c919de86661e62953cd73d84", + "https://bcr.bazel.build/modules/buildozer/7.1.2/source.json": "c9028a501d2db85793a6996205c8de120944f50a0d570438fcae0457a5f9d1f8", + "https://bcr.bazel.build/modules/gazelle/0.32.0/MODULE.bazel": "b499f58a5d0d3537f3cf5b76d8ada18242f64ec474d8391247438bf04f58c7b8", + "https://bcr.bazel.build/modules/gazelle/0.33.0/MODULE.bazel": "a13a0f279b462b784fb8dd52a4074526c4a2afe70e114c7d09066097a46b3350", + "https://bcr.bazel.build/modules/gazelle/0.34.0/MODULE.bazel": "abdd8ce4d70978933209db92e436deb3a8b737859e9354fb5fd11fb5c2004c8a", + "https://bcr.bazel.build/modules/gazelle/0.36.0/MODULE.bazel": "e375d5d6e9a6ca59b0cb38b0540bc9a05b6aa926d322f2de268ad267a2ee74c0", + "https://bcr.bazel.build/modules/gazelle/0.40.0/MODULE.bazel": "42ba5378ebe845fca43989a53186ab436d956db498acde790685fe0e8f9c6146", + "https://bcr.bazel.build/modules/gazelle/0.40.0/source.json": "1e5ef6e4d8b9b6836d93273c781e78ff829ea2e077afef7a57298040fa4f010a", + "https://bcr.bazel.build/modules/google_benchmark/1.8.2/MODULE.bazel": "a70cf1bba851000ba93b58ae2f6d76490a9feb74192e57ab8e8ff13c34ec50cb", + "https://bcr.bazel.build/modules/googletest/1.11.0/MODULE.bazel": "3a83f095183f66345ca86aa13c58b59f9f94a2f81999c093d4eeaa2d262d12f4", + "https://bcr.bazel.build/modules/googletest/1.14.0.bcr.1/MODULE.bazel": "22c31a561553727960057361aa33bf20fb2e98584bc4fec007906e27053f80c6", + "https://bcr.bazel.build/modules/googletest/1.14.0/MODULE.bazel": "cfbcbf3e6eac06ef9d85900f64424708cc08687d1b527f0ef65aa7517af8118f", + "https://bcr.bazel.build/modules/googletest/1.15.2/MODULE.bazel": "6de1edc1d26cafb0ea1a6ab3f4d4192d91a312fd2d360b63adaa213cd00b2108", + "https://bcr.bazel.build/modules/googletest/1.15.2/source.json": "dbdda654dcb3a0d7a8bc5d0ac5fc7e150b58c2a986025ae5bc634bb2cb61f470", + "https://bcr.bazel.build/modules/jsoncpp/1.9.5/MODULE.bazel": "31271aedc59e815656f5736f282bb7509a97c7ecb43e927ac1a37966e0578075", + "https://bcr.bazel.build/modules/jsoncpp/1.9.6/MODULE.bazel": "2f8d20d3b7d54143213c4dfc3d98225c42de7d666011528dc8fe91591e2e17b0", + "https://bcr.bazel.build/modules/jsoncpp/1.9.6/source.json": "a04756d367a2126c3541682864ecec52f92cdee80a35735a3cb249ce015ca000", + "https://bcr.bazel.build/modules/libpfm/4.11.0/MODULE.bazel": "45061ff025b301940f1e30d2c16bea596c25b176c8b6b3087e92615adbd52902", + "https://bcr.bazel.build/modules/nlohmann_json/3.6.1/MODULE.bazel": "6f7b417dcc794d9add9e556673ad25cb3ba835224290f4f848f8e2db1e1fca74", + "https://bcr.bazel.build/modules/nlohmann_json/3.6.1/source.json": "f448c6e8963fdfa7eb831457df83ad63d3d6355018f6574fb017e8169deb43a9", + "https://bcr.bazel.build/modules/platforms/0.0.10/MODULE.bazel": "8cb8efaf200bdeb2150d93e162c40f388529a25852b332cec879373771e48ed5", + "https://bcr.bazel.build/modules/platforms/0.0.11/MODULE.bazel": "0daefc49732e227caa8bfa834d65dc52e8cc18a2faf80df25e8caea151a9413f", + "https://bcr.bazel.build/modules/platforms/0.0.4/MODULE.bazel": "9b328e31ee156f53f3c416a64f8491f7eb731742655a47c9eec4703a71644aee", + "https://bcr.bazel.build/modules/platforms/0.0.5/MODULE.bazel": "5733b54ea419d5eaf7997054bb55f6a1d0b5ff8aedf0176fef9eea44f3acda37", + "https://bcr.bazel.build/modules/platforms/0.0.6/MODULE.bazel": "ad6eeef431dc52aefd2d77ed20a4b353f8ebf0f4ecdd26a807d2da5aa8cd0615", + "https://bcr.bazel.build/modules/platforms/0.0.7/MODULE.bazel": "72fd4a0ede9ee5c021f6a8dd92b503e089f46c227ba2813ff183b71616034814", + "https://bcr.bazel.build/modules/platforms/0.0.8/MODULE.bazel": "9f142c03e348f6d263719f5074b21ef3adf0b139ee4c5133e2aa35664da9eb2d", + "https://bcr.bazel.build/modules/platforms/0.0.9/MODULE.bazel": "4a87a60c927b56ddd67db50c89acaa62f4ce2a1d2149ccb63ffd871d5ce29ebc", + "https://bcr.bazel.build/modules/platforms/1.0.0/MODULE.bazel": "f05feb42b48f1b3c225e4ccf351f367be0371411a803198ec34a389fb22aa580", + "https://bcr.bazel.build/modules/platforms/1.0.0/source.json": "f4ff1fd412e0246fd38c82328eb209130ead81d62dcd5a9e40910f867f733d96", + "https://bcr.bazel.build/modules/protobuf/21.7/MODULE.bazel": "a5a29bb89544f9b97edce05642fac225a808b5b7be74038ea3640fae2f8e66a7", + "https://bcr.bazel.build/modules/protobuf/23.1/MODULE.bazel": "88b393b3eb4101d18129e5db51847cd40a5517a53e81216144a8c32dfeeca52a", + "https://bcr.bazel.build/modules/protobuf/24.4/MODULE.bazel": "7bc7ce5f2abf36b3b7b7c8218d3acdebb9426aeb35c2257c96445756f970eb12", + "https://bcr.bazel.build/modules/protobuf/27.0/MODULE.bazel": "7873b60be88844a0a1d8f80b9d5d20cfbd8495a689b8763e76c6372998d3f64c", + "https://bcr.bazel.build/modules/protobuf/27.1/MODULE.bazel": "703a7b614728bb06647f965264967a8ef1c39e09e8f167b3ca0bb1fd80449c0d", + "https://bcr.bazel.build/modules/protobuf/27.2/MODULE.bazel": "32450b50673882e4c8c3d10a83f3bc82161b213ed2f80d17e38bece8f165c295", + "https://bcr.bazel.build/modules/protobuf/29.0-rc2/MODULE.bazel": "6241d35983510143049943fc0d57937937122baf1b287862f9dc8590fc4c37df", + "https://bcr.bazel.build/modules/protobuf/29.0-rc3/MODULE.bazel": "33c2dfa286578573afc55a7acaea3cada4122b9631007c594bf0729f41c8de92", + "https://bcr.bazel.build/modules/protobuf/29.0/MODULE.bazel": "319dc8bf4c679ff87e71b1ccfb5a6e90a6dbc4693501d471f48662ac46d04e4e", + "https://bcr.bazel.build/modules/protobuf/3.19.0/MODULE.bazel": "6b5fbb433f760a99a22b18b6850ed5784ef0e9928a72668b66e4d7ccd47db9b0", + "https://bcr.bazel.build/modules/protobuf/3.19.2/MODULE.bazel": "532ffe5f2186b69fdde039efe6df13ba726ff338c6bc82275ad433013fa10573", + "https://bcr.bazel.build/modules/protobuf/3.19.6/MODULE.bazel": "9233edc5e1f2ee276a60de3eaa47ac4132302ef9643238f23128fea53ea12858", + "https://bcr.bazel.build/modules/protobuf/31.1/MODULE.bazel": "379a389bb330b7b8c1cdf331cc90bf3e13de5614799b3b52cdb7c6f389f6b38e", + "https://bcr.bazel.build/modules/protobuf/31.1/source.json": "25af5d0219da0c0fc4d1191a24ce438e6ca7f49d2e1a94f354efeba6ef10426f", + "https://bcr.bazel.build/modules/pybind11_bazel/2.11.1/MODULE.bazel": "88af1c246226d87e65be78ed49ecd1e6f5e98648558c14ce99176da041dc378e", + "https://bcr.bazel.build/modules/pybind11_bazel/2.12.0/MODULE.bazel": "e6f4c20442eaa7c90d7190d8dc539d0ab422f95c65a57cc59562170c58ae3d34", + "https://bcr.bazel.build/modules/pybind11_bazel/2.12.0/source.json": "6900fdc8a9e95866b8c0d4ad4aba4d4236317b5c1cd04c502df3f0d33afed680", + "https://bcr.bazel.build/modules/re2/2023-09-01/MODULE.bazel": "cb3d511531b16cfc78a225a9e2136007a48cf8a677e4264baeab57fe78a80206", + "https://bcr.bazel.build/modules/re2/2024-07-02.bcr.1/MODULE.bazel": "b4963dda9b31080be1905ef085ecd7dd6cd47c05c79b9cdf83ade83ab2ab271a", + "https://bcr.bazel.build/modules/re2/2024-07-02.bcr.1/source.json": "2ff292be6ef3340325ce8a045ecc326e92cbfab47c7cbab4bd85d28971b97ac4", + "https://bcr.bazel.build/modules/re2/2024-07-02/MODULE.bazel": "0eadc4395959969297cbcf31a249ff457f2f1d456228c67719480205aa306daa", + "https://bcr.bazel.build/modules/rules_android/0.1.1/MODULE.bazel": "48809ab0091b07ad0182defb787c4c5328bd3a278938415c00a7b69b50c4d3a8", + "https://bcr.bazel.build/modules/rules_android/0.6.6/MODULE.bazel": "b0fb569752aab65ab1a9db0a8f6cfaf5aa1754965e17e95dcf0e4d88e192a68d", + "https://bcr.bazel.build/modules/rules_android/0.6.6/source.json": "a9d8dc2d5a102dc03269a94acc886a4cab82cdcb9ccbc77b0f665d6d17a6ae09", + "https://bcr.bazel.build/modules/rules_apple/3.16.0/MODULE.bazel": "0d1caf0b8375942ce98ea944be754a18874041e4e0459401d925577624d3a54a", + "https://bcr.bazel.build/modules/rules_apple/3.16.0/source.json": "d8b5fe461272018cc07cfafce11fe369c7525330804c37eec5a82f84cd475366", + "https://bcr.bazel.build/modules/rules_cc/0.0.1/MODULE.bazel": "cb2aa0747f84c6c3a78dad4e2049c154f08ab9d166b1273835a8174940365647", + "https://bcr.bazel.build/modules/rules_cc/0.0.10/MODULE.bazel": "ec1705118f7eaedd6e118508d3d26deba2a4e76476ada7e0e3965211be012002", + "https://bcr.bazel.build/modules/rules_cc/0.0.13/MODULE.bazel": "0e8529ed7b323dad0775ff924d2ae5af7640b23553dfcd4d34344c7e7a867191", + "https://bcr.bazel.build/modules/rules_cc/0.0.14/MODULE.bazel": "5e343a3aac88b8d7af3b1b6d2093b55c347b8eefc2e7d1442f7a02dc8fea48ac", + "https://bcr.bazel.build/modules/rules_cc/0.0.15/MODULE.bazel": "6704c35f7b4a72502ee81f61bf88706b54f06b3cbe5558ac17e2e14666cd5dcc", + "https://bcr.bazel.build/modules/rules_cc/0.0.16/MODULE.bazel": "7661303b8fc1b4d7f532e54e9d6565771fea666fbdf839e0a86affcd02defe87", + "https://bcr.bazel.build/modules/rules_cc/0.0.17/MODULE.bazel": "2ae1d8f4238ec67d7185d8861cb0a2cdf4bc608697c331b95bf990e69b62e64a", + "https://bcr.bazel.build/modules/rules_cc/0.0.2/MODULE.bazel": "6915987c90970493ab97393024c156ea8fb9f3bea953b2f3ec05c34f19b5695c", + "https://bcr.bazel.build/modules/rules_cc/0.0.6/MODULE.bazel": "abf360251023dfe3efcef65ab9d56beefa8394d4176dd29529750e1c57eaa33f", + "https://bcr.bazel.build/modules/rules_cc/0.0.8/MODULE.bazel": "964c85c82cfeb6f3855e6a07054fdb159aced38e99a5eecf7bce9d53990afa3e", + "https://bcr.bazel.build/modules/rules_cc/0.0.9/MODULE.bazel": "836e76439f354b89afe6a911a7adf59a6b2518fafb174483ad78a2a2fde7b1c5", + "https://bcr.bazel.build/modules/rules_cc/0.1.1/MODULE.bazel": "2f0222a6f229f0bf44cd711dc13c858dad98c62d52bd51d8fc3a764a83125513", + "https://bcr.bazel.build/modules/rules_cc/0.2.11/MODULE.bazel": "e94f24f065bf2191dba2dace951814378b66a94bb3bcc48077492fe0508059b5", + "https://bcr.bazel.build/modules/rules_cc/0.2.11/source.json": "4d555dc20c9c135b21b2e403cf0ce8393fb65711b2305979ce053df4ee3e78de", + "https://bcr.bazel.build/modules/rules_cc/0.2.8/MODULE.bazel": "f1df20f0bf22c28192a794f29b501ee2018fa37a3862a1a2132ae2940a23a642", + "https://bcr.bazel.build/modules/rules_foreign_cc/0.15.1/MODULE.bazel": "c2c60d26c79fda484acb95cdbec46e89d6b28b4845cb277160ce1e0c8622bb88", + "https://bcr.bazel.build/modules/rules_foreign_cc/0.15.1/source.json": "a161811a63ba8a859086da3b7ff3ad04f2e9c255d7727b41087103fc0eb22f55", + "https://bcr.bazel.build/modules/rules_foreign_cc/0.9.0/MODULE.bazel": "c9e8c682bf75b0e7c704166d79b599f93b72cfca5ad7477df596947891feeef6", + "https://bcr.bazel.build/modules/rules_fuzzing/0.5.2/MODULE.bazel": "40c97d1144356f52905566c55811f13b299453a14ac7769dfba2ac38192337a8", + "https://bcr.bazel.build/modules/rules_go/0.41.0/MODULE.bazel": "55861d8e8bb0e62cbd2896f60ff303f62ffcb0eddb74ecb0e5c0cbe36fc292c8", + "https://bcr.bazel.build/modules/rules_go/0.42.0/MODULE.bazel": "8cfa875b9aa8c6fce2b2e5925e73c1388173ea3c32a0db4d2b4804b453c14270", + "https://bcr.bazel.build/modules/rules_go/0.46.0/MODULE.bazel": "3477df8bdcc49e698b9d25f734c4f3a9f5931ff34ee48a2c662be168f5f2d3fd", + "https://bcr.bazel.build/modules/rules_go/0.50.1/MODULE.bazel": "b91a308dc5782bb0a8021ad4330c81fea5bda77f96b9e4c117b9b9c8f6665ee0", + "https://bcr.bazel.build/modules/rules_go/0.51.0-rc2/MODULE.bazel": "edfc3a9cea7bedb0eaaff37b0d7817c1a4bf72b3c615580b0ffcee6c52690fd4", + "https://bcr.bazel.build/modules/rules_go/0.51.0-rc2/source.json": "6b5cd0b3da2bd0e6949580851db990a04af0a285f072b9a0f059424457cd8cc9", + "https://bcr.bazel.build/modules/rules_java/4.0.0/MODULE.bazel": "5a78a7ae82cd1a33cef56dc578c7d2a46ed0dca12643ee45edbb8417899e6f74", + "https://bcr.bazel.build/modules/rules_java/5.3.5/MODULE.bazel": "a4ec4f2db570171e3e5eb753276ee4b389bae16b96207e9d3230895c99644b86", + "https://bcr.bazel.build/modules/rules_java/6.0.0/MODULE.bazel": "8a43b7df601a7ec1af61d79345c17b31ea1fedc6711fd4abfd013ea612978e39", + "https://bcr.bazel.build/modules/rules_java/6.3.0/MODULE.bazel": "a97c7678c19f236a956ad260d59c86e10a463badb7eb2eda787490f4c969b963", + "https://bcr.bazel.build/modules/rules_java/6.4.0/MODULE.bazel": "e986a9fe25aeaa84ac17ca093ef13a4637f6107375f64667a15999f77db6c8f6", + "https://bcr.bazel.build/modules/rules_java/6.5.2/MODULE.bazel": "1d440d262d0e08453fa0c4d8f699ba81609ed0e9a9a0f02cd10b3e7942e61e31", + "https://bcr.bazel.build/modules/rules_java/7.1.0/MODULE.bazel": "30d9135a2b6561c761bd67bd4990da591e6bdc128790ce3e7afd6a3558b2fb64", + "https://bcr.bazel.build/modules/rules_java/7.10.0/MODULE.bazel": "530c3beb3067e870561739f1144329a21c851ff771cd752a49e06e3dc9c2e71a", + "https://bcr.bazel.build/modules/rules_java/7.12.2/MODULE.bazel": "579c505165ee757a4280ef83cda0150eea193eed3bef50b1004ba88b99da6de6", + "https://bcr.bazel.build/modules/rules_java/7.2.0/MODULE.bazel": "06c0334c9be61e6cef2c8c84a7800cef502063269a5af25ceb100b192453d4ab", + "https://bcr.bazel.build/modules/rules_java/7.3.2/MODULE.bazel": "50dece891cfdf1741ea230d001aa9c14398062f2b7c066470accace78e412bc2", + "https://bcr.bazel.build/modules/rules_java/7.4.0/MODULE.bazel": "a592852f8a3dd539e82ee6542013bf2cadfc4c6946be8941e189d224500a8934", + "https://bcr.bazel.build/modules/rules_java/7.6.1/MODULE.bazel": "2f14b7e8a1aa2f67ae92bc69d1ec0fa8d9f827c4e17ff5e5f02e91caa3b2d0fe", + "https://bcr.bazel.build/modules/rules_java/8.13.0/MODULE.bazel": "0444ebf737d144cf2bb2ccb368e7f1cce735264285f2a3711785827c1686625e", + "https://bcr.bazel.build/modules/rules_java/8.13.0/source.json": "4605c0f676b87dd9d1fabd4d743b71f04d97503bd1a79aad53f87399fb5396de", + "https://bcr.bazel.build/modules/rules_java/8.3.2/MODULE.bazel": "7336d5511ad5af0b8615fdc7477535a2e4e723a357b6713af439fe8cf0195017", + "https://bcr.bazel.build/modules/rules_java/8.5.1/MODULE.bazel": "d8a9e38cc5228881f7055a6079f6f7821a073df3744d441978e7a43e20226939", + "https://bcr.bazel.build/modules/rules_java/8.6.0/MODULE.bazel": "9c064c434606d75a086f15ade5edb514308cccd1544c2b2a89bbac4310e41c71", + "https://bcr.bazel.build/modules/rules_java/8.6.1/MODULE.bazel": "f4808e2ab5b0197f094cabce9f4b006a27766beb6a9975931da07099560ca9c2", + "https://bcr.bazel.build/modules/rules_jvm_external/4.4.2/MODULE.bazel": "a56b85e418c83eb1839819f0b515c431010160383306d13ec21959ac412d2fe7", + "https://bcr.bazel.build/modules/rules_jvm_external/5.1/MODULE.bazel": "33f6f999e03183f7d088c9be518a63467dfd0be94a11d0055fe2d210f89aa909", + "https://bcr.bazel.build/modules/rules_jvm_external/5.2/MODULE.bazel": "d9351ba35217ad0de03816ef3ed63f89d411349353077348a45348b096615036", + "https://bcr.bazel.build/modules/rules_jvm_external/5.3/MODULE.bazel": "bf93870767689637164657731849fb887ad086739bd5d360d90007a581d5527d", + "https://bcr.bazel.build/modules/rules_jvm_external/6.1/MODULE.bazel": "75b5fec090dbd46cf9b7d8ea08cf84a0472d92ba3585b476f44c326eda8059c4", + "https://bcr.bazel.build/modules/rules_jvm_external/6.2/MODULE.bazel": "36a6e52487a855f33cb960724eb56547fa87e2c98a0474c3acad94339d7f8e99", + "https://bcr.bazel.build/modules/rules_jvm_external/6.3/MODULE.bazel": "c998e060b85f71e00de5ec552019347c8bca255062c990ac02d051bb80a38df0", + "https://bcr.bazel.build/modules/rules_jvm_external/6.6/MODULE.bazel": "153042249c7060536dc95b6bb9f9bb8063b8a0b0cb7acdb381bddbc2374aed55", + "https://bcr.bazel.build/modules/rules_jvm_external/6.7/MODULE.bazel": "e717beabc4d091ecb2c803c2d341b88590e9116b8bf7947915eeb33aab4f96dd", + "https://bcr.bazel.build/modules/rules_jvm_external/6.7/source.json": "5426f412d0a7fc6b611643376c7e4a82dec991491b9ce5cb1cfdd25fe2e92be4", + "https://bcr.bazel.build/modules/rules_kotlin/1.9.0/MODULE.bazel": "ef85697305025e5a61f395d4eaede272a5393cee479ace6686dba707de804d59", + "https://bcr.bazel.build/modules/rules_kotlin/1.9.5/MODULE.bazel": "043a16a572f610558ec2030db3ff0c9938574e7dd9f58bded1bb07c0192ef025", + "https://bcr.bazel.build/modules/rules_kotlin/1.9.6/MODULE.bazel": "d269a01a18ee74d0335450b10f62c9ed81f2321d7958a2934e44272fe82dcef3", + "https://bcr.bazel.build/modules/rules_kotlin/1.9.6/source.json": "2faa4794364282db7c06600b7e5e34867a564ae91bda7cae7c29c64e9466b7d5", + "https://bcr.bazel.build/modules/rules_license/0.0.3/MODULE.bazel": "627e9ab0247f7d1e05736b59dbb1b6871373de5ad31c3011880b4133cafd4bd0", + "https://bcr.bazel.build/modules/rules_license/0.0.7/MODULE.bazel": "088fbeb0b6a419005b89cf93fe62d9517c0a2b8bb56af3244af65ecfe37e7d5d", + "https://bcr.bazel.build/modules/rules_license/1.0.0/MODULE.bazel": "a7fda60eefdf3d8c827262ba499957e4df06f659330bbe6cdbdb975b768bb65c", + "https://bcr.bazel.build/modules/rules_license/1.0.0/source.json": "a52c89e54cc311196e478f8382df91c15f7a2bfdf4c6cd0e2675cc2ff0b56efb", + "https://bcr.bazel.build/modules/rules_pkg/0.7.0/MODULE.bazel": "df99f03fc7934a4737122518bb87e667e62d780b610910f0447665a7e2be62dc", + "https://bcr.bazel.build/modules/rules_pkg/1.0.1/MODULE.bazel": "5b1df97dbc29623bccdf2b0dcd0f5cb08e2f2c9050aab1092fd39a41e82686ff", + "https://bcr.bazel.build/modules/rules_pkg/1.0.1/source.json": "bd82e5d7b9ce2d31e380dd9f50c111d678c3bdaca190cb76b0e1c71b05e1ba8a", + "https://bcr.bazel.build/modules/rules_proto/4.0.0/MODULE.bazel": "a7a7b6ce9bee418c1a760b3d84f83a299ad6952f9903c67f19e4edd964894e06", + "https://bcr.bazel.build/modules/rules_proto/5.3.0-21.7/MODULE.bazel": "e8dff86b0971688790ae75528fe1813f71809b5afd57facb44dad9e8eca631b7", + "https://bcr.bazel.build/modules/rules_proto/6.0.0-rc1/MODULE.bazel": "1e5b502e2e1a9e825eef74476a5a1ee524a92297085015a052510b09a1a09483", + "https://bcr.bazel.build/modules/rules_proto/6.0.0/MODULE.bazel": "b531d7f09f58dce456cd61b4579ce8c86b38544da75184eadaf0a7cb7966453f", + "https://bcr.bazel.build/modules/rules_proto/6.0.2/MODULE.bazel": "ce916b775a62b90b61888052a416ccdda405212b6aaeb39522f7dc53431a5e73", + "https://bcr.bazel.build/modules/rules_proto/7.0.2/MODULE.bazel": "bf81793bd6d2ad89a37a40693e56c61b0ee30f7a7fdbaf3eabbf5f39de47dea2", + "https://bcr.bazel.build/modules/rules_proto/7.0.2/source.json": "1e5e7260ae32ef4f2b52fd1d0de8d03b606a44c91b694d2f1afb1d3b28a48ce1", + "https://bcr.bazel.build/modules/rules_python/0.10.2/MODULE.bazel": "cc82bc96f2997baa545ab3ce73f196d040ffb8756fd2d66125a530031cd90e5f", + "https://bcr.bazel.build/modules/rules_python/0.23.1/MODULE.bazel": "49ffccf0511cb8414de28321f5fcf2a31312b47c40cc21577144b7447f2bf300", + "https://bcr.bazel.build/modules/rules_python/0.25.0/MODULE.bazel": "72f1506841c920a1afec76975b35312410eea3aa7b63267436bfb1dd91d2d382", + "https://bcr.bazel.build/modules/rules_python/0.28.0/MODULE.bazel": "cba2573d870babc976664a912539b320cbaa7114cd3e8f053c720171cde331ed", + "https://bcr.bazel.build/modules/rules_python/0.31.0/MODULE.bazel": "93a43dc47ee570e6ec9f5779b2e64c1476a6ce921c48cc9a1678a91dd5f8fd58", + "https://bcr.bazel.build/modules/rules_python/0.33.2/MODULE.bazel": "3e036c4ad8d804a4dad897d333d8dce200d943df4827cb849840055be8d2e937", + "https://bcr.bazel.build/modules/rules_python/0.37.1/MODULE.bazel": "3faeb2d9fa0a81f8980643ee33f212308f4d93eea4b9ce6f36d0b742e71e9500", + "https://bcr.bazel.build/modules/rules_python/0.37.2/MODULE.bazel": "b5ffde91410745750b6c13be1c5dc4555ef5bc50562af4a89fd77807fdde626a", + "https://bcr.bazel.build/modules/rules_python/0.4.0/MODULE.bazel": "9208ee05fd48bf09ac60ed269791cf17fb343db56c8226a720fbb1cdf467166c", + "https://bcr.bazel.build/modules/rules_python/0.40.0/MODULE.bazel": "9d1a3cd88ed7d8e39583d9ffe56ae8a244f67783ae89b60caafc9f5cf318ada7", + "https://bcr.bazel.build/modules/rules_python/1.0.0/MODULE.bazel": "898a3d999c22caa585eb062b600f88654bf92efb204fa346fb55f6f8edffca43", + "https://bcr.bazel.build/modules/rules_python/1.2.0/MODULE.bazel": "5aeeb48b2a6c19d668b48adf2b8a2b209a6310c230db0ce77450f148a89846e4", + "https://bcr.bazel.build/modules/rules_python/1.6.3/MODULE.bazel": "a7b80c42cb3de5ee2a5fa1abc119684593704fcd2fec83165ebe615dec76574f", + "https://bcr.bazel.build/modules/rules_python/1.6.3/source.json": "f0be74977e5604a6526c8a416cda22985093ff7d5d380d41722d7e44015cc419", + "https://bcr.bazel.build/modules/rules_robolectric/4.14.1.2/MODULE.bazel": "d44fec647d0aeb67b9f3b980cf68ba634976f3ae7ccd6c07d790b59b87a4f251", + "https://bcr.bazel.build/modules/rules_robolectric/4.14.1.2/source.json": "37c10335f2361c337c5c1f34ed36d2da70534c23088062b33a8bdaab68aa9dea", + "https://bcr.bazel.build/modules/rules_shell/0.1.2/MODULE.bazel": "66e4ca3ce084b04af0b9ff05ff14cab4e5df7503973818bb91cbc6cda08d32fc", + "https://bcr.bazel.build/modules/rules_shell/0.2.0/MODULE.bazel": "fda8a652ab3c7d8fee214de05e7a9916d8b28082234e8d2c0094505c5268ed3c", + "https://bcr.bazel.build/modules/rules_shell/0.3.0/MODULE.bazel": "de4402cd12f4cc8fda2354fce179fdb068c0b9ca1ec2d2b17b3e21b24c1a937b", + "https://bcr.bazel.build/modules/rules_shell/0.6.1/MODULE.bazel": "72e76b0eea4e81611ef5452aa82b3da34caca0c8b7b5c0c9584338aa93bae26b", + "https://bcr.bazel.build/modules/rules_shell/0.6.1/source.json": "20ec05cd5e592055e214b2da8ccb283c7f2a421ea0dc2acbf1aa792e11c03d0c", + "https://bcr.bazel.build/modules/rules_swift/1.16.0/MODULE.bazel": "4a09f199545a60d09895e8281362b1ff3bb08bbde69c6fc87aff5b92fcc916ca", + "https://bcr.bazel.build/modules/rules_swift/2.1.1/MODULE.bazel": "494900a80f944fc7aa61500c2073d9729dff0b764f0e89b824eb746959bc1046", + "https://bcr.bazel.build/modules/rules_swift/2.1.1/source.json": "40fc69dfaac64deddbb75bd99cdac55f4427d9ca0afbe408576a65428427a186", + "https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8", + "https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c", + "https://bcr.bazel.build/modules/stardoc/0.5.6/MODULE.bazel": "c43dabc564990eeab55e25ed61c07a1aadafe9ece96a4efabb3f8bf9063b71ef", + "https://bcr.bazel.build/modules/stardoc/0.6.2/MODULE.bazel": "7060193196395f5dd668eda046ccbeacebfd98efc77fed418dbe2b82ffaa39fd", + "https://bcr.bazel.build/modules/stardoc/0.7.0/MODULE.bazel": "05e3d6d30c099b6770e97da986c53bd31844d7f13d41412480ea265ac9e8079c", + "https://bcr.bazel.build/modules/stardoc/0.7.1/MODULE.bazel": "3548faea4ee5dda5580f9af150e79d0f6aea934fc60c1cc50f4efdd9420759e7", + "https://bcr.bazel.build/modules/stardoc/0.7.2/MODULE.bazel": "fc152419aa2ea0f51c29583fab1e8c99ddefd5b3778421845606ee628629e0e5", + "https://bcr.bazel.build/modules/stardoc/0.7.2/source.json": "58b029e5e901d6802967754adf0a9056747e8176f017cfe3607c0851f4d42216", + "https://bcr.bazel.build/modules/swift_argument_parser/1.3.1.1/MODULE.bazel": "5e463fbfba7b1701d957555ed45097d7f984211330106ccd1352c6e0af0dcf91", + "https://bcr.bazel.build/modules/swift_argument_parser/1.3.1.1/source.json": "32bd87e5f4d7acc57c5b2ff7c325ae3061d5e242c0c4c214ae87e0f1c13e54cb", + "https://bcr.bazel.build/modules/upb/0.0.0-20220923-a547704/MODULE.bazel": "7298990c00040a0e2f121f6c32544bab27d4452f80d9ce51349b1a28f3005c43", + "https://bcr.bazel.build/modules/upb/0.0.0-20230516-61a97ef/MODULE.bazel": "c0df5e35ad55e264160417fd0875932ee3c9dda63d9fccace35ac62f45e1b6f9", + "https://bcr.bazel.build/modules/zlib/1.2.11/MODULE.bazel": "07b389abc85fdbca459b69e2ec656ae5622873af3f845e1c9d80fe179f3effa0", + "https://bcr.bazel.build/modules/zlib/1.2.12/MODULE.bazel": "3b1a8834ada2a883674be8cbd36ede1b6ec481477ada359cd2d3ddc562340b27", + "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.3/MODULE.bazel": "af322bc08976524477c79d1e45e241b6efbeb918c497e8840b8ab116802dda79", + "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.5/MODULE.bazel": "eec517b5bbe5492629466e11dae908d043364302283de25581e3eb944326c4ca", + "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.5/source.json": "22bc55c47af97246cfc093d0acf683a7869377de362b5d1c552c2c2e16b7a806", + "https://bcr.bazel.build/modules/zlib/1.3.1/MODULE.bazel": "751c9940dcfe869f5f7274e1295422a34623555916eb98c174c1e945594bf198" + }, + "selectedYankedVersions": {}, + "moduleExtensions": { + "//:extensions.bzl%llvm_deps_extension": { + "general": { + "bzlTransitiveDigest": "LGeZ4Ibt22AGXloFt/bm3EsBB05m6aTG+WxfH8fJVB4=", + "usagesDigest": "dHBLC1g5cqg/flxcuZRJMp2heDoB4+0/NDd6MutLhGE=", + "recordedFileInputs": {}, + "recordedDirentsInputs": {}, + "envVariables": {}, + "generatedRepoSpecs": { + "llvm-raw": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:local.bzl%new_local_repository", + "attributes": { + "build_file_content": "# empty", + "path": "../../" + } + }, + "llvm_zlib": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:zlib-ng.BUILD", + "sha256": "e36bb346c00472a1f9ff2a0a4643e590a254be6379da7cddd9daeb9a7f296731", + "strip_prefix": "zlib-ng-2.0.7", + "urls": [ + "https://github.com/zlib-ng/zlib-ng/archive/refs/tags/2.0.7.zip" + ] + } + }, + "vulkan_headers": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:vulkan_headers.BUILD", + "sha256": "19f491784ef0bc73caff877d11c96a48b946b5a1c805079d9006e3fbaa5c1895", + "strip_prefix": "Vulkan-Headers-9bd3f561bcee3f01d22912de10bb07ce4e23d378", + "urls": [ + "https://github.com/KhronosGroup/Vulkan-Headers/archive/9bd3f561bcee3f01d22912de10bb07ce4e23d378.tar.gz" + ] + } + }, + "vulkan_sdk_setup": { + "repoRuleId": "@@//:vulkan_sdk.bzl%vulkan_sdk_setup", + "attributes": {} + }, + "gmp": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "urls": [ + "https://gmplib.org/download/gmp/gmp-6.2.1.tar.xz", + "https://ftp.gnu.org/gnu/gmp/gmp-6.2.1.tar.xz" + ], + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:gmp.BUILD", + "sha256": "fd4829912cddd12f84181c3451cc752be224643e87fac497b69edddadc49b4f2", + "strip_prefix": "gmp-6.2.1" + } + }, + "mpfr": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "urls": [ + "https://www.mpfr.org/mpfr-current/mpfr-4.2.2.tar.gz" + ], + "sha256": "826cbb24610bd193f36fde172233fb8c009f3f5c2ad99f644d0dea2e16a20e42", + "strip_prefix": "mpfr-4.2.2", + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:mpfr.BUILD" + } + }, + "mpc": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "urls": [ + "https://ftp.gnu.org/gnu/mpc/mpc-1.3.1.tar.gz" + ], + "sha256": "ab642492f5cf882b74aa0cb730cd410a81edcdbec895183ce930e706c1c759b8", + "strip_prefix": "mpc-1.3.1", + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:mpc.BUILD" + } + }, + "pfm": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "urls": [ + "https://versaweb.dl.sourceforge.net/project/perfmon2/libpfm4/libpfm-4.13.0.tar.gz" + ], + "sha256": "d18b97764c755528c1051d376e33545d0eb60c6ebf85680436813fa5b04cc3d1", + "strip_prefix": "libpfm-4.13.0", + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:pfm.BUILD" + } + }, + "llvm_zstd": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:zstd.BUILD", + "sha256": "7c42d56fac126929a6a85dbc73ff1db2411d04f104fae9bdea51305663a83fd0", + "strip_prefix": "zstd-1.5.2", + "urls": [ + "https://github.com/facebook/zstd/releases/download/v1.5.2/zstd-1.5.2.tar.gz" + ] + } + }, + "pybind11": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "url": "https://github.com/pybind/pybind11/archive/v2.10.3.zip", + "sha256": "201966a61dc826f1b1879a24a3317a1ec9214a918c8eb035be2f30c3e9cfbdcb", + "strip_prefix": "pybind11-2.10.3", + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:pybind.BUILD" + } + }, + "pyyaml": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "url": "https://github.com/yaml/pyyaml/archive/refs/tags/5.1.zip", + "sha256": "f0a35d7f282a6d6b1a4f3f3965ef5c124e30ed27a0088efb97c0977268fd671f", + "strip_prefix": "pyyaml-5.1/lib3", + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:pyyaml.BUILD" + } + }, + "robin_map": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:robin_map.BUILD", + "sha256": "a8424ad3b0affd4c57ed26f0f3d8a29604f0e1f2ef2089f497f614b1c94c7236", + "strip_prefix": "robin-map-1.3.0", + "url": "https://github.com/Tessil/robin-map/archive/refs/tags/v1.3.0.tar.gz" + } + }, + "nanobind": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:nanobind.BUILD", + "sha256": "8ce3667dce3e64fc06bfb9b778b6f48731482362fb89a43da156632266cd5a90", + "strip_prefix": "nanobind-2.9.2", + "url": "https://github.com/wjakob/nanobind/archive/refs/tags/v2.9.2.tar.gz" + } + } + }, + "recordedRepoMappingEntries": [ + [ + "", + "bazel_tools", + "bazel_tools" + ] + ] + } + }, + "@@rules_android+//rules/android_sdk_repository:rule.bzl%android_sdk_repository_extension": { + "general": { + "bzlTransitiveDigest": "NAy+0M15JNVEBb8Tny6t7j3lKqTnsAMjoBB6LJ+C370=", + "usagesDigest": "g9Ur6X6qhf9a8MmY9qXU/jFjkyk/aZVBegI0yVMF0z4=", + "recordedFileInputs": {}, + "recordedDirentsInputs": {}, + "envVariables": {}, + "generatedRepoSpecs": { + "androidsdk": { + "repoRuleId": "@@rules_android+//rules/android_sdk_repository:rule.bzl%_android_sdk_repository", + "attributes": {} + } + }, + "recordedRepoMappingEntries": [] + } + }, + "@@rules_kotlin+//src/main/starlark/core/repositories:bzlmod_setup.bzl%rules_kotlin_extensions": { + "general": { + "bzlTransitiveDigest": "sFhcgPbDQehmbD1EOXzX4H1q/CD5df8zwG4kp4jbvr8=", + "usagesDigest": "QI2z8ZUR+mqtbwsf2fLqYdJAkPOHdOV+tF2yVAUgRzw=", + "recordedFileInputs": {}, + "recordedDirentsInputs": {}, + "envVariables": {}, + "generatedRepoSpecs": { + "com_github_jetbrains_kotlin_git": { + "repoRuleId": "@@rules_kotlin+//src/main/starlark/core/repositories:compiler.bzl%kotlin_compiler_git_repository", + "attributes": { + "urls": [ + "https://github.com/JetBrains/kotlin/releases/download/v1.9.23/kotlin-compiler-1.9.23.zip" + ], + "sha256": "93137d3aab9afa9b27cb06a824c2324195c6b6f6179d8a8653f440f5bd58be88" + } + }, + "com_github_jetbrains_kotlin": { + "repoRuleId": "@@rules_kotlin+//src/main/starlark/core/repositories:compiler.bzl%kotlin_capabilities_repository", + "attributes": { + "git_repository_name": "com_github_jetbrains_kotlin_git", + "compiler_version": "1.9.23" + } + }, + "com_github_google_ksp": { + "repoRuleId": "@@rules_kotlin+//src/main/starlark/core/repositories:ksp.bzl%ksp_compiler_plugin_repository", + "attributes": { + "urls": [ + "https://github.com/google/ksp/releases/download/1.9.23-1.0.20/artifacts.zip" + ], + "sha256": "ee0618755913ef7fd6511288a232e8fad24838b9af6ea73972a76e81053c8c2d", + "strip_version": "1.9.23-1.0.20" + } + }, + "com_github_pinterest_ktlint": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_file", + "attributes": { + "sha256": "01b2e0ef893383a50dbeb13970fe7fa3be36ca3e83259e01649945b09d736985", + "urls": [ + "https://github.com/pinterest/ktlint/releases/download/1.3.0/ktlint" + ], + "executable": true + } + }, + "rules_android": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "sha256": "cd06d15dd8bb59926e4d65f9003bfc20f9da4b2519985c27e190cddc8b7a7806", + "strip_prefix": "rules_android-0.1.1", + "urls": [ + "https://github.com/bazelbuild/rules_android/archive/v0.1.1.zip" + ] + } + } + }, + "recordedRepoMappingEntries": [ + [ + "rules_kotlin+", + "bazel_tools", + "bazel_tools" + ] + ] + } + }, + "@@rules_python+//python/uv:uv.bzl%uv": { + "general": { + "bzlTransitiveDigest": "477hS4MXeJ7LqPNLTqL+1ltraV5lqwOw3tEXWqnJRt8=", + "usagesDigest": "icnInV8HDGrRQf9x8RMfxWfBHgT3OgRlYovS/9POEJw=", + "recordedFileInputs": {}, + "recordedDirentsInputs": {}, + "envVariables": {}, + "generatedRepoSpecs": { + "uv": { + "repoRuleId": "@@rules_python+//python/uv/private:uv_toolchains_repo.bzl%uv_toolchains_repo", + "attributes": { + "toolchain_type": "'@@rules_python+//python/uv:uv_toolchain_type'", + "toolchain_names": [ + "none" + ], + "toolchain_implementations": { + "none": "'@@rules_python+//python:none'" + }, + "toolchain_compatible_with": { + "none": [ + "@platforms//:incompatible" + ] + }, + "toolchain_target_settings": {} + } + } + }, + "recordedRepoMappingEntries": [ + [ + "rules_python+", + "bazel_tools", + "bazel_tools" + ] + ] + } + } + } +} diff --git a/utils/bazel/extensions.bzl b/utils/bazel/extensions.bzl new file mode 100644 index 0000000000000..b0d5871b722a7 --- /dev/null +++ b/utils/bazel/extensions.bzl @@ -0,0 +1,127 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +"""bzlmod extensions for llvm-project""" + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("@bazel_tools//tools/build_defs/repo:local.bzl", "new_local_repository") +load(":vulkan_sdk.bzl", "vulkan_sdk_setup") + +def _llvm_repos_extension_impl(module_ctx): + if any([m.is_root and m.name == "llvm-project-overlay" for m in module_ctx.modules]): + new_local_repository( + name = "llvm-raw", + build_file_content = "# empty", + path = "../../", + ) + + http_archive( + name = "llvm_zlib", + build_file = "@llvm-raw//utils/bazel/third_party_build:zlib-ng.BUILD", + sha256 = "e36bb346c00472a1f9ff2a0a4643e590a254be6379da7cddd9daeb9a7f296731", + strip_prefix = "zlib-ng-2.0.7", + urls = [ + "https://github.com/zlib-ng/zlib-ng/archive/refs/tags/2.0.7.zip", + ], + ) + + http_archive( + name = "vulkan_headers", + build_file = "@llvm-raw//utils/bazel/third_party_build:vulkan_headers.BUILD", + sha256 = "19f491784ef0bc73caff877d11c96a48b946b5a1c805079d9006e3fbaa5c1895", + strip_prefix = "Vulkan-Headers-9bd3f561bcee3f01d22912de10bb07ce4e23d378", + urls = [ + "https://github.com/KhronosGroup/Vulkan-Headers/archive/9bd3f561bcee3f01d22912de10bb07ce4e23d378.tar.gz", + ], + ) + + vulkan_sdk_setup(name = "vulkan_sdk_setup") + + http_archive( + name = "gmp", + urls = [ + "https://gmplib.org/download/gmp/gmp-6.2.1.tar.xz", + "https://ftp.gnu.org/gnu/gmp/gmp-6.2.1.tar.xz", + ], + build_file = "@llvm-raw//utils/bazel/third_party_build:gmp.BUILD", + sha256 = "fd4829912cddd12f84181c3451cc752be224643e87fac497b69edddadc49b4f2", + strip_prefix = "gmp-6.2.1", + ) + + http_archive( + name = "mpfr", + urls = [ + "https://www.mpfr.org/mpfr-current/mpfr-4.2.2.tar.gz", + ], + sha256 = "826cbb24610bd193f36fde172233fb8c009f3f5c2ad99f644d0dea2e16a20e42", + strip_prefix = "mpfr-4.2.2", + build_file = "@llvm-raw//utils/bazel/third_party_build:mpfr.BUILD", + ) + + http_archive( + name = "mpc", + urls = [ + "https://ftp.gnu.org/gnu/mpc/mpc-1.3.1.tar.gz", + ], + sha256 = "ab642492f5cf882b74aa0cb730cd410a81edcdbec895183ce930e706c1c759b8", + strip_prefix = "mpc-1.3.1", + build_file = "@llvm-raw//utils/bazel/third_party_build:mpc.BUILD", + ) + + http_archive( + name = "pfm", + urls = [ + "https://versaweb.dl.sourceforge.net/project/perfmon2/libpfm4/libpfm-4.13.0.tar.gz", + ], + sha256 = "d18b97764c755528c1051d376e33545d0eb60c6ebf85680436813fa5b04cc3d1", + strip_prefix = "libpfm-4.13.0", + build_file = "@llvm-raw//utils/bazel/third_party_build:pfm.BUILD", + ) + + http_archive( + name = "llvm_zstd", + build_file = "@llvm-raw//utils/bazel/third_party_build:zstd.BUILD", + sha256 = "7c42d56fac126929a6a85dbc73ff1db2411d04f104fae9bdea51305663a83fd0", + strip_prefix = "zstd-1.5.2", + urls = [ + "https://github.com/facebook/zstd/releases/download/v1.5.2/zstd-1.5.2.tar.gz", + ], + ) + + http_archive( + name = "pybind11", + url = "https://github.com/pybind/pybind11/archive/v2.10.3.zip", + sha256 = "201966a61dc826f1b1879a24a3317a1ec9214a918c8eb035be2f30c3e9cfbdcb", + strip_prefix = "pybind11-2.10.3", + build_file = "@llvm-raw//utils/bazel/third_party_build:pybind.BUILD", + ) + + http_archive( + name = "pyyaml", + url = "https://github.com/yaml/pyyaml/archive/refs/tags/5.1.zip", + sha256 = "f0a35d7f282a6d6b1a4f3f3965ef5c124e30ed27a0088efb97c0977268fd671f", + strip_prefix = "pyyaml-5.1/lib3", + build_file = "@llvm-raw//utils/bazel/third_party_build:pyyaml.BUILD", + ) + + # TODO: bump to robin-map-1.4.0 + http_archive( + name = "robin_map", + build_file = "@llvm-raw//utils/bazel/third_party_build:robin_map.BUILD", + sha256 = "a8424ad3b0affd4c57ed26f0f3d8a29604f0e1f2ef2089f497f614b1c94c7236", + strip_prefix = "robin-map-1.3.0", + url = "https://github.com/Tessil/robin-map/archive/refs/tags/v1.3.0.tar.gz", + ) + + http_archive( + name = "nanobind", + build_file = "@llvm-raw//utils/bazel/third_party_build:nanobind.BUILD", + sha256 = "8ce3667dce3e64fc06bfb9b778b6f48731482362fb89a43da156632266cd5a90", + strip_prefix = "nanobind-2.9.2", + url = "https://github.com/wjakob/nanobind/archive/refs/tags/v2.9.2.tar.gz", + ) + +llvm_repos_extension = module_extension( + implementation = _llvm_repos_extension_impl, +) diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index deb56dc0957e9..790709bdef05c 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -1025,6 +1025,7 @@ cc_library( gentbl_cc_library( name = "sema_attr_gen", tbl_outs = { + "include/clang/Sema/AttrIsTypeDependent.inc": ["-gen-clang-attr-is-type-dependent"], "include/clang/Sema/AttrParsedAttrImpl.inc": ["-gen-clang-attr-parsed-attr-impl"], "include/clang/Sema/AttrParsedAttrKinds.inc": ["-gen-clang-attr-parsed-attr-kinds"], "include/clang/Sema/AttrSpellingListIndex.inc": ["-gen-clang-attr-spelling-index"], diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 635f77215b38f..ddad2f4f7611d 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -4100,6 +4100,7 @@ cc_library( ":DebugInfo", ":DebugInfoDWARF", ":JITLink", + ":Object", ":OrcJIT", ":OrcShared", ":Support",