Skip to content

Commit

Permalink
[ARM] Enable MVE masked loads and stores
Browse files Browse the repository at this point in the history
With the extra optimisations we have done, these should now be fine to
enable by default. Which is what this patch does.

Differential Revision: https://reviews.llvm.org/D70968
  • Loading branch information
davemgreen committed Dec 9, 2019
1 parent 966fac1 commit b1aba03
Show file tree
Hide file tree
Showing 16 changed files with 24 additions and 24 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
Expand Up @@ -37,7 +37,7 @@ using namespace llvm;
#define DEBUG_TYPE "armtti"

static cl::opt<bool> EnableMaskedLoadStores(
"enable-arm-maskedldst", cl::Hidden, cl::init(false),
"enable-arm-maskedldst", cl::Hidden, cl::init(true),
cl::desc("Enable the generation of masked loads and stores"));

static cl::opt<bool> DisableLowOverheadLoops(
Expand Down
@@ -1,4 +1,4 @@
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -enable-arm-maskedldst=true -disable-mve-tail-predication=false --verify-machineinstrs %s -o - | FileCheck %s
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false --verify-machineinstrs %s -o - | FileCheck %s

; CHECK-LABEL: vpsel_mul_reduce_add
; CHECK: dls lr, lr
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp,+fp-armv8d16sp,+fp16,+fpregs,+fullfp16 -enable-arm-maskedldst=true -disable-mve-tail-predication=false %s -o - | FileCheck %s
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp,+fp-armv8d16sp,+fp16,+fpregs,+fullfp16 -disable-mve-tail-predication=false %s -o - | FileCheck %s

define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocapture readonly %b, float* nocapture readonly %c, i32 %N) {
; CHECK-LABEL: fast_float_mul:
Expand Down
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false -enable-arm-maskedldst=true %s -o - | FileCheck %s
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s

define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, i8* nocapture readonly %b, i32 %N) {
; CHECK-LABEL: test_acc_scalar_char:
Expand Down
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=armv8.1m.main -mattr=+mve -enable-arm-maskedldst=true -disable-mve-tail-predication=false --verify-machineinstrs %s -o - | FileCheck %s
; RUN: llc -mtriple=armv8.1m.main -mattr=+mve -disable-mve-tail-predication=false --verify-machineinstrs %s -o - | FileCheck %s

define dso_local i32 @mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) {
; CHECK-LABEL: mul_reduce_add:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/Thumb2/mve-intrinsics/load-store.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -enable-arm-maskedldst -o - %s | FileCheck %s
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s

define arm_aapcs_vfpcc <8 x half> @test_vld1q_f16(half* %base) {
; CHECK-LABEL: test_vld1q_f16:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/Thumb2/mve-masked-ldst-offset.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE

define i8* @ldrwu32_4(i8* %x, i8* %y, <4 x i32> *%m) {
; CHECK-LABEL: ldrwu32_4:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE

define i8* @ldrwu32_4(i8* %x, i8* %y, <4 x i32> *%m) {
; CHECK-LABEL: ldrwu32_4:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE

define i8* @ldrwu32_4(i8* %x, i8* %y, <4 x i32> *%m) {
; CHECK-LABEL: ldrwu32_4:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE

define void @foo_v4i32_v4i32(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i32> *%src) {
; CHECK-LABEL: foo_v4i32_v4i32:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/Thumb2/mve-masked-load.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE

define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align4_zero(<4 x i32> *%dest, <4 x i32> %a) {
; CHECK-LE-LABEL: masked_v4i32_align4_zero:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/Thumb2/mve-masked-store.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE

define arm_aapcs_vfpcc void @masked_v4i32(<4 x i32> *%dest, <4 x i32> %a) {
; CHECK-LE-LABEL: masked_v4i32:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll
@@ -1,4 +1,4 @@
; RUN: opt -loop-vectorize -enable-arm-maskedldst < %s -S -o - | FileCheck %s
; RUN: opt -loop-vectorize < %s -S -o - | FileCheck %s

target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1-m.main-none-eabi"
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll
@@ -1,5 +1,5 @@
; RUN: opt -loop-vectorize -enable-arm-maskedldst < %s -S -o - | FileCheck %s --check-prefix=CHECK
; RUN: opt -loop-vectorize -enable-arm-maskedldst -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=CHECK-COST
; RUN: opt -loop-vectorize < %s -S -o - | FileCheck %s --check-prefix=CHECK
; RUN: opt -loop-vectorize -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=CHECK-COST

target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-arm-none-eabi"
Expand Down
@@ -1,6 +1,6 @@
; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf \
; RUN: -disable-mve-tail-predication=false -loop-vectorize -S < %s | \
; RUN: FileCheck %s -check-prefixes=CHECK,NO-FOLDING
; RUN: FileCheck %s -check-prefixes=CHECK,PREFER-FOLDING

; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=-mve \
; RUN: -disable-mve-tail-predication=false -loop-vectorize \
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll
@@ -1,7 +1,7 @@
; RUN: opt < %s -loop-vectorize -enable-arm-maskedldst -S | \
; RUN: opt < %s -loop-vectorize -S | \
; RUN: FileCheck %s -check-prefixes=COMMON,CHECK

; RUN: opt < %s -loop-vectorize -enable-arm-maskedldst -prefer-predicate-over-epilog -S | \
; RUN: opt < %s -loop-vectorize -prefer-predicate-over-epilog -S | \
; RUN: FileCheck -check-prefixes=COMMON,PREDFLAG %s

target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
Expand Down

0 comments on commit b1aba03

Please sign in to comment.