1,410 changes: 803 additions & 607 deletions llvm/test/CodeGen/X86/masked_memop.ll

Large diffs are not rendered by default.

58 changes: 28 additions & 30 deletions llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX1
; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX2
; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX -check-prefix=AVX1
; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX -check-prefix=AVX2
; RUN: opt < %s -O3 -mcpu=knl -S | FileCheck %s -check-prefix=AVX512

;AVX1-NOT: llvm.masked

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc_linux"

Expand All @@ -18,12 +16,12 @@ target triple = "x86_64-pc_linux"
; }
;}

;AVX2-LABEL: @foo1
;AVX2: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
;AVX2: call <8 x i32> @llvm.masked.load.v8i32
;AVX2: add nsw <8 x i32>
;AVX2: call void @llvm.masked.store.v8i32
;AVX2: ret void
;AVX-LABEL: @foo1
;AVX: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
;AVX: call <8 x i32> @llvm.masked.load.v8i32
;AVX: add nsw <8 x i32>
;AVX: call void @llvm.masked.store.v8i32
;AVX: ret void

;AVX512-LABEL: @foo1
;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100
Expand Down Expand Up @@ -102,12 +100,12 @@ for.end: ; preds = %for.cond
; }
;}

;AVX2-LABEL: @foo2
;AVX2: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
;AVX2: call <8 x float> @llvm.masked.load.v8f32
;AVX2: fadd <8 x float>
;AVX2: call void @llvm.masked.store.v8f32
;AVX2: ret void
;AVX-LABEL: @foo2
;AVX: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
;AVX: call <8 x float> @llvm.masked.load.v8f32
;AVX: fadd <8 x float>
;AVX: call void @llvm.masked.store.v8f32
;AVX: ret void

;AVX512-LABEL: @foo2
;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100
Expand Down Expand Up @@ -187,13 +185,13 @@ for.end: ; preds = %for.cond
; }
;}

;AVX2-LABEL: @foo3
;AVX2: icmp slt <4 x i32> %wide.load, <i32 100, i32 100,
;AVX2: call <4 x double> @llvm.masked.load.v4f64
;AVX2: sitofp <4 x i32> %wide.load to <4 x double>
;AVX2: fadd <4 x double>
;AVX2: call void @llvm.masked.store.v4f64
;AVX2: ret void
;AVX-LABEL: @foo3
;AVX: icmp slt <4 x i32> %wide.load, <i32 100, i32 100,
;AVX: call <4 x double> @llvm.masked.load.v4f64
;AVX: sitofp <4 x i32> %wide.load to <4 x double>
;AVX: fadd <4 x double>
;AVX: call void @llvm.masked.store.v4f64
;AVX: ret void

;AVX512-LABEL: @foo3
;AVX512: icmp slt <8 x i32> %wide.load, <i32 100, i32 100,
Expand Down Expand Up @@ -275,9 +273,9 @@ for.end: ; preds = %for.cond
; }
;}

;AVX2-LABEL: @foo4
;AVX2-NOT: llvm.masked
;AVX2: ret void
;AVX-LABEL: @foo4
;AVX-NOT: llvm.masked
;AVX: ret void

;AVX512-LABEL: @foo4
;AVX512-NOT: llvm.masked
Expand Down Expand Up @@ -349,10 +347,10 @@ for.end: ; preds = %for.cond

; The loop here should not be vectorized due to trapping
; constant expression
;AVX2-LABEL: @foo5
;AVX2-NOT: llvm.masked
;AVX2: store i32 sdiv
;AVX2: ret void
;AVX-LABEL: @foo5
;AVX-NOT: llvm.masked
;AVX: store i32 sdiv
;AVX: ret void

;AVX512-LABEL: @foo5
;AVX512-NOT: llvm.masked
Expand Down