60 changes: 30 additions & 30 deletions llvm/test/CodeGen/VE/Scalar/store.ll
Original file line number Diff line number Diff line change
@@ -1,117 +1,117 @@
; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s

; Function Attrs: norecurse nounwind readonly
define void @storef128(fp128* nocapture %0, fp128 %1) {
define void @storef128(ptr nocapture %0, fp128 %1) {
; CHECK-LABEL: storef128:
; CHECK: # %bb.0:
; CHECK-NEXT: st %s2, 8(, %s0)
; CHECK-NEXT: st %s3, (, %s0)
; CHECK-NEXT: b.l.t (, %s10)
store fp128 %1, fp128* %0, align 16
store fp128 %1, ptr %0, align 16
ret void
}

; Function Attrs: norecurse nounwind readonly
define void @storef64(double* nocapture %0, double %1) {
define void @storef64(ptr nocapture %0, double %1) {
; CHECK-LABEL: storef64:
; CHECK: # %bb.0:
; CHECK-NEXT: st %s1, (, %s0)
; CHECK-NEXT: b.l.t (, %s10)
store double %1, double* %0, align 16
store double %1, ptr %0, align 16
ret void
}

; Function Attrs: norecurse nounwind readonly
define void @storef32(float* nocapture %0, float %1) {
define void @storef32(ptr nocapture %0, float %1) {
; CHECK-LABEL: storef32:
; CHECK: # %bb.0:
; CHECK-NEXT: stu %s1, (, %s0)
; CHECK-NEXT: b.l.t (, %s10)
store float %1, float* %0, align 16
store float %1, ptr %0, align 16
ret void
}

; Function Attrs: norecurse nounwind readonly
define void @storei128(i128* nocapture %0, i128 %1) {
define void @storei128(ptr nocapture %0, i128 %1) {
; CHECK-LABEL: storei128:
; CHECK: # %bb.0:
; CHECK-NEXT: st %s2, 8(, %s0)
; CHECK-NEXT: st %s1, (, %s0)
; CHECK-NEXT: b.l.t (, %s10)
store i128 %1, i128* %0, align 16
store i128 %1, ptr %0, align 16
ret void
}

; Function Attrs: norecurse nounwind readonly
define void @storei64(i64* nocapture %0, i64 %1) {
define void @storei64(ptr nocapture %0, i64 %1) {
; CHECK-LABEL: storei64:
; CHECK: # %bb.0:
; CHECK-NEXT: st %s1, (, %s0)
; CHECK-NEXT: b.l.t (, %s10)
store i64 %1, i64* %0, align 16
store i64 %1, ptr %0, align 16
ret void
}

; Function Attrs: norecurse nounwind readonly
define void @storei32(i32* nocapture %0, i32 %1) {
define void @storei32(ptr nocapture %0, i32 %1) {
; CHECK-LABEL: storei32:
; CHECK: # %bb.0:
; CHECK-NEXT: stl %s1, (, %s0)
; CHECK-NEXT: b.l.t (, %s10)
store i32 %1, i32* %0, align 16
store i32 %1, ptr %0, align 16
ret void
}

; Function Attrs: norecurse nounwind readonly
define void @storei32tr(i32* nocapture %0, i64 %1) {
define void @storei32tr(ptr nocapture %0, i64 %1) {
; CHECK-LABEL: storei32tr:
; CHECK: # %bb.0:
; CHECK-NEXT: stl %s1, (, %s0)
; CHECK-NEXT: b.l.t (, %s10)
%3 = trunc i64 %1 to i32
store i32 %3, i32* %0, align 16
store i32 %3, ptr %0, align 16
ret void
}

; Function Attrs: norecurse nounwind readonly
define void @storei16(i16* nocapture %0, i16 %1) {
define void @storei16(ptr nocapture %0, i16 %1) {
; CHECK-LABEL: storei16:
; CHECK: # %bb.0:
; CHECK-NEXT: st2b %s1, (, %s0)
; CHECK-NEXT: b.l.t (, %s10)
store i16 %1, i16* %0, align 16
store i16 %1, ptr %0, align 16
ret void
}

; Function Attrs: norecurse nounwind readonly
define void @storei16tr(i16* nocapture %0, i64 %1) {
define void @storei16tr(ptr nocapture %0, i64 %1) {
; CHECK-LABEL: storei16tr:
; CHECK: # %bb.0:
; CHECK-NEXT: st2b %s1, (, %s0)
; CHECK-NEXT: b.l.t (, %s10)
%3 = trunc i64 %1 to i16
store i16 %3, i16* %0, align 16
store i16 %3, ptr %0, align 16
ret void
}

; Function Attrs: norecurse nounwind readonly
define void @storei8(i8* nocapture %0, i8 %1) {
define void @storei8(ptr nocapture %0, i8 %1) {
; CHECK-LABEL: storei8:
; CHECK: # %bb.0:
; CHECK-NEXT: st1b %s1, (, %s0)
; CHECK-NEXT: b.l.t (, %s10)
store i8 %1, i8* %0, align 16
store i8 %1, ptr %0, align 16
ret void
}

; Function Attrs: norecurse nounwind readonly
define void @storei8tr(i8* nocapture %0, i64 %1) {
define void @storei8tr(ptr nocapture %0, i64 %1) {
; CHECK-LABEL: storei8tr:
; CHECK: # %bb.0:
; CHECK-NEXT: st1b %s1, (, %s0)
; CHECK-NEXT: b.l.t (, %s10)
%3 = trunc i64 %1 to i8
store i8 %3, i8* %0, align 16
store i8 %3, ptr %0, align 16
ret void
}

Expand All @@ -124,7 +124,7 @@ define void @storef128stk(fp128 %0) {
; CHECK-NEXT: adds.l %s11, 16, %s11
; CHECK-NEXT: b.l.t (, %s10)
%addr = alloca fp128, align 16
store fp128 %0, fp128* %addr, align 16
store fp128 %0, ptr %addr, align 16
ret void
}

Expand All @@ -136,7 +136,7 @@ define void @storef64stk(double %0) {
; CHECK-NEXT: adds.l %s11, 16, %s11
; CHECK-NEXT: b.l.t (, %s10)
%addr = alloca double, align 16
store double %0, double* %addr, align 16
store double %0, ptr %addr, align 16
ret void
}

Expand All @@ -148,7 +148,7 @@ define void @storef32stk(float %0) {
; CHECK-NEXT: adds.l %s11, 16, %s11
; CHECK-NEXT: b.l.t (, %s10)
%addr = alloca float, align 16
store float %0, float* %addr, align 16
store float %0, ptr %addr, align 16
ret void
}

Expand All @@ -161,7 +161,7 @@ define void @storei128stk(i128 %0) {
; CHECK-NEXT: adds.l %s11, 16, %s11
; CHECK-NEXT: b.l.t (, %s10)
%addr = alloca i128, align 16
store i128 %0, i128* %addr, align 16
store i128 %0, ptr %addr, align 16
ret void
}

Expand All @@ -173,7 +173,7 @@ define void @storei64stk(i64 %0) {
; CHECK-NEXT: adds.l %s11, 16, %s11
; CHECK-NEXT: b.l.t (, %s10)
%addr = alloca i64, align 16
store i64 %0, i64* %addr, align 16
store i64 %0, ptr %addr, align 16
ret void
}

Expand All @@ -185,7 +185,7 @@ define void @storei32stk(i32 %0) {
; CHECK-NEXT: adds.l %s11, 16, %s11
; CHECK-NEXT: b.l.t (, %s10)
%addr = alloca i32, align 16
store i32 %0, i32* %addr, align 16
store i32 %0, ptr %addr, align 16
ret void
}

Expand All @@ -197,7 +197,7 @@ define void @storei16stk(i16 %0) {
; CHECK-NEXT: adds.l %s11, 16, %s11
; CHECK-NEXT: b.l.t (, %s10)
%addr = alloca i16, align 16
store i16 %0, i16* %addr, align 16
store i16 %0, ptr %addr, align 16
ret void
}

Expand All @@ -209,6 +209,6 @@ define void @storei8stk(i8 %0) {
; CHECK-NEXT: adds.l %s11, 16, %s11
; CHECK-NEXT: b.l.t (, %s10)
%addr = alloca i8, align 16
store i8 %0, i8* %addr, align 16
store i8 %0, ptr %addr, align 16
ret void
}
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/VE/Scalar/store_gv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ define void @storef128com(fp128 %0) {
; CHECK-NEXT: st %s0, 8(, %s2)
; CHECK-NEXT: st %s1, (, %s2)
; CHECK-NEXT: b.l.t (, %s10)
store fp128 %0, fp128* @vf128, align 16
store fp128 %0, ptr @vf128, align 16
ret void
}

Expand All @@ -32,7 +32,7 @@ define void @storef64com(double %0) {
; CHECK-NEXT: lea.sl %s1, vf64@hi(, %s1)
; CHECK-NEXT: st %s0, (, %s1)
; CHECK-NEXT: b.l.t (, %s10)
store double %0, double* @vf64, align 8
store double %0, ptr @vf64, align 8
ret void
}

Expand All @@ -45,7 +45,7 @@ define void @storef32com(float %0) {
; CHECK-NEXT: lea.sl %s1, vf32@hi(, %s1)
; CHECK-NEXT: stu %s0, (, %s1)
; CHECK-NEXT: b.l.t (, %s10)
store float %0, float* @vf32, align 4
store float %0, ptr @vf32, align 4
ret void
}

Expand All @@ -59,7 +59,7 @@ define void @storei128com(i128 %0) {
; CHECK-NEXT: st %s1, 8(, %s2)
; CHECK-NEXT: st %s0, (, %s2)
; CHECK-NEXT: b.l.t (, %s10)
store i128 %0, i128* @vi128, align 16
store i128 %0, ptr @vi128, align 16
ret void
}

Expand All @@ -72,7 +72,7 @@ define void @storei64com(i64 %0) {
; CHECK-NEXT: lea.sl %s1, vi64@hi(, %s1)
; CHECK-NEXT: st %s0, (, %s1)
; CHECK-NEXT: b.l.t (, %s10)
store i64 %0, i64* @vi64, align 8
store i64 %0, ptr @vi64, align 8
ret void
}

Expand All @@ -85,7 +85,7 @@ define void @storei32com(i32 %0) {
; CHECK-NEXT: lea.sl %s1, vi32@hi(, %s1)
; CHECK-NEXT: stl %s0, (, %s1)
; CHECK-NEXT: b.l.t (, %s10)
store i32 %0, i32* @vi32, align 4
store i32 %0, ptr @vi32, align 4
ret void
}

Expand All @@ -98,7 +98,7 @@ define void @storei16com(i16 %0) {
; CHECK-NEXT: lea.sl %s1, vi16@hi(, %s1)
; CHECK-NEXT: st2b %s0, (, %s1)
; CHECK-NEXT: b.l.t (, %s10)
store i16 %0, i16* @vi16, align 2
store i16 %0, ptr @vi16, align 2
ret void
}

Expand All @@ -111,6 +111,6 @@ define void @storei8com(i8 %0) {
; CHECK-NEXT: lea.sl %s1, vi8@hi(, %s1)
; CHECK-NEXT: st1b %s0, (, %s1)
; CHECK-NEXT: b.l.t (, %s10)
store i8 %0, i8* @vi8, align 1
store i8 %0, ptr @vi8, align 1
ret void
}
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/VE/Scalar/symbol_relocation_tls.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
@y = internal thread_local global i32 0, align 4

; Function Attrs: norecurse nounwind readnone
define nonnull i32* @get_global() {
define nonnull ptr @get_global() {
; GENDYN: lea %s0, (-24)
; GENDYN-NEXT: R_VE_TLS_GD_LO32 x
; GENDYN-NEXT: and %s0, %s0, (32)0
Expand Down Expand Up @@ -47,11 +47,11 @@ define nonnull i32* @get_global() {
; GENDYNPIC-NEXT: bsic %s10, (, %s12)
; GENDYNPIC-NEXT: or %s11, 0, %s9
entry:
ret i32* @x
ret ptr @x
}

; Function Attrs: norecurse nounwind readnone
define nonnull i32* @get_local() {
define nonnull ptr @get_local() {
; GENDYN: lea %s0, (-24)
; GENDYN-NEXT: R_VE_TLS_GD_LO32 y
; GENDYN-NEXT: and %s0, %s0, (32)0
Expand Down Expand Up @@ -86,7 +86,7 @@ define nonnull i32* @get_local() {
; GENDYNPIC-NEXT: bsic %s10, (, %s12)
; GENDYNPIC-NEXT: or %s11, 0, %s9
entry:
ret i32* @y
ret ptr @y
}

; Function Attrs: norecurse nounwind
Expand Down Expand Up @@ -129,7 +129,7 @@ define void @set_global(i32 %v) {
; GENDYNPIC-NEXT: ld %s18, 288(, %s11)
; GENDYNPIC-NEXT: or %s11, 0, %s9
entry:
store i32 %v, i32* @x, align 4
store i32 %v, ptr @x, align 4
ret void
}

Expand Down Expand Up @@ -173,6 +173,6 @@ define void @set_local(i32 %v) {
; GENDYNPIC-NEXT: ld %s18, 288(, %s11)
; GENDYNPIC-NEXT: or %s11, 0, %s9
entry:
store i32 %v, i32* @y, align 4
store i32 %v, ptr @y, align 4
ret void
}
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/VE/Scalar/tls.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
@y = internal thread_local global i32 0, align 4

; Function Attrs: norecurse nounwind readnone
define nonnull i32* @get_global() {
define nonnull ptr @get_global() {
; GENDYN-LABEL: get_global:
; GENDYN: .LBB{{[0-9]+}}_2:
; GENDYN-NEXT: lea %s0, x@tls_gd_lo(-24)
Expand Down Expand Up @@ -46,11 +46,11 @@ define nonnull i32* @get_global() {
; LOCAL-NEXT: adds.l %s0, %s14, %s34
; LOCAL-NEXT: or %s11, 0, %s9
entry:
ret i32* @x
ret ptr @x
}

; Function Attrs: norecurse nounwind readnone
define nonnull i32* @get_local() {
define nonnull ptr @get_local() {
; GENDYN-LABEL: get_local:
; GENDYN: .LBB{{[0-9]+}}_2:
; GENDYN-NEXT: lea %s0, y@tls_gd_lo(-24)
Expand Down Expand Up @@ -87,7 +87,7 @@ define nonnull i32* @get_local() {
; LOCAL-NEXT: adds.l %s0, %s14, %s34
; LOCAL-NEXT: or %s11, 0, %s9
entry:
ret i32* @y
ret ptr @y
}

; Function Attrs: norecurse nounwind
Expand Down Expand Up @@ -137,7 +137,7 @@ define void @set_global(i32 %v) {
; LOCAL-NEXT: stl %s0, (, %s34)
; LOCAL-NEXT: or %s11, 0, %s9
entry:
store i32 %v, i32* @x, align 4
store i32 %v, ptr @x, align 4
ret void
}

Expand Down Expand Up @@ -188,6 +188,6 @@ define void @set_local(i32 %v) {
; LOCAL-NEXT: stl %s0, (, %s34)
; LOCAL-NEXT: or %s11, 0, %s9
entry:
store i32 %v, i32* @y, align 4
store i32 %v, ptr @y, align 4
ret void
}
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/VE/Scalar/truncstore.ll
Original file line number Diff line number Diff line change
@@ -1,72 +1,72 @@
; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s

define void @func0(i1 signext %p, i8* %a) {
define void @func0(i1 signext %p, ptr %a) {
; CHECK-LABEL: func0:
; CHECK: # %bb.0:
; CHECK-NEXT: st1b %s0, (, %s1)
; CHECK-NEXT: b.l.t (, %s10)
%p.conv = sext i1 %p to i8
store i8 %p.conv, i8* %a, align 2
store i8 %p.conv, ptr %a, align 2
ret void
}

define void @func1(i8 signext %p, i16* %a) {
define void @func1(i8 signext %p, ptr %a) {
; CHECK-LABEL: func1:
; CHECK: # %bb.0:
; CHECK-NEXT: st2b %s0, (, %s1)
; CHECK-NEXT: b.l.t (, %s10)
%p.conv = sext i8 %p to i16
store i16 %p.conv, i16* %a, align 2
store i16 %p.conv, ptr %a, align 2
ret void
}

define void @func2(i8 signext %p, i32* %a) {
define void @func2(i8 signext %p, ptr %a) {
; CHECK-LABEL: func2:
; CHECK: # %bb.0:
; CHECK-NEXT: stl %s0, (, %s1)
; CHECK-NEXT: b.l.t (, %s10)
%p.conv = sext i8 %p to i32
store i32 %p.conv, i32* %a, align 4
store i32 %p.conv, ptr %a, align 4
ret void
}

define void @func3(i8 signext %p, i64* %a) {
define void @func3(i8 signext %p, ptr %a) {
; CHECK-LABEL: func3:
; CHECK: # %bb.0:
; CHECK-NEXT: st %s0, (, %s1)
; CHECK-NEXT: b.l.t (, %s10)
%p.conv = sext i8 %p to i64
store i64 %p.conv, i64* %a, align 8
store i64 %p.conv, ptr %a, align 8
ret void
}

define void @func5(i16 signext %p, i32* %a) {
define void @func5(i16 signext %p, ptr %a) {
; CHECK-LABEL: func5:
; CHECK: # %bb.0:
; CHECK-NEXT: stl %s0, (, %s1)
; CHECK-NEXT: b.l.t (, %s10)
%p.conv = sext i16 %p to i32
store i32 %p.conv, i32* %a, align 4
store i32 %p.conv, ptr %a, align 4
ret void
}

define void @func6(i16 signext %p, i64* %a) {
define void @func6(i16 signext %p, ptr %a) {
; CHECK-LABEL: func6:
; CHECK: # %bb.0:
; CHECK-NEXT: st %s0, (, %s1)
; CHECK-NEXT: b.l.t (, %s10)
%p.conv = sext i16 %p to i64
store i64 %p.conv, i64* %a, align 8
store i64 %p.conv, ptr %a, align 8
ret void
}

define void @func8(i32 %p, i64* %a) {
define void @func8(i32 %p, ptr %a) {
; CHECK-LABEL: func8:
; CHECK: # %bb.0:
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
; CHECK-NEXT: st %s0, (, %s1)
; CHECK-NEXT: b.l.t (, %s10)
%p.conv = sext i32 %p to i64
store i64 %p.conv, i64* %a, align 8
store i64 %p.conv, ptr %a, align 8
ret void
}
69 changes: 34 additions & 35 deletions llvm/test/CodeGen/VE/Scalar/va_arg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -43,44 +43,43 @@ define i32 @func_vainout(i32, ...) {
; CHECK: bsic
; CHECK: bsic

%a = alloca i8*, align 8
%a8 = bitcast i8** %a to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %a8)
call void @llvm.va_start(i8* nonnull %a8)
%p0 = va_arg i8** %a, i32
%p1 = va_arg i8** %a, i16
%p2 = va_arg i8** %a, i8
%p3 = va_arg i8** %a, i32
%p4 = va_arg i8** %a, i16
%p5 = va_arg i8** %a, i8
%p6 = va_arg i8** %a, float
%p7 = va_arg i8** %a, i8*
%p8 = va_arg i8** %a, i64
%p9 = va_arg i8** %a, double
%p10 = va_arg i8** %a, fp128
%p11 = va_arg i8** %a, double
call void @llvm.va_end(i8* nonnull %a8)
call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %a8)
%pf0 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), i32 %p0)
%a = alloca ptr, align 8
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %a)
call void @llvm.va_start(ptr nonnull %a)
%p0 = va_arg ptr %a, i32
%p1 = va_arg ptr %a, i16
%p2 = va_arg ptr %a, i8
%p3 = va_arg ptr %a, i32
%p4 = va_arg ptr %a, i16
%p5 = va_arg ptr %a, i8
%p6 = va_arg ptr %a, float
%p7 = va_arg ptr %a, ptr
%p8 = va_arg ptr %a, i64
%p9 = va_arg ptr %a, double
%p10 = va_arg ptr %a, fp128
%p11 = va_arg ptr %a, double
call void @llvm.va_end(ptr nonnull %a)
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %a)
%pf0 = call i32 (ptr, ...) @printf(ptr @.str, i32 %p0)
%p1.s32 = sext i16 %p1 to i32
%pf1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.1, i64 0, i64 0), i32 %p1.s32)
%pf1 = call i32 (ptr, ...) @printf(ptr @.str.1, i32 %p1.s32)
%p2.s32 = sext i8 %p2 to i32
%pf2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.2, i64 0, i64 0), i32 %p2.s32)
%pf3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.3, i64 0, i64 0), i32 %p3)
%pf2 = call i32 (ptr, ...) @printf(ptr @.str.2, i32 %p2.s32)
%pf3 = call i32 (ptr, ...) @printf(ptr @.str.3, i32 %p3)
%p4.z32 = zext i16 %p4 to i32
%pf4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.4, i64 0, i64 0), i32 %p4.z32)
%pf4 = call i32 (ptr, ...) @printf(ptr @.str.4, i32 %p4.z32)
%p5.z32 = zext i8 %p5 to i32
%pf5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.5, i64 0, i64 0), i32 %p5.z32)
%pf6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.6, i64 0, i64 0), float %p6)
%pf7 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.7, i64 0, i64 0), i8* %p7)
%pf8 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.8, i64 0, i64 0), i64 %p8)
%pf9 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.9, i64 0, i64 0), double %p9)
%pf10 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.10, i64 0, i64 0), fp128 %p10)
%pf11 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.9, i64 0, i64 0), double %p11)
%pf5 = call i32 (ptr, ...) @printf(ptr @.str.5, i32 %p5.z32)
%pf6 = call i32 (ptr, ...) @printf(ptr @.str.6, float %p6)
%pf7 = call i32 (ptr, ...) @printf(ptr @.str.7, ptr %p7)
%pf8 = call i32 (ptr, ...) @printf(ptr @.str.8, i64 %p8)
%pf9 = call i32 (ptr, ...) @printf(ptr @.str.9, double %p9)
%pf10 = call i32 (ptr, ...) @printf(ptr @.str.10, fp128 %p10)
%pf11 = call i32 (ptr, ...) @printf(ptr @.str.9, double %p11)
ret i32 0
}
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
declare void @llvm.va_start(i8*)
declare void @llvm.va_end(i8*)
declare i32 @printf(i8* nocapture readonly, ...)
declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
declare void @llvm.va_start(ptr)
declare void @llvm.va_end(ptr)
declare i32 @printf(ptr nocapture readonly, ...)
131 changes: 63 additions & 68 deletions llvm/test/CodeGen/VE/Scalar/va_callee.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,32 +12,31 @@ define i32 @va_func(i32, ...) {
; CHECK: ld %s24, 240(, %s9)
; CHECK: ld %s25, 248(, %s9)

%va = alloca i8*, align 8
%va.i8 = bitcast i8** %va to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %va.i8)
call void @llvm.va_start(i8* nonnull %va.i8)
%p1 = va_arg i8** %va, i32
%p2 = va_arg i8** %va, i16
%p3 = va_arg i8** %va, i8
%p4 = va_arg i8** %va, i32
%p5 = va_arg i8** %va, i16
%p6 = va_arg i8** %va, i8
%p7 = va_arg i8** %va, float
%p8 = va_arg i8** %va, i8*
%p9 = va_arg i8** %va, i64
%p10 = va_arg i8** %va, double
call void @llvm.va_end(i8* nonnull %va.i8)
%va = alloca ptr, align 8
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %va)
call void @llvm.va_start(ptr nonnull %va)
%p1 = va_arg ptr %va, i32
%p2 = va_arg ptr %va, i16
%p3 = va_arg ptr %va, i8
%p4 = va_arg ptr %va, i32
%p5 = va_arg ptr %va, i16
%p6 = va_arg ptr %va, i8
%p7 = va_arg ptr %va, float
%p8 = va_arg ptr %va, ptr
%p9 = va_arg ptr %va, i64
%p10 = va_arg ptr %va, double
call void @llvm.va_end(ptr nonnull %va)
call void @use_i32(i32 %p1)
call void @use_s16(i16 %p2)
call void @use_s8(i8 %p3)
call void @use_i32(i32 %p4)
call void @use_u16(i16 %p5)
call void @use_u8(i8 %p6)
call void @use_float(float %p7)
call void @use_i8p(i8* %p8)
call void @use_i8p(ptr %p8)
call void @use_i64(i64 %p9)
call void @use_double(double %p10)
call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %va.i8)
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %va)
ret i32 0
}

Expand All @@ -53,36 +52,34 @@ define i32 @va_copy0(i32, ...) {
; CHECK: ld %s24,
; CHECK: ld %s25,

%va = alloca i8*, align 8
%va.i8 = bitcast i8** %va to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %va.i8)
call void @llvm.va_start(i8* nonnull %va.i8)
%vb = alloca i8*, align 8
%vb.i8 = bitcast i8** %vb to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %va.i8)
call void @llvm.va_copy(i8* nonnull %vb.i8, i8* nonnull %va.i8)
call void @llvm.va_end(i8* nonnull %va.i8)
call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %va.i8)
%p1 = va_arg i8** %vb, i32
%p2 = va_arg i8** %vb, i16
%p3 = va_arg i8** %vb, i8
%p4 = va_arg i8** %vb, i32
%p5 = va_arg i8** %vb, i16
%p6 = va_arg i8** %vb, i8
%p7 = va_arg i8** %vb, float
%p8 = va_arg i8** %vb, i8*
%p9 = va_arg i8** %vb, i64
%p10 = va_arg i8** %vb, double
call void @llvm.va_end(i8* nonnull %vb.i8)
call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %vb.i8)
%va = alloca ptr, align 8
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %va)
call void @llvm.va_start(ptr nonnull %va)
%vb = alloca ptr, align 8
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %va)
call void @llvm.va_copy(ptr nonnull %vb, ptr nonnull %va)
call void @llvm.va_end(ptr nonnull %va)
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %va)
%p1 = va_arg ptr %vb, i32
%p2 = va_arg ptr %vb, i16
%p3 = va_arg ptr %vb, i8
%p4 = va_arg ptr %vb, i32
%p5 = va_arg ptr %vb, i16
%p6 = va_arg ptr %vb, i8
%p7 = va_arg ptr %vb, float
%p8 = va_arg ptr %vb, ptr
%p9 = va_arg ptr %vb, i64
%p10 = va_arg ptr %vb, double
call void @llvm.va_end(ptr nonnull %vb)
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %vb)
call void @use_i32(i32 %p1)
call void @use_s16(i16 %p2)
call void @use_s8(i8 %p3)
call void @use_i32(i32 %p4)
call void @use_u16(i16 %p5)
call void @use_u8(i8 %p6)
call void @use_float(float %p7)
call void @use_i8p(i8* %p8)
call void @use_i8p(ptr %p8)
call void @use_i64(i64 %p9)
call void @use_double(double %p10)
ret i32 0
Expand All @@ -100,38 +97,36 @@ define i32 @va_copy8(i32, ...) {
; CHECK: ld %s24,
; CHECK: ld %s25,

%va = alloca i8*, align 8
%va.i8 = bitcast i8** %va to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %va.i8)
call void @llvm.va_start(i8* nonnull %va.i8)
%p1 = va_arg i8** %va, i32
%p2 = va_arg i8** %va, i16
%p3 = va_arg i8** %va, i8
%p4 = va_arg i8** %va, i32
%p5 = va_arg i8** %va, i16
%p6 = va_arg i8** %va, i8
%p7 = va_arg i8** %va, float
%va = alloca ptr, align 8
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %va)
call void @llvm.va_start(ptr nonnull %va)
%p1 = va_arg ptr %va, i32
%p2 = va_arg ptr %va, i16
%p3 = va_arg ptr %va, i8
%p4 = va_arg ptr %va, i32
%p5 = va_arg ptr %va, i16
%p6 = va_arg ptr %va, i8
%p7 = va_arg ptr %va, float

%vc = alloca i8*, align 8
%vc.i8 = bitcast i8** %vc to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %va.i8)
call void @llvm.va_copy(i8* nonnull %vc.i8, i8* nonnull %va.i8)
call void @llvm.va_end(i8* nonnull %va.i8)
%p8 = va_arg i8** %vc, i8*
%p9 = va_arg i8** %vc, i64
%p10 = va_arg i8** %vc, double
call void @llvm.va_end(i8* nonnull %vc.i8)
%vc = alloca ptr, align 8
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %va)
call void @llvm.va_copy(ptr nonnull %vc, ptr nonnull %va)
call void @llvm.va_end(ptr nonnull %va)
%p8 = va_arg ptr %vc, ptr
%p9 = va_arg ptr %vc, i64
%p10 = va_arg ptr %vc, double
call void @llvm.va_end(ptr nonnull %vc)
call void @use_i32(i32 %p1)
call void @use_s16(i16 %p2)
call void @use_s8(i8 %p3)
call void @use_i32(i32 %p4)
call void @use_u16(i16 %p5)
call void @use_u8(i8 %p6)
call void @use_float(float %p7)
call void @use_i8p(i8* %p8)
call void @use_i8p(ptr %p8)
call void @use_i64(i64 %p9)
call void @use_double(double %p10)
call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %va.i8)
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %va)
ret i32 0
}

Expand All @@ -141,12 +136,12 @@ declare void @use_u16(i16 zeroext)
declare void @use_u8(i8 zeroext)
declare void @use_s16(i16 signext)
declare void @use_s8(i8 signext)
declare void @use_i8p(i8*)
declare void @use_i8p(ptr)
declare void @use_float(float)
declare void @use_double(double)

declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
declare void @llvm.va_start(i8*)
declare void @llvm.va_copy(i8*, i8*)
declare void @llvm.va_end(i8*)
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
declare void @llvm.va_start(ptr)
declare void @llvm.va_copy(ptr, ptr)
declare void @llvm.va_end(ptr)
declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/VE/Scalar/va_caller.ll
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,6 @@ define i32 @caller() {
; CHECK-NEXT: bsic %s10, (, %s12)
; CHECK-NEXT: or %s0, 0, (0)1
; CHECK-NEXT: or %s11, 0, %s9
call i32 (i32, ...) @func(i32 0, i16 1, i8 2, i32 3, i16 4, i8 5, float 6.0, i8* null, i64 8, double 9.0, i128 10, fp128 0xLA000000000000000)
call i32 (i32, ...) @func(i32 0, i16 1, i8 2, i32 3, i16 4, i8 5, float 6.0, ptr null, i64 8, double 9.0, i128 10, fp128 0xLA000000000000000)
ret i32 0
}
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/VE/VELIntrinsics/lsv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
;;; We test LSVrr_v and LVSvr instructions.

; Function Attrs: nounwind
define void @lsv_vvss(i8* %0, i64 %1, i32 signext %2) {
define void @lsv_vvss(ptr %0, i64 %1, i32 signext %2) {
; CHECK-LABEL: lsv_vvss:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s3, 256
Expand All @@ -16,23 +16,23 @@ define void @lsv_vvss(i8* %0, i64 %1, i32 signext %2) {
; CHECK-NEXT: lsv %v0(%s2), %s1
; CHECK-NEXT: vst %v0, 8, %s0
; CHECK-NEXT: b.l.t (, %s10)
%4 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256)
%4 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, ptr %0, i32 256)
%5 = tail call fast <256 x double> @llvm.ve.vl.lsv.vvss(<256 x double> %4, i32 %2, i64 %1)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %5, i64 8, i8* %0, i32 256)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %5, i64 8, ptr %0, i32 256)
ret void
}

; Function Attrs: nounwind readonly
declare <256 x double> @llvm.ve.vl.vld.vssl(i64, i8*, i32)
declare <256 x double> @llvm.ve.vl.vld.vssl(i64, ptr, i32)

; Function Attrs: nounwind readnone
declare <256 x double> @llvm.ve.vl.lsv.vvss(<256 x double>, i32, i64)

; Function Attrs: nounwind writeonly
declare void @llvm.ve.vl.vst.vssl(<256 x double>, i64, i8*, i32)
declare void @llvm.ve.vl.vst.vssl(<256 x double>, i64, ptr, i32)

; Function Attrs: nounwind readonly
define i64 @lvsl_vssl_imm(i8* readonly %0, i32 signext %1) {
define i64 @lvsl_vssl_imm(ptr readonly %0, i32 signext %1) {
; CHECK-LABEL: lvsl_vssl_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s2, 256
Expand All @@ -41,7 +41,7 @@ define i64 @lvsl_vssl_imm(i8* readonly %0, i32 signext %1) {
; CHECK-NEXT: and %s0, %s1, (32)0
; CHECK-NEXT: lvs %s0, %v0(%s0)
; CHECK-NEXT: b.l.t (, %s10)
%3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256)
%3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, ptr %0, i32 256)
%4 = tail call i64 @llvm.ve.vl.lvsl.svs(<256 x double> %3, i32 %1)
ret i64 %4
}
Expand All @@ -50,7 +50,7 @@ define i64 @lvsl_vssl_imm(i8* readonly %0, i32 signext %1) {
declare i64 @llvm.ve.vl.lvsl.svs(<256 x double>, i32)

; Function Attrs: nounwind readonly
define double @lvsd_vssl_imm(i8* readonly %0, i32 signext %1) {
define double @lvsd_vssl_imm(ptr readonly %0, i32 signext %1) {
; CHECK-LABEL: lvsd_vssl_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s2, 256
Expand All @@ -59,7 +59,7 @@ define double @lvsd_vssl_imm(i8* readonly %0, i32 signext %1) {
; CHECK-NEXT: and %s0, %s1, (32)0
; CHECK-NEXT: lvs %s0, %v0(%s0)
; CHECK-NEXT: b.l.t (, %s10)
%3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256)
%3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, ptr %0, i32 256)
%4 = tail call fast double @llvm.ve.vl.lvsd.svs(<256 x double> %3, i32 %1)
ret double %4
}
Expand All @@ -68,7 +68,7 @@ define double @lvsd_vssl_imm(i8* readonly %0, i32 signext %1) {
declare double @llvm.ve.vl.lvsd.svs(<256 x double>, i32)

; Function Attrs: nounwind readonly
define float @lvss_vssl_imm(i8* readonly %0, i32 signext %1) {
define float @lvss_vssl_imm(ptr readonly %0, i32 signext %1) {
; CHECK-LABEL: lvss_vssl_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s2, 256
Expand All @@ -77,7 +77,7 @@ define float @lvss_vssl_imm(i8* readonly %0, i32 signext %1) {
; CHECK-NEXT: and %s0, %s1, (32)0
; CHECK-NEXT: lvs %s0, %v0(%s0)
; CHECK-NEXT: b.l.t (, %s10)
%3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256)
%3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, ptr %0, i32 256)
%4 = tail call fast float @llvm.ve.vl.lvss.svs(<256 x double> %3, i32 %1)
ret float %4
}
Expand Down
46 changes: 23 additions & 23 deletions llvm/test/CodeGen/VE/VELIntrinsics/lvlgen.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
; Test for correct placement of 'lvl' instructions

; Function Attrs: nounwind readonly
declare <256 x double> @llvm.ve.vl.vld.vssl(i64, i8*, i32)
declare void @llvm.ve.vl.vst.vssl(<256 x double>, i64, i8*, i32)
declare <256 x double> @llvm.ve.vl.vld.vssl(i64, ptr, i32)
declare void @llvm.ve.vl.vst.vssl(<256 x double>, i64, ptr, i32)

; Check that the backend can handle constant VL as well as parametric VL
; sources.

; Function Attrs: nounwind
define void @switching_vl(i32 %evl, i32 %evl2, i8* %P, i8* %Q) {
define void @switching_vl(i32 %evl, i32 %evl2, ptr %P, ptr %Q) {
; CHECK-LABEL: switching_vl:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s4, 256
Expand All @@ -30,20 +30,20 @@ define void @switching_vl(i32 %evl, i32 %evl2, i8* %P, i8* %Q) {
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vst %v0, 16, %s3
; CHECK-NEXT: b.l.t (, %s10)
%l0 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 256)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l0, i64 16, i8* %Q, i32 %evl)
%l1 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 16, i8* %P, i32 128)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l1, i64 16, i8* %Q, i32 %evl2)
%l2 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 128)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl)
%l0 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, ptr %P, i32 256)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l0, i64 16, ptr %Q, i32 %evl)
%l1 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 16, ptr %P, i32 128)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l1, i64 16, ptr %Q, i32 %evl2)
%l2 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, ptr %P, i32 128)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, ptr %Q, i32 %evl)
ret void
}

; Check that no redundant 'lvl' is inserted when vector length does not change
; in a basic block.

; Function Attrs: nounwind
define void @stable_vl(i32 %evl, i8* %P, i8* %Q) {
define void @stable_vl(i32 %evl, ptr %P, ptr %Q) {
; CHECK-LABEL: stable_vl:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s0, %s0, (32)0
Expand All @@ -55,19 +55,19 @@ define void @stable_vl(i32 %evl, i8* %P, i8* %Q) {
; CHECK-NEXT: vld %v0, 8, %s1
; CHECK-NEXT: vst %v0, 16, %s2
; CHECK-NEXT: b.l.t (, %s10)
%l0 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l0, i64 16, i8* %Q, i32 %evl)
%l1 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 16, i8* %P, i32 %evl)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l1, i64 16, i8* %Q, i32 %evl)
%l2 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl)
%l0 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, ptr %P, i32 %evl)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l0, i64 16, ptr %Q, i32 %evl)
%l1 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 16, ptr %P, i32 %evl)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l1, i64 16, ptr %Q, i32 %evl)
%l2 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, ptr %P, i32 %evl)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, ptr %Q, i32 %evl)
ret void
}

;;; Check the case we have a call in the middle of vector instructions.

; Function Attrs: nounwind
define void @call_invl(i32 %evl, i8* %P, i8* %Q) {
define void @call_invl(i32 %evl, ptr %P, ptr %Q) {
; CHECK-LABEL: call_invl:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: st %s18, 288(, %s11) # 8-byte Folded Spill
Expand All @@ -92,13 +92,13 @@ define void @call_invl(i32 %evl, i8* %P, i8* %Q) {
; CHECK-NEXT: ld %s19, 296(, %s11) # 8-byte Folded Reload
; CHECK-NEXT: ld %s18, 288(, %s11) # 8-byte Folded Reload
; CHECK-NEXT: or %s11, 0, %s9
%l0 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l0, i64 16, i8* %Q, i32 %evl)
%l0 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, ptr %P, i32 %evl)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l0, i64 16, ptr %Q, i32 %evl)
call void @fun()
%l1 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 16, i8* %P, i32 %evl)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l1, i64 16, i8* %Q, i32 %evl)
%l2 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl)
%l1 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 16, ptr %P, i32 %evl)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l1, i64 16, ptr %Q, i32 %evl)
%l2 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, ptr %P, i32 %evl)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, ptr %Q, i32 %evl)
ret void
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/VE/VELIntrinsics/lvm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
;;; We test LVMir_m, LVMyir_y, SVMmi, and SVMyi instructions.

; Function Attrs: nounwind readnone
define i64 @lvm_mmss(i8* nocapture readnone %0, i64 %1) {
define i64 @lvm_mmss(ptr nocapture readnone %0, i64 %1) {
; CHECK-LABEL: lvm_mmss:
; CHECK: # %bb.0:
; CHECK-NEXT: lvm %vm1, 3, %s1
Expand All @@ -24,7 +24,7 @@ declare <256 x i1> @llvm.ve.vl.lvm.mmss(<256 x i1>, i64, i64)
declare i64 @llvm.ve.vl.svm.sms(<256 x i1>, i64)

; Function Attrs: nounwind readnone
define i64 @lvml_MMss(i8* nocapture readnone %0, i64 %1) {
define i64 @lvml_MMss(ptr nocapture readnone %0, i64 %1) {
; CHECK-LABEL: lvml_MMss:
; CHECK: # %bb.0:
; CHECK-NEXT: lvm %vm2, 1, %s1
Expand Down
19 changes: 8 additions & 11 deletions llvm/test/CodeGen/VE/VELIntrinsics/pack.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,22 @@
;;; We test pack_f32p and pack_f32a pseudo instruction.

; Function Attrs: nounwind readonly
define fastcc i64 @pack_f32p(float* readonly %0, float* readonly %1) {
define fastcc i64 @pack_f32p(ptr readonly %0, ptr readonly %1) {
; CHECK-LABEL: pack_f32p:
; CHECK: # %bb.0:
; CHECK-NEXT: ldu %s0, (, %s0)
; CHECK-NEXT: ldl.zx %s1, (, %s1)
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: b.l.t (, %s10)
%3 = bitcast float* %0 to i8*
%4 = bitcast float* %1 to i8*
%5 = tail call i64 @llvm.ve.vl.pack.f32p(i8* %3, i8* %4)
ret i64 %5
%3 = tail call i64 @llvm.ve.vl.pack.f32p(ptr %0, ptr %1)
ret i64 %3
}

; Function Attrs: nounwind readonly
declare i64 @llvm.ve.vl.pack.f32p(i8*, i8*)
declare i64 @llvm.ve.vl.pack.f32p(ptr, ptr)

; Function Attrs: nounwind readonly
define fastcc i64 @pack_f32a(float* readonly %0) {
define fastcc i64 @pack_f32a(ptr readonly %0) {
; CHECK-LABEL: pack_f32a:
; CHECK: # %bb.0:
; CHECK-NEXT: ldl.zx %s0, (, %s0)
Expand All @@ -32,10 +30,9 @@ define fastcc i64 @pack_f32a(float* readonly %0) {
; CHECK-NEXT: lea.sl %s1, 1(, %s1)
; CHECK-NEXT: mulu.l %s0, %s0, %s1
; CHECK-NEXT: b.l.t (, %s10)
%2 = bitcast float* %0 to i8*
%3 = tail call i64 @llvm.ve.vl.pack.f32a(i8* %2)
ret i64 %3
%2 = tail call i64 @llvm.ve.vl.pack.f32a(ptr %0)
ret i64 %2
}

; Function Attrs: nounwind readonly
declare i64 @llvm.ve.vl.pack.f32a(i8*)
declare i64 @llvm.ve.vl.pack.f32a(ptr)
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/VE/VELIntrinsics/pfchv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,55 +6,55 @@
;;; We test PFCHVrrl, PFCHVirl, PFCHVNCrrl, and PFCHVNCirl instructions.

; Function Attrs: nounwind
define void @pfchv_vssl(i8* %0, i64 %1) {
define void @pfchv_vssl(ptr %0, i64 %1) {
; CHECK-LABEL: pfchv_vssl:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s2, 256
; CHECK-NEXT: lvl %s2
; CHECK-NEXT: pfchv %s1, %s0
; CHECK-NEXT: b.l.t (, %s10)
tail call void @llvm.ve.vl.pfchv.ssl(i64 %1, i8* %0, i32 256)
tail call void @llvm.ve.vl.pfchv.ssl(i64 %1, ptr %0, i32 256)
ret void
}

; Function Attrs: inaccessiblemem_or_argmemonly nounwind
declare void @llvm.ve.vl.pfchv.ssl(i64, i8*, i32)
declare void @llvm.ve.vl.pfchv.ssl(i64, ptr, i32)

; Function Attrs: nounwind
define void @pfchv_vssl_imm(i8* %0) {
define void @pfchv_vssl_imm(ptr %0) {
; CHECK-LABEL: pfchv_vssl_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
; CHECK-NEXT: lvl %s1
; CHECK-NEXT: pfchv 8, %s0
; CHECK-NEXT: b.l.t (, %s10)
tail call void @llvm.ve.vl.pfchv.ssl(i64 8, i8* %0, i32 256)
tail call void @llvm.ve.vl.pfchv.ssl(i64 8, ptr %0, i32 256)
ret void
}

; Function Attrs: nounwind
define void @pfchvnc_vssl(i8* %0, i64 %1) {
define void @pfchvnc_vssl(ptr %0, i64 %1) {
; CHECK-LABEL: pfchvnc_vssl:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s2, 256
; CHECK-NEXT: lvl %s2
; CHECK-NEXT: pfchv.nc %s1, %s0
; CHECK-NEXT: b.l.t (, %s10)
tail call void @llvm.ve.vl.pfchvnc.ssl(i64 %1, i8* %0, i32 256)
tail call void @llvm.ve.vl.pfchvnc.ssl(i64 %1, ptr %0, i32 256)
ret void
}

; Function Attrs: inaccessiblemem_or_argmemonly nounwind
declare void @llvm.ve.vl.pfchvnc.ssl(i64, i8*, i32)
declare void @llvm.ve.vl.pfchvnc.ssl(i64, ptr, i32)

; Function Attrs: nounwind
define void @pfchvnc_vssl_imm(i8* %0) {
define void @pfchvnc_vssl_imm(ptr %0) {
; CHECK-LABEL: pfchvnc_vssl_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
; CHECK-NEXT: lvl %s1
; CHECK-NEXT: pfchv.nc 8, %s0
; CHECK-NEXT: b.l.t (, %s10)
tail call void @llvm.ve.vl.pfchvnc.ssl(i64 8, i8* %0, i32 256)
tail call void @llvm.ve.vl.pfchvnc.ssl(i64 8, ptr %0, i32 256)
ret void
}
116 changes: 58 additions & 58 deletions llvm/test/CodeGen/VE/VELIntrinsics/vbrd.ll

Large diffs are not rendered by default.

512 changes: 256 additions & 256 deletions llvm/test/CodeGen/VE/VELIntrinsics/vld.ll

Large diffs are not rendered by default.

40 changes: 20 additions & 20 deletions llvm/test/CodeGen/VE/VELIntrinsics/vmv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
;;; We test VMVivl and VMVivl_v, and VMVivml_v instructions.

; Function Attrs: nounwind
define void @vmv_vsvl(i8* %0, i32 signext %1) {
define void @vmv_vsvl(ptr %0, i32 signext %1) {
; CHECK-LABEL: vmv_vsvl:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s2, 256
Expand All @@ -16,23 +16,23 @@ define void @vmv_vsvl(i8* %0, i32 signext %1) {
; CHECK-NEXT: vmv %v0, %s1, %v0
; CHECK-NEXT: vst %v0, 8, %s0
; CHECK-NEXT: b.l.t (, %s10)
%3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256)
%3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, ptr %0, i32 256)
%4 = tail call fast <256 x double> @llvm.ve.vl.vmv.vsvl(i32 %1, <256 x double> %3, i32 256)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %4, i64 8, i8* %0, i32 256)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %4, i64 8, ptr %0, i32 256)
ret void
}

; Function Attrs: nounwind readonly
declare <256 x double> @llvm.ve.vl.vld.vssl(i64, i8*, i32)
declare <256 x double> @llvm.ve.vl.vld.vssl(i64, ptr, i32)

; Function Attrs: nounwind readnone
declare <256 x double> @llvm.ve.vl.vmv.vsvl(i32, <256 x double>, i32)

; Function Attrs: nounwind writeonly
declare void @llvm.ve.vl.vst.vssl(<256 x double>, i64, i8*, i32)
declare void @llvm.ve.vl.vst.vssl(<256 x double>, i64, ptr, i32)

; Function Attrs: nounwind
define void @vmv_vsvl_imm(i8* %0) {
define void @vmv_vsvl_imm(ptr %0) {
; CHECK-LABEL: vmv_vsvl_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
Expand All @@ -41,14 +41,14 @@ define void @vmv_vsvl_imm(i8* %0) {
; CHECK-NEXT: vmv %v0, 31, %v0
; CHECK-NEXT: vst %v0, 8, %s0
; CHECK-NEXT: b.l.t (, %s10)
%2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256)
%2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, ptr %0, i32 256)
%3 = tail call fast <256 x double> @llvm.ve.vl.vmv.vsvl(i32 31, <256 x double> %2, i32 256)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %3, i64 8, i8* %0, i32 256)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %3, i64 8, ptr %0, i32 256)
ret void
}

; Function Attrs: nounwind
define void @vmv_vsvvl(i8* %0, i32 signext %1) {
define void @vmv_vsvvl(ptr %0, i32 signext %1) {
; CHECK-LABEL: vmv_vsvvl:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s2, 256
Expand All @@ -61,17 +61,17 @@ define void @vmv_vsvvl(i8* %0, i32 signext %1) {
; CHECK-NEXT: lvl %s2
; CHECK-NEXT: vst %v0, 8, %s0
; CHECK-NEXT: b.l.t (, %s10)
%3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256)
%3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, ptr %0, i32 256)
%4 = tail call fast <256 x double> @llvm.ve.vl.vmv.vsvvl(i32 %1, <256 x double> %3, <256 x double> %3, i32 128)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %4, i64 8, i8* %0, i32 256)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %4, i64 8, ptr %0, i32 256)
ret void
}

; Function Attrs: nounwind readnone
declare <256 x double> @llvm.ve.vl.vmv.vsvvl(i32, <256 x double>, <256 x double>, i32)

; Function Attrs: nounwind
define void @vmv_vsvvl_imm(i8* %0) {
define void @vmv_vsvvl_imm(ptr %0) {
; CHECK-LABEL: vmv_vsvvl_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
Expand All @@ -83,14 +83,14 @@ define void @vmv_vsvvl_imm(i8* %0) {
; CHECK-NEXT: lvl %s1
; CHECK-NEXT: vst %v0, 8, %s0
; CHECK-NEXT: b.l.t (, %s10)
%2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256)
%2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, ptr %0, i32 256)
%3 = tail call fast <256 x double> @llvm.ve.vl.vmv.vsvvl(i32 31, <256 x double> %2, <256 x double> %2, i32 128)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %3, i64 8, i8* %0, i32 256)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %3, i64 8, ptr %0, i32 256)
ret void
}

; Function Attrs: nounwind
define void @vmv_vsvmvl(i8* %0, i32 signext %1) {
define void @vmv_vsvmvl(ptr %0, i32 signext %1) {
; CHECK-LABEL: vmv_vsvmvl:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s2, 256
Expand All @@ -103,17 +103,17 @@ define void @vmv_vsvmvl(i8* %0, i32 signext %1) {
; CHECK-NEXT: lvl %s2
; CHECK-NEXT: vst %v0, 8, %s0
; CHECK-NEXT: b.l.t (, %s10)
%3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256)
%3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, ptr %0, i32 256)
%4 = tail call fast <256 x double> @llvm.ve.vl.vmv.vsvmvl(i32 %1, <256 x double> %3, <256 x i1> undef, <256 x double> %3, i32 128)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %4, i64 8, i8* %0, i32 256)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %4, i64 8, ptr %0, i32 256)
ret void
}

; Function Attrs: nounwind readnone
declare <256 x double> @llvm.ve.vl.vmv.vsvmvl(i32, <256 x double>, <256 x i1>, <256 x double>, i32)

; Function Attrs: nounwind
define void @vmv_vsvmvl_imm(i8* %0) {
define void @vmv_vsvmvl_imm(ptr %0) {
; CHECK-LABEL: vmv_vsvmvl_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
Expand All @@ -125,8 +125,8 @@ define void @vmv_vsvmvl_imm(i8* %0) {
; CHECK-NEXT: lvl %s1
; CHECK-NEXT: vst %v0, 8, %s0
; CHECK-NEXT: b.l.t (, %s10)
%2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256)
%2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, ptr %0, i32 256)
%3 = tail call fast <256 x double> @llvm.ve.vl.vmv.vsvmvl(i32 31, <256 x double> %2, <256 x i1> undef, <256 x double> %2, i32 128)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %3, i64 8, i8* %0, i32 256)
tail call void @llvm.ve.vl.vst.vssl(<256 x double> %3, i64 8, ptr %0, i32 256)
ret void
}
674 changes: 337 additions & 337 deletions llvm/test/CodeGen/VE/VELIntrinsics/vst.ll

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions llvm/test/CodeGen/VE/Vector/loadvm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
@v512i1 = common dso_local local_unnamed_addr global <512 x i1> zeroinitializer, align 4

; Function Attrs: norecurse nounwind readonly
define fastcc <256 x i1> @loadv256i1(<256 x i1>* nocapture readonly %mp) {
define fastcc <256 x i1> @loadv256i1(ptr nocapture readonly %mp) {
; CHECK-LABEL: loadv256i1:
; CHECK: # %bb.0:
; CHECK-NEXT: ld %s1, (, %s0)
Expand All @@ -17,7 +17,7 @@ define fastcc <256 x i1> @loadv256i1(<256 x i1>* nocapture readonly %mp) {
; CHECK-NEXT: lvm %vm1, 2, %s3
; CHECK-NEXT: lvm %vm1, 3, %s0
; CHECK-NEXT: b.l.t (, %s10)
%m = load <256 x i1>, <256 x i1>* %mp, align 16
%m = load <256 x i1>, ptr %mp, align 16
ret <256 x i1> %m
}

Expand All @@ -37,12 +37,12 @@ define fastcc <256 x i1> @loadv256i1com() {
; CHECK-NEXT: lvm %vm1, 2, %s3
; CHECK-NEXT: lvm %vm1, 3, %s0
; CHECK-NEXT: b.l.t (, %s10)
%m = load <256 x i1>, <256 x i1>* @v256i1, align 16
%m = load <256 x i1>, ptr @v256i1, align 16
ret <256 x i1> %m
}

; Function Attrs: norecurse nounwind readonly
define fastcc <512 x i1> @loadv512i1(<512 x i1>* nocapture readonly %mp) {
define fastcc <512 x i1> @loadv512i1(ptr nocapture readonly %mp) {
; CHECK-LABEL: loadv512i1:
; CHECK: # %bb.0:
; CHECK-NEXT: ld %s1, (, %s0)
Expand All @@ -62,7 +62,7 @@ define fastcc <512 x i1> @loadv512i1(<512 x i1>* nocapture readonly %mp) {
; CHECK-NEXT: lvm %vm2, 2, %s3
; CHECK-NEXT: lvm %vm2, 3, %s0
; CHECK-NEXT: b.l.t (, %s10)
%m = load <512 x i1>, <512 x i1>* %mp, align 16
%m = load <512 x i1>, ptr %mp, align 16
ret <512 x i1> %m
}

Expand Down Expand Up @@ -90,7 +90,7 @@ define fastcc <512 x i1> @loadv512i1com() {
; CHECK-NEXT: lvm %vm2, 2, %s3
; CHECK-NEXT: lvm %vm2, 3, %s0
; CHECK-NEXT: b.l.t (, %s10)
%m = load <512 x i1>, <512 x i1>* @v512i1, align 16
%m = load <512 x i1>, ptr @v512i1, align 16
ret <512 x i1> %m
}

20 changes: 10 additions & 10 deletions llvm/test/CodeGen/VE/Vector/loadvr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,50 +4,50 @@
@v256i64 = common dso_local local_unnamed_addr global <256 x i64> zeroinitializer, align 16

; Function Attrs: norecurse nounwind readonly
define fastcc <256 x i64> @loadv256i64(<256 x i64>* nocapture readonly) {
define fastcc <256 x i64> @loadv256i64(ptr nocapture readonly) {
; CHECK-LABEL: loadv256i64:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
; CHECK-NEXT: lvl %s1
; CHECK-NEXT: vld %v0, 8, %s0
; CHECK-NEXT: b.l.t (, %s10)
%2 = load <256 x i64>, <256 x i64>* %0, align 16
%2 = load <256 x i64>, ptr %0, align 16
ret <256 x i64> %2
}

; Function Attrs: norecurse nounwind readonly
define fastcc <256 x double> @loadv256f64(<256 x double>* nocapture readonly) {
define fastcc <256 x double> @loadv256f64(ptr nocapture readonly) {
; CHECK-LABEL: loadv256f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
; CHECK-NEXT: lvl %s1
; CHECK-NEXT: vld %v0, 8, %s0
; CHECK-NEXT: b.l.t (, %s10)
%2 = load <256 x double>, <256 x double>* %0, align 16
%2 = load <256 x double>, ptr %0, align 16
ret <256 x double> %2
}

; Function Attrs: norecurse nounwind readonly
define fastcc <256 x i32> @loadv256i32(<256 x i32>* nocapture readonly) {
define fastcc <256 x i32> @loadv256i32(ptr nocapture readonly) {
; CHECK-LABEL: loadv256i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
; CHECK-NEXT: lvl %s1
; CHECK-NEXT: vldl.zx %v0, 4, %s0
; CHECK-NEXT: b.l.t (, %s10)
%2 = load <256 x i32>, <256 x i32>* %0, align 16
%2 = load <256 x i32>, ptr %0, align 16
ret <256 x i32> %2
}

; Function Attrs: norecurse nounwind readonly
define fastcc <256 x float> @loadv256f32(<256 x float>* nocapture readonly) {
define fastcc <256 x float> @loadv256f32(ptr nocapture readonly) {
; CHECK-LABEL: loadv256f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
; CHECK-NEXT: lvl %s1
; CHECK-NEXT: vldu %v0, 4, %s0
; CHECK-NEXT: b.l.t (, %s10)
%2 = load <256 x float>, <256 x float>* %0, align 16
%2 = load <256 x float>, ptr %0, align 16
ret <256 x float> %2
}

Expand All @@ -74,7 +74,7 @@ define fastcc <256 x i64> @loadv256i64stk() {
; CHECK-NEXT: lea %s11, 2048(, %s11)
; CHECK-NEXT: b.l.t (, %s10)
%addr = alloca <256 x i64>, align 16
%1 = load <256 x i64>, <256 x i64>* %addr, align 16
%1 = load <256 x i64>, ptr %addr, align 16
ret <256 x i64> %1
}

Expand All @@ -89,6 +89,6 @@ define fastcc <256 x i64> @loadv256i64com() {
; CHECK-NEXT: lvl %s1
; CHECK-NEXT: vld %v0, 8, %s0
; CHECK-NEXT: b.l.t (, %s10)
%1 = load <256 x i64>, <256 x i64>* @v256i64, align 16
%1 = load <256 x i64>, ptr @v256i64, align 16
ret <256 x i64> %1
}
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/VE/Vector/storevm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
@v512i1 = common dso_local local_unnamed_addr global <512 x i1> zeroinitializer, align 4

; Function Attrs: norecurse nounwind readonly
define fastcc void @storev256i1(<256 x i1>* nocapture %mp, <256 x i1> %m) {
define fastcc void @storev256i1(ptr nocapture %mp, <256 x i1> %m) {
; CHECK-LABEL: storev256i1:
; CHECK: # %bb.0:
; CHECK-NEXT: svm %s1, %vm1, 3
Expand All @@ -17,7 +17,7 @@ define fastcc void @storev256i1(<256 x i1>* nocapture %mp, <256 x i1> %m) {
; CHECK-NEXT: svm %s1, %vm1, 0
; CHECK-NEXT: st %s1, (, %s0)
; CHECK-NEXT: b.l.t (, %s10)
store <256 x i1> %m, <256 x i1>* %mp, align 16
store <256 x i1> %m, ptr %mp, align 16
ret void
}

Expand All @@ -37,12 +37,12 @@ define fastcc void @storev256i1com(<256 x i1> %m) {
; CHECK-NEXT: svm %s0, %vm1, 0
; CHECK-NEXT: st %s0, (, %s1)
; CHECK-NEXT: b.l.t (, %s10)
store <256 x i1> %m, <256 x i1>* @v256i1, align 16
store <256 x i1> %m, ptr @v256i1, align 16
ret void
}

; Function Attrs: norecurse nounwind readonly
define fastcc void @storev512i1(<512 x i1>* nocapture %mp, <512 x i1> %m) {
define fastcc void @storev512i1(ptr nocapture %mp, <512 x i1> %m) {
; CHECK-LABEL: storev512i1:
; CHECK: # %bb.0:
; CHECK-NEXT: svm %s1, %vm2, 3
Expand All @@ -62,7 +62,7 @@ define fastcc void @storev512i1(<512 x i1>* nocapture %mp, <512 x i1> %m) {
; CHECK-NEXT: svm %s1, %vm3, 0
; CHECK-NEXT: st %s1, (, %s0)
; CHECK-NEXT: b.l.t (, %s10)
store <512 x i1> %m, <512 x i1>* %mp, align 16
store <512 x i1> %m, ptr %mp, align 16
ret void
}

Expand Down Expand Up @@ -90,6 +90,6 @@ define fastcc void @storev512i1com(<512 x i1> %m) {
; CHECK-NEXT: svm %s0, %vm3, 0
; CHECK-NEXT: st %s0, (, %s1)
; CHECK-NEXT: b.l.t (, %s10)
store <512 x i1> %m, <512 x i1>* @v512i1, align 16
store <512 x i1> %m, ptr @v512i1, align 16
ret void
}
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/VE/Vector/storevr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
@v256i64 = common dso_local local_unnamed_addr global <256 x i64> zeroinitializer, align 16

; Function Attrs: norecurse nounwind readonly
define fastcc void @storev256i64(<256 x i64>* nocapture, <256 x i64>) {
define fastcc void @storev256i64(ptr nocapture, <256 x i64>) {
; CHECK-LABEL: storev256i64:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
; CHECK-NEXT: lvl %s1
; CHECK-NEXT: vst %v0, 8, %s0
; CHECK-NEXT: b.l.t (, %s10)
store <256 x i64> %1, <256 x i64>* %0, align 16
store <256 x i64> %1, ptr %0, align 16
ret void
}

Expand All @@ -38,7 +38,7 @@ define fastcc void @storev256i64stk(<256 x i64>) {
; CHECK-NEXT: lea %s11, 2048(, %s11)
; CHECK-NEXT: b.l.t (, %s10)
%addr = alloca <256 x i64>, align 16
store <256 x i64> %0, <256 x i64>* %addr, align 16
store <256 x i64> %0, ptr %addr, align 16
ret void
}

Expand All @@ -53,6 +53,6 @@ define fastcc void @storev256i64com(<256 x i64>) {
; CHECK-NEXT: lvl %s1
; CHECK-NEXT: vst %v0, 8, %s0
; CHECK-NEXT: b.l.t (, %s10)
store <256 x i64> %0, <256 x i64>* @v256i64, align 16
store <256 x i64> %0, ptr @v256i64, align 16
ret void
}
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/VE/Vector/vec_gather.ll
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s

declare <256 x double> @llvm.masked.gather.v256f64.v256p0f64(<256 x double*> %0, i32 immarg %1, <256 x i1> %2, <256 x double> %3) #0
declare <256 x double> @llvm.masked.gather.v256f64.v256p0(<256 x ptr> %0, i32 immarg %1, <256 x i1> %2, <256 x double> %3) #0

; Function Attrs: nounwind
define fastcc <256 x double> @vec_mgather_v256f64(<256 x double*> %P, <256 x i1> %M) {
define fastcc <256 x double> @vec_mgather_v256f64(<256 x ptr> %P, <256 x i1> %M) {
; CHECK-LABEL: vec_mgather_v256f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s0, 256
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vgt %v0, %v0, 0, 0
; CHECK-NEXT: b.l.t (, %s10)
%r = call <256 x double> @llvm.masked.gather.v256f64.v256p0f64(<256 x double*> %P, i32 4, <256 x i1> %M, <256 x double> undef)
%r = call <256 x double> @llvm.masked.gather.v256f64.v256p0(<256 x ptr> %P, i32 4, <256 x i1> %M, <256 x double> undef)
ret <256 x double> %r
}

; Function Attrs: nounwind
define fastcc <256 x double> @vec_mgather_pt_v256f64(<256 x double*> %P, <256 x double> %PT, <256 x i1> %M) {
define fastcc <256 x double> @vec_mgather_pt_v256f64(<256 x ptr> %P, <256 x double> %PT, <256 x i1> %M) {
; CHECK-LABEL: vec_mgather_pt_v256f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s0, 256
Expand All @@ -28,27 +28,27 @@ define fastcc <256 x double> @vec_mgather_pt_v256f64(<256 x double*> %P, <256 x
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vmrg %v0, %v1, %v2, %vm0
; CHECK-NEXT: b.l.t (, %s10)
%r = call <256 x double> @llvm.masked.gather.v256f64.v256p0f64(<256 x double*> %P, i32 4, <256 x i1> %M, <256 x double> %PT)
%r = call <256 x double> @llvm.masked.gather.v256f64.v256p0(<256 x ptr> %P, i32 4, <256 x i1> %M, <256 x double> %PT)
ret <256 x double> %r
}


declare <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> %0, i32 immarg %1, <256 x i1> %2, <256 x float> %3) #0
declare <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> %0, i32 immarg %1, <256 x i1> %2, <256 x float> %3) #0

; Function Attrs: nounwind
define fastcc <256 x float> @vec_mgather_v256f32(<256 x float*> %P, <256 x i1> %M) {
define fastcc <256 x float> @vec_mgather_v256f32(<256 x ptr> %P, <256 x i1> %M) {
; CHECK-LABEL: vec_mgather_v256f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s0, 256
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vgtu %v0, %v0, 0, 0
; CHECK-NEXT: b.l.t (, %s10)
%r = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> %P, i32 4, <256 x i1> %M, <256 x float> undef)
%r = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> %P, i32 4, <256 x i1> %M, <256 x float> undef)
ret <256 x float> %r
}

; Function Attrs: nounwind
define fastcc <256 x float> @vec_mgather_pt_v256f32(<256 x float*> %P, <256 x float> %PT, <256 x i1> %M) {
define fastcc <256 x float> @vec_mgather_pt_v256f32(<256 x ptr> %P, <256 x float> %PT, <256 x i1> %M) {
; CHECK-LABEL: vec_mgather_pt_v256f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s0, 256
Expand All @@ -60,27 +60,27 @@ define fastcc <256 x float> @vec_mgather_pt_v256f32(<256 x float*> %P, <256 x fl
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vmrg %v0, %v1, %v2, %vm0
; CHECK-NEXT: b.l.t (, %s10)
%r = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> %P, i32 4, <256 x i1> %M, <256 x float> %PT)
%r = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> %P, i32 4, <256 x i1> %M, <256 x float> %PT)
ret <256 x float> %r
}


declare <256 x i32> @llvm.masked.gather.v256i32.v256p0i32(<256 x i32*> %0, i32 immarg %1, <256 x i1> %2, <256 x i32> %3) #0
declare <256 x i32> @llvm.masked.gather.v256i32.v256p0(<256 x ptr> %0, i32 immarg %1, <256 x i1> %2, <256 x i32> %3) #0

; Function Attrs: nounwind
define fastcc <256 x i32> @vec_mgather_v256i32(<256 x i32*> %P, <256 x i1> %M) {
define fastcc <256 x i32> @vec_mgather_v256i32(<256 x ptr> %P, <256 x i1> %M) {
; CHECK-LABEL: vec_mgather_v256i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s0, 256
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vgtl.zx %v0, %v0, 0, 0
; CHECK-NEXT: b.l.t (, %s10)
%r = call <256 x i32> @llvm.masked.gather.v256i32.v256p0i32(<256 x i32*> %P, i32 4, <256 x i1> %M, <256 x i32> undef)
%r = call <256 x i32> @llvm.masked.gather.v256i32.v256p0(<256 x ptr> %P, i32 4, <256 x i1> %M, <256 x i32> undef)
ret <256 x i32> %r
}

; Function Attrs: nounwind
define fastcc <256 x i32> @vec_mgather_pt_v256i32(<256 x i32*> %P, <256 x i32> %PT, <256 x i1> %M) {
define fastcc <256 x i32> @vec_mgather_pt_v256i32(<256 x ptr> %P, <256 x i32> %PT, <256 x i1> %M) {
; CHECK-LABEL: vec_mgather_pt_v256i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s0, 256
Expand All @@ -92,7 +92,7 @@ define fastcc <256 x i32> @vec_mgather_pt_v256i32(<256 x i32*> %P, <256 x i32> %
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vmrg %v0, %v1, %v2, %vm0
; CHECK-NEXT: b.l.t (, %s10)
%r = call <256 x i32> @llvm.masked.gather.v256i32.v256p0i32(<256 x i32*> %P, i32 4, <256 x i1> %M, <256 x i32> %PT)
%r = call <256 x i32> @llvm.masked.gather.v256i32.v256p0(<256 x ptr> %P, i32 4, <256 x i1> %M, <256 x i32> %PT)
ret <256 x i32> %r
}

Expand Down
40 changes: 20 additions & 20 deletions llvm/test/CodeGen/VE/Vector/vec_load.ll
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s

declare <128 x double> @llvm.masked.load.v128f64.p0v128f64(<128 x double>* %0, i32 immarg %1, <128 x i1> %2, <128 x double> %3) #0
declare <128 x double> @llvm.masked.load.v128f64.p0(ptr %0, i32 immarg %1, <128 x i1> %2, <128 x double> %3) #0

; TODO: Custom widen by lowering to vvp_load in ReplaceNodeResult
; Function Attrs: nounwind
; define fastcc <128 x double> @vec_mload_v128f64(<128 x double>* %P, <128 x i1> %M) {
; %r = call <128 x double> @llvm.masked.load.v128f64.p0v128f64(<128 x double>* %P, i32 16, <128 x i1> %M, <128 x double> undef)
; define fastcc <128 x double> @vec_mload_v128f64(ptr %P, <128 x i1> %M) {
; %r = call <128 x double> @llvm.masked.load.v128f64.p0(ptr %P, i32 16, <128 x i1> %M, <128 x double> undef)
; ret <128 x double> %r
; }


declare <256 x double> @llvm.masked.load.v256f64.p0v256f64(<256 x double>* %0, i32 immarg %1, <256 x i1> %2, <256 x double> %3) #0
declare <256 x double> @llvm.masked.load.v256f64.p0(ptr %0, i32 immarg %1, <256 x i1> %2, <256 x double> %3) #0

; Function Attrs: nounwind
define fastcc <256 x double> @vec_mload_v256f64(<256 x double>* %P, <256 x i1> %M) {
define fastcc <256 x double> @vec_mload_v256f64(ptr %P, <256 x i1> %M) {
; CHECK-LABEL: vec_mload_v256f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
Expand All @@ -24,24 +24,24 @@ define fastcc <256 x double> @vec_mload_v256f64(<256 x double>* %P, <256 x i1> %
; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1
; CHECK-NEXT: vgt %v0, %v0, 0, 0, %vm1
; CHECK-NEXT: b.l.t (, %s10)
%r = call <256 x double> @llvm.masked.load.v256f64.p0v256f64(<256 x double>* %P, i32 16, <256 x i1> %M, <256 x double> undef)
%r = call <256 x double> @llvm.masked.load.v256f64.p0(ptr %P, i32 16, <256 x i1> %M, <256 x double> undef)
ret <256 x double> %r
}

; Function Attrs: nounwind
define fastcc <256 x double> @vec_load_v256f64(<256 x double>* %P) {
define fastcc <256 x double> @vec_load_v256f64(ptr %P) {
; CHECK-LABEL: vec_load_v256f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
; CHECK-NEXT: lvl %s1
; CHECK-NEXT: vld %v0, 8, %s0
; CHECK-NEXT: b.l.t (, %s10)
%r = load <256 x double>, <256 x double>* %P, align 4
%r = load <256 x double>, ptr %P, align 4
ret <256 x double> %r
}

; Function Attrs: nounwind
define fastcc <256 x double> @vec_mload_pt_v256f64(<256 x double>* %P, <256 x double> %PT, <256 x i1> %M) {
define fastcc <256 x double> @vec_mload_pt_v256f64(ptr %P, <256 x double> %PT, <256 x i1> %M) {
; CHECK-LABEL: vec_mload_pt_v256f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
Expand All @@ -52,15 +52,15 @@ define fastcc <256 x double> @vec_mload_pt_v256f64(<256 x double>* %P, <256 x do
; CHECK-NEXT: vgt %v1, %v1, 0, 0, %vm1
; CHECK-NEXT: vmrg %v0, %v0, %v1, %vm1
; CHECK-NEXT: b.l.t (, %s10)
%r = call <256 x double> @llvm.masked.load.v256f64.p0v256f64(<256 x double>* %P, i32 16, <256 x i1> %M, <256 x double> %PT)
%r = call <256 x double> @llvm.masked.load.v256f64.p0(ptr %P, i32 16, <256 x i1> %M, <256 x double> %PT)
ret <256 x double> %r
}


declare <256 x float> @llvm.masked.load.v256f32.p0v256f32(<256 x float>* %0, i32 immarg %1, <256 x i1> %2, <256 x float> %3) #0
declare <256 x float> @llvm.masked.load.v256f32.p0(ptr %0, i32 immarg %1, <256 x i1> %2, <256 x float> %3) #0

; Function Attrs: nounwind
define fastcc <256 x float> @vec_mload_v256f32(<256 x float>* %P, <256 x i1> %M) {
define fastcc <256 x float> @vec_mload_v256f32(ptr %P, <256 x i1> %M) {
; CHECK-LABEL: vec_mload_v256f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
Expand All @@ -70,12 +70,12 @@ define fastcc <256 x float> @vec_mload_v256f32(<256 x float>* %P, <256 x i1> %M)
; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1
; CHECK-NEXT: vgtu %v0, %v0, 0, 0, %vm1
; CHECK-NEXT: b.l.t (, %s10)
%r = call <256 x float> @llvm.masked.load.v256f32.p0v256f32(<256 x float>* %P, i32 16, <256 x i1> %M, <256 x float> undef)
%r = call <256 x float> @llvm.masked.load.v256f32.p0(ptr %P, i32 16, <256 x i1> %M, <256 x float> undef)
ret <256 x float> %r
}

; Function Attrs: nounwind
define fastcc <256 x float> @vec_mload_pt_v256f32(<256 x float>* %P, <256 x float> %PT, <256 x i1> %M) {
define fastcc <256 x float> @vec_mload_pt_v256f32(ptr %P, <256 x float> %PT, <256 x i1> %M) {
; CHECK-LABEL: vec_mload_pt_v256f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
Expand All @@ -86,15 +86,15 @@ define fastcc <256 x float> @vec_mload_pt_v256f32(<256 x float>* %P, <256 x floa
; CHECK-NEXT: vgtu %v1, %v1, 0, 0, %vm1
; CHECK-NEXT: vmrg %v0, %v0, %v1, %vm1
; CHECK-NEXT: b.l.t (, %s10)
%r = call <256 x float> @llvm.masked.load.v256f32.p0v256f32(<256 x float>* %P, i32 16, <256 x i1> %M, <256 x float> %PT)
%r = call <256 x float> @llvm.masked.load.v256f32.p0(ptr %P, i32 16, <256 x i1> %M, <256 x float> %PT)
ret <256 x float> %r
}


declare <256 x i32> @llvm.masked.load.v256i32.p0v256i32(<256 x i32>* %0, i32 immarg %1, <256 x i1> %2, <256 x i32> %3) #0
declare <256 x i32> @llvm.masked.load.v256i32.p0(ptr %0, i32 immarg %1, <256 x i1> %2, <256 x i32> %3) #0

; Function Attrs: nounwind
define fastcc <256 x i32> @vec_mload_v256i32(<256 x i32>* %P, <256 x i1> %M) {
define fastcc <256 x i32> @vec_mload_v256i32(ptr %P, <256 x i1> %M) {
; CHECK-LABEL: vec_mload_v256i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
Expand All @@ -104,12 +104,12 @@ define fastcc <256 x i32> @vec_mload_v256i32(<256 x i32>* %P, <256 x i1> %M) {
; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1
; CHECK-NEXT: vgtl.zx %v0, %v0, 0, 0, %vm1
; CHECK-NEXT: b.l.t (, %s10)
%r = call <256 x i32> @llvm.masked.load.v256i32.p0v256i32(<256 x i32>* %P, i32 16, <256 x i1> %M, <256 x i32> undef)
%r = call <256 x i32> @llvm.masked.load.v256i32.p0(ptr %P, i32 16, <256 x i1> %M, <256 x i32> undef)
ret <256 x i32> %r
}

; Function Attrs: nounwind
define fastcc <256 x i32> @vec_mload_pt_v256i32(<256 x i32>* %P, <256 x i32> %PT, <256 x i1> %M) {
define fastcc <256 x i32> @vec_mload_pt_v256i32(ptr %P, <256 x i32> %PT, <256 x i1> %M) {
; CHECK-LABEL: vec_mload_pt_v256i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
Expand All @@ -120,7 +120,7 @@ define fastcc <256 x i32> @vec_mload_pt_v256i32(<256 x i32>* %P, <256 x i32> %PT
; CHECK-NEXT: vgtl.zx %v1, %v1, 0, 0, %vm1
; CHECK-NEXT: vmrg %v0, %v0, %v1, %vm1
; CHECK-NEXT: b.l.t (, %s10)
%r = call <256 x i32> @llvm.masked.load.v256i32.p0v256i32(<256 x i32>* %P, i32 16, <256 x i1> %M, <256 x i32> %PT)
%r = call <256 x i32> @llvm.masked.load.v256i32.p0(ptr %P, i32 16, <256 x i1> %M, <256 x i32> %PT)
ret <256 x i32> %r
}

Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/VE/Vector/vec_scatter.ll
Original file line number Diff line number Diff line change
@@ -1,59 +1,59 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s

declare void @llvm.masked.scatter.v256i64.v256p0i64(<256 x i64>, <256 x i64*>, i32 immarg, <256 x i1>) #0
declare void @llvm.masked.scatter.v256i64.v256p0(<256 x i64>, <256 x ptr>, i32 immarg, <256 x i1>) #0

; Function Attrs: nounwind
define fastcc void @vec_mscatter_v256i64(<256 x i64> %V, <256 x i64*> %P, <256 x i1> %M) {
define fastcc void @vec_mscatter_v256i64(<256 x i64> %V, <256 x ptr> %P, <256 x i1> %M) {
; CHECK-LABEL: vec_mscatter_v256i64:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s0, 256
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vsc %v0, %v1, 0, 0
; CHECK-NEXT: b.l.t (, %s10)
call void @llvm.masked.scatter.v256i64.v256p0i64(<256 x i64> %V, <256 x i64*> %P, i32 4, <256 x i1> %M)
call void @llvm.masked.scatter.v256i64.v256p0(<256 x i64> %V, <256 x ptr> %P, i32 4, <256 x i1> %M)
ret void
}

declare void @llvm.masked.scatter.v256f64.v256p0f64(<256 x double>, <256 x double*>, i32 immarg, <256 x i1>) #0
declare void @llvm.masked.scatter.v256f64.v256p0(<256 x double>, <256 x ptr>, i32 immarg, <256 x i1>) #0

; Function Attrs: nounwind
define fastcc void @vec_mscatter_v256f64(<256 x double> %V, <256 x double*> %P, <256 x i1> %M) {
define fastcc void @vec_mscatter_v256f64(<256 x double> %V, <256 x ptr> %P, <256 x i1> %M) {
; CHECK-LABEL: vec_mscatter_v256f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s0, 256
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vsc %v0, %v1, 0, 0
; CHECK-NEXT: b.l.t (, %s10)
call void @llvm.masked.scatter.v256f64.v256p0f64(<256 x double> %V, <256 x double*> %P, i32 4, <256 x i1> %M)
call void @llvm.masked.scatter.v256f64.v256p0(<256 x double> %V, <256 x ptr> %P, i32 4, <256 x i1> %M)
ret void
}

declare void @llvm.masked.scatter.v256f32.v256p0f32(<256 x float>, <256 x float*>, i32 immarg, <256 x i1>) #0
declare void @llvm.masked.scatter.v256f32.v256p0(<256 x float>, <256 x ptr>, i32 immarg, <256 x i1>) #0

; Function Attrs: nounwind
define fastcc void @vec_mscatter_v256f32(<256 x float> %V, <256 x float*> %P, <256 x i1> %M) {
define fastcc void @vec_mscatter_v256f32(<256 x float> %V, <256 x ptr> %P, <256 x i1> %M) {
; CHECK-LABEL: vec_mscatter_v256f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s0, 256
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vscu %v0, %v1, 0, 0
; CHECK-NEXT: b.l.t (, %s10)
call void @llvm.masked.scatter.v256f32.v256p0f32(<256 x float> %V, <256 x float*> %P, i32 4, <256 x i1> %M)
call void @llvm.masked.scatter.v256f32.v256p0(<256 x float> %V, <256 x ptr> %P, i32 4, <256 x i1> %M)
ret void
}

declare void @llvm.masked.scatter.v256i32.v256p0i32(<256 x i32>, <256 x i32*>, i32 immarg, <256 x i1>) #0
declare void @llvm.masked.scatter.v256i32.v256p0(<256 x i32>, <256 x ptr>, i32 immarg, <256 x i1>) #0

; Function Attrs: nounwind
define fastcc void @vec_mscatter_v256i32(<256 x i32> %V, <256 x i32*> %P, <256 x i1> %M) {
define fastcc void @vec_mscatter_v256i32(<256 x i32> %V, <256 x ptr> %P, <256 x i1> %M) {
; CHECK-LABEL: vec_mscatter_v256i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s0, 256
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vscl %v0, %v1, 0, 0
; CHECK-NEXT: b.l.t (, %s10)
call void @llvm.masked.scatter.v256i32.v256p0i32(<256 x i32> %V, <256 x i32*> %P, i32 4, <256 x i1> %M)
call void @llvm.masked.scatter.v256i32.v256p0(<256 x i32> %V, <256 x ptr> %P, i32 4, <256 x i1> %M)
ret void
}

Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/VE/Vector/vec_store.ll
Original file line number Diff line number Diff line change
@@ -1,43 +1,43 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s

declare void @llvm.masked.store.v256f64.p0v256f64(<256 x double>, <256 x double>*, i32 immarg, <256 x i1>)
declare void @llvm.masked.store.v256f64.p0(<256 x double>, ptr, i32 immarg, <256 x i1>)

define fastcc void @vec_mstore_v256f64(<256 x double>* %P, <256 x double> %V, <256 x i1> %M) {
define fastcc void @vec_mstore_v256f64(ptr %P, <256 x double> %V, <256 x i1> %M) {
; CHECK-LABEL: vec_mstore_v256f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
; CHECK-NEXT: lvl %s1
; CHECK-NEXT: vst %v0, 8, %s0
; CHECK-NEXT: b.l.t (, %s10)
call void @llvm.masked.store.v256f64.p0v256f64(<256 x double> %V, <256 x double>* %P, i32 16, <256 x i1> %M)
call void @llvm.masked.store.v256f64.p0(<256 x double> %V, ptr %P, i32 16, <256 x i1> %M)
ret void
}


declare void @llvm.masked.store.v256f32.p0v256f32(<256 x float>, <256 x float>*, i32 immarg, <256 x i1>)
declare void @llvm.masked.store.v256f32.p0(<256 x float>, ptr, i32 immarg, <256 x i1>)

define fastcc void @vec_mstore_v256f32(<256 x float>* %P, <256 x float> %V, <256 x i1> %M) {
define fastcc void @vec_mstore_v256f32(ptr %P, <256 x float> %V, <256 x i1> %M) {
; CHECK-LABEL: vec_mstore_v256f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
; CHECK-NEXT: lvl %s1
; CHECK-NEXT: vstu %v0, 4, %s0
; CHECK-NEXT: b.l.t (, %s10)
call void @llvm.masked.store.v256f32.p0v256f32(<256 x float> %V, <256 x float>* %P, i32 16, <256 x i1> %M)
call void @llvm.masked.store.v256f32.p0(<256 x float> %V, ptr %P, i32 16, <256 x i1> %M)
ret void
}


declare void @llvm.masked.store.v256i32.p0v256i32(<256 x i32>, <256 x i32>*, i32 immarg, <256 x i1>)
declare void @llvm.masked.store.v256i32.p0(<256 x i32>, ptr, i32 immarg, <256 x i1>)

define fastcc void @vec_mstore_v256i32(<256 x i32>* %P, <256 x i32> %V, <256 x i1> %M) {
define fastcc void @vec_mstore_v256i32(ptr %P, <256 x i32> %V, <256 x i1> %M) {
; CHECK-LABEL: vec_mstore_v256i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
; CHECK-NEXT: lvl %s1
; CHECK-NEXT: vstl %v0, 4, %s0
; CHECK-NEXT: b.l.t (, %s10)
call void @llvm.masked.store.v256i32.p0v256i32(<256 x i32> %V, <256 x i32>* %P, i32 16, <256 x i1> %M)
call void @llvm.masked.store.v256i32.p0(<256 x i32> %V, ptr %P, i32 16, <256 x i1> %M)
ret void
}
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/VE/Vector/vp_gather.ll
Original file line number Diff line number Diff line change
@@ -1,58 +1,58 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s

declare <256 x i64> @llvm.vp.gather.v256i64.v256p0i64(<256 x i64*>, <256 x i1>, i32)
declare <256 x i64> @llvm.vp.gather.v256i64.v256p0(<256 x ptr>, <256 x i1>, i32)

; Function Attrs: nounwind
define fastcc <256 x i64> @vp_gather_v256i64(<256 x i64*> %P, <256 x i1> %M, i32 %avl) {
define fastcc <256 x i64> @vp_gather_v256i64(<256 x ptr> %P, <256 x i1> %M, i32 %avl) {
; CHECK-LABEL: vp_gather_v256i64:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vgt %v0, %v0, 0, 0, %vm1
; CHECK-NEXT: b.l.t (, %s10)
%r = call <256 x i64> @llvm.vp.gather.v256i64.v256p0i64(<256 x i64*> %P, <256 x i1> %M, i32 %avl)
%r = call <256 x i64> @llvm.vp.gather.v256i64.v256p0(<256 x ptr> %P, <256 x i1> %M, i32 %avl)
ret <256 x i64> %r
}

declare <256 x double> @llvm.vp.gather.v256f64.v256p0f64(<256 x double*>, <256 x i1>, i32)
declare <256 x double> @llvm.vp.gather.v256f64.v256p0(<256 x ptr>, <256 x i1>, i32)

; Function Attrs: nounwind
define fastcc <256 x double> @vp_gather_v256f64(<256 x double*> %P, <256 x i1> %M, i32 %avl) {
define fastcc <256 x double> @vp_gather_v256f64(<256 x ptr> %P, <256 x i1> %M, i32 %avl) {
; CHECK-LABEL: vp_gather_v256f64:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vgt %v0, %v0, 0, 0, %vm1
; CHECK-NEXT: b.l.t (, %s10)
%r = call <256 x double> @llvm.vp.gather.v256f64.v256p0f64(<256 x double*> %P, <256 x i1> %M, i32 %avl)
%r = call <256 x double> @llvm.vp.gather.v256f64.v256p0(<256 x ptr> %P, <256 x i1> %M, i32 %avl)
ret <256 x double> %r
}

declare <256 x float> @llvm.vp.gather.v256f32.v256p0f32(<256 x float*>, <256 x i1>, i32)
declare <256 x float> @llvm.vp.gather.v256f32.v256p0(<256 x ptr>, <256 x i1>, i32)

; Function Attrs: nounwind
define fastcc <256 x float> @vp_gather_v256f32(<256 x float*> %P, <256 x i1> %M, i32 %avl) {
define fastcc <256 x float> @vp_gather_v256f32(<256 x ptr> %P, <256 x i1> %M, i32 %avl) {
; CHECK-LABEL: vp_gather_v256f32:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vgtu %v0, %v0, 0, 0, %vm1
; CHECK-NEXT: b.l.t (, %s10)
%r = call <256 x float> @llvm.vp.gather.v256f32.v256p0f32(<256 x float*> %P, <256 x i1> %M, i32 %avl)
%r = call <256 x float> @llvm.vp.gather.v256f32.v256p0(<256 x ptr> %P, <256 x i1> %M, i32 %avl)
ret <256 x float> %r
}

declare <256 x i32> @llvm.vp.gather.v256i32.v256p0i32(<256 x i32*>, <256 x i1>, i32)
declare <256 x i32> @llvm.vp.gather.v256i32.v256p0(<256 x ptr>, <256 x i1>, i32)

; Function Attrs: nounwind
define fastcc <256 x i32> @vp_gather_v256i32(<256 x i32*> %P, <256 x i1> %M, i32 %avl) {
define fastcc <256 x i32> @vp_gather_v256i32(<256 x ptr> %P, <256 x i1> %M, i32 %avl) {
; CHECK-LABEL: vp_gather_v256i32:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vgtl.zx %v0, %v0, 0, 0, %vm1
; CHECK-NEXT: b.l.t (, %s10)
%r = call <256 x i32> @llvm.vp.gather.v256i32.v256p0i32(<256 x i32*> %P, <256 x i1> %M, i32 %avl)
%r = call <256 x i32> @llvm.vp.gather.v256i32.v256p0(<256 x ptr> %P, <256 x i1> %M, i32 %avl)
ret <256 x i32> %r
}
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/VE/Vector/vp_scatter.ll
Original file line number Diff line number Diff line change
@@ -1,59 +1,59 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s

declare void @llvm.vp.scatter.v256i64.v256p0i64(<256 x i64>, <256 x i64*>, <256 x i1>, i32 %avl)
declare void @llvm.vp.scatter.v256i64.v256p0(<256 x i64>, <256 x ptr>, <256 x i1>, i32 %avl)

; Function Attrs: nounwind
define fastcc void @vp_mscatter_v256i64(<256 x i64> %V, <256 x i64*> %P, <256 x i1> %M, i32 %avl) {
define fastcc void @vp_mscatter_v256i64(<256 x i64> %V, <256 x ptr> %P, <256 x i1> %M, i32 %avl) {
; CHECK-LABEL: vp_mscatter_v256i64:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vsc %v0, %v1, 0, 0, %vm1
; CHECK-NEXT: b.l.t (, %s10)
call void @llvm.vp.scatter.v256i64.v256p0i64(<256 x i64> %V, <256 x i64*> %P, <256 x i1> %M, i32 %avl)
call void @llvm.vp.scatter.v256i64.v256p0(<256 x i64> %V, <256 x ptr> %P, <256 x i1> %M, i32 %avl)
ret void
}

declare void @llvm.vp.scatter.v256f64.v256p0f64(<256 x double>, <256 x double*>, <256 x i1>, i32 %avl)
declare void @llvm.vp.scatter.v256f64.v256p0(<256 x double>, <256 x ptr>, <256 x i1>, i32 %avl)

; Function Attrs: nounwind
define fastcc void @vp_mscatter_v256f64(<256 x double> %V, <256 x double*> %P, <256 x i1> %M, i32 %avl) {
define fastcc void @vp_mscatter_v256f64(<256 x double> %V, <256 x ptr> %P, <256 x i1> %M, i32 %avl) {
; CHECK-LABEL: vp_mscatter_v256f64:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vsc %v0, %v1, 0, 0, %vm1
; CHECK-NEXT: b.l.t (, %s10)
call void @llvm.vp.scatter.v256f64.v256p0f64(<256 x double> %V, <256 x double*> %P, <256 x i1> %M, i32 %avl)
call void @llvm.vp.scatter.v256f64.v256p0(<256 x double> %V, <256 x ptr> %P, <256 x i1> %M, i32 %avl)
ret void
}

declare void @llvm.vp.scatter.v256f32.v256p0f32(<256 x float>, <256 x float*>, <256 x i1>, i32 %avl)
declare void @llvm.vp.scatter.v256f32.v256p0(<256 x float>, <256 x ptr>, <256 x i1>, i32 %avl)

; Function Attrs: nounwind
define fastcc void @vp_mscatter_v256f32(<256 x float> %V, <256 x float*> %P, <256 x i1> %M, i32 %avl) {
define fastcc void @vp_mscatter_v256f32(<256 x float> %V, <256 x ptr> %P, <256 x i1> %M, i32 %avl) {
; CHECK-LABEL: vp_mscatter_v256f32:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vscu %v0, %v1, 0, 0, %vm1
; CHECK-NEXT: b.l.t (, %s10)
call void @llvm.vp.scatter.v256f32.v256p0f32(<256 x float> %V, <256 x float*> %P, <256 x i1> %M, i32 %avl)
call void @llvm.vp.scatter.v256f32.v256p0(<256 x float> %V, <256 x ptr> %P, <256 x i1> %M, i32 %avl)
ret void
}

declare void @llvm.vp.scatter.v256i32.v256p0i32(<256 x i32>, <256 x i32*>, <256 x i1>, i32 %avl)
declare void @llvm.vp.scatter.v256i32.v256p0(<256 x i32>, <256 x ptr>, <256 x i1>, i32 %avl)

; Function Attrs: nounwind
define fastcc void @vp_mscatter_v256i32(<256 x i32> %V, <256 x i32*> %P, <256 x i1> %M, i32 %avl) {
define fastcc void @vp_mscatter_v256i32(<256 x i32> %V, <256 x ptr> %P, <256 x i1> %M, i32 %avl) {
; CHECK-LABEL: vp_mscatter_v256i32:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vscl %v0, %v1, 0, 0, %vm1
; CHECK-NEXT: b.l.t (, %s10)
call void @llvm.vp.scatter.v256i32.v256p0i32(<256 x i32> %V, <256 x i32*> %P, <256 x i1> %M, i32 %avl)
call void @llvm.vp.scatter.v256i32.v256p0(<256 x i32> %V, <256 x ptr> %P, <256 x i1> %M, i32 %avl)
ret void
}

56 changes: 28 additions & 28 deletions llvm/test/CodeGen/VE/Vector/vp_strided_load.ll
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s

declare <256 x float> @llvm.experimental.vp.strided.load.v256f32.p0f32.i64(float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
declare <256 x float> @llvm.experimental.vp.strided.load.v256f32.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)

define fastcc <256 x float> @vp_strided_load_v256f32_rrm(float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
define fastcc <256 x float> @vp_strided_load_v256f32_rrm(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
; CHECK-LABEL: vp_strided_load_v256f32_rrm:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s2, %s2, (32)0
Expand All @@ -13,11 +13,11 @@ define fastcc <256 x float> @vp_strided_load_v256f32_rrm(float* %ptr, i64 %strid
; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1
; CHECK-NEXT: vgtu %v0, %v0, 0, 0, %vm1
; CHECK-NEXT: b.l.t (, %s10)
%r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.p0f32.i64(float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
%r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
ret <256 x float> %r
}

define fastcc <256 x float> @vp_strided_load_v256f32_rr(float* %ptr, i64 %stride, i32 %evl) {
define fastcc <256 x float> @vp_strided_load_v256f32_rr(ptr %ptr, i64 %stride, i32 %evl) {
; CHECK-LABEL: vp_strided_load_v256f32_rr:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s2, %s2, (32)0
Expand All @@ -26,11 +26,11 @@ define fastcc <256 x float> @vp_strided_load_v256f32_rr(float* %ptr, i64 %stride
; CHECK-NEXT: b.l.t (, %s10)
%one = insertelement <256 x i1> undef, i1 1, i32 0
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
%r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.p0f32.i64(float* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
%r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
ret <256 x float> %r
}

define fastcc <256 x float> @vp_strided_load_v256f32_ri(float* %ptr, i32 %evl) {
define fastcc <256 x float> @vp_strided_load_v256f32_ri(ptr %ptr, i32 %evl) {
; CHECK-LABEL: vp_strided_load_v256f32_ri:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s1, %s1, (32)0
Expand All @@ -39,13 +39,13 @@ define fastcc <256 x float> @vp_strided_load_v256f32_ri(float* %ptr, i32 %evl) {
; CHECK-NEXT: b.l.t (, %s10)
%one = insertelement <256 x i1> undef, i1 1, i32 0
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
%r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.p0f32.i64(float* %ptr, i64 24, <256 x i1> %allones, i32 %evl)
%r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.p0.i64(ptr %ptr, i64 24, <256 x i1> %allones, i32 %evl)
ret <256 x float> %r
}

declare <256 x i32> @llvm.experimental.vp.strided.load.v256i32.p0i32.i64(i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
declare <256 x i32> @llvm.experimental.vp.strided.load.v256i32.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)

define fastcc <256 x i32> @vp_strided_load_v256i32_rrm(i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
define fastcc <256 x i32> @vp_strided_load_v256i32_rrm(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
; CHECK-LABEL: vp_strided_load_v256i32_rrm:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s2, %s2, (32)0
Expand All @@ -55,11 +55,11 @@ define fastcc <256 x i32> @vp_strided_load_v256i32_rrm(i32* %ptr, i64 %stride, <
; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1
; CHECK-NEXT: vgtl.zx %v0, %v0, 0, 0, %vm1
; CHECK-NEXT: b.l.t (, %s10)
%r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.p0i32.i64(i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
%r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
ret <256 x i32> %r
}

define fastcc <256 x i32> @vp_strided_load_v256i32_rr(i32* %ptr, i64 %stride, i32 %evl) {
define fastcc <256 x i32> @vp_strided_load_v256i32_rr(ptr %ptr, i64 %stride, i32 %evl) {
; CHECK-LABEL: vp_strided_load_v256i32_rr:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s2, %s2, (32)0
Expand All @@ -68,11 +68,11 @@ define fastcc <256 x i32> @vp_strided_load_v256i32_rr(i32* %ptr, i64 %stride, i3
; CHECK-NEXT: b.l.t (, %s10)
%one = insertelement <256 x i1> undef, i1 1, i32 0
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
%r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.p0i32.i64(i32* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
%r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
ret <256 x i32> %r
}

define fastcc <256 x i32> @vp_strided_load_v256i32_ri(i32* %ptr, i32 %evl) {
define fastcc <256 x i32> @vp_strided_load_v256i32_ri(ptr %ptr, i32 %evl) {
; CHECK-LABEL: vp_strided_load_v256i32_ri:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s1, %s1, (32)0
Expand All @@ -81,13 +81,13 @@ define fastcc <256 x i32> @vp_strided_load_v256i32_ri(i32* %ptr, i32 %evl) {
; CHECK-NEXT: b.l.t (, %s10)
%one = insertelement <256 x i1> undef, i1 1, i32 0
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
%r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.p0i32.i64(i32* %ptr, i64 24, <256 x i1> %allones, i32 %evl)
%r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.p0.i64(ptr %ptr, i64 24, <256 x i1> %allones, i32 %evl)
ret <256 x i32> %r
}

declare <256 x double> @llvm.experimental.vp.strided.load.v256f64.p0f64.i64(double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
declare <256 x double> @llvm.experimental.vp.strided.load.v256f64.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)

define fastcc <256 x double> @vp_strided_load_v256f64_rrm(double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
define fastcc <256 x double> @vp_strided_load_v256f64_rrm(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
; CHECK-LABEL: vp_strided_load_v256f64_rrm:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s2, %s2, (32)0
Expand All @@ -97,11 +97,11 @@ define fastcc <256 x double> @vp_strided_load_v256f64_rrm(double* %ptr, i64 %str
; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1
; CHECK-NEXT: vgt %v0, %v0, 0, 0, %vm1
; CHECK-NEXT: b.l.t (, %s10)
%r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.p0f64.i64(double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
%r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
ret <256 x double> %r
}

define fastcc <256 x double> @vp_strided_load_v256f64_rr(double* %ptr, i64 %stride, i32 %evl) {
define fastcc <256 x double> @vp_strided_load_v256f64_rr(ptr %ptr, i64 %stride, i32 %evl) {
; CHECK-LABEL: vp_strided_load_v256f64_rr:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s2, %s2, (32)0
Expand All @@ -110,11 +110,11 @@ define fastcc <256 x double> @vp_strided_load_v256f64_rr(double* %ptr, i64 %stri
; CHECK-NEXT: b.l.t (, %s10)
%one = insertelement <256 x i1> undef, i1 1, i32 0
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
%r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.p0f64.i64(double* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
%r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
ret <256 x double> %r
}

define fastcc <256 x double> @vp_strided_load_v256f64_ri(double* %ptr, i32 %evl) {
define fastcc <256 x double> @vp_strided_load_v256f64_ri(ptr %ptr, i32 %evl) {
; CHECK-LABEL: vp_strided_load_v256f64_ri:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s1, %s1, (32)0
Expand All @@ -123,13 +123,13 @@ define fastcc <256 x double> @vp_strided_load_v256f64_ri(double* %ptr, i32 %evl)
; CHECK-NEXT: b.l.t (, %s10)
%one = insertelement <256 x i1> undef, i1 1, i32 0
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
%r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.p0f64.i64(double* %ptr, i64 24, <256 x i1> %allones, i32 %evl)
%r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.p0.i64(ptr %ptr, i64 24, <256 x i1> %allones, i32 %evl)
ret <256 x double> %r
}

declare <256 x i64> @llvm.experimental.vp.strided.load.v256i64.p0i64.i64(i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
declare <256 x i64> @llvm.experimental.vp.strided.load.v256i64.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)

define fastcc <256 x i64> @vp_strided_load_v256i64_rrm(i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
define fastcc <256 x i64> @vp_strided_load_v256i64_rrm(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
; CHECK-LABEL: vp_strided_load_v256i64_rrm:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s2, %s2, (32)0
Expand All @@ -139,11 +139,11 @@ define fastcc <256 x i64> @vp_strided_load_v256i64_rrm(i64* %ptr, i64 %stride, <
; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1
; CHECK-NEXT: vgt %v0, %v0, 0, 0, %vm1
; CHECK-NEXT: b.l.t (, %s10)
%r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.p0i64.i64(i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
%r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
ret <256 x i64> %r
}

define fastcc <256 x i64> @vp_strided_load_v256i64_rr(i64* %ptr, i64 %stride, i32 %evl) {
define fastcc <256 x i64> @vp_strided_load_v256i64_rr(ptr %ptr, i64 %stride, i32 %evl) {
; CHECK-LABEL: vp_strided_load_v256i64_rr:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s2, %s2, (32)0
Expand All @@ -152,11 +152,11 @@ define fastcc <256 x i64> @vp_strided_load_v256i64_rr(i64* %ptr, i64 %stride, i3
; CHECK-NEXT: b.l.t (, %s10)
%one = insertelement <256 x i1> undef, i1 1, i32 0
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
%r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.p0i64.i64(i64* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
%r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.p0.i64(ptr %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
ret <256 x i64> %r
}

define fastcc <256 x i64> @vp_strided_load_v256i64_ri(i64* %ptr, i32 %evl) {
define fastcc <256 x i64> @vp_strided_load_v256i64_ri(ptr %ptr, i32 %evl) {
; CHECK-LABEL: vp_strided_load_v256i64_ri:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s1, %s1, (32)0
Expand All @@ -165,6 +165,6 @@ define fastcc <256 x i64> @vp_strided_load_v256i64_ri(i64* %ptr, i32 %evl) {
; CHECK-NEXT: b.l.t (, %s10)
%one = insertelement <256 x i1> undef, i1 1, i32 0
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
%r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.p0i64.i64(i64* %ptr, i64 24, <256 x i1> %allones, i32 %evl)
%r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.p0.i64(ptr %ptr, i64 24, <256 x i1> %allones, i32 %evl)
ret <256 x i64> %r
}
56 changes: 28 additions & 28 deletions llvm/test/CodeGen/VE/Vector/vp_strided_store.ll
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s

declare void @llvm.experimental.vp.strided.store.v256f32.p0f32.i64(<256 x float> %val, float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
declare void @llvm.experimental.vp.strided.store.v256f32.p0.i64(<256 x float> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)

define fastcc void @vp_strided_store_v256f32_rrm(<256 x float> %val, float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
define fastcc void @vp_strided_store_v256f32_rrm(<256 x float> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
; CHECK-LABEL: vp_strided_store_v256f32_rrm:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s2, %s2, (32)0
; CHECK-NEXT: lvl %s2
; CHECK-NEXT: vstu %v0, %s1, %s0, %vm1
; CHECK-NEXT: b.l.t (, %s10)
call void @llvm.experimental.vp.strided.store.v256f32.p0f32.i64(<256 x float> %val, float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
call void @llvm.experimental.vp.strided.store.v256f32.p0.i64(<256 x float> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
ret void
}

define fastcc void @vp_strided_store_v256f32_rr(<256 x float> %val, float* %ptr, i64 %stride, i32 %evl) {
define fastcc void @vp_strided_store_v256f32_rr(<256 x float> %val, ptr %ptr, i64 %stride, i32 %evl) {
; CHECK-LABEL: vp_strided_store_v256f32_rr:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s2, %s2, (32)0
Expand All @@ -23,11 +23,11 @@ define fastcc void @vp_strided_store_v256f32_rr(<256 x float> %val, float* %ptr,
; CHECK-NEXT: b.l.t (, %s10)
%one = insertelement <256 x i1> undef, i1 1, i32 0
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
call void @llvm.experimental.vp.strided.store.v256f32.p0f32.i64(<256 x float> %val, float* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
call void @llvm.experimental.vp.strided.store.v256f32.p0.i64(<256 x float> %val, ptr %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
ret void
}

define fastcc void @vp_strided_store_v256f32_ri(<256 x float> %val, float* %ptr, i32 %evl) {
define fastcc void @vp_strided_store_v256f32_ri(<256 x float> %val, ptr %ptr, i32 %evl) {
; CHECK-LABEL: vp_strided_store_v256f32_ri:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s1, %s1, (32)0
Expand All @@ -36,24 +36,24 @@ define fastcc void @vp_strided_store_v256f32_ri(<256 x float> %val, float* %ptr,
; CHECK-NEXT: b.l.t (, %s10)
%one = insertelement <256 x i1> undef, i1 1, i32 0
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
call void @llvm.experimental.vp.strided.store.v256f32.p0f32.i64(<256 x float> %val, float* %ptr, i64 24, <256 x i1> %allones, i32 %evl)
call void @llvm.experimental.vp.strided.store.v256f32.p0.i64(<256 x float> %val, ptr %ptr, i64 24, <256 x i1> %allones, i32 %evl)
ret void
}

declare void @llvm.experimental.vp.strided.store.v256i32.p0i32.i64(<256 x i32> %val, i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
declare void @llvm.experimental.vp.strided.store.v256i32.p0.i64(<256 x i32> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)

define fastcc void @vp_strided_store_v256i32_rrm(<256 x i32> %val, i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
define fastcc void @vp_strided_store_v256i32_rrm(<256 x i32> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
; CHECK-LABEL: vp_strided_store_v256i32_rrm:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s2, %s2, (32)0
; CHECK-NEXT: lvl %s2
; CHECK-NEXT: vstl %v0, %s1, %s0, %vm1
; CHECK-NEXT: b.l.t (, %s10)
call void @llvm.experimental.vp.strided.store.v256i32.p0i32.i64(<256 x i32> %val, i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
call void @llvm.experimental.vp.strided.store.v256i32.p0.i64(<256 x i32> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
ret void
}

define fastcc void @vp_strided_store_v256i32_rr(<256 x i32> %val, i32* %ptr, i64 %stride, i32 %evl) {
define fastcc void @vp_strided_store_v256i32_rr(<256 x i32> %val, ptr %ptr, i64 %stride, i32 %evl) {
; CHECK-LABEL: vp_strided_store_v256i32_rr:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s2, %s2, (32)0
Expand All @@ -62,11 +62,11 @@ define fastcc void @vp_strided_store_v256i32_rr(<256 x i32> %val, i32* %ptr, i64
; CHECK-NEXT: b.l.t (, %s10)
%one = insertelement <256 x i1> undef, i1 1, i32 0
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
call void @llvm.experimental.vp.strided.store.v256i32.p0i32.i64(<256 x i32> %val, i32* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
call void @llvm.experimental.vp.strided.store.v256i32.p0.i64(<256 x i32> %val, ptr %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
ret void
}

define fastcc void @vp_strided_store_v256i32_ri(<256 x i32> %val, i32* %ptr, i32 %evl) {
define fastcc void @vp_strided_store_v256i32_ri(<256 x i32> %val, ptr %ptr, i32 %evl) {
; CHECK-LABEL: vp_strided_store_v256i32_ri:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s1, %s1, (32)0
Expand All @@ -75,24 +75,24 @@ define fastcc void @vp_strided_store_v256i32_ri(<256 x i32> %val, i32* %ptr, i32
; CHECK-NEXT: b.l.t (, %s10)
%one = insertelement <256 x i1> undef, i1 1, i32 0
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
call void @llvm.experimental.vp.strided.store.v256i32.p0i32.i64(<256 x i32> %val, i32* %ptr, i64 24, <256 x i1> %allones, i32 %evl)
call void @llvm.experimental.vp.strided.store.v256i32.p0.i64(<256 x i32> %val, ptr %ptr, i64 24, <256 x i1> %allones, i32 %evl)
ret void
}

declare void @llvm.experimental.vp.strided.store.v256f64.p0f64.i64(<256 x double> %val, double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
declare void @llvm.experimental.vp.strided.store.v256f64.p0.i64(<256 x double> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)

define fastcc void @vp_strided_store_v256f64_rrm(<256 x double> %val, double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
define fastcc void @vp_strided_store_v256f64_rrm(<256 x double> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
; CHECK-LABEL: vp_strided_store_v256f64_rrm:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s2, %s2, (32)0
; CHECK-NEXT: lvl %s2
; CHECK-NEXT: vst %v0, %s1, %s0, %vm1
; CHECK-NEXT: b.l.t (, %s10)
call void @llvm.experimental.vp.strided.store.v256f64.p0f64.i64(<256 x double> %val, double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
call void @llvm.experimental.vp.strided.store.v256f64.p0.i64(<256 x double> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
ret void
}

define fastcc void @vp_strided_store_v256f64_rr(<256 x double> %val, double* %ptr, i64 %stride, i32 %evl) {
define fastcc void @vp_strided_store_v256f64_rr(<256 x double> %val, ptr %ptr, i64 %stride, i32 %evl) {
; CHECK-LABEL: vp_strided_store_v256f64_rr:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s2, %s2, (32)0
Expand All @@ -101,11 +101,11 @@ define fastcc void @vp_strided_store_v256f64_rr(<256 x double> %val, double* %pt
; CHECK-NEXT: b.l.t (, %s10)
%one = insertelement <256 x i1> undef, i1 1, i32 0
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
call void @llvm.experimental.vp.strided.store.v256f64.p0f64.i64(<256 x double> %val, double* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
call void @llvm.experimental.vp.strided.store.v256f64.p0.i64(<256 x double> %val, ptr %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
ret void
}

define fastcc void @vp_strided_store_v256f64_ri(<256 x double> %val, double* %ptr, i32 %evl) {
define fastcc void @vp_strided_store_v256f64_ri(<256 x double> %val, ptr %ptr, i32 %evl) {
; CHECK-LABEL: vp_strided_store_v256f64_ri:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s1, %s1, (32)0
Expand All @@ -114,24 +114,24 @@ define fastcc void @vp_strided_store_v256f64_ri(<256 x double> %val, double* %pt
; CHECK-NEXT: b.l.t (, %s10)
%one = insertelement <256 x i1> undef, i1 1, i32 0
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
call void @llvm.experimental.vp.strided.store.v256f64.p0f64.i64(<256 x double> %val, double* %ptr, i64 24, <256 x i1> %allones, i32 %evl)
call void @llvm.experimental.vp.strided.store.v256f64.p0.i64(<256 x double> %val, ptr %ptr, i64 24, <256 x i1> %allones, i32 %evl)
ret void
}

declare void @llvm.experimental.vp.strided.store.v256i64.p0i64.i64(<256 x i64> %val, i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
declare void @llvm.experimental.vp.strided.store.v256i64.p0.i64(<256 x i64> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)

define fastcc void @vp_strided_store_v256i64_rrm(<256 x i64> %val, i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
define fastcc void @vp_strided_store_v256i64_rrm(<256 x i64> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
; CHECK-LABEL: vp_strided_store_v256i64_rrm:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s2, %s2, (32)0
; CHECK-NEXT: lvl %s2
; CHECK-NEXT: vst %v0, %s1, %s0, %vm1
; CHECK-NEXT: b.l.t (, %s10)
call void @llvm.experimental.vp.strided.store.v256i64.p0i64.i64(<256 x i64> %val, i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
call void @llvm.experimental.vp.strided.store.v256i64.p0.i64(<256 x i64> %val, ptr %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
ret void
}

define fastcc void @vp_strided_store_v256i64_rr(<256 x i64> %val, i64* %ptr, i64 %stride, i32 %evl) {
define fastcc void @vp_strided_store_v256i64_rr(<256 x i64> %val, ptr %ptr, i64 %stride, i32 %evl) {
; CHECK-LABEL: vp_strided_store_v256i64_rr:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s2, %s2, (32)0
Expand All @@ -140,11 +140,11 @@ define fastcc void @vp_strided_store_v256i64_rr(<256 x i64> %val, i64* %ptr, i64
; CHECK-NEXT: b.l.t (, %s10)
%one = insertelement <256 x i1> undef, i1 1, i32 0
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
call void @llvm.experimental.vp.strided.store.v256i64.p0i64.i64(<256 x i64> %val, i64* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
call void @llvm.experimental.vp.strided.store.v256i64.p0.i64(<256 x i64> %val, ptr %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
ret void
}

define fastcc void @vp_strided_store_v256i64_ri(<256 x i64> %val, i64* %ptr, i32 %evl) {
define fastcc void @vp_strided_store_v256i64_ri(<256 x i64> %val, ptr %ptr, i32 %evl) {
; CHECK-LABEL: vp_strided_store_v256i64_ri:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s1, %s1, (32)0
Expand All @@ -153,6 +153,6 @@ define fastcc void @vp_strided_store_v256i64_ri(<256 x i64> %val, i64* %ptr, i32
; CHECK-NEXT: b.l.t (, %s10)
%one = insertelement <256 x i1> undef, i1 1, i32 0
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
call void @llvm.experimental.vp.strided.store.v256i64.p0i64.i64(<256 x i64> %val, i64* %ptr, i64 24, <256 x i1> %allones, i32 %evl)
call void @llvm.experimental.vp.strided.store.v256i64.p0.i64(<256 x i64> %val, ptr %ptr, i64 24, <256 x i1> %allones, i32 %evl)
ret void
}