64 changes: 28 additions & 36 deletions llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,107 +3,99 @@

target triple = "aarch64-unknown-linux-gnu"

define <vscale x 4 x i32> @combine_ld1(i32* %ptr) #0 {
define <vscale x 4 x i32> @combine_ld1(ptr %ptr) #0 {
; CHECK-LABEL: @combine_ld1(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], align 16, !annotation !0
; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i32>, ptr [[PTR:%.*]], align 16, !annotation !0
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %1, i32* %ptr), !annotation !0
%2 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %1, ptr %ptr), !annotation !0
ret <vscale x 4 x i32> %2
}

define <vscale x 4 x i32> @combine_ld1_casted_predicate(i32* %ptr) #0 {
define <vscale x 4 x i32> @combine_ld1_casted_predicate(ptr %ptr) #0 {
; CHECK-LABEL: @combine_ld1_casted_predicate(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], align 16, !annotation !0
; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i32>, ptr [[PTR:%.*]], align 16, !annotation !0
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
%3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %2)
%4 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %3, i32* %ptr), !annotation !0
%4 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %3, ptr %ptr), !annotation !0
ret <vscale x 4 x i32> %4
}

define <vscale x 4 x i32> @combine_ld1_masked(i32* %ptr) #0 {
define <vscale x 4 x i32> @combine_ld1_masked(ptr %ptr) #0 {
; CHECK-LABEL: @combine_ld1_masked(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP2]], i32 1, <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> zeroinitializer), !annotation !0
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[PTR:%.*]], i32 1, <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> zeroinitializer), !annotation !0
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
%2 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %1, i32* %ptr), !annotation !0
%2 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %1, ptr %ptr), !annotation !0
ret <vscale x 4 x i32> %2
}

define <vscale x 8 x i16> @combine_ld1_masked_casted_predicate(i16* %ptr) #0 {
define <vscale x 8 x i16> @combine_ld1_masked_casted_predicate(ptr %ptr) #0 {
; CHECK-LABEL: @combine_ld1_masked_casted_predicate(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR:%.*]] to <vscale x 8 x i16>*
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP4]], i32 1, <vscale x 8 x i1> [[TMP3]], <vscale x 8 x i16> zeroinitializer), !annotation !0
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[PTR:%.*]], i32 1, <vscale x 8 x i1> [[TMP3]], <vscale x 8 x i16> zeroinitializer), !annotation !0
; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP5]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
%3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %2)
%4 = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> %3, i16* %ptr), !annotation !0
%4 = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> %3, ptr %ptr), !annotation !0
ret <vscale x 8 x i16> %4
}

define void @combine_st1(<vscale x 4 x i32> %vec, i32* %ptr) #0 {
define void @combine_st1(<vscale x 4 x i32> %vec, ptr %ptr) #0 {
; CHECK-LABEL: @combine_st1(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
; CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], <vscale x 4 x i32>* [[TMP1]], align 16, !annotation !0
; CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], ptr [[PTR:%.*]], align 16, !annotation !0
; CHECK-NEXT: ret void
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %1, i32* %ptr), !annotation !0
call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %1, ptr %ptr), !annotation !0
ret void
}

define void @combine_st1_casted_predicate(<vscale x 4 x i32> %vec, i32* %ptr) #0 {
define void @combine_st1_casted_predicate(<vscale x 4 x i32> %vec, ptr %ptr) #0 {
; CHECK-LABEL: @combine_st1_casted_predicate(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
; CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], <vscale x 4 x i32>* [[TMP1]], align 16, !annotation !0
; CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], ptr [[PTR:%.*]], align 16, !annotation !0
; CHECK-NEXT: ret void
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
%3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %2)
call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %3, i32* %ptr), !annotation !0
call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %3, ptr %ptr), !annotation !0
ret void
}

define void @combine_st1_masked(<vscale x 4 x i32> %vec, i32* %ptr) #0 {
define void @combine_st1_masked(<vscale x 4 x i32> %vec, ptr %ptr) #0 {
; CHECK-LABEL: @combine_st1_masked(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[VEC:%.*]], <vscale x 4 x i32>* [[TMP2]], i32 1, <vscale x 4 x i1> [[TMP1]]), !annotation !0
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[VEC:%.*]], ptr [[PTR:%.*]], i32 1, <vscale x 4 x i1> [[TMP1]]), !annotation !0
; CHECK-NEXT: ret void
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %1, i32* %ptr), !annotation !0
call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %1, ptr %ptr), !annotation !0
ret void
}

define void @combine_st1_masked_casted_predicate(<vscale x 8 x i16> %vec, i16* %ptr) #0 {
define void @combine_st1_masked_casted_predicate(<vscale x 8 x i16> %vec, ptr %ptr) #0 {
; CHECK-LABEL: @combine_st1_masked_casted_predicate(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR:%.*]] to <vscale x 8 x i16>*
; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[VEC:%.*]], <vscale x 8 x i16>* [[TMP4]], i32 1, <vscale x 8 x i1> [[TMP3]]), !annotation !0
; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[VEC:%.*]], ptr [[PTR:%.*]], i32 1, <vscale x 8 x i1> [[TMP3]]), !annotation !0
; CHECK-NEXT: ret void
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
%3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %2)
call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> %vec, <vscale x 8 x i1> %3, i16* %ptr), !annotation !0
call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> %vec, <vscale x 8 x i1> %3, ptr %ptr), !annotation !0
ret void
}

Expand All @@ -113,15 +105,15 @@ declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)

declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1>, i32*)
declare <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1>, i16*)
declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1>, ptr)
declare <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1>, ptr)

declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)

declare void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
declare void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
declare void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, ptr)
declare void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, ptr)

attributes #0 = { "target-features"="+sve" }

Expand Down
36 changes: 18 additions & 18 deletions llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics-gfx8.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,97 +12,97 @@ declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float,
declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

define amdgpu_kernel void @image_sample_a16_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
define amdgpu_kernel void @image_sample_a16_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
; CHECK-LABEL: @image_sample_a16_1d(
; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float [[S32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
; CHECK-NEXT: ret void
;
%s32 = fpext half %s to float
%res = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
store <4 x float> %res, <4 x float> addrspace(1)* %out
store <4 x float> %res, ptr addrspace(1) %out
ret void
}

define amdgpu_kernel void @image_sample_a16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
define amdgpu_kernel void @image_sample_a16_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
; CHECK-LABEL: @image_sample_a16_2d(
; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float
; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
; CHECK-NEXT: ret void
;
%s32 = fpext half %s to float
%t32 = fpext half %t to float
%res = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
store <4 x float> %res, <4 x float> addrspace(1)* %out
store <4 x float> %res, ptr addrspace(1) %out
ret void
}

define amdgpu_kernel void @image_sample_a16_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
define amdgpu_kernel void @image_sample_a16_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
; CHECK-LABEL: @image_sample_a16_3d(
; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float
; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float
; CHECK-NEXT: [[R32:%.*]] = fpext half [[R:%.*]] to float
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float [[S32]], float [[T32]], float [[R32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
; CHECK-NEXT: ret void
;
%s32 = fpext half %s to float
%t32 = fpext half %t to float
%r32 = fpext half %r to float
%res = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
store <4 x float> %res, <4 x float> addrspace(1)* %out
store <4 x float> %res, ptr addrspace(1) %out
ret void
}

define amdgpu_kernel void @image_sample_a16_cube(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
define amdgpu_kernel void @image_sample_a16_cube(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
;
; CHECK-LABEL: @image_sample_a16_cube(
; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float
; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float
; CHECK-NEXT: [[FACE32:%.*]] = fpext half [[FACE:%.*]] to float
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float [[S32]], float [[T32]], float [[FACE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
; CHECK-NEXT: ret void
;
%s32 = fpext half %s to float
%t32 = fpext half %t to float
%face32 = fpext half %face to float
%res = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
store <4 x float> %res, <4 x float> addrspace(1)* %out
store <4 x float> %res, ptr addrspace(1) %out
ret void
}

define amdgpu_kernel void @image_sample_a16_1darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
define amdgpu_kernel void @image_sample_a16_1darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
; CHECK-LABEL: @image_sample_a16_1darray(
; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float
; CHECK-NEXT: [[SLICE32:%.*]] = fpext half [[SLICE:%.*]] to float
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float [[S32]], float [[SLICE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
; CHECK-NEXT: ret void
;
%s32 = fpext half %s to float
%slice32 = fpext half %slice to float
%res = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
store <4 x float> %res, <4 x float> addrspace(1)* %out
store <4 x float> %res, ptr addrspace(1) %out
ret void
}

define amdgpu_kernel void @image_sample_a16_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
define amdgpu_kernel void @image_sample_a16_2darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
; CHECK-LABEL: @image_sample_a16_2darray(
; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float
; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float
; CHECK-NEXT: [[SLICE32:%.*]] = fpext half [[SLICE:%.*]] to float
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float [[S32]], float [[T32]], float [[SLICE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
; CHECK-NEXT: ret void
;
%s32 = fpext half %s to float
%t32 = fpext half %t to float
%slice32 = fpext half %slice to float
%res = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
store <4 x float> %res, <4 x float> addrspace(1)* %out
store <4 x float> %res, ptr addrspace(1) %out
ret void
}
1,106 changes: 553 additions & 553 deletions llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll

Large diffs are not rendered by default.

232 changes: 116 additions & 116 deletions llvm/test/Transforms/InstCombine/AMDGPU/ldexp.ll

Large diffs are not rendered by default.

76 changes: 29 additions & 47 deletions llvm/test/Transforms/InstCombine/ARM/strcmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,31 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
@bell = constant [5 x i8] c"bell\00"
@null = constant [1 x i8] zeroinitializer

declare i32 @strcmp(i8*, i8*)
declare i32 @strcmp(ptr, ptr)

; strcmp("", x) -> -*x
define arm_aapcscc i32 @test1(i8* %str2) {
define arm_aapcscc i32 @test1(ptr %str2) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[STRCMPLOAD:%.*]] = load i8, i8* [[STR2:%.*]], align 1
; CHECK-NEXT: [[STRCMPLOAD:%.*]] = load i8, ptr [[STR2:%.*]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[STRCMPLOAD]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i32 0, [[TMP1]]
; CHECK-NEXT: ret i32 [[TMP2]]
;

%str1 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
%temp1 = call arm_apcscc i32 @strcmp(i8* %str1, i8* %str2)
%temp1 = call arm_apcscc i32 @strcmp(ptr @null, ptr %str2)
ret i32 %temp1

}

; strcmp(x, "") -> *x
define arm_aapcscc i32 @test2(i8* %str1) {
define arm_aapcscc i32 @test2(ptr %str1) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: [[STRCMPLOAD:%.*]] = load i8, i8* [[STR1:%.*]], align 1
; CHECK-NEXT: [[STRCMPLOAD:%.*]] = load i8, ptr [[STR1:%.*]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[STRCMPLOAD]] to i32
; CHECK-NEXT: ret i32 [[TMP1]]
;

%str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
%temp1 = call arm_aapcscc i32 @strcmp(i8* %str1, i8* %str2)
%temp1 = call arm_aapcscc i32 @strcmp(ptr %str1, ptr @null)
ret i32 %temp1
}

Expand All @@ -45,9 +43,7 @@ define arm_aapcscc i32 @test3() {
; CHECK-NEXT: ret i32 -1
;

%str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
%str2 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
%temp1 = call arm_aapcscc i32 @strcmp(i8* %str1, i8* %str2)
%temp1 = call arm_aapcscc i32 @strcmp(ptr @hell, ptr @hello)
ret i32 %temp1
}

Expand All @@ -56,64 +52,57 @@ define arm_aapcscc i32 @test4() {
; CHECK-NEXT: ret i32 1
;

%str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
%str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
%temp1 = call arm_aapcscc i32 @strcmp(i8* %str1, i8* %str2)
%temp1 = call arm_aapcscc i32 @strcmp(ptr @hell, ptr @null)
ret i32 %temp1
}

; strcmp(x, y) -> memcmp(x, y, <known length>)
; (This transform is rather difficult to trigger in a useful manner)
define arm_aapcscc i32 @test5(i1 %b) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: [[STR2:%.*]] = select i1 [[B:%.*]], i8* getelementptr inbounds ([5 x i8], [5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @bell, i32 0, i32 0)
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(5) getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i8* noundef nonnull dereferenceable(5) [[STR2]], i32 5)
; CHECK-NEXT: [[STR2:%.*]] = select i1 [[B:%.*]], ptr @hell, ptr @bell
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(ptr noundef nonnull dereferenceable(5) @hello, ptr noundef nonnull dereferenceable(5) [[STR2]], i32 5)
; CHECK-NEXT: ret i32 [[MEMCMP]]
;

%str1 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
%temp1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
%temp2 = getelementptr inbounds [5 x i8], [5 x i8]* @bell, i32 0, i32 0
%str2 = select i1 %b, i8* %temp1, i8* %temp2
%temp3 = call arm_aapcscc i32 @strcmp(i8* %str1, i8* %str2)
%str2 = select i1 %b, ptr @hell, ptr @bell
%temp3 = call arm_aapcscc i32 @strcmp(ptr @hello, ptr %str2)
ret i32 %temp3
}

; strcmp(x,x) -> 0
define arm_aapcscc i32 @test6(i8* %str) {
define arm_aapcscc i32 @test6(ptr %str) {
; CHECK-LABEL: @test6(
; CHECK-NEXT: ret i32 0
;

%temp1 = call arm_aapcscc i32 @strcmp(i8* %str, i8* %str)
%temp1 = call arm_aapcscc i32 @strcmp(ptr %str, ptr %str)
ret i32 %temp1
}

; strcmp("", x) -> -*x
define arm_aapcs_vfpcc i32 @test1_vfp(i8* %str2) {
define arm_aapcs_vfpcc i32 @test1_vfp(ptr %str2) {
; CHECK-LABEL: @test1_vfp(
; CHECK-NEXT: [[STRCMPLOAD:%.*]] = load i8, i8* [[STR2:%.*]], align 1
; CHECK-NEXT: [[STRCMPLOAD:%.*]] = load i8, ptr [[STR2:%.*]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[STRCMPLOAD]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i32 0, [[TMP1]]
; CHECK-NEXT: ret i32 [[TMP2]]
;

%str1 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
%temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2)
%temp1 = call arm_aapcs_vfpcc i32 @strcmp(ptr @null, ptr %str2)
ret i32 %temp1

}

; strcmp(x, "") -> *x
define arm_aapcs_vfpcc i32 @test2_vfp(i8* %str1) {
define arm_aapcs_vfpcc i32 @test2_vfp(ptr %str1) {
; CHECK-LABEL: @test2_vfp(
; CHECK-NEXT: [[STRCMPLOAD:%.*]] = load i8, i8* [[STR1:%.*]], align 1
; CHECK-NEXT: [[STRCMPLOAD:%.*]] = load i8, ptr [[STR1:%.*]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[STRCMPLOAD]] to i32
; CHECK-NEXT: ret i32 [[TMP1]]
;

%str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
%temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2)
%temp1 = call arm_aapcs_vfpcc i32 @strcmp(ptr %str1, ptr @null)
ret i32 %temp1
}

Expand All @@ -123,9 +112,7 @@ define arm_aapcs_vfpcc i32 @test3_vfp() {
; CHECK-NEXT: ret i32 -1
;

%str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
%str2 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
%temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2)
%temp1 = call arm_aapcs_vfpcc i32 @strcmp(ptr @hell, ptr @hello)
ret i32 %temp1
}

Expand All @@ -134,35 +121,30 @@ define arm_aapcs_vfpcc i32 @test4_vfp() {
; CHECK-NEXT: ret i32 1
;

%str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
%str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
%temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2)
%temp1 = call arm_aapcs_vfpcc i32 @strcmp(ptr @hell, ptr @null)
ret i32 %temp1
}

; strcmp(x, y) -> memcmp(x, y, <known length>)
; (This transform is rather difficult to trigger in a useful manner)
define arm_aapcs_vfpcc i32 @test5_vfp(i1 %b) {
; CHECK-LABEL: @test5_vfp(
; CHECK-NEXT: [[STR2:%.*]] = select i1 [[B:%.*]], i8* getelementptr inbounds ([5 x i8], [5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @bell, i32 0, i32 0)
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(5) getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i8* noundef nonnull dereferenceable(5) [[STR2]], i32 5)
; CHECK-NEXT: [[STR2:%.*]] = select i1 [[B:%.*]], ptr @hell, ptr @bell
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(ptr noundef nonnull dereferenceable(5) @hello, ptr noundef nonnull dereferenceable(5) [[STR2]], i32 5)
; CHECK-NEXT: ret i32 [[MEMCMP]]
;

%str1 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
%temp1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
%temp2 = getelementptr inbounds [5 x i8], [5 x i8]* @bell, i32 0, i32 0
%str2 = select i1 %b, i8* %temp1, i8* %temp2
%temp3 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2)
%str2 = select i1 %b, ptr @hell, ptr @bell
%temp3 = call arm_aapcs_vfpcc i32 @strcmp(ptr @hello, ptr %str2)
ret i32 %temp3
}

; strcmp(x,x) -> 0
define arm_aapcs_vfpcc i32 @test6_vfp(i8* %str) {
define arm_aapcs_vfpcc i32 @test6_vfp(ptr %str) {
; CHECK-LABEL: @test6_vfp(
; CHECK-NEXT: ret i32 0
;

%temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str, i8* %str)
%temp1 = call arm_aapcs_vfpcc i32 @strcmp(ptr %str, ptr %str)
ret i32 %temp1
}
52 changes: 21 additions & 31 deletions llvm/test/Transforms/InstCombine/ARM/strcpy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,68 +9,58 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
@a = common global [32 x i8] zeroinitializer, align 1
@b = common global [32 x i8] zeroinitializer, align 1

declare i8* @strcpy(i8*, i8*)
declare ptr @strcpy(ptr, ptr)

define arm_aapcscc void @test_simplify1() {
; CHECK-LABEL: @test_simplify1(

%dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
%src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0

call arm_aapcscc i8* @strcpy(i8* %dst, i8* %src)
; CHECK: @llvm.memcpy.p0i8.p0i8.i32
call arm_aapcscc ptr @strcpy(ptr @a, ptr @hello)
; CHECK: @llvm.memcpy.p0.p0.i32
ret void
}

define arm_aapcscc i8* @test_simplify2() {
define arm_aapcscc ptr @test_simplify2() {
; CHECK-LABEL: @test_simplify2(

%dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0

%ret = call arm_aapcscc i8* @strcpy(i8* %dst, i8* %dst)
; CHECK: ret i8* getelementptr inbounds ([32 x i8], [32 x i8]* @a, i32 0, i32 0)
ret i8* %ret
%ret = call arm_aapcscc ptr @strcpy(ptr @a, ptr @a)
; CHECK: ret ptr @a
ret ptr %ret
}

define arm_aapcscc i8* @test_no_simplify1() {
define arm_aapcscc ptr @test_no_simplify1() {
; CHECK-LABEL: @test_no_simplify1(

%dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
%src = getelementptr [32 x i8], [32 x i8]* @b, i32 0, i32 0

%ret = call arm_aapcscc i8* @strcpy(i8* %dst, i8* %src)
; CHECK: call arm_aapcscc i8* @strcpy
ret i8* %ret
%ret = call arm_aapcscc ptr @strcpy(ptr @a, ptr @b)
; CHECK: call arm_aapcscc ptr @strcpy
ret ptr %ret
}

define arm_aapcs_vfpcc void @test_simplify1_vfp() {
; CHECK-LABEL: @test_simplify1_vfp(

%dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
%src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0

call arm_aapcs_vfpcc i8* @strcpy(i8* %dst, i8* %src)
; CHECK: @llvm.memcpy.p0i8.p0i8.i32
call arm_aapcs_vfpcc ptr @strcpy(ptr @a, ptr @hello)
; CHECK: @llvm.memcpy.p0.p0.i32
ret void
}

define arm_aapcs_vfpcc i8* @test_simplify2_vfp() {
define arm_aapcs_vfpcc ptr @test_simplify2_vfp() {
; CHECK-LABEL: @test_simplify2_vfp(

%dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0

%ret = call arm_aapcs_vfpcc i8* @strcpy(i8* %dst, i8* %dst)
; CHECK: ret i8* getelementptr inbounds ([32 x i8], [32 x i8]* @a, i32 0, i32 0)
ret i8* %ret
%ret = call arm_aapcs_vfpcc ptr @strcpy(ptr @a, ptr @a)
; CHECK: ret ptr @a
ret ptr %ret
}

define arm_aapcs_vfpcc i8* @test_no_simplify1_vfp() {
define arm_aapcs_vfpcc ptr @test_no_simplify1_vfp() {
; CHECK-LABEL: @test_no_simplify1_vfp(

%dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
%src = getelementptr [32 x i8], [32 x i8]* @b, i32 0, i32 0

%ret = call arm_aapcs_vfpcc i8* @strcpy(i8* %dst, i8* %src)
; CHECK: call arm_aapcs_vfpcc i8* @strcpy
ret i8* %ret
%ret = call arm_aapcs_vfpcc ptr @strcpy(ptr @a, ptr @b)
; CHECK: call arm_aapcs_vfpcc ptr @strcpy
ret ptr %ret
}
84 changes: 38 additions & 46 deletions llvm/test/Transforms/InstCombine/ARM/vld1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,110 +9,102 @@ target triple = "armv8-arm-none-eabi"
; constant, since we get constant-folding for free.

; Bail the optimization if the alignment is not a constant.
define <2 x i64> @vld1_align(i8* %ptr, i32 %align) {
define <2 x i64> @vld1_align(ptr %ptr, i32 %align) {
; CHECK-LABEL: @vld1_align(
; CHECK-NEXT: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[PTR:%.*]], i32 [[ALIGN:%.*]])
; CHECK-NEXT: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr [[PTR:%.*]], i32 [[ALIGN:%.*]])
; CHECK-NEXT: ret <2 x i64> [[VLD1]]
;
%vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* %ptr, i32 %align)
%vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr %ptr, i32 %align)
ret <2 x i64> %vld1
}

; Bail the optimization if the alignment is not power of 2.
define <2 x i64> @vld1_align_pow2(i8* %ptr) {
define <2 x i64> @vld1_align_pow2(ptr %ptr) {
; CHECK-LABEL: @vld1_align_pow2(
; CHECK-NEXT: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[PTR:%.*]], i32 3)
; CHECK-NEXT: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr [[PTR:%.*]], i32 3)
; CHECK-NEXT: ret <2 x i64> [[VLD1]]
;
%vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* %ptr, i32 3)
%vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr %ptr, i32 3)
ret <2 x i64> %vld1
}

define <8 x i8> @vld1_8x8(i8* %ptr) {
define <8 x i8> @vld1_8x8(ptr %ptr) {
; CHECK-LABEL: @vld1_8x8(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <8 x i8>*
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[PTR:%.*]], align 1
; CHECK-NEXT: ret <8 x i8> [[TMP2]]
;
%vld1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %ptr, i32 1)
%vld1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0(ptr %ptr, i32 1)
ret <8 x i8> %vld1
}

define <4 x i16> @vld1_4x16(i8* %ptr) {
define <4 x i16> @vld1_4x16(ptr %ptr) {
; CHECK-LABEL: @vld1_4x16(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <4 x i16>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[PTR:%.*]], align 2
; CHECK-NEXT: ret <4 x i16> [[TMP2]]
;
%vld1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* %ptr, i32 2)
%vld1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0(ptr %ptr, i32 2)
ret <4 x i16> %vld1
}

define <2 x i32> @vld1_2x32(i8* %ptr) {
define <2 x i32> @vld1_2x32(ptr %ptr) {
; CHECK-LABEL: @vld1_2x32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <2 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: ret <2 x i32> [[TMP2]]
;
%vld1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* %ptr, i32 4)
%vld1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0(ptr %ptr, i32 4)
ret <2 x i32> %vld1
}

define <1 x i64> @vld1_1x64(i8* %ptr) {
define <1 x i64> @vld1_1x64(ptr %ptr) {
; CHECK-LABEL: @vld1_1x64(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <1 x i64>*
; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr [[PTR:%.*]], align 8
; CHECK-NEXT: ret <1 x i64> [[TMP2]]
;
%vld1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %ptr, i32 8)
%vld1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0(ptr %ptr, i32 8)
ret <1 x i64> %vld1
}

define <8 x i16> @vld1_8x16(i8* %ptr) {
define <8 x i16> @vld1_8x16(ptr %ptr) {
; CHECK-LABEL: @vld1_8x16(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <8 x i16>*
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
; CHECK-NEXT: ret <8 x i16> [[TMP2]]
;
%vld1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %ptr, i32 2)
%vld1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %ptr, i32 2)
ret <8 x i16> %vld1
}

define <16 x i8> @vld1_16x8(i8* %ptr) {
define <16 x i8> @vld1_16x8(ptr %ptr) {
; CHECK-LABEL: @vld1_16x8(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <16 x i8>*
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 1
; CHECK-NEXT: ret <16 x i8> [[TMP2]]
;
%vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %ptr, i32 1)
%vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0(ptr %ptr, i32 1)
ret <16 x i8> %vld1
}

define <4 x i32> @vld1_4x32(i8* %ptr) {
define <4 x i32> @vld1_4x32(ptr %ptr) {
; CHECK-LABEL: @vld1_4x32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
%vld1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* %ptr, i32 4)
%vld1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0(ptr %ptr, i32 4)
ret <4 x i32> %vld1
}

define <2 x i64> @vld1_2x64(i8* %ptr) {
define <2 x i64> @vld1_2x64(ptr %ptr) {
; CHECK-LABEL: @vld1_2x64(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <2 x i64>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
%vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* %ptr, i32 8)
%vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr %ptr, i32 8)
ret <2 x i64> %vld1
}

declare <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8*, i32)
declare <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8*, i32)
declare <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8*, i32)
declare <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8*, i32)
declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32)
declare <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8*, i32)
declare <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8*, i32)
declare <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8*, i32)
declare <8 x i8> @llvm.arm.neon.vld1.v8i8.p0(ptr, i32)
declare <4 x i16> @llvm.arm.neon.vld1.v4i16.p0(ptr, i32)
declare <2 x i32> @llvm.arm.neon.vld1.v2i32.p0(ptr, i32)
declare <1 x i64> @llvm.arm.neon.vld1.v1i64.p0(ptr, i32)
declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr, i32)
declare <16 x i8> @llvm.arm.neon.vld1.v16i8.p0(ptr, i32)
declare <4 x i32> @llvm.arm.neon.vld1.v4i32.p0(ptr, i32)
declare <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr, i32)
80 changes: 36 additions & 44 deletions llvm/test/Transforms/InstCombine/PowerPC/aligned-altivec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,123 +2,115 @@
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"

declare <4 x i32> @llvm.ppc.altivec.lvx(i8*) #1
declare <4 x i32> @llvm.ppc.altivec.lvx(ptr) #1

define <4 x i32> @test1(<4 x i32>* %h) #0 {
define <4 x i32> @test1(ptr %h) #0 {
entry:
%h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
%hv = bitcast <4 x i32>* %h1 to i8*
%vl = call <4 x i32> @llvm.ppc.altivec.lvx(i8* %hv)
%h1 = getelementptr <4 x i32>, ptr %h, i64 1
%vl = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %h1)

; CHECK-LABEL: @test1
; CHECK: @llvm.ppc.altivec.lvx
; CHECK: ret <4 x i32>

%v0 = load <4 x i32>, <4 x i32>* %h, align 8
%v0 = load <4 x i32>, ptr %h, align 8
%a = add <4 x i32> %v0, %vl
ret <4 x i32> %a
}

define <4 x i32> @test1a(<4 x i32>* align 16 %h) #0 {
define <4 x i32> @test1a(ptr align 16 %h) #0 {
entry:
%h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
%hv = bitcast <4 x i32>* %h1 to i8*
%vl = call <4 x i32> @llvm.ppc.altivec.lvx(i8* %hv)
%h1 = getelementptr <4 x i32>, ptr %h, i64 1
%vl = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %h1)

; CHECK-LABEL: @test1a
; CHECK-NOT: @llvm.ppc.altivec.lvx
; CHECK: ret <4 x i32>

%v0 = load <4 x i32>, <4 x i32>* %h, align 8
%v0 = load <4 x i32>, ptr %h, align 8
%a = add <4 x i32> %v0, %vl
ret <4 x i32> %a
}

declare void @llvm.ppc.altivec.stvx(<4 x i32>, i8*) #0
declare void @llvm.ppc.altivec.stvx(<4 x i32>, ptr) #0

define <4 x i32> @test2(<4 x i32>* %h, <4 x i32> %d) #0 {
define <4 x i32> @test2(ptr %h, <4 x i32> %d) #0 {
entry:
%h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
%hv = bitcast <4 x i32>* %h1 to i8*
call void @llvm.ppc.altivec.stvx(<4 x i32> %d, i8* %hv)
%h1 = getelementptr <4 x i32>, ptr %h, i64 1
call void @llvm.ppc.altivec.stvx(<4 x i32> %d, ptr %h1)

%v0 = load <4 x i32>, <4 x i32>* %h, align 8
%v0 = load <4 x i32>, ptr %h, align 8
ret <4 x i32> %v0

; CHECK-LABEL: @test2
; CHECK: @llvm.ppc.altivec.stvx
; CHECK: ret <4 x i32>
}

define <4 x i32> @test2a(<4 x i32>* align 16 %h, <4 x i32> %d) #0 {
define <4 x i32> @test2a(ptr align 16 %h, <4 x i32> %d) #0 {
entry:
%h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
%hv = bitcast <4 x i32>* %h1 to i8*
call void @llvm.ppc.altivec.stvx(<4 x i32> %d, i8* %hv)
%h1 = getelementptr <4 x i32>, ptr %h, i64 1
call void @llvm.ppc.altivec.stvx(<4 x i32> %d, ptr %h1)

%v0 = load <4 x i32>, <4 x i32>* %h, align 8
%v0 = load <4 x i32>, ptr %h, align 8
ret <4 x i32> %v0

; CHECK-LABEL: @test2
; CHECK-NOT: @llvm.ppc.altivec.stvx
; CHECK: ret <4 x i32>
}

declare <4 x i32> @llvm.ppc.altivec.lvxl(i8*) #1
declare <4 x i32> @llvm.ppc.altivec.lvxl(ptr) #1

define <4 x i32> @test1l(<4 x i32>* %h) #0 {
define <4 x i32> @test1l(ptr %h) #0 {
entry:
%h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
%hv = bitcast <4 x i32>* %h1 to i8*
%vl = call <4 x i32> @llvm.ppc.altivec.lvxl(i8* %hv)
%h1 = getelementptr <4 x i32>, ptr %h, i64 1
%vl = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %h1)

; CHECK-LABEL: @test1l
; CHECK: @llvm.ppc.altivec.lvxl
; CHECK: ret <4 x i32>

%v0 = load <4 x i32>, <4 x i32>* %h, align 8
%v0 = load <4 x i32>, ptr %h, align 8
%a = add <4 x i32> %v0, %vl
ret <4 x i32> %a
}

define <4 x i32> @test1la(<4 x i32>* align 16 %h) #0 {
define <4 x i32> @test1la(ptr align 16 %h) #0 {
entry:
%h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
%hv = bitcast <4 x i32>* %h1 to i8*
%vl = call <4 x i32> @llvm.ppc.altivec.lvxl(i8* %hv)
%h1 = getelementptr <4 x i32>, ptr %h, i64 1
%vl = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %h1)

; CHECK-LABEL: @test1la
; CHECK-NOT: @llvm.ppc.altivec.lvxl
; CHECK: ret <4 x i32>

%v0 = load <4 x i32>, <4 x i32>* %h, align 8
%v0 = load <4 x i32>, ptr %h, align 8
%a = add <4 x i32> %v0, %vl
ret <4 x i32> %a
}

declare void @llvm.ppc.altivec.stvxl(<4 x i32>, i8*) #0
declare void @llvm.ppc.altivec.stvxl(<4 x i32>, ptr) #0

define <4 x i32> @test2l(<4 x i32>* %h, <4 x i32> %d) #0 {
define <4 x i32> @test2l(ptr %h, <4 x i32> %d) #0 {
entry:
%h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
%hv = bitcast <4 x i32>* %h1 to i8*
call void @llvm.ppc.altivec.stvxl(<4 x i32> %d, i8* %hv)
%h1 = getelementptr <4 x i32>, ptr %h, i64 1
call void @llvm.ppc.altivec.stvxl(<4 x i32> %d, ptr %h1)

%v0 = load <4 x i32>, <4 x i32>* %h, align 8
%v0 = load <4 x i32>, ptr %h, align 8
ret <4 x i32> %v0

; CHECK-LABEL: @test2l
; CHECK: @llvm.ppc.altivec.stvxl
; CHECK: ret <4 x i32>
}

define <4 x i32> @test2la(<4 x i32>* align 16 %h, <4 x i32> %d) #0 {
define <4 x i32> @test2la(ptr align 16 %h, <4 x i32> %d) #0 {
entry:
%h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
%hv = bitcast <4 x i32>* %h1 to i8*
call void @llvm.ppc.altivec.stvxl(<4 x i32> %d, i8* %hv)
%h1 = getelementptr <4 x i32>, ptr %h, i64 1
call void @llvm.ppc.altivec.stvxl(<4 x i32> %d, ptr %h1)

%v0 = load <4 x i32>, <4 x i32>* %h, align 8
%v0 = load <4 x i32>, ptr %h, align 8
ret <4 x i32> %v0

; CHECK-LABEL: @test2l
Expand Down
48 changes: 22 additions & 26 deletions llvm/test/Transforms/InstCombine/PowerPC/vsx-unaligned.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,34 +11,30 @@ target triple = "powerpc64-unknown-linux-gnu"

define void @test1() {
entry:
%t1 = alloca <4 x float>*, align 8
%t2 = alloca <2 x double>*, align 8
store <4 x float>* @vf, <4 x float>** %t1, align 8
%0 = load <4 x float>*, <4 x float>** %t1, align 8
%1 = bitcast <4 x float>* %0 to i8*
%2 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %1)
store <4 x float>* @res_vf, <4 x float>** %t1, align 8
%3 = load <4 x float>*, <4 x float>** %t1, align 8
%4 = bitcast <4 x float>* %3 to i8*
call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %2, i8* %4)
store <2 x double>* @vd, <2 x double>** %t2, align 8
%5 = load <2 x double>*, <2 x double>** %t2, align 8
%6 = bitcast <2 x double>* %5 to i8*
%7 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %6)
store <2 x double>* @res_vd, <2 x double>** %t2, align 8
%8 = load <2 x double>*, <2 x double>** %t2, align 8
%9 = bitcast <2 x double>* %8 to i8*
call void @llvm.ppc.vsx.stxvd2x(<2 x double> %7, i8* %9)
%t1 = alloca ptr, align 8
%t2 = alloca ptr, align 8
store ptr @vf, ptr %t1, align 8
%0 = load ptr, ptr %t1, align 8
%1 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %0)
store ptr @res_vf, ptr %t1, align 8
%2 = load ptr, ptr %t1, align 8
call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %1, ptr %2)
store ptr @vd, ptr %t2, align 8
%3 = load ptr, ptr %t2, align 8
%4 = call <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %3)
store ptr @res_vd, ptr %t2, align 8
%5 = load ptr, ptr %t2, align 8
call void @llvm.ppc.vsx.stxvd2x(<2 x double> %4, ptr %5)
ret void
}

; CHECK-LABEL: @test1
; CHECK: %0 = load <4 x i32>, <4 x i32>* bitcast (<4 x float>* @vf to <4 x i32>*), align 1
; CHECK: store <4 x i32> %0, <4 x i32>* bitcast (<4 x float>* @res_vf to <4 x i32>*), align 1
; CHECK: %1 = load <2 x double>, <2 x double>* @vd, align 1
; CHECK: store <2 x double> %1, <2 x double>* @res_vd, align 1
; CHECK: %0 = load <4 x i32>, ptr @vf, align 1
; CHECK: store <4 x i32> %0, ptr @res_vf, align 1
; CHECK: %1 = load <2 x double>, ptr @vd, align 1
; CHECK: store <2 x double> %1, ptr @res_vd, align 1

declare <4 x i32> @llvm.ppc.vsx.lxvw4x(i8*)
declare void @llvm.ppc.vsx.stxvw4x(<4 x i32>, i8*)
declare <2 x double> @llvm.ppc.vsx.lxvd2x(i8*)
declare void @llvm.ppc.vsx.stxvd2x(<2 x double>, i8*)
declare <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr)
declare void @llvm.ppc.vsx.stxvw4x(<4 x i32>, ptr)
declare <2 x double> @llvm.ppc.vsx.lxvd2x(ptr)
declare void @llvm.ppc.vsx.stxvd2x(<2 x double>, ptr)
54 changes: 24 additions & 30 deletions llvm/test/Transforms/InstCombine/SystemZ/libcall-arg-exts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -34,65 +34,59 @@ define fp128 @fun3(i8 zeroext %x) {

@a = common global [60 x i8] zeroinitializer, align 1
@b = common global [60 x i8] zeroinitializer, align 1
declare i8* @__memccpy_chk(i8*, i8*, i32, i64, i64)
define i8* @fun4() {
declare ptr @__memccpy_chk(ptr, ptr, i32, i64, i64)
define ptr @fun4() {
; CHECK-LABEL: @fun4
; CHECK: call i8* @memccpy
%dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
%src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
%ret = call i8* @__memccpy_chk(i8* %dst, i8* %src, i32 0, i64 60, i64 -1)
ret i8* %ret
; CHECK: call ptr @memccpy
%ret = call ptr @__memccpy_chk(ptr @a, ptr @b, i32 0, i64 60, i64 -1)
ret ptr %ret
}

%FILE = type { }
@A = constant [2 x i8] c"A\00"
declare i32 @fputs(i8*, %FILE*)
define void @fun5(%FILE* %fp) {
declare i32 @fputs(ptr, ptr)
define void @fun5(ptr %fp) {
; CHECK-LABEL: @fun5
; CHECK: call i32 @fputc
%str = getelementptr [2 x i8], [2 x i8]* @A, i32 0, i32 0
call i32 @fputs(i8* %str, %FILE* %fp)
call i32 @fputs(ptr @A, ptr %fp)
ret void
}

@empty = constant [1 x i8] zeroinitializer
declare i32 @puts(i8*)
declare i32 @puts(ptr)
define void @fun6() {
; CHECK-LABEL: @fun6
; CHECK: call i32 @putchar
%str = getelementptr [1 x i8], [1 x i8]* @empty, i32 0, i32 0
call i32 @puts(i8* %str)
call i32 @puts(ptr @empty)
ret void
}

@.str1 = private constant [2 x i8] c"a\00"
declare i8* @strstr(i8*, i8*)
define i8* @fun7(i8* %str) {
declare ptr @strstr(ptr, ptr)
define ptr @fun7(ptr %str) {
; CHECK-LABEL: @fun7
; CHECK: call i8* @strchr
%pat = getelementptr inbounds [2 x i8], [2 x i8]* @.str1, i32 0, i32 0
%ret = call i8* @strstr(i8* %str, i8* %pat)
ret i8* %ret
; CHECK: call ptr @strchr
%ret = call ptr @strstr(ptr %str, ptr @.str1)
ret ptr %ret
}

; CHECK: declare i8* @strchr(i8*, i32 signext)
; CHECK: declare ptr @strchr(ptr, i32 signext)

@hello = constant [14 x i8] c"hello world\5Cn\00"
@chp = global i8* zeroinitializer
declare i8* @strchr(i8*, i32)
@chp = global ptr zeroinitializer
declare ptr @strchr(ptr, i32)
define void @fun8(i32 %chr) {
; CHECK-LABEL: @fun8
; CHECK: call i8* @memchr
%src = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
%dst = call i8* @strchr(i8* %src, i32 %chr)
store i8* %dst, i8** @chp
; CHECK: call ptr @memchr
%dst = call ptr @strchr(ptr @hello, i32 %chr)
store ptr %dst, ptr @chp
ret void
}

; CHECK: declare double @ldexp(double, i32 signext)
; CHECK: declare float @ldexpf(float, i32 signext)
; CHECK: declare fp128 @ldexpl(fp128, i32 signext)
; CHECK: declare i8* @memccpy(i8* noalias writeonly, i8* noalias nocapture readonly, i32 signext, i64)
; CHECK: declare noundef i32 @fputc(i32 noundef signext, %FILE* nocapture noundef)
; CHECK: declare ptr @memccpy(ptr noalias writeonly, ptr noalias nocapture readonly, i32 signext, i64)
; CHECK: declare noundef i32 @fputc(i32 noundef signext, ptr nocapture noundef)
; CHECK: declare noundef i32 @putchar(i32 noundef signext)
; CHECK: declare i8* @memchr(i8*, i32 signext, i64)
; CHECK: declare ptr @memchr(ptr, i32 signext, i64)
12 changes: 6 additions & 6 deletions llvm/test/Transforms/InstCombine/X86/addcarry.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,34 @@
declare { i8, i32 } @llvm.x86.addcarry.32(i8, i32, i32)
declare { i8, i64 } @llvm.x86.addcarry.64(i8, i64, i64)

define i32 @no_carryin_i32(i32 %x, i32 %y, i8* %p) {
define i32 @no_carryin_i32(i32 %x, i32 %y, ptr %p) {
; CHECK-LABEL: @no_carryin_i32(
; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i8
; CHECK-NEXT: store i8 [[TMP4]], i8* [[P:%.*]], align 1
; CHECK-NEXT: store i8 [[TMP4]], ptr [[P:%.*]], align 1
; CHECK-NEXT: ret i32 [[TMP2]]
;
%s = call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 %x, i32 %y)
%ov = extractvalue { i8, i32 } %s, 0
store i8 %ov, i8* %p
store i8 %ov, ptr %p
%r = extractvalue { i8, i32 } %s, 1
ret i32 %r
}

define i64 @no_carryin_i64(i64 %x, i64 %y, i8* %p) {
define i64 @no_carryin_i64(i64 %x, i64 %y, ptr %p) {
; CHECK-LABEL: @no_carryin_i64(
; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i8
; CHECK-NEXT: store i8 [[TMP4]], i8* [[P:%.*]], align 1
; CHECK-NEXT: store i8 [[TMP4]], ptr [[P:%.*]], align 1
; CHECK-NEXT: ret i64 [[TMP2]]
;
%s = call { i8, i64 } @llvm.x86.addcarry.64(i8 0, i64 %x, i64 %y)
%ov = extractvalue { i8, i64 } %s, 0
store i8 %ov, i8* %p
store i8 %ov, ptr %p
%r = extractvalue { i8, i64 } %s, 1
ret i64 %r
}
28 changes: 12 additions & 16 deletions llvm/test/Transforms/InstCombine/X86/blend_x86.ll
Original file line number Diff line number Diff line change
Expand Up @@ -209,29 +209,29 @@ define <16 x i8> @sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i1> %cond) {
; expected IR when 1 of the blend operands is a constant 0 vector. Potentially, this could
; be transformed to bitwise logic in IR, but currently that transform is left to the backend.

define <4 x float> @sel_v4f32_sse_reality(<4 x float>* %x, <4 x float> %y, <4 x float> %z) {
define <4 x float> @sel_v4f32_sse_reality(ptr %x, <4 x float> %y, <4 x float> %z) {
; CHECK-LABEL: @sel_v4f32_sse_reality(
; CHECK-NEXT: [[LD:%.*]] = load <4 x float>, <4 x float>* [[X:%.*]], align 16
; CHECK-NEXT: [[LD:%.*]] = load <4 x float>, ptr [[X:%.*]], align 16
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x float> [[Z:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[CMP]], <4 x float> zeroinitializer, <4 x float> [[LD]]
; CHECK-NEXT: ret <4 x float> [[R]]
;
%ld = load <4 x float>, <4 x float>* %x, align 16
%ld = load <4 x float>, ptr %x, align 16
%cmp = fcmp olt <4 x float> %z, %y
%sext = sext <4 x i1> %cmp to <4 x i32>
%cond = bitcast <4 x i32> %sext to <4 x float>
%r = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %ld, <4 x float> zeroinitializer, <4 x float> %cond)
ret <4 x float> %r
}

define <2 x double> @sel_v2f64_sse_reality(<2 x double>* nocapture readonly %x, <2 x double> %y, <2 x double> %z) {
define <2 x double> @sel_v2f64_sse_reality(ptr nocapture readonly %x, <2 x double> %y, <2 x double> %z) {
; CHECK-LABEL: @sel_v2f64_sse_reality(
; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* [[X:%.*]], align 16
; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, ptr [[X:%.*]], align 16
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <2 x double> [[Z:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP]], <2 x double> zeroinitializer, <2 x double> [[LD]]
; CHECK-NEXT: ret <2 x double> [[R]]
;
%ld = load <2 x double>, <2 x double>* %x, align 16
%ld = load <2 x double>, ptr %x, align 16
%cmp = fcmp olt <2 x double> %z, %y
%sext = sext <2 x i1> %cmp to <2 x i64>
%cond = bitcast <2 x i64> %sext to <2 x double>
Expand All @@ -241,19 +241,17 @@ define <2 x double> @sel_v2f64_sse_reality(<2 x double>* nocapture readonly %x,

; Bitcast the inputs and the result and remove the intrinsic.

define <2 x i64> @sel_v4i32_sse_reality(<2 x i64>* nocapture readonly %x, <2 x i64> %y, <2 x i64> %z) {
define <2 x i64> @sel_v4i32_sse_reality(ptr nocapture readonly %x, <2 x i64> %y, <2 x i64> %z) {
; CHECK-LABEL: @sel_v4i32_sse_reality(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64>* [[X:%.*]] to <4 x i32>*
; CHECK-NEXT: [[LD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16
; CHECK-NEXT: [[LD1:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
; CHECK-NEXT: [[YCAST:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x i32>
; CHECK-NEXT: [[ZCAST:%.*]] = bitcast <2 x i64> [[Z:%.*]] to <4 x i32>
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i32> [[YCAST]], [[ZCAST]]
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> [[LD1]]
; CHECK-NEXT: [[RCAST:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[RCAST]]
;
%xcast = bitcast <2 x i64>* %x to <16 x i8>*
%ld = load <16 x i8>, <16 x i8>* %xcast, align 16
%ld = load <16 x i8>, ptr %x, align 16
%ycast = bitcast <2 x i64> %y to <4 x i32>
%zcast = bitcast <2 x i64> %z to <4 x i32>
%cmp = icmp sgt <4 x i32> %ycast, %zcast
Expand All @@ -264,19 +262,17 @@ define <2 x i64> @sel_v4i32_sse_reality(<2 x i64>* nocapture readonly %x, <2 x i
ret <2 x i64> %rcast
}

define <2 x i64> @sel_v16i8_sse_reality(<2 x i64>* nocapture readonly %x, <2 x i64> %y, <2 x i64> %z) {
define <2 x i64> @sel_v16i8_sse_reality(ptr nocapture readonly %x, <2 x i64> %y, <2 x i64> %z) {
; CHECK-LABEL: @sel_v16i8_sse_reality(
; CHECK-NEXT: [[XCAST:%.*]] = bitcast <2 x i64>* [[X:%.*]] to <16 x i8>*
; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[XCAST]], align 16
; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, ptr [[X:%.*]], align 16
; CHECK-NEXT: [[YCAST:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <16 x i8>
; CHECK-NEXT: [[ZCAST:%.*]] = bitcast <2 x i64> [[Z:%.*]] to <16 x i8>
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <16 x i8> [[YCAST]], [[ZCAST]]
; CHECK-NEXT: [[R:%.*]] = select <16 x i1> [[CMP]], <16 x i8> zeroinitializer, <16 x i8> [[LD]]
; CHECK-NEXT: [[RCAST:%.*]] = bitcast <16 x i8> [[R]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[RCAST]]
;
%xcast = bitcast <2 x i64>* %x to <16 x i8>*
%ld = load <16 x i8>, <16 x i8>* %xcast, align 16
%ld = load <16 x i8>, ptr %x, align 16
%ycast = bitcast <2 x i64> %y to <16 x i8>
%zcast = bitcast <2 x i64> %z to <16 x i8>
%cmp = icmp sgt <16 x i8> %ycast, %zcast
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ target triple = "i386-apple-darwin9"
%struct.ClearColor = type { double, %struct.IColor4, %struct.IColor4, float, i32 }
%struct.ClipPlane = type { i32, [6 x %struct.IColor4] }
%struct.ColorBuffer = type { i16, i8, i8, [8 x i16], [0 x i32] }
%struct.ColorMatrix = type { [16 x float]*, %struct.ImagingColorScale }
%struct.Convolution = type { %struct.IColor4, %struct.ImagingColorScale, i16, i16, [0 x i32], float*, i32, i32 }
%struct.ColorMatrix = type { ptr, %struct.ImagingColorScale }
%struct.Convolution = type { %struct.IColor4, %struct.ImagingColorScale, i16, i16, [0 x i32], ptr, i32, i32 }
%struct.DepthTest = type { i16, i16, i8, i8, i8, i8, double, double }
%struct.FixedFunction = type { %struct.PPStreamToken* }
%struct.FixedFunction = type { ptr }
%struct.FogMode = type { %struct.IColor4, float, float, float, float, float, i16, i16, i16, i8, i8 }
%struct.HintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 }
%struct.Histogram = type { %struct.ProgramLimits*, i32, i16, i8, i8 }
%struct.Histogram = type { ptr, i32, i16, i8, i8 }
%struct.ImagingColorScale = type { %struct.TCoord2, %struct.TCoord2, %struct.TCoord2, %struct.TCoord2 }
%struct.ImagingSubset = type { %struct.Convolution, %struct.Convolution, %struct.Convolution, %struct.ColorMatrix, %struct.Minmax, %struct.Histogram, %struct.ImagingColorScale, %struct.ImagingColorScale, %struct.ImagingColorScale, %struct.ImagingColorScale, i32, [0 x i32] }
%struct.Light = type { %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.PointLineLimits, float, float, float, float, float, %struct.PointLineLimits, float, %struct.PointLineLimits, float, %struct.PointLineLimits, float, float, float, float, float }
Expand All @@ -26,12 +26,12 @@ target triple = "i386-apple-darwin9"
%struct.LogicOp = type { i16, i8, i8 }
%struct.MaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 }
%struct.Material = type { %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.IColor4, float, float, float, float, [8 x %struct.LightProduct], %struct.IColor4, [8 x i32] }
%struct.Minmax = type { %struct.MinmaxTable*, i16, i8, i8, [0 x i32] }
%struct.Minmax = type { ptr, i16, i8, i8, [0 x i32] }
%struct.MinmaxTable = type { %struct.IColor4, %struct.IColor4 }
%struct.Mipmaplevel = type { [4 x i32], [4 x i32], [4 x float], [4 x i32], i32, i32, float*, i8*, i16, i16, i16, i16, [2 x float] }
%struct.Mipmaplevel = type { [4 x i32], [4 x i32], [4 x float], [4 x i32], i32, i32, ptr, ptr, i16, i16, i16, i16, [2 x float] }
%struct.Multisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 }
%struct.PipelineProgramState = type { i8, i8, i8, i8, [0 x i32], %struct.IColor4* }
%struct.PixelMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
%struct.PipelineProgramState = type { i8, i8, i8, i8, [0 x i32], ptr }
%struct.PixelMap = type { ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
%struct.PixelMode = type { float, float, %struct.PixelStore, %struct.PixelTransfer, %struct.PixelMap, %struct.ImagingSubset, i32, i32 }
%struct.PixelPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 }
%struct.PixelStore = type { %struct.PixelPack, %struct.PixelPack }
Expand All @@ -46,32 +46,32 @@ target triple = "i386-apple-darwin9"
%struct.RegisterCombinersPerPortionState = type { [4 x %struct.RegisterCombinersPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 }
%struct.RegisterCombinersPerStageState = type { [2 x %struct.RegisterCombinersPerPortionState], [2 x %struct.IColor4] }
%struct.RegisterCombinersPerVariableState = type { i16, i16, i16, i16 }
%struct.SWRSurfaceRec = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, [4 x i8*], i32 }
%struct.SWRSurfaceRec = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, ptr, ptr, ptr, [4 x ptr], i32 }
%struct.ScissorTest = type { %struct.ProgramLimits, i8, i8, i8, i8 }
%struct.State = type <{ i16, i16, i16, i16, i32, i32, [256 x %struct.IColor4], [128 x %struct.IColor4], %struct.Viewport, %struct.Transform, %struct.LightModel, %struct.ActiveTextureTargets, %struct.AlphaTest, %struct.BlendMode, %struct.ClearColor, %struct.ColorBuffer, %struct.DepthTest, %struct.ArrayRange, %struct.FogMode, %struct.HintMode, %struct.LineMode, %struct.LogicOp, %struct.MaskMode, %struct.PixelMode, %struct.PointMode, %struct.PolygonMode, %struct.ScissorTest, i32, %struct.StencilTest, [8 x %struct.TextureMode], [16 x %struct.TextureImageMode], %struct.ArrayRange, [8 x %struct.TextureCoordGen], %struct.ClipPlane, %struct.Multisample, %struct.RegisterCombiners, %struct.ArrayRange, %struct.ArrayRange, [3 x %struct.PipelineProgramState], %struct.ArrayRange, %struct.TransformFeedback, i32*, %struct.FixedFunction, [3 x i32], [3 x i32] }>
%struct.State = type <{ i16, i16, i16, i16, i32, i32, [256 x %struct.IColor4], [128 x %struct.IColor4], %struct.Viewport, %struct.Transform, %struct.LightModel, %struct.ActiveTextureTargets, %struct.AlphaTest, %struct.BlendMode, %struct.ClearColor, %struct.ColorBuffer, %struct.DepthTest, %struct.ArrayRange, %struct.FogMode, %struct.HintMode, %struct.LineMode, %struct.LogicOp, %struct.MaskMode, %struct.PixelMode, %struct.PointMode, %struct.PolygonMode, %struct.ScissorTest, i32, %struct.StencilTest, [8 x %struct.TextureMode], [16 x %struct.TextureImageMode], %struct.ArrayRange, [8 x %struct.TextureCoordGen], %struct.ClipPlane, %struct.Multisample, %struct.RegisterCombiners, %struct.ArrayRange, %struct.ArrayRange, [3 x %struct.PipelineProgramState], %struct.ArrayRange, %struct.TransformFeedback, ptr, %struct.FixedFunction, [3 x i32], [3 x i32] }>
%struct.StencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] }
%struct.TextureCoordGen = type { { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, i8, i8, i8, i8 }
%struct.TextureGeomState = type { i16, i16, i16, i16, i16, i8, i8, i8, i8, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [6 x i16], [6 x i16] }
%struct.TextureImageMode = type { float }
%struct.TextureLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, i8* }
%struct.TextureLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, ptr }
%struct.TextureMode = type { %struct.IColor4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float }
%struct.TextureParamState = type { i16, i16, i16, i16, i16, i16, %struct.IColor4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, i8* }
%struct.TextureRec = type { [4 x float], %struct.TextureState*, %struct.Mipmaplevel*, %struct.Mipmaplevel*, float, float, float, float, i8, i8, i8, i8, i16, i16, i16, i16, i32, float, [2 x %struct.PPStreamToken] }
%struct.TextureState = type { i16, i8, i8, i16, i16, float, i32, %struct.SWRSurfaceRec*, %struct.TextureParamState, %struct.TextureGeomState, [0 x i32], i8*, i32, %struct.TextureLevel, [1 x [15 x %struct.TextureLevel]] }
%struct.TextureParamState = type { i16, i16, i16, i16, i16, i16, %struct.IColor4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, ptr }
%struct.TextureRec = type { [4 x float], ptr, ptr, ptr, float, float, float, float, i8, i8, i8, i8, i16, i16, i16, i16, i32, float, [2 x %struct.PPStreamToken] }
%struct.TextureState = type { i16, i8, i8, i16, i16, float, i32, ptr, %struct.TextureParamState, %struct.TextureGeomState, [0 x i32], ptr, i32, %struct.TextureLevel, [1 x [15 x %struct.TextureLevel]] }
%struct.Transform = type <{ [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, float, i8, i8, i8, i8, i32, i32, i32, i16, i16, i8, i8, i8, i8, i32 }>
%struct.TransformFeedback = type { i8, i8, i8, i8, [0 x i32], [16 x i32], [16 x i32] }
%struct.Viewport = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float }
%struct.IColor4 = type { float, float, float, float }
%struct.TCoord2 = type { float, float }
%struct.VMGPStack = type { [6 x <4 x float>*], <4 x float>*, i32, i32, <4 x float>*, <4 x float>**, i32, i32, i32, i32, i32, i32 }
%struct.VMTextures = type { [16 x %struct.TextureRec*] }
%struct.VMGPStack = type { [6 x ptr], ptr, i32, i32, ptr, ptr, i32, i32, i32, i32, i32, i32 }
%struct.VMTextures = type { [16 x ptr] }
%struct.PPStreamToken = type { { i16, i16, i32 } }
%struct._VMConstants = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [528 x i8], { void (i8*, i8*, i32, i8*)*, float (float)*, float (float)*, float (float)*, i32 (float)* } }
%struct._VMConstants = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [528 x i8], { ptr, ptr, ptr, ptr, ptr } }

define i32 @foo(%struct.State* %dst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._VMConstants* %cnstn, %struct.PPStreamToken* %pstrm, %struct.PluginBufferData* %gpctx, %struct.VMTextures* %txtrs, %struct.VMGPStack* %gpstk, <4 x float>* %src) nounwind {
define i32 @foo(ptr %dst, ptr %prgrm, ptr %buffs, ptr %cnstn, ptr %pstrm, ptr %gpctx, ptr %txtrs, ptr %gpstk, ptr %src) nounwind {
bb266.i:
getelementptr <4 x float>, <4 x float>* %src, i32 11 ; <<4 x float>*>:0 [#uses=1]
load <4 x float>, <4 x float>* %0, align 16 ; <<4 x float>>:1 [#uses=1]
getelementptr <4 x float>, ptr %src, i32 11 ; <ptr>:0 [#uses=1]
load <4 x float>, ptr %0, align 16 ; <<4 x float>>:1 [#uses=1]
shufflevector <4 x float> %1, <4 x float> poison, <4 x i32> < i32 0, i32 1, i32 1, i32 1 > ; <<4 x float>>:2 [#uses=1]
shufflevector <4 x float> %2, <4 x float> poison, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>>:3 [#uses=1]
shufflevector <4 x float> undef, <4 x float> poison, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>>:4 [#uses=1]
Expand Down
40 changes: 20 additions & 20 deletions llvm/test/Transforms/InstCombine/X86/shufflemask-undef.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ target triple = "i386-apple-darwin9"
%struct.ClearColor = type { double, %struct.IColor4, %struct.IColor4, float, i32 }
%struct.ClipPlane = type { i32, [6 x %struct.IColor4] }
%struct.ColorBuffer = type { i16, i8, i8, [8 x i16], [0 x i32] }
%struct.ColorMatrix = type { [16 x float]*, %struct.ImagingColorScale }
%struct.Convolution = type { %struct.IColor4, %struct.ImagingColorScale, i16, i16, [0 x i32], float*, i32, i32 }
%struct.ColorMatrix = type { ptr, %struct.ImagingColorScale }
%struct.Convolution = type { %struct.IColor4, %struct.ImagingColorScale, i16, i16, [0 x i32], ptr, i32, i32 }
%struct.DepthTest = type { i16, i16, i8, i8, i8, i8, double, double }
%struct.FixedFunction = type { %struct.PPStreamToken* }
%struct.FixedFunction = type { ptr }
%struct.FogMode = type { %struct.IColor4, float, float, float, float, float, i16, i16, i16, i8, i8 }
%struct.HintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 }
%struct.Histogram = type { %struct.ProgramLimits*, i32, i16, i8, i8 }
%struct.Histogram = type { ptr, i32, i16, i8, i8 }
%struct.ImagingColorScale = type { %struct.TCoord2, %struct.TCoord2, %struct.TCoord2, %struct.TCoord2 }
%struct.ImagingSubset = type { %struct.Convolution, %struct.Convolution, %struct.Convolution, %struct.ColorMatrix, %struct.Minmax, %struct.Histogram, %struct.ImagingColorScale, %struct.ImagingColorScale, %struct.ImagingColorScale, %struct.ImagingColorScale, i32, [0 x i32] }
%struct.Light = type { %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.PointLineLimits, float, float, float, float, float, %struct.PointLineLimits, float, %struct.PointLineLimits, float, %struct.PointLineLimits, float, float, float, float, float }
Expand All @@ -26,12 +26,12 @@ target triple = "i386-apple-darwin9"
%struct.LogicOp = type { i16, i8, i8 }
%struct.MaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 }
%struct.Material = type { %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.IColor4, float, float, float, float, [8 x %struct.LightProduct], %struct.IColor4, [8 x i32] }
%struct.Minmax = type { %struct.MinmaxTable*, i16, i8, i8, [0 x i32] }
%struct.Minmax = type { ptr, i16, i8, i8, [0 x i32] }
%struct.MinmaxTable = type { %struct.IColor4, %struct.IColor4 }
%struct.Mipmaplevel = type { [4 x i32], [4 x i32], [4 x float], [4 x i32], i32, i32, float*, i8*, i16, i16, i16, i16, [2 x float] }
%struct.Mipmaplevel = type { [4 x i32], [4 x i32], [4 x float], [4 x i32], i32, i32, ptr, ptr, i16, i16, i16, i16, [2 x float] }
%struct.Multisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 }
%struct.PipelineProgramState = type { i8, i8, i8, i8, [0 x i32], %struct.IColor4* }
%struct.PixelMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
%struct.PipelineProgramState = type { i8, i8, i8, i8, [0 x i32], ptr }
%struct.PixelMap = type { ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
%struct.PixelMode = type { float, float, %struct.PixelStore, %struct.PixelTransfer, %struct.PixelMap, %struct.ImagingSubset, i32, i32 }
%struct.PixelPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 }
%struct.PixelStore = type { %struct.PixelPack, %struct.PixelPack }
Expand All @@ -46,32 +46,32 @@ target triple = "i386-apple-darwin9"
%struct.RegisterCombinersPerPortionState = type { [4 x %struct.RegisterCombinersPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 }
%struct.RegisterCombinersPerStageState = type { [2 x %struct.RegisterCombinersPerPortionState], [2 x %struct.IColor4] }
%struct.RegisterCombinersPerVariableState = type { i16, i16, i16, i16 }
%struct.SWRSurfaceRec = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, [4 x i8*], i32 }
%struct.SWRSurfaceRec = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, ptr, ptr, ptr, [4 x ptr], i32 }
%struct.ScissorTest = type { %struct.ProgramLimits, i8, i8, i8, i8 }
%struct.State = type <{ i16, i16, i16, i16, i32, i32, [256 x %struct.IColor4], [128 x %struct.IColor4], %struct.Viewport, %struct.Transform, %struct.LightModel, %struct.ActiveTextureTargets, %struct.AlphaTest, %struct.BlendMode, %struct.ClearColor, %struct.ColorBuffer, %struct.DepthTest, %struct.ArrayRange, %struct.FogMode, %struct.HintMode, %struct.LineMode, %struct.LogicOp, %struct.MaskMode, %struct.PixelMode, %struct.PointMode, %struct.PolygonMode, %struct.ScissorTest, i32, %struct.StencilTest, [8 x %struct.TextureMode], [16 x %struct.TextureImageMode], %struct.ArrayRange, [8 x %struct.TextureCoordGen], %struct.ClipPlane, %struct.Multisample, %struct.RegisterCombiners, %struct.ArrayRange, %struct.ArrayRange, [3 x %struct.PipelineProgramState], %struct.ArrayRange, %struct.TransformFeedback, i32*, %struct.FixedFunction, [3 x i32], [3 x i32] }>
%struct.State = type <{ i16, i16, i16, i16, i32, i32, [256 x %struct.IColor4], [128 x %struct.IColor4], %struct.Viewport, %struct.Transform, %struct.LightModel, %struct.ActiveTextureTargets, %struct.AlphaTest, %struct.BlendMode, %struct.ClearColor, %struct.ColorBuffer, %struct.DepthTest, %struct.ArrayRange, %struct.FogMode, %struct.HintMode, %struct.LineMode, %struct.LogicOp, %struct.MaskMode, %struct.PixelMode, %struct.PointMode, %struct.PolygonMode, %struct.ScissorTest, i32, %struct.StencilTest, [8 x %struct.TextureMode], [16 x %struct.TextureImageMode], %struct.ArrayRange, [8 x %struct.TextureCoordGen], %struct.ClipPlane, %struct.Multisample, %struct.RegisterCombiners, %struct.ArrayRange, %struct.ArrayRange, [3 x %struct.PipelineProgramState], %struct.ArrayRange, %struct.TransformFeedback, ptr, %struct.FixedFunction, [3 x i32], [3 x i32] }>
%struct.StencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] }
%struct.TextureCoordGen = type { { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, i8, i8, i8, i8 }
%struct.TextureGeomState = type { i16, i16, i16, i16, i16, i8, i8, i8, i8, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [6 x i16], [6 x i16] }
%struct.TextureImageMode = type { float }
%struct.TextureLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, i8* }
%struct.TextureLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, ptr }
%struct.TextureMode = type { %struct.IColor4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float }
%struct.TextureParamState = type { i16, i16, i16, i16, i16, i16, %struct.IColor4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, i8* }
%struct.TextureRec = type { [4 x float], %struct.TextureState*, %struct.Mipmaplevel*, %struct.Mipmaplevel*, float, float, float, float, i8, i8, i8, i8, i16, i16, i16, i16, i32, float, [2 x %struct.PPStreamToken] }
%struct.TextureState = type { i16, i8, i8, i16, i16, float, i32, %struct.SWRSurfaceRec*, %struct.TextureParamState, %struct.TextureGeomState, [0 x i32], i8*, i32, %struct.TextureLevel, [1 x [15 x %struct.TextureLevel]] }
%struct.TextureParamState = type { i16, i16, i16, i16, i16, i16, %struct.IColor4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, ptr }
%struct.TextureRec = type { [4 x float], ptr, ptr, ptr, float, float, float, float, i8, i8, i8, i8, i16, i16, i16, i16, i32, float, [2 x %struct.PPStreamToken] }
%struct.TextureState = type { i16, i8, i8, i16, i16, float, i32, ptr, %struct.TextureParamState, %struct.TextureGeomState, [0 x i32], ptr, i32, %struct.TextureLevel, [1 x [15 x %struct.TextureLevel]] }
%struct.Transform = type <{ [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, float, i8, i8, i8, i8, i32, i32, i32, i16, i16, i8, i8, i8, i8, i32 }>
%struct.TransformFeedback = type { i8, i8, i8, i8, [0 x i32], [16 x i32], [16 x i32] }
%struct.Viewport = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float }
%struct.IColor4 = type { float, float, float, float }
%struct.TCoord2 = type { float, float }
%struct.VMGPStack = type { [6 x <4 x float>*], <4 x float>*, i32, i32, <4 x float>*, <4 x float>**, i32, i32, i32, i32, i32, i32 }
%struct.VMTextures = type { [16 x %struct.TextureRec*] }
%struct.VMGPStack = type { [6 x ptr], ptr, i32, i32, ptr, ptr, i32, i32, i32, i32, i32, i32 }
%struct.VMTextures = type { [16 x ptr] }
%struct.PPStreamToken = type { { i16, i16, i32 } }
%struct._VMConstants = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [528 x i8], { void (i8*, i8*, i32, i8*)*, float (float)*, float (float)*, float (float)*, i32 (float)* } }
%struct._VMConstants = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [528 x i8], { ptr, ptr, ptr, ptr, ptr } }

define i32 @foo(%struct.State* %dst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._VMConstants* %cnstn, %struct.PPStreamToken* %pstrm, %struct.PluginBufferData* %gpctx, %struct.VMTextures* %txtrs, %struct.VMGPStack* %gpstk, <4 x float>* %src) nounwind {
define i32 @foo(ptr %dst, ptr %prgrm, ptr %buffs, ptr %cnstn, ptr %pstrm, ptr %gpctx, ptr %txtrs, ptr %gpstk, ptr %src) nounwind {
bb266.i:
getelementptr <4 x float>, <4 x float>* %src, i32 11 ; <<4 x float>*>:0 [#uses=1]
load <4 x float>, <4 x float>* %0, align 16 ; <<4 x float>>:1 [#uses=1]
getelementptr <4 x float>, ptr %src, i32 11 ; <ptr>:0 [#uses=1]
load <4 x float>, ptr %0, align 16 ; <<4 x float>>:1 [#uses=1]
shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 1, i32 1 > ; <<4 x float>>:2 [#uses=1]
shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>>:3 [#uses=1]
shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>>:4 [#uses=1]
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/Transforms/InstCombine/X86/x86-addsub-inseltpoison.ll
Original file line number Diff line number Diff line change
Expand Up @@ -140,16 +140,16 @@ define float @elts_addsub_v8f32_sub(<8 x float> %0, <8 x float> %1) {
ret float %8
}

define void @PR46277(float %0, float %1, float %2, float %3, <4 x float> %4, float* %5) {
define void @PR46277(float %0, float %1, float %2, float %3, <4 x float> %4, ptr %5) {
; CHECK-LABEL: @PR46277(
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP1:%.*]], i64 1
; CHECK-NEXT: [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> [[TMP8]], <4 x float> [[TMP4:%.*]])
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i64 0
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* [[TMP5:%.*]], i64 1
; CHECK-NEXT: store float [[TMP10]], float* [[TMP5]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP5:%.*]], i64 1
; CHECK-NEXT: store float [[TMP10]], ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i64 1
; CHECK-NEXT: store float [[TMP12]], float* [[TMP11]], align 4
; CHECK-NEXT: store float [[TMP12]], ptr [[TMP11]], align 4
; CHECK-NEXT: ret void
;
%7 = insertelement <4 x float> poison, float %0, i32 0
Expand All @@ -158,10 +158,10 @@ define void @PR46277(float %0, float %1, float %2, float %3, <4 x float> %4, flo
%10 = insertelement <4 x float> %9, float %3, i32 3
%11 = tail call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %10, <4 x float> %4)
%12 = extractelement <4 x float> %11, i32 0
%13 = getelementptr inbounds float, float* %5, i64 1
store float %12, float* %5, align 4
%13 = getelementptr inbounds float, ptr %5, i64 1
store float %12, ptr %5, align 4
%14 = extractelement <4 x float> %11, i32 1
store float %14, float* %13, align 4
store float %14, ptr %13, align 4
ret void
}

Expand Down
14 changes: 7 additions & 7 deletions llvm/test/Transforms/InstCombine/X86/x86-addsub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -140,16 +140,16 @@ define float @elts_addsub_v8f32_sub(<8 x float> %0, <8 x float> %1) {
ret float %8
}

define void @PR46277(float %0, float %1, float %2, float %3, <4 x float> %4, float* %5) {
define void @PR46277(float %0, float %1, float %2, float %3, <4 x float> %4, ptr %5) {
; CHECK-LABEL: @PR46277(
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> undef, float [[TMP0:%.*]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP1:%.*]], i64 1
; CHECK-NEXT: [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> [[TMP8]], <4 x float> [[TMP4:%.*]])
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i64 0
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* [[TMP5:%.*]], i64 1
; CHECK-NEXT: store float [[TMP10]], float* [[TMP5]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP5:%.*]], i64 1
; CHECK-NEXT: store float [[TMP10]], ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i64 1
; CHECK-NEXT: store float [[TMP12]], float* [[TMP11]], align 4
; CHECK-NEXT: store float [[TMP12]], ptr [[TMP11]], align 4
; CHECK-NEXT: ret void
;
%7 = insertelement <4 x float> undef, float %0, i32 0
Expand All @@ -158,10 +158,10 @@ define void @PR46277(float %0, float %1, float %2, float %3, <4 x float> %4, flo
%10 = insertelement <4 x float> %9, float %3, i32 3
%11 = tail call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %10, <4 x float> %4)
%12 = extractelement <4 x float> %11, i32 0
%13 = getelementptr inbounds float, float* %5, i64 1
store float %12, float* %5, align 4
%13 = getelementptr inbounds float, ptr %5, i64 1
store float %12, ptr %5, align 4
%14 = extractelement <4 x float> %11, i32 1
store float %14, float* %13, align 4
store float %14, ptr %13, align 4
ret void
}

Expand Down
24 changes: 12 additions & 12 deletions llvm/test/Transforms/InstCombine/X86/x86-amx-load-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,36 @@
; RUN: opt -passes=instcombine -S < %s | FileCheck %s

; Prohibit poiter cast for amx.
define dso_local void @test_amx_load_store(<256 x i32>* %src, i8* %dst) {
define dso_local void @test_amx_load_store(ptr %src, ptr %dst) {
; CHECK-LABEL: @test_amx_load_store(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[VEC:%.*]] = load <256 x i32>, <256 x i32>* [[SRC:%.*]], align 64
; CHECK-NEXT: [[VEC:%.*]] = load <256 x i32>, ptr [[SRC:%.*]], align 64
; CHECK-NEXT: [[BC:%.*]] = bitcast <256 x i32> [[VEC]] to x86_amx
; CHECK-NEXT: tail call void @llvm.x86.tilestored64.internal(i16 16, i16 16, i8* [[DST:%.*]], i64 64, x86_amx [[BC]])
; CHECK-NEXT: tail call void @llvm.x86.tilestored64.internal(i16 16, i16 16, ptr [[DST:%.*]], i64 64, x86_amx [[BC]])
; CHECK-NEXT: ret void
;
entry:
%vec = load <256 x i32>, <256 x i32>* %src, align 64
%vec = load <256 x i32>, ptr %src, align 64
%bc = bitcast <256 x i32> %vec to x86_amx
tail call void @llvm.x86.tilestored64.internal(i16 16, i16 16, i8* %dst, i64 64, x86_amx %bc)
tail call void @llvm.x86.tilestored64.internal(i16 16, i16 16, ptr %dst, i64 64, x86_amx %bc)
ret void
}

; Prohibit poiter cast for amx.
define dso_local void @test_amx_load_store2(<256 x i32>* %dst, i8* %src) {
define dso_local void @test_amx_load_store2(ptr %dst, ptr %src) {
; CHECK-LABEL: @test_amx_load_store2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[AMX:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 16, i8* [[SRC:%.*]], i64 64)
; CHECK-NEXT: [[AMX:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 16, ptr [[SRC:%.*]], i64 64)
; CHECK-NEXT: [[BC:%.*]] = bitcast x86_amx [[AMX]] to <256 x i32>
; CHECK-NEXT: store <256 x i32> [[BC]], <256 x i32>* [[DST:%.*]], align 1024
; CHECK-NEXT: store <256 x i32> [[BC]], ptr [[DST:%.*]], align 1024
; CHECK-NEXT: ret void
;
entry:
%amx = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 16, i8* %src, i64 64)
%amx = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 16, ptr %src, i64 64)
%bc = bitcast x86_amx %amx to <256 x i32>
store <256 x i32> %bc, <256 x i32>* %dst
store <256 x i32> %bc, ptr %dst
ret void
}

declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, i8*, i64)
declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx)
declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, ptr, i64)
declare void @llvm.x86.tilestored64.internal(i16, i16, ptr, i64, x86_amx)
22 changes: 11 additions & 11 deletions llvm/test/Transforms/InstCombine/X86/x86-amx.ll
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=instcombine -S < %s | FileCheck %s

define void @foo(<256 x i32>* %arrayidx16, <256 x i32>* %arrayidx29, <256 x i32>* %arrayidx35, i1 %c1, i1 %c2) {
define void @foo(ptr %arrayidx16, ptr %arrayidx29, ptr %arrayidx35, i1 %c1, i1 %c2) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_COND9:%.*]]
; CHECK: for.cond9:
; CHECK-NEXT: br i1 [[C1:%.*]], label [[FOR_BODY14:%.*]], label [[EXIT:%.*]]
; CHECK: for.body14:
; CHECK-NEXT: [[T5:%.*]] = load <256 x i32>, <256 x i32>* [[ARRAYIDX16:%.*]], align 64
; CHECK-NEXT: [[T5:%.*]] = load <256 x i32>, ptr [[ARRAYIDX16:%.*]], align 64
; CHECK-NEXT: br label [[FOR_COND18:%.*]]
; CHECK: for.cond18:
; CHECK-NEXT: [[SUB_C_SROA_0_0:%.*]] = phi <256 x i32> [ [[T5]], [[FOR_BODY14]] ], [ [[T12:%.*]], [[FOR_BODY24:%.*]] ]
; CHECK-NEXT: br i1 [[C2:%.*]], label [[FOR_BODY24]], label [[FOR_COND_CLEANUP23:%.*]]
; CHECK: for.cond.cleanup23:
; CHECK-NEXT: store <256 x i32> [[SUB_C_SROA_0_0]], <256 x i32>* [[ARRAYIDX16]], align 64
; CHECK-NEXT: store <256 x i32> [[SUB_C_SROA_0_0]], ptr [[ARRAYIDX16]], align 64
; CHECK-NEXT: br label [[FOR_COND9]]
; CHECK: for.body24:
; CHECK-NEXT: [[T6:%.*]] = load <256 x i32>, <256 x i32>* [[ARRAYIDX29:%.*]], align 64
; CHECK-NEXT: [[T7:%.*]] = load <256 x i32>, <256 x i32>* [[ARRAYIDX35:%.*]], align 64
; CHECK-NEXT: [[T6:%.*]] = load <256 x i32>, ptr [[ARRAYIDX29:%.*]], align 64
; CHECK-NEXT: [[T7:%.*]] = load <256 x i32>, ptr [[ARRAYIDX35:%.*]], align 64
; CHECK-NEXT: [[T8:%.*]] = bitcast <256 x i32> [[SUB_C_SROA_0_0]] to x86_amx
; CHECK-NEXT: [[T9:%.*]] = bitcast <256 x i32> [[T6]] to x86_amx
; CHECK-NEXT: [[T10:%.*]] = bitcast <256 x i32> [[T7]] to x86_amx
Expand All @@ -34,20 +34,20 @@ for.cond9: ; preds = %for.cond, %for.cond
br i1 %c1, label %for.body14, label %exit

for.body14:
%t5 = load <256 x i32>, <256 x i32>* %arrayidx16, align 64
%t5 = load <256 x i32>, ptr %arrayidx16, align 64
br label %for.cond18

for.cond18: ; preds = %for.body24, %for.body14
%sub_c.sroa.0.0 = phi <256 x i32> [ %t5, %for.body14 ], [ %t12, %for.body24 ]
br i1 %c2, label %for.body24, label %for.cond.cleanup23

for.cond.cleanup23: ; preds = %for.cond18
store <256 x i32> %sub_c.sroa.0.0, <256 x i32>* %arrayidx16, align 64
store <256 x i32> %sub_c.sroa.0.0, ptr %arrayidx16, align 64
br label %for.cond9

for.body24: ; preds = %for.cond18
%t6 = load <256 x i32>, <256 x i32>* %arrayidx29, align 64
%t7 = load <256 x i32>, <256 x i32>* %arrayidx35, align 64
%t6 = load <256 x i32>, ptr %arrayidx29, align 64
%t7 = load <256 x i32>, ptr %arrayidx35, align 64
%t8 = bitcast <256 x i32> %sub_c.sroa.0.0 to x86_amx
%t9 = bitcast <256 x i32> %t6 to x86_amx
%t10 = bitcast <256 x i32> %t7 to x86_amx
Expand All @@ -59,6 +59,6 @@ exit:
ret void
}

declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, i8*, i64)
declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, ptr, i64)
declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx)
declare void @llvm.x86.tilestored64.internal(i16, i16, ptr, i64, x86_amx)
232 changes: 105 additions & 127 deletions llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll

Large diffs are not rendered by default.

60 changes: 28 additions & 32 deletions llvm/test/Transforms/InstCombine/assume_inevitable.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,20 @@
; Check that assume is propagated backwards through all
; operations that are `isGuaranteedToTransferExecutionToSuccessor`
; (it should reach the load and mark it as `align 32`).
define i32 @assume_inevitable(i32* %a, i32* %b, i8* %c) {
define i32 @assume_inevitable(ptr %a, ptr %b, ptr %c) {
; CHECK-LABEL: @assume_inevitable(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[M:%.*]] = alloca i64, align 8
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 32
; CHECK-NEXT: [[LOADRES:%.*]] = load i32, i32* [[B:%.*]], align 4
; CHECK-NEXT: [[LOADRES2:%.*]] = call i32 @llvm.annotation.i32(i32 [[LOADRES]], i8* nonnull getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i8* nonnull getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i64 0, i64 0), i32 2)
; CHECK-NEXT: store i32 [[LOADRES2]], i32* [[A]], align 32
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 32
; CHECK-NEXT: [[LOADRES:%.*]] = load i32, ptr [[B:%.*]], align 4
; CHECK-NEXT: [[LOADRES2:%.*]] = call i32 @llvm.annotation.i32(i32 [[LOADRES]], ptr nonnull @.str, ptr nonnull @.str1, i32 2)
; CHECK-NEXT: store i32 [[LOADRES2]], ptr [[A]], align 32
; CHECK-NEXT: [[DUMMY_EQ:%.*]] = icmp ugt i32 [[LOADRES]], 42
; CHECK-NEXT: tail call void @llvm.assume(i1 [[DUMMY_EQ]])
; CHECK-NEXT: [[M_I8:%.*]] = bitcast i64* [[M]] to i8*
; CHECK-NEXT: [[M_A:%.*]] = call i8* @llvm.ptr.annotation.p0i8(i8* nonnull [[M_I8]], i8* nonnull getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i8* nonnull getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i64 0, i64 0), i32 2, i8* null)
; CHECK-NEXT: [[M_X:%.*]] = bitcast i8* [[M_A]] to i64*
; CHECK-NEXT: [[OBJSZ:%.*]] = call i64 @llvm.objectsize.i64.p0i8(i8* [[C:%.*]], i1 false, i1 false, i1 false)
; CHECK-NEXT: store i64 [[OBJSZ]], i64* [[M_X]], align 4
; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
; CHECK-NEXT: [[M_A:%.*]] = call ptr @llvm.ptr.annotation.p0(ptr nonnull [[M]], ptr nonnull @.str, ptr nonnull @.str1, i32 2, ptr null)
; CHECK-NEXT: [[OBJSZ:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[C:%.*]], i1 false, i1 false, i1 false)
; CHECK-NEXT: store i64 [[OBJSZ]], ptr [[M_A]], align 4
; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
Expand All @@ -28,30 +26,28 @@ define i32 @assume_inevitable(i32* %a, i32* %b, i8* %c) {
entry:
%dummy = alloca i8, align 4
%m = alloca i64
%0 = load i32, i32* %a, align 4
%0 = load i32, ptr %a, align 4

; START perform a bunch of inevitable operations
%loadres = load i32, i32* %b
%loadres2 = call i32 @llvm.annotation.i32(i32 %loadres, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i32 0, i32 0), i32 2)
store i32 %loadres2, i32* %a
%loadres = load i32, ptr %b
%loadres2 = call i32 @llvm.annotation.i32(i32 %loadres, ptr @.str, ptr @.str1, i32 2)
store i32 %loadres2, ptr %a

%dummy_eq = icmp ugt i32 %loadres, 42
tail call void @llvm.assume(i1 %dummy_eq)

call void @llvm.lifetime.start.p0i8(i64 1, i8* %dummy)
%i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %dummy)
call void @llvm.invariant.end.p0i8({}* %i, i64 1, i8* %dummy)
call void @llvm.lifetime.end.p0i8(i64 1, i8* %dummy)
call void @llvm.lifetime.start.p0(i64 1, ptr %dummy)
%i = call ptr @llvm.invariant.start.p0(i64 1, ptr %dummy)
call void @llvm.invariant.end.p0(ptr %i, i64 1, ptr %dummy)
call void @llvm.lifetime.end.p0(i64 1, ptr %dummy)

%m_i8 = bitcast i64* %m to i8*
%m_a = call i8* @llvm.ptr.annotation.p0i8(i8* %m_i8, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i32 0, i32 0), i32 2, i8* null)
%m_x = bitcast i8* %m_a to i64*
%objsz = call i64 @llvm.objectsize.i64.p0i8(i8* %c, i1 false)
store i64 %objsz, i64* %m_x
%m_a = call ptr @llvm.ptr.annotation.p0(ptr %m, ptr @.str, ptr @.str1, i32 2, ptr null)
%objsz = call i64 @llvm.objectsize.i64.p0(ptr %c, i1 false)
store i64 %objsz, ptr %m_a
; END perform a bunch of inevitable operations

; AND here's the assume:
%ptrint = ptrtoint i32* %a to i64
%ptrint = ptrtoint ptr %a to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)
Expand All @@ -62,13 +58,13 @@ entry:
@.str = private unnamed_addr constant [4 x i8] c"sth\00", section "llvm.metadata"
@.str1 = private unnamed_addr constant [4 x i8] c"t.c\00", section "llvm.metadata"

declare i64 @llvm.objectsize.i64.p0i8(i8*, i1)
declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32)
declare i8* @llvm.ptr.annotation.p0i8(i8*, i8*, i8*, i32, i8*)
declare i64 @llvm.objectsize.i64.p0(ptr, i1)
declare i32 @llvm.annotation.i32(i32, ptr, ptr, i32)
declare ptr @llvm.ptr.annotation.p0(ptr, ptr, ptr, i32, ptr)

declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
declare void @llvm.lifetime.end.p0(i64, ptr nocapture)

declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture)
declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture)
declare ptr @llvm.invariant.start.p0(i64, ptr nocapture)
declare void @llvm.invariant.end.p0(ptr, i64, ptr nocapture)
declare void @llvm.assume(i1)
37 changes: 18 additions & 19 deletions llvm/test/Transforms/InstCombine/call-returned.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
; RUN: opt -S -passes=instcombine < %s | FileCheck %s

declare i32 @passthru_i32(i32 returned)
declare i8* @passthru_p8(i8* returned)
declare i8* @passthru_p8_from_p32(i32* returned)
declare ptr @passthru_p8(ptr returned)
declare ptr @passthru_p8_from_p32(ptr returned)
declare <8 x i8> @passthru_8i8v_from_2i32v(<2 x i32> returned)

define i32 @returned_const_int_arg() {
Expand All @@ -15,32 +15,31 @@ define i32 @returned_const_int_arg() {
ret i32 %x
}

define i8* @returned_const_ptr_arg() {
define ptr @returned_const_ptr_arg() {
; CHECK-LABEL: @returned_const_ptr_arg(
; CHECK-NEXT: [[X:%.*]] = call i8* @passthru_p8(i8* null)
; CHECK-NEXT: ret i8* null
; CHECK-NEXT: [[X:%.*]] = call ptr @passthru_p8(ptr null)
; CHECK-NEXT: ret ptr null
;
%x = call i8* @passthru_p8(i8* null)
ret i8* %x
%x = call ptr @passthru_p8(ptr null)
ret ptr %x
}

define i8* @returned_const_ptr_arg_casted() {
define ptr @returned_const_ptr_arg_casted() {
; CHECK-LABEL: @returned_const_ptr_arg_casted(
; CHECK-NEXT: [[X:%.*]] = call i8* @passthru_p8_from_p32(i32* null)
; CHECK-NEXT: ret i8* null
; CHECK-NEXT: [[X:%.*]] = call ptr @passthru_p8_from_p32(ptr null)
; CHECK-NEXT: ret ptr null
;
%x = call i8* @passthru_p8_from_p32(i32* null)
ret i8* %x
%x = call ptr @passthru_p8_from_p32(ptr null)
ret ptr %x
}

define i8* @returned_ptr_arg_casted(i32* %a) {
define ptr @returned_ptr_arg_casted(ptr %a) {
; CHECK-LABEL: @returned_ptr_arg_casted(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A:%.*]] to i8*
; CHECK-NEXT: [[X:%.*]] = call i8* @passthru_p8_from_p32(i32* [[A]])
; CHECK-NEXT: ret i8* [[TMP1]]
; CHECK-NEXT: [[X:%.*]] = call ptr @passthru_p8_from_p32(ptr [[A:%.*]])
; CHECK-NEXT: ret ptr [[A]]
;
%x = call i8* @passthru_p8_from_p32(i32* %a)
ret i8* %x
%x = call ptr @passthru_p8_from_p32(ptr %a)
ret ptr %x
}

@GV = constant <2 x i32> zeroinitializer
Expand All @@ -49,7 +48,7 @@ define <8 x i8> @returned_const_vec_arg_casted() {
; CHECK-NEXT: [[X:%.*]] = call <8 x i8> @passthru_8i8v_from_2i32v(<2 x i32> zeroinitializer)
; CHECK-NEXT: ret <8 x i8> zeroinitializer
;
%v = load <2 x i32>, <2 x i32>* @GV
%v = load <2 x i32>, ptr @GV
%x = call <8 x i8> @passthru_8i8v_from_2i32v(<2 x i32> %v)
ret <8 x i8> %x
}
Expand Down
Loading