| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,150 @@ | ||
| ; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -load-store-vectorizer -S -o - %s | FileCheck %s | ||
|
|
||
| target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" | ||
|
|
||
| declare i32 @llvm.amdgcn.workitem.id.x() #1 | ||
|
|
||
| ; CHECK-LABEL: @basic_merge_sext_index( | ||
| ; CHECK: sext i32 %id.x to i64 | ||
| ; CHECK: load <2 x float> | ||
| ; CHECK: store <2 x float> zeroinitializer | ||
| define void @basic_merge_sext_index(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 { | ||
| entry: | ||
| %id.x = call i32 @llvm.amdgcn.workitem.id.x() | ||
| %sext.id.x = sext i32 %id.x to i64 | ||
| %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %sext.id.x | ||
| %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %sext.id.x | ||
| %a.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %a.idx.x, i64 1 | ||
| %c.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %c.idx.x, i64 1 | ||
|
|
||
| %ld.c = load float, float addrspace(1)* %c.idx.x, align 4 | ||
| %ld.c.idx.1 = load float, float addrspace(1)* %c.idx.x.1, align 4 | ||
|
|
||
| store float 0.0, float addrspace(1)* %a.idx.x, align 4 | ||
| store float 0.0, float addrspace(1)* %a.idx.x.1, align 4 | ||
|
|
||
| %add = fadd float %ld.c, %ld.c.idx.1 | ||
| store float %add, float addrspace(1)* %b, align 4 | ||
| ret void | ||
| } | ||
|
|
||
| ; CHECK-LABEL: @basic_merge_zext_index( | ||
| ; CHECK: zext i32 %id.x to i64 | ||
| ; CHECK: load <2 x float> | ||
| ; CHECK: store <2 x float> | ||
| define void @basic_merge_zext_index(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 { | ||
| entry: | ||
| %id.x = call i32 @llvm.amdgcn.workitem.id.x() | ||
| %zext.id.x = zext i32 %id.x to i64 | ||
| %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %zext.id.x | ||
| %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %zext.id.x | ||
| %a.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %a.idx.x, i64 1 | ||
| %c.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %c.idx.x, i64 1 | ||
|
|
||
| %ld.c = load float, float addrspace(1)* %c.idx.x, align 4 | ||
| %ld.c.idx.1 = load float, float addrspace(1)* %c.idx.x.1, align 4 | ||
| store float 0.0, float addrspace(1)* %a.idx.x, align 4 | ||
| store float 0.0, float addrspace(1)* %a.idx.x.1, align 4 | ||
|
|
||
| %add = fadd float %ld.c, %ld.c.idx.1 | ||
| store float %add, float addrspace(1)* %b, align 4 | ||
| ret void | ||
| } | ||
|
|
||
| ; CHECK-LABEL: @merge_op_zext_index( | ||
| ; CHECK: load <2 x float> | ||
| ; CHECK: store <2 x float> | ||
| define void @merge_op_zext_index(float addrspace(1)* nocapture noalias %a, float addrspace(1)* nocapture noalias %b, float addrspace(1)* nocapture readonly noalias %c) #0 { | ||
| entry: | ||
| %id.x = call i32 @llvm.amdgcn.workitem.id.x() | ||
| %shl = shl i32 %id.x, 2 | ||
| %zext.id.x = zext i32 %shl to i64 | ||
| %a.0 = getelementptr inbounds float, float addrspace(1)* %a, i64 %zext.id.x | ||
| %c.0 = getelementptr inbounds float, float addrspace(1)* %c, i64 %zext.id.x | ||
|
|
||
| %id.x.1 = or i32 %shl, 1 | ||
| %id.x.1.ext = zext i32 %id.x.1 to i64 | ||
|
|
||
| %a.1 = getelementptr inbounds float, float addrspace(1)* %a, i64 %id.x.1.ext | ||
| %c.1 = getelementptr inbounds float, float addrspace(1)* %c, i64 %id.x.1.ext | ||
|
|
||
| %ld.c.0 = load float, float addrspace(1)* %c.0, align 4 | ||
| store float 0.0, float addrspace(1)* %a.0, align 4 | ||
| %ld.c.1 = load float, float addrspace(1)* %c.1, align 4 | ||
| store float 0.0, float addrspace(1)* %a.1, align 4 | ||
|
|
||
| %add = fadd float %ld.c.0, %ld.c.1 | ||
| store float %add, float addrspace(1)* %b, align 4 | ||
| ret void | ||
| } | ||
|
|
||
| ; CHECK-LABEL: @merge_op_sext_index( | ||
| ; CHECK: load <2 x float> | ||
| ; CHECK: store <2 x float> | ||
| define void @merge_op_sext_index(float addrspace(1)* nocapture noalias %a, float addrspace(1)* nocapture noalias %b, float addrspace(1)* nocapture readonly noalias %c) #0 { | ||
| entry: | ||
| %id.x = call i32 @llvm.amdgcn.workitem.id.x() | ||
| %shl = shl i32 %id.x, 2 | ||
| %zext.id.x = sext i32 %shl to i64 | ||
| %a.0 = getelementptr inbounds float, float addrspace(1)* %a, i64 %zext.id.x | ||
| %c.0 = getelementptr inbounds float, float addrspace(1)* %c, i64 %zext.id.x | ||
|
|
||
| %id.x.1 = or i32 %shl, 1 | ||
| %id.x.1.ext = sext i32 %id.x.1 to i64 | ||
|
|
||
| %a.1 = getelementptr inbounds float, float addrspace(1)* %a, i64 %id.x.1.ext | ||
| %c.1 = getelementptr inbounds float, float addrspace(1)* %c, i64 %id.x.1.ext | ||
|
|
||
| %ld.c.0 = load float, float addrspace(1)* %c.0, align 4 | ||
| store float 0.0, float addrspace(1)* %a.0, align 4 | ||
| %ld.c.1 = load float, float addrspace(1)* %c.1, align 4 | ||
| store float 0.0, float addrspace(1)* %a.1, align 4 | ||
|
|
||
| %add = fadd float %ld.c.0, %ld.c.1 | ||
| store float %add, float addrspace(1)* %b, align 4 | ||
| ret void | ||
| } | ||
|
|
||
| ; This case fails to vectorize if not using the extra extension | ||
| ; handling in isConsecutiveAccess. | ||
|
|
||
| ; CHECK-LABEL: @zext_trunc_phi_1( | ||
| ; CHECK: loop: | ||
| ; CHECK: load <2 x i32> | ||
| ; CHECK: store <2 x i32> | ||
| define void @zext_trunc_phi_1(i32 addrspace(1)* nocapture noalias %a, i32 addrspace(1)* nocapture noalias %b, i32 addrspace(1)* nocapture readonly noalias %c, i32 %n, i64 %arst, i64 %aoeu) #0 { | ||
| entry: | ||
| %cmp0 = icmp eq i32 %n, 0 | ||
| br i1 %cmp0, label %exit, label %loop | ||
|
|
||
| loop: | ||
| %indvars.iv = phi i64 [ %indvars.iv.next, %loop ], [ 0, %entry ] | ||
| %trunc.iv = trunc i64 %indvars.iv to i32 | ||
| %idx = shl i32 %trunc.iv, 4 | ||
|
|
||
| %idx.ext = zext i32 %idx to i64 | ||
| %c.0 = getelementptr inbounds i32, i32 addrspace(1)* %c, i64 %idx.ext | ||
| %a.0 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.ext | ||
|
|
||
| %idx.1 = or i32 %idx, 1 | ||
| %idx.1.ext = zext i32 %idx.1 to i64 | ||
| %c.1 = getelementptr inbounds i32, i32 addrspace(1)* %c, i64 %idx.1.ext | ||
| %a.1 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.1.ext | ||
|
|
||
| %ld.c.0 = load i32, i32 addrspace(1)* %c.0, align 4 | ||
| store i32 %ld.c.0, i32 addrspace(1)* %a.0, align 4 | ||
| %ld.c.1 = load i32, i32 addrspace(1)* %c.1, align 4 | ||
| store i32 %ld.c.1, i32 addrspace(1)* %a.1, align 4 | ||
|
|
||
| %indvars.iv.next = add i64 %indvars.iv, 1 | ||
| %lftr.wideiv = trunc i64 %indvars.iv.next to i32 | ||
|
|
||
| %exitcond = icmp eq i32 %lftr.wideiv, %n | ||
| br i1 %exitcond, label %exit, label %loop | ||
|
|
||
| exit: | ||
| ret void | ||
| } | ||
|
|
||
| attributes #0 = { nounwind } | ||
| attributes #1 = { nounwind readnone } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| ; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -load-store-vectorizer -S -o - %s | FileCheck %s | ||
|
|
||
| target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" | ||
|
|
||
| ; Check relative position of the inserted vector load relative to the | ||
| ; existing adds. | ||
|
|
||
| ; CHECK-LABEL: @insert_load_point( | ||
| ; CHECK: %z = add i32 %x, 4 | ||
| ; CHECK: %w = add i32 %y, 9 | ||
| ; CHECK: load <2 x float> | ||
| ; CHECK: %foo = add i32 %z, %w | ||
| define void @insert_load_point(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c, i64 %idx, i32 %x, i32 %y) #0 { | ||
| entry: | ||
| %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %idx | ||
| %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %idx | ||
| %a.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %a.idx.x, i64 1 | ||
| %c.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %c.idx.x, i64 1 | ||
|
|
||
| %z = add i32 %x, 4 | ||
| %ld.c = load float, float addrspace(1)* %c.idx.x, align 4 | ||
| %w = add i32 %y, 9 | ||
| %ld.c.idx.1 = load float, float addrspace(1)* %c.idx.x.1, align 4 | ||
| %foo = add i32 %z, %w | ||
|
|
||
| store float 0.0, float addrspace(1)* %a.idx.x, align 4 | ||
| store float 0.0, float addrspace(1)* %a.idx.x.1, align 4 | ||
|
|
||
| %add = fadd float %ld.c, %ld.c.idx.1 | ||
| store float %add, float addrspace(1)* %b, align 4 | ||
| store i32 %foo, i32 addrspace(3)* null, align 4 | ||
| ret void | ||
| } | ||
|
|
||
| ; CHECK-LABEL: @insert_store_point( | ||
| ; CHECK: %z = add i32 %x, 4 | ||
| ; CHECK: %w = add i32 %y, 9 | ||
| ; CHECK: store <2 x float> | ||
| ; CHECK: %foo = add i32 %z, %w | ||
| define void @insert_store_point(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c, i64 %idx, i32 %x, i32 %y) #0 { | ||
| entry: | ||
| %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %idx | ||
| %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %idx | ||
| %a.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %a.idx.x, i64 1 | ||
| %c.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %c.idx.x, i64 1 | ||
|
|
||
| %ld.c = load float, float addrspace(1)* %c.idx.x, align 4 | ||
| %ld.c.idx.1 = load float, float addrspace(1)* %c.idx.x.1, align 4 | ||
|
|
||
| %z = add i32 %x, 4 | ||
| store float 0.0, float addrspace(1)* %a.idx.x, align 4 | ||
| %w = add i32 %y, 9 | ||
| store float 0.0, float addrspace(1)* %a.idx.x.1, align 4 | ||
| %foo = add i32 %z, %w | ||
|
|
||
| %add = fadd float %ld.c, %ld.c.idx.1 | ||
| store float %add, float addrspace(1)* %b, align 4 | ||
| store i32 %foo, i32 addrspace(3)* null, align 4 | ||
| ret void | ||
| } | ||
|
|
||
| attributes #0 = { nounwind } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| ; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -load-store-vectorizer -S -o - %s | FileCheck %s | ||
|
|
||
| target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" | ||
|
|
||
| ; This is OK to vectorize the load as long as the may alias store | ||
| ; occurs before the vector load. | ||
|
|
||
| ; CHECK: store double 0.000000e+00, double addrspace(1)* %a, | ||
| ; CHECK: load <2 x double> | ||
| ; CHECK: store double 0.000000e+00, double addrspace(1)* %a.idx.1 | ||
| define void @interleave(double addrspace(1)* nocapture %a, double addrspace(1)* nocapture %b, double addrspace(1)* nocapture readonly %c) #0 { | ||
| entry: | ||
| %a.idx.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1 | ||
| %c.idx.1 = getelementptr inbounds double, double addrspace(1)* %c, i64 1 | ||
|
|
||
| %ld.c = load double, double addrspace(1)* %c, align 8 ; may alias store to %a | ||
| store double 0.0, double addrspace(1)* %a, align 8 | ||
|
|
||
| %ld.c.idx.1 = load double, double addrspace(1)* %c.idx.1, align 8 ; may alias store to %a | ||
| store double 0.0, double addrspace(1)* %a.idx.1, align 8 | ||
|
|
||
| %add = fadd double %ld.c, %ld.c.idx.1 | ||
| store double %add, double addrspace(1)* %b | ||
|
|
||
| ret void | ||
| } | ||
|
|
||
| attributes #0 = { nounwind } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| if not 'AMDGPU' in config.root.targets: | ||
| config.unsupported = True | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,91 @@ | ||
| ; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -load-store-vectorizer -S -o - %s | FileCheck %s | ||
|
|
||
| target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" | ||
|
|
||
| ; CHECK-LABEL: @merge_v2i32_v2i32( | ||
| ; CHECK: load <4 x i32> | ||
| ; CHECK: store <4 x i32> zeroinitializer | ||
| define void @merge_v2i32_v2i32(<2 x i32> addrspace(1)* nocapture %a, <2 x i32> addrspace(1)* nocapture readonly %b) #0 { | ||
| entry: | ||
| %a.1 = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %a, i64 1 | ||
| %b.1 = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %b, i64 1 | ||
|
|
||
| %ld.c = load <2 x i32>, <2 x i32> addrspace(1)* %b, align 4 | ||
| %ld.c.idx.1 = load <2 x i32>, <2 x i32> addrspace(1)* %b.1, align 4 | ||
|
|
||
| store <2 x i32> zeroinitializer, <2 x i32> addrspace(1)* %a, align 4 | ||
| store <2 x i32> zeroinitializer, <2 x i32> addrspace(1)* %a.1, align 4 | ||
|
|
||
| ret void | ||
| } | ||
|
|
||
| ; CHECK-LABEL: @merge_v1i32_v1i32( | ||
| ; CHECK: load <2 x i32> | ||
| ; CHECK: store <2 x i32> zeroinitializer | ||
| define void @merge_v1i32_v1i32(<1 x i32> addrspace(1)* nocapture %a, <1 x i32> addrspace(1)* nocapture readonly %b) #0 { | ||
| entry: | ||
| %a.1 = getelementptr inbounds <1 x i32>, <1 x i32> addrspace(1)* %a, i64 1 | ||
| %b.1 = getelementptr inbounds <1 x i32>, <1 x i32> addrspace(1)* %b, i64 1 | ||
|
|
||
| %ld.c = load <1 x i32>, <1 x i32> addrspace(1)* %b, align 4 | ||
| %ld.c.idx.1 = load <1 x i32>, <1 x i32> addrspace(1)* %b.1, align 4 | ||
|
|
||
| store <1 x i32> zeroinitializer, <1 x i32> addrspace(1)* %a, align 4 | ||
| store <1 x i32> zeroinitializer, <1 x i32> addrspace(1)* %a.1, align 4 | ||
|
|
||
| ret void | ||
| } | ||
|
|
||
| ; CHECK-LABEL: @no_merge_v3i32_v3i32( | ||
| ; CHECK: load <3 x i32> | ||
| ; CHECK: load <3 x i32> | ||
| ; CHECK: store <3 x i32> zeroinitializer | ||
| ; CHECK: store <3 x i32> zeroinitializer | ||
| define void @no_merge_v3i32_v3i32(<3 x i32> addrspace(1)* nocapture %a, <3 x i32> addrspace(1)* nocapture readonly %b) #0 { | ||
| entry: | ||
| %a.1 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %a, i64 1 | ||
| %b.1 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %b, i64 1 | ||
|
|
||
| %ld.c = load <3 x i32>, <3 x i32> addrspace(1)* %b, align 4 | ||
| %ld.c.idx.1 = load <3 x i32>, <3 x i32> addrspace(1)* %b.1, align 4 | ||
|
|
||
| store <3 x i32> zeroinitializer, <3 x i32> addrspace(1)* %a, align 4 | ||
| store <3 x i32> zeroinitializer, <3 x i32> addrspace(1)* %a.1, align 4 | ||
|
|
||
| ret void | ||
| } | ||
|
|
||
| ; CHECK-LABEL: @merge_v2i16_v2i16( | ||
| ; CHECK: load <4 x i16> | ||
| ; CHECK: store <4 x i16> zeroinitializer | ||
| define void @merge_v2i16_v2i16(<2 x i16> addrspace(1)* nocapture %a, <2 x i16> addrspace(1)* nocapture readonly %b) #0 { | ||
| entry: | ||
| %a.1 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a, i64 1 | ||
| %b.1 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b, i64 1 | ||
|
|
||
| %ld.c = load <2 x i16>, <2 x i16> addrspace(1)* %b, align 4 | ||
| %ld.c.idx.1 = load <2 x i16>, <2 x i16> addrspace(1)* %b.1, align 4 | ||
|
|
||
| store <2 x i16> zeroinitializer, <2 x i16> addrspace(1)* %a, align 4 | ||
| store <2 x i16> zeroinitializer, <2 x i16> addrspace(1)* %a.1, align 4 | ||
|
|
||
| ret void | ||
| } | ||
|
|
||
| ; Ideally this would be merged | ||
| ; CHECK-LABEL: @merge_load_i32_v2i16( | ||
| ; CHECK: load i32, | ||
| ; CHECK: load <2 x i16> | ||
| define void @merge_load_i32_v2i16(i32 addrspace(1)* nocapture %a) #0 { | ||
| entry: | ||
| %a.1 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 1 | ||
| %a.1.cast = bitcast i32 addrspace(1)* %a.1 to <2 x i16> addrspace(1)* | ||
|
|
||
| %ld.0 = load i32, i32 addrspace(1)* %a | ||
| %ld.1 = load <2 x i16>, <2 x i16> addrspace(1)* %a.1.cast | ||
|
|
||
| ret void | ||
| } | ||
|
|
||
| attributes #0 = { nounwind } | ||
| attributes #1 = { nounwind readnone } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| ; RUN: opt -mtriple=amdgcn-amd-amdhsa -load-store-vectorizer -S -o - %s | FileCheck %s | ||
|
|
||
| ; CHECK-LABEL: @no_implicit_float( | ||
| ; CHECK: store i32 | ||
| ; CHECK: store i32 | ||
| ; CHECK: store i32 | ||
| ; CHECK: store i32 | ||
| define void @no_implicit_float(i32 addrspace(1)* %out) #0 { | ||
| %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 | ||
| %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 | ||
| %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 | ||
|
|
||
| store i32 123, i32 addrspace(1)* %out.gep.1 | ||
| store i32 456, i32 addrspace(1)* %out.gep.2 | ||
| store i32 333, i32 addrspace(1)* %out.gep.3 | ||
| store i32 1234, i32 addrspace(1)* %out | ||
| ret void | ||
| } | ||
|
|
||
| attributes #0 = { nounwind noimplicitfloat } |