301 changes: 301 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-load-store-pointers.ll

Large diffs are not rendered by default.

Large diffs are not rendered by default.

96 changes: 80 additions & 16 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir

Large diffs are not rendered by default.

51 changes: 27 additions & 24 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
Original file line number Diff line number Diff line change
Expand Up @@ -6604,22 +6604,25 @@ body: |
; CI: liveins: $vgpr0_vgpr1
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4)
; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; VI-LABEL: name: test_load_constant_v2p3_align8
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-LABEL: name: test_load_constant_v2p3_align8
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 4)
$vgpr0_vgpr1 = COPY %1
Expand All @@ -6635,22 +6638,25 @@ body: |
; CI: liveins: $vgpr0_vgpr1
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4)
; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4)
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; VI-LABEL: name: test_load_constant_v2p3_align4
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-LABEL: name: test_load_constant_v2p3_align4
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 4, addrspace 4)
$vgpr0_vgpr1 = COPY %1
Expand Down Expand Up @@ -6683,7 +6689,6 @@ body: |
; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
; CI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
Expand All @@ -6699,9 +6704,9 @@ body: |
; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
; CI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; VI-LABEL: name: test_load_constant_v2p3_align1
; VI: liveins: $vgpr0_vgpr1
Expand All @@ -6724,7 +6729,6 @@ body: |
; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
Expand All @@ -6740,9 +6744,9 @@ body: |
; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
; VI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-LABEL: name: test_load_constant_v2p3_align1
; GFX9: liveins: $vgpr0_vgpr1
Expand All @@ -6765,7 +6769,6 @@ body: |
; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; GFX9-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
; GFX9-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4)
Expand All @@ -6781,9 +6784,9 @@ body: |
; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
; GFX9-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 1, addrspace 4)
$vgpr0_vgpr1 = COPY %1
Expand Down
146 changes: 80 additions & 66 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir

Large diffs are not rendered by default.

98 changes: 54 additions & 44 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
Original file line number Diff line number Diff line change
Expand Up @@ -13448,43 +13448,49 @@ body: |
; SI: liveins: $vgpr0_vgpr1
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; CI-HSA-LABEL: name: test_load_global_v2p3_align8
; CI-HSA: liveins: $vgpr0_vgpr1
; CI-HSA-NEXT: {{ $}}
; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; CI-MESA-LABEL: name: test_load_global_v2p3_align8
; CI-MESA: liveins: $vgpr0_vgpr1
; CI-MESA-NEXT: {{ $}}
; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; VI-LABEL: name: test_load_global_v2p3_align8
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-HSA-LABEL: name: test_load_global_v2p3_align8
; GFX9-HSA: liveins: $vgpr0_vgpr1
; GFX9-HSA-NEXT: {{ $}}
; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-MESA-LABEL: name: test_load_global_v2p3_align8
; GFX9-MESA: liveins: $vgpr0_vgpr1
; GFX9-MESA-NEXT: {{ $}}
; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1)
; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 1)
$vgpr0_vgpr1 = COPY %1
Expand All @@ -13500,43 +13506,49 @@ body: |
; SI: liveins: $vgpr0_vgpr1
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; CI-HSA-LABEL: name: test_load_global_v2p3_align4
; CI-HSA: liveins: $vgpr0_vgpr1
; CI-HSA-NEXT: {{ $}}
; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; CI-MESA-LABEL: name: test_load_global_v2p3_align4
; CI-MESA: liveins: $vgpr0_vgpr1
; CI-MESA-NEXT: {{ $}}
; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; VI-LABEL: name: test_load_global_v2p3_align4
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-HSA-LABEL: name: test_load_global_v2p3_align4
; GFX9-HSA: liveins: $vgpr0_vgpr1
; GFX9-HSA-NEXT: {{ $}}
; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-MESA-LABEL: name: test_load_global_v2p3_align4
; GFX9-MESA: liveins: $vgpr0_vgpr1
; GFX9-MESA-NEXT: {{ $}}
; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1)
; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1)
; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 4, addrspace 1)
$vgpr0_vgpr1 = COPY %1
Expand Down Expand Up @@ -13569,7 +13581,6 @@ body: |
; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
; SI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
Expand All @@ -13585,16 +13596,17 @@ body: |
; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
; SI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; CI-HSA-LABEL: name: test_load_global_v2p3_align1
; CI-HSA: liveins: $vgpr0_vgpr1
; CI-HSA-NEXT: {{ $}}
; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 1, addrspace 1)
; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 1, addrspace 1)
; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; CI-MESA-LABEL: name: test_load_global_v2p3_align1
; CI-MESA: liveins: $vgpr0_vgpr1
Expand All @@ -13617,7 +13629,6 @@ body: |
; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
; CI-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
Expand All @@ -13633,9 +13644,9 @@ body: |
; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
; CI-MESA-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; VI-LABEL: name: test_load_global_v2p3_align1
; VI: liveins: $vgpr0_vgpr1
Expand All @@ -13658,7 +13669,6 @@ body: |
; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
; VI-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
Expand All @@ -13674,16 +13684,17 @@ body: |
; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
; VI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-HSA-LABEL: name: test_load_global_v2p3_align1
; GFX9-HSA: liveins: $vgpr0_vgpr1
; GFX9-HSA-NEXT: {{ $}}
; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 1, addrspace 1)
; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 1, addrspace 1)
; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-MESA-LABEL: name: test_load_global_v2p3_align1
; GFX9-MESA: liveins: $vgpr0_vgpr1
Expand All @@ -13706,7 +13717,6 @@ body: |
; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
; GFX9-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32)
; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; GFX9-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
; GFX9-MESA-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1)
Expand All @@ -13722,9 +13732,9 @@ body: |
; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]]
; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32)
; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]]
; GFX9-MESA-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32)
; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3)
; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
; GFX9-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 1, addrspace 1)
$vgpr0_vgpr1 = COPY %1
Expand Down
50 changes: 30 additions & 20 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
Original file line number Diff line number Diff line change
Expand Up @@ -14595,71 +14595,81 @@ body: |
; SI: liveins: $vgpr0
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; CI-LABEL: name: test_load_local_v2p3_align8
; CI: liveins: $vgpr0
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; CI-DS128-LABEL: name: test_load_local_v2p3_align8
; CI-DS128: liveins: $vgpr0
; CI-DS128-NEXT: {{ $}}
; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; VI-LABEL: name: test_load_local_v2p3_align8
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-LABEL: name: test_load_local_v2p3_align8
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-UNALIGNED-LABEL: name: test_load_local_v2p3_align8
; GFX9-UNALIGNED: liveins: $vgpr0
; GFX9-UNALIGNED-NEXT: {{ $}}
; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX10-LABEL: name: test_load_local_v2p3_align8
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX10-UNALIGNED-LABEL: name: test_load_local_v2p3_align8
; GFX10-UNALIGNED: liveins: $vgpr0
; GFX10-UNALIGNED-NEXT: {{ $}}
; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX11-LABEL: name: test_load_local_v2p3_align8
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX11-UNALIGNED-LABEL: name: test_load_local_v2p3_align8
; GFX11-UNALIGNED: liveins: $vgpr0
; GFX11-UNALIGNED-NEXT: {{ $}}
; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3)
; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3)
; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
%0:_(p3) = COPY $vgpr0
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 3)
$vgpr0_vgpr1 = COPY %1
Expand Down
83 changes: 47 additions & 36 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
Original file line number Diff line number Diff line change
Expand Up @@ -16860,106 +16860,117 @@ body: |
; SI: liveins: $vgpr0
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; SI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
; SI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; CI-LABEL: name: test_load_private_v2p3_align8
; CI: liveins: $vgpr0
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; CI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
; CI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
; CI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; VI-LABEL: name: test_load_private_v2p3_align8
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
; VI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX9-LABEL: name: test_load_private_v2p3_align8
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX10-LABEL: name: test_load_private_v2p3_align8
; GFX10: liveins: $vgpr0
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX11-LABEL: name: test_load_private_v2p3_align8
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load (<2 x p3>), addrspace 5)
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5)
; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; GFX12-LABEL: name: test_load_private_v2p3_align8
; GFX12: liveins: $vgpr0
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load (<2 x p3>), addrspace 5)
; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5)
; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; UNALIGNED_GFX9-LABEL: name: test_load_private_v2p3_align8
; UNALIGNED_GFX9: liveins: $vgpr0
; UNALIGNED_GFX9-NEXT: {{ $}}
; UNALIGNED_GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
; UNALIGNED_GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
; UNALIGNED_GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; UNALIGNED_GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
; UNALIGNED_GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; UNALIGNED_GFX10-LABEL: name: test_load_private_v2p3_align8
; UNALIGNED_GFX10: liveins: $vgpr0
; UNALIGNED_GFX10-NEXT: {{ $}}
; UNALIGNED_GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5)
; UNALIGNED_GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5)
; UNALIGNED_GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; UNALIGNED_GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5)
; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3)
; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
; UNALIGNED_GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5)
; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; UNALIGNED_GFX11-LABEL: name: test_load_private_v2p3_align8
; UNALIGNED_GFX11: liveins: $vgpr0
; UNALIGNED_GFX11-NEXT: {{ $}}
; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load (<2 x p3>), addrspace 5)
; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5)
; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
;
; UNALIGNED_GFX12-LABEL: name: test_load_private_v2p3_align8
; UNALIGNED_GFX12: liveins: $vgpr0
; UNALIGNED_GFX12-NEXT: {{ $}}
; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load (<2 x p3>), addrspace 5)
; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5)
; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p3>) = G_BITCAST [[LOAD]](<2 x s32>)
; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x p3>)
%0:_(p5) = COPY $vgpr0
%1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 5)
$vgpr0_vgpr1 = COPY %1
Expand Down
84 changes: 48 additions & 36 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
Original file line number Diff line number Diff line change
Expand Up @@ -2310,9 +2310,9 @@ body: |
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; SI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>)
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
Expand All @@ -2332,8 +2332,7 @@ body: |
; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
; SI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32)
; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32)
; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
Expand All @@ -2353,16 +2352,17 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 1, addrspace 1)
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
; CI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 1, addrspace 1)
;
; VI-LABEL: name: test_store_global_v2p3_align1
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; VI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>)
; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
Expand All @@ -2383,8 +2383,7 @@ body: |
; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
; VI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32)
; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32)
; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
Expand All @@ -2405,7 +2404,8 @@ body: |
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 1, addrspace 1)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
; GFX9-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 1, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store (<2 x p3>), align 1, addrspace 1)
Expand All @@ -2422,9 +2422,9 @@ body: |
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; SI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>)
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
Expand All @@ -2433,8 +2433,7 @@ body: |
; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
; SI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32)
; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32)
; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
Expand All @@ -2445,16 +2444,17 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 2, addrspace 1)
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
; CI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 2, addrspace 1)
;
; VI-LABEL: name: test_store_global_v2p3_align2
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; VI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>)
; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
Expand All @@ -2463,8 +2463,7 @@ body: |
; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
; VI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32)
; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32)
; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
Expand All @@ -2475,7 +2474,8 @@ body: |
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 2, addrspace 1)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
; GFX9-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 2, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store (<2 x p3>), align 2, addrspace 1)
Expand All @@ -2492,28 +2492,32 @@ body: |
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; SI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
; SI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
;
; CI-LABEL: name: test_store_global_v2p3_align4
; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1)
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
; CI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
;
; VI-LABEL: name: test_store_global_v2p3_align4
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; VI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
; VI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
;
; GFX9-LABEL: name: test_store_global_v2p3_align4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
; GFX9-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store (<2 x p3>), align 4, addrspace 1)
Expand All @@ -2530,28 +2534,32 @@ body: |
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; SI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
; SI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
;
; CI-LABEL: name: test_store_global_v2p3_align8
; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1)
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
; CI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
;
; VI-LABEL: name: test_store_global_v2p3_align8
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; VI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
; VI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
;
; GFX9-LABEL: name: test_store_global_v2p3_align8
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
; GFX9-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store (<2 x p3>), align 8, addrspace 1)
Expand All @@ -2568,28 +2576,32 @@ body: |
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; SI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
; SI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
;
; CI-LABEL: name: test_store_global_v2p3_align16
; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1)
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
; CI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
;
; VI-LABEL: name: test_store_global_v2p3_align16
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; VI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
; VI-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
;
; GFX9-LABEL: name: test_store_global_v2p3_align16
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
; GFX9-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY1]](<2 x p3>)
; GFX9-NEXT: G_STORE [[BITCAST]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x p3>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store (<2 x p3>), align 16, addrspace 1)
Expand Down
106 changes: 106 additions & 0 deletions llvm/test/CodeGen/AMDGPU/commute-frame-index-operand.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=machine-cse -verify-machineinstrs %s -o - | FileCheck --check-prefix=GCN %s

# Check that invalid MIR is not produced with a frame index in a
# commutable operand.

---
name: commute_frame_index__v_add_co_u32_e32__sgpr_fi
tracksRegLiveness: true
stack:
- { id: 0, size: 8, alignment: 4, local-offset: 0 }
body: |
bb.0:
liveins: $sgpr4
; GCN-LABEL: name: commute_frame_index__v_add_co_u32_e32__sgpr_fi
; GCN: liveins: $sgpr4
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 killed [[COPY]], %stack.0, implicit-def dead $vcc, implicit $exec
; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
%0:sreg_32 = COPY $sgpr4
%1:vgpr_32 = V_ADD_CO_U32_e32 killed %0, %stack.0, implicit-def dead $vcc, implicit $exec
S_ENDPGM 0, implicit %1
...

---
name: commute_frame_index__v_add_co_u32_e64__fi_sgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 8, alignment: 4, local-offset: 0 }
body: |
bb.0:
liveins: $sgpr4
; GCN-LABEL: name: commute_frame_index__v_add_co_u32_e64__fi_sgpr
; GCN: liveins: $sgpr4
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GCN-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, killed [[COPY]], 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
%0:sreg_32 = COPY $sgpr4
%1:vgpr_32, %2:sreg_64 = V_ADD_CO_U32_e64 %stack.0, killed %0, 0, implicit $exec
S_ENDPGM 0, implicit %1
...

---
name: commute_frame_index__v_add_co_u32__vgpr_fi
tracksRegLiveness: true
stack:
- { id: 0, size: 8, alignment: 4, local-offset: 0 }
body: |
bb.0:
liveins: $vgpr0
; GCN-LABEL: name: commute_frame_index__v_add_co_u32__vgpr_fi
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, killed [[COPY]], implicit-def dead $vcc, implicit $exec
; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_ADD_CO_U32_e32 killed %0, %stack.0, implicit-def dead $vcc, implicit $exec
S_ENDPGM 0, implicit %1
...

---
name: commute_frame_index__v_add_co_u32__fi_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 8, alignment: 4, local-offset: 0 }
body: |
bb.0.entry:
liveins: $vgpr0
; GCN-LABEL: name: commute_frame_index__v_add_co_u32__fi_vgpr
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, killed [[COPY]], implicit-def dead $vcc, implicit $exec
; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, killed %0, implicit-def dead $vcc, implicit $exec
S_ENDPGM 0, implicit %1
...

---
name: commute_frame_index__v_add_co_u32_e32__fi_fi
tracksRegLiveness: true
stack:
- { id: 0, size: 8, alignment: 4, local-offset: 0 }
- { id: 1, size: 8, alignment: 4, local-offset: 0 }
body: |
bb.0:
; GCN-LABEL: name: commute_frame_index__v_add_co_u32_e32__fi_fi
; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, %stack.1, implicit-def dead $vcc, implicit $exec
; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
%0:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, %stack.1, implicit-def dead $vcc, implicit $exec
S_ENDPGM 0, implicit %0
...
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
Original file line number Diff line number Diff line change
Expand Up @@ -1170,31 +1170,31 @@ body: |
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec
; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, killed $vgpr1, implicit $exec
; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
; MUBUF-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
;
; MUBUFW32-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after
; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3
; MUBUFW32-NEXT: {{ $}}
; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; MUBUFW32-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec
; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, killed $vgpr1, implicit $exec
; MUBUFW32-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
;
; FLATSCRW64-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after
; FLATSCRW64: liveins: $sgpr8
; FLATSCRW64-NEXT: {{ $}}
; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec
; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, killed $vgpr1, implicit $exec
; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
;
; FLATSCRW32-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after
; FLATSCRW32: liveins: $sgpr8
; FLATSCRW32-NEXT: {{ $}}
; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec
; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, killed $vgpr1, implicit $exec
; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
renamable $vgpr0 = V_ADD_U32_e32 renamable $sgpr8, %stack.1, implicit $exec
SI_RETURN implicit $vgpr0, implicit $sgpr8
Expand Down
33 changes: 33 additions & 0 deletions llvm/test/CodeGen/NVPTX/i8x2-instructions.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -mattr=+ptx80 -asm-verbose=false \
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
; RUN: | FileCheck %s
; RUN: %if ptxas %{ \
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -asm-verbose=false \
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
; RUN: | %ptxas-verify -arch=sm_90 \
; RUN: %}

target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"

; CHECK-LABEL: test_bitcast_2xi8_i16(
; CHECK: ld.param.u32 %r1, [test_bitcast_2xi8_i16_param_0];
; CHECK: mov.b32 {%rs1, %rs2}, %r1;
; CHECK: shl.b16 %rs3, %rs2, 8;
; CHECK: and.b16 %rs4, %rs1, 255;
; CHECK: or.b16 %rs5, %rs4, %rs3;
; CHECK: cvt.u32.u16 %r2, %rs5;
; CHECK: st.param.b32 [func_retval0], %r2;
define i16 @test_bitcast_2xi8_i16(<2 x i8> %a) {
%res = bitcast <2 x i8> %a to i16
ret i16 %res
}

; CHECK-LABEL: test_bitcast_i16_2xi8(
; CHECK: ld.param.u16 %rs1, [test_bitcast_i16_2xi8_param_0];
; CHECK: shr.u16 %rs2, %rs1, 8;
; CHECK: mov.b32 %r1, {%rs1, %rs2};
; CHECK: st.param.b32 [func_retval0], %r1;
define <2 x i8> @test_bitcast_i16_2xi8(i16 %a) {
%res = bitcast i16 %a to <2 x i8>
ret <2 x i8> %res
}
310 changes: 310 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,310 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck -check-prefixes=CHECK,RV32 %s
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck -check-prefixes=CHECK,RV64 %s


define <8 x i1> @v8i1_v16i1(<16 x i1>) {
; RV32-LABEL: v8i1_v16i1:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vmv.x.s a0, v0
; RV32-NEXT: slli a1, a0, 19
; RV32-NEXT: srli a1, a1, 31
; RV32-NEXT: slli a2, a0, 26
; RV32-NEXT: srli a2, a2, 31
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV32-NEXT: vmv.v.x v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: slli a1, a0, 24
; RV32-NEXT: srli a1, a1, 31
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: slli a1, a0, 29
; RV32-NEXT: srli a1, a1, 31
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: slli a1, a0, 18
; RV32-NEXT: srli a1, a1, 31
; RV32-NEXT: slli a2, a0, 16
; RV32-NEXT: srli a2, a2, 31
; RV32-NEXT: vmv.v.x v9, a2
; RV32-NEXT: vslide1down.vx v9, v9, a1
; RV32-NEXT: slli a1, a0, 27
; RV32-NEXT: srli a1, a1, 31
; RV32-NEXT: vslide1down.vx v9, v9, a1
; RV32-NEXT: slli a0, a0, 28
; RV32-NEXT: srli a0, a0, 31
; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV32-NEXT: vand.vi v8, v9, 1
; RV32-NEXT: vmsne.vi v0, v8, 0
; RV32-NEXT: ret
;
; RV64-LABEL: v8i1_v16i1:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vmv.x.s a0, v0
; RV64-NEXT: slli a1, a0, 51
; RV64-NEXT: srli a1, a1, 63
; RV64-NEXT: slli a2, a0, 58
; RV64-NEXT: srli a2, a2, 63
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV64-NEXT: vmv.v.x v8, a2
; RV64-NEXT: vslide1down.vx v8, v8, a1
; RV64-NEXT: slli a1, a0, 56
; RV64-NEXT: srli a1, a1, 63
; RV64-NEXT: vslide1down.vx v8, v8, a1
; RV64-NEXT: slli a1, a0, 61
; RV64-NEXT: srli a1, a1, 63
; RV64-NEXT: vslide1down.vx v8, v8, a1
; RV64-NEXT: slli a1, a0, 50
; RV64-NEXT: srli a1, a1, 63
; RV64-NEXT: slli a2, a0, 48
; RV64-NEXT: srli a2, a2, 63
; RV64-NEXT: vmv.v.x v9, a2
; RV64-NEXT: vslide1down.vx v9, v9, a1
; RV64-NEXT: slli a1, a0, 59
; RV64-NEXT: srli a1, a1, 63
; RV64-NEXT: vslide1down.vx v9, v9, a1
; RV64-NEXT: slli a0, a0, 60
; RV64-NEXT: srli a0, a0, 63
; RV64-NEXT: vmv.v.i v0, 15
; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV64-NEXT: vand.vi v8, v9, 1
; RV64-NEXT: vmsne.vi v0, v8, 0
; RV64-NEXT: ret
%2 = shufflevector <16 x i1> %0, <16 x i1> poison, <8 x i32> <i32 5, i32 12, i32 7, i32 2, i32 15, i32 13, i32 4, i32 3>
ret <8 x i1> %2
}

define <4 x i32> @v4i32_v8i32(<8 x i32>) {
; CHECK-LABEL: v4i32_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vid.v v10
; CHECK-NEXT: vsrl.vi v10, v10, 1
; CHECK-NEXT: vrsub.vi v11, v10, 3
; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: vmv.v.i v0, 5
; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 4
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: vslidedown.vi v10, v8, 1, v0.t
; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
%2 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> <i32 5, i32 3, i32 7, i32 2>
ret <4 x i32> %2
}

define <4 x i32> @v4i32_v16i32(<16 x i32>) {
; RV32-LABEL: v4i32_v16i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vmv.v.i v12, 1
; RV32-NEXT: vmv.v.i v14, 6
; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV32-NEXT: vslideup.vi v14, v12, 1
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vid.v v12
; RV32-NEXT: vadd.vv v12, v12, v12
; RV32-NEXT: vadd.vi v15, v12, 1
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32-NEXT: vrgatherei16.vv v12, v8, v15
; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; RV32-NEXT: vmv.v.i v0, 10
; RV32-NEXT: vsetivli zero, 8, e32, m4, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 8
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vrgatherei16.vv v12, v8, v14, v0.t
; RV32-NEXT: vmv1r.v v8, v12
; RV32-NEXT: ret
;
; RV64-LABEL: v4i32_v16i32:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64-NEXT: vid.v v12
; RV64-NEXT: vadd.vv v12, v12, v12
; RV64-NEXT: vadd.vi v14, v12, 1
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64-NEXT: vrgatherei16.vv v12, v8, v14
; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; RV64-NEXT: vmv.v.i v0, 10
; RV64-NEXT: vsetivli zero, 8, e32, m4, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 8
; RV64-NEXT: li a0, 3
; RV64-NEXT: slli a0, a0, 33
; RV64-NEXT: addi a0, a0, 1
; RV64-NEXT: slli a0, a0, 16
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vmv.v.x v10, a0
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV64-NEXT: vrgatherei16.vv v12, v8, v10, v0.t
; RV64-NEXT: vmv1r.v v8, v12
; RV64-NEXT: ret
%2 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> <i32 1, i32 9, i32 5, i32 14>
ret <4 x i32> %2
}

define <4 x i32> @v4i32_v32i32(<32 x i32>) {
; RV32-LABEL: v4i32_v32i32:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -256
; RV32-NEXT: .cfi_def_cfa_offset 256
; RV32-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 248(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: .cfi_offset s0, -8
; RV32-NEXT: addi s0, sp, 256
; RV32-NEXT: .cfi_def_cfa s0, 0
; RV32-NEXT: andi sp, sp, -128
; RV32-NEXT: li a0, 32
; RV32-NEXT: mv a1, sp
; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; RV32-NEXT: vse32.v v8, (a1)
; RV32-NEXT: lw a0, 36(sp)
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vslidedown.vi v10, v8, 1
; RV32-NEXT: vmv.x.s a1, v10
; RV32-NEXT: vmv.v.x v10, a1
; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: lw a0, 120(sp)
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 4
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vslide1down.vx v8, v10, a1
; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: addi sp, s0, -256
; RV32-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 248(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 256
; RV32-NEXT: ret
;
; RV64-LABEL: v4i32_v32i32:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -256
; RV64-NEXT: .cfi_def_cfa_offset 256
; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: .cfi_offset s0, -16
; RV64-NEXT: addi s0, sp, 256
; RV64-NEXT: .cfi_def_cfa s0, 0
; RV64-NEXT: andi sp, sp, -128
; RV64-NEXT: li a0, 32
; RV64-NEXT: mv a1, sp
; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; RV64-NEXT: vse32.v v8, (a1)
; RV64-NEXT: lw a0, 36(sp)
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v10, v8, 1
; RV64-NEXT: vmv.x.s a1, v10
; RV64-NEXT: vmv.v.x v10, a1
; RV64-NEXT: vslide1down.vx v10, v10, a0
; RV64-NEXT: lw a0, 120(sp)
; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 4
; RV64-NEXT: vmv.x.s a1, v8
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vslide1down.vx v8, v10, a1
; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: addi sp, s0, -256
; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 256
; RV64-NEXT: ret
%2 = shufflevector <32 x i32> %0, <32 x i32> poison, <4 x i32> <i32 1, i32 9, i32 4, i32 30>
ret <4 x i32> %2
}

define <16 x i1> @v16i1_v8i1(<8 x i1>) {
; CHECK-LABEL: v16i1_v8i1:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_0)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
; CHECK-NEXT: vrgather.vv v10, v9, v8
; CHECK-NEXT: vmsne.vi v0, v10, 0
; CHECK-NEXT: ret
%2 = shufflevector <8 x i1> %0, <8 x i1> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 5, i32 1, i32 2, i32 0, i32 6, i32 2, i32 3, i32 0, i32 7, i32 1, i32 2, i32 0, i32 4>
ret <16 x i1> %2
}

define <8 x i32> @v8i32_v4i32(<4 x i32>) {
; CHECK-LABEL: v8i32_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0)
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
%2 = shufflevector <4 x i32> %0, <4 x i32> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 1, i32 2, i32 0, i32 3>
ret <8 x i32> %2
}

define <16 x i32> @v16i32_v4i32(<4 x i32>) {
; CHECK-LABEL: v16i32_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 2
; CHECK-NEXT: addi a1, a0, 265
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, 3
; CHECK-NEXT: vmerge.vim v10, v9, 2, v0
; CHECK-NEXT: lui a1, 4
; CHECK-NEXT: addi a1, a1, 548
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: addi a0, a0, -1856
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vim v10, v10, 0, v0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmerge.vim v9, v10, 1, v0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vsext.vf2 v16, v9
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%2 = shufflevector <4 x i32> %0, <4 x i32> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 2, i32 3, i32 0, i32 1, i32 1, i32 2, i32 0, i32 3, i32 1, i32 1, i32 2, i32 0, i32 3>
ret <16 x i32> %2
}

define <32 x i32> @v32i32_v4i32(<4 x i32>) {
; CHECK-LABEL: v32i32_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: lui a1, 135432
; CHECK-NEXT: addi a1, a1, 1161
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.i v10, 3
; CHECK-NEXT: vmerge.vim v10, v10, 2, v0
; CHECK-NEXT: lui a0, 270865
; CHECK-NEXT: addi a0, a0, 548
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: lui a0, 100550
; CHECK-NEXT: addi a0, a0, 64
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; CHECK-NEXT: vmerge.vim v10, v10, 0, v0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vsext.vf2 v24, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vrgatherei16.vv v16, v8, v24
; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
%2 = shufflevector <4 x i32> %0, <4 x i32> poison, <32 x i32> <i32 2, i32 3, i32 0, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 0, i32 1, i32 1, i32 2, i32 0, i32 3, i32 1, i32 1, i32 2, i32 0, i32 3, i32 1, i32 2, i32 0, i32 3, i32 1, i32 1, i32 2, i32 0, i32 3>
ret <32 x i32> %2
}
90 changes: 90 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,64 @@ define {<2 x i64>, <2 x i64>} @vector_deinterleave_v2i64_v4i64(<4 x i64> %vec) {
ret {<2 x i64>, <2 x i64>} %retval
}

define {<4 x i64>, <4 x i64>} @vector_deinterleave_v4i64_v8i64(<8 x i64> %vec) {
; CHECK-LABEL: vector_deinterleave_v4i64_v8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vid.v v12
; CHECK-NEXT: vadd.vv v14, v12, v12
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vrgatherei16.vv v12, v8, v14
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vadd.vi v15, v14, -4
; CHECK-NEXT: vmv.v.i v0, 12
; CHECK-NEXT: vsetivli zero, 4, e64, m4, ta, ma
; CHECK-NEXT: vslidedown.vi v16, v8, 4
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; CHECK-NEXT: vrgatherei16.vv v12, v16, v15, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vadd.vi v15, v14, 1
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vrgatherei16.vv v10, v8, v15
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vadd.vi v8, v14, -3
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vrgatherei16.vv v10, v16, v8, v0.t
; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%retval = call {<4 x i64>, <4 x i64>} @llvm.vector.deinterleave2.v8i64(<8 x i64> %vec)
ret {<4 x i64>, <4 x i64>} %retval
}

define {<8 x i64>, <8 x i64>} @vector_deinterleave_v8i64_v16i64(<16 x i64> %vec) {
; CHECK-LABEL: vector_deinterleave_v8i64_v16i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv8r.v v16, v8
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v7, v8, v8
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vrgatherei16.vv v8, v16, v7
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v0, -16
; CHECK-NEXT: vadd.vi v12, v7, -8
; CHECK-NEXT: vsetivli zero, 8, e64, m8, ta, ma
; CHECK-NEXT: vslidedown.vi v24, v16, 8
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; CHECK-NEXT: vrgatherei16.vv v8, v24, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vadd.vi v20, v7, 1
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vrgatherei16.vv v12, v16, v20
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vadd.vi v16, v7, -7
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vrgatherei16.vv v12, v24, v16, v0.t
; CHECK-NEXT: ret
%retval = call {<8 x i64>, <8 x i64>} @llvm.vector.deinterleave2.v16i64(<16 x i64> %vec)
ret {<8 x i64>, <8 x i64>} %retval
}

declare {<16 x i1>, <16 x i1>} @llvm.vector.deinterleave2.v32i1(<32 x i1>)
declare {<16 x i8>, <16 x i8>} @llvm.vector.deinterleave2.v32i8(<32 x i8>)
declare {<8 x i16>, <8 x i16>} @llvm.vector.deinterleave2.v16i16(<16 x i16>)
Expand Down Expand Up @@ -176,9 +234,41 @@ define {<2 x double>, <2 x double>} @vector_deinterleave_v2f64_v4f64(<4 x double
ret {<2 x double>, <2 x double>} %retval
}

define {<4 x double>, <4 x double>} @vector_deinterleave_v4f64_v8f64(<8 x double> %vec) {
; CHECK-LABEL: vector_deinterleave_v4f64_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vid.v v12
; CHECK-NEXT: vadd.vv v14, v12, v12
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vrgatherei16.vv v12, v8, v14
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vadd.vi v15, v14, -4
; CHECK-NEXT: vmv.v.i v0, 12
; CHECK-NEXT: vsetivli zero, 4, e64, m4, ta, ma
; CHECK-NEXT: vslidedown.vi v16, v8, 4
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; CHECK-NEXT: vrgatherei16.vv v12, v16, v15, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vadd.vi v15, v14, 1
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vrgatherei16.vv v10, v8, v15
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vadd.vi v8, v14, -3
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vrgatherei16.vv v10, v16, v8, v0.t
; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%retval = call {<4 x double>, <4 x double>} @llvm.vector.deinterleave2.v8f64(<8 x double> %vec)
ret {<4 x double>, <4 x double>} %retval
}

declare {<2 x half>,<2 x half>} @llvm.vector.deinterleave2.v4f16(<4 x half>)
declare {<4 x half>, <4 x half>} @llvm.vector.deinterleave2.v8f16(<8 x half>)
declare {<2 x float>, <2 x float>} @llvm.vector.deinterleave2.v4f32(<4 x float>)
declare {<8 x half>, <8 x half>} @llvm.vector.deinterleave2.v16f16(<16 x half>)
declare {<4 x float>, <4 x float>} @llvm.vector.deinterleave2.v8f32(<8 x float>)
declare {<2 x double>, <2 x double>} @llvm.vector.deinterleave2.v4f64(<4 x double>)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV32: {{.*}}
; RV64: {{.*}}
16 changes: 16 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,22 @@ define {<vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv
ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %retval
}

define {<vscale x 4 x i64>, <vscale x 4 x i64>} @vector_deinterleave_nxv4i64_nxv8i64(<vscale x 8 x i64> %vec) {
; CHECK-LABEL: vector_deinterleave_nxv4i64_nxv8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; CHECK-NEXT: vid.v v16
; CHECK-NEXT: vadd.vv v24, v16, v16
; CHECK-NEXT: vrgather.vv v16, v8, v24
; CHECK-NEXT: vadd.vi v24, v24, 1
; CHECK-NEXT: vrgather.vv v0, v8, v24
; CHECK-NEXT: vmv4r.v v8, v16
; CHECK-NEXT: vmv4r.v v12, v0
; CHECK-NEXT: ret
%retval = call {<vscale x 4 x i64>, <vscale x 4 x i64>} @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %vec)
ret {<vscale x 4 x i64>, <vscale x 4 x i64>} %retval
}

declare {<vscale x 16 x i1>, <vscale x 16 x i1>} @llvm.vector.deinterleave2.nxv32i1(<vscale x 32 x i1>)
declare {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8>)
declare {<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.vector.deinterleave2.nxv16i16(<vscale x 16 x i16>)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
# parent block is dead.
#
# CHECK: Link graph
# CHECK-DAG: section parent:
# CHECK-EMPTY:
# CHECK-DAG: section child:
# CHECK-EMPTY:
# CHECK-DAG: section parent:
# CHECK-EMPTY:

--- !COFF
header:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/ExecutionEngine/JITLink/x86-64/COFF_pdata_strip.s
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
#
# CHECK: section .func:
# CHECK-EMPTY:
# CHECK-NEXT: section .xdata:
# CHECK-EMPTY:
# CHECK-NEXT: section .pdata:
# CHECK-EMPTY:
# CHECK: section .xdata:
# CHECK-EMPTY:

.text
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/ARM/mve-fp-registers.s
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Some simple operations on S, D and Q registers (loads, stores and moves) are
// also avaliable in MVE, even in the integer-only version. Some of these
// also available in MVE, even in the integer-only version. Some of these
// instructions (operating on D or Q registers, or FP16 values) are only
// available for certain targets.

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/Attributor/nofpclass-phiselect.ll
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ exit:
}

define float @phi_phi(i1 %c, float nofpclass(inf) %base, float nofpclass(inf) %arg) {
; CHECK-LABEL: define float @phi_phi
; CHECK-LABEL: define nofpclass(inf) float @phi_phi
; CHECK-SAME: (i1 [[C:%.*]], float nofpclass(inf) [[BASE:%.*]], float nofpclass(inf) [[ARG:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
Expand Down
1 change: 0 additions & 1 deletion llvm/test/Transforms/EarlyCSE/nofpclass-phi-regression.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ define void @compute_known_fpclass_phi_assert(i1 %cmp46, i1 %tobool51, ptr %a01)
; CHECK-NEXT: [[TMP1]] = phi double [ 0.000000e+00, [[IF_THEN52]] ], [ [[TMP0]], [[FOR_BODY48]] ]
; CHECK-NEXT: br label [[FOR_COND45]]
; CHECK: for.end82:
; CHECK-NEXT: [[MUL84:%.*]] = fmul double [[TMP0]], 0.000000e+00
; CHECK-NEXT: ret void
;
entry:
Expand Down
31 changes: 31 additions & 0 deletions llvm/test/Transforms/Inline/inline-cost-annotation-pass.ll
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,34 @@ define ptr @main() {
%2 = call ptr @foo()
ret ptr %1
}

; Make sure it also analyzes invoke call sites.

; CHECK: Analyzing call of g... (caller:f)
; CHECK: define i32 @g(i32 %v) {
; CHECK: ; cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = {{.*}}
; CHECK: %p = icmp ugt i32 %v, 35
; CHECK: ; cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = {{.*}}
; CHECK: %r = select i1 %p, i32 %v, i32 7
; CHECK: ; cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = {{.*}}
; CHECK: ret i32 %r
; CHECK: }
define i32 @g(i32 %v) {
%p = icmp ugt i32 %v, 35
%r = select i1 %p, i32 %v, i32 7
ret i32 %r
}

define void @f(i32 %v, ptr %dst) personality ptr @__gxx_personality_v0 {
%v1 = invoke i32 @g(i32 %v)
to label %bb1 unwind label %bb2
bb1:
store i32 %v1, ptr %dst
ret void
bb2:
%lpad.loopexit80 = landingpad { ptr, i32 }
cleanup
ret void
}

declare i32 @__gxx_personality_v0(...)
6 changes: 4 additions & 2 deletions llvm/test/Transforms/InstCombine/div.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1163,7 +1163,8 @@ define <2 x i8> @sdiv_constant_dividend_select_of_constants_divisor_vec(i1 %b) {

define <2 x i8> @sdiv_constant_dividend_select_of_constants_divisor_vec_ub1(i1 %b) {
; CHECK-LABEL: @sdiv_constant_dividend_select_of_constants_divisor_vec_ub1(
; CHECK-NEXT: ret <2 x i8> <i8 -10, i8 -10>
; CHECK-NEXT: [[R:%.*]] = select i1 [[B:%.*]], <2 x i8> <i8 poison, i8 8>, <2 x i8> <i8 -10, i8 -10>
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%s = select i1 %b, <2 x i8> <i8 0, i8 -5>, <2 x i8> <i8 -4, i8 4>
%r = sdiv <2 x i8> <i8 42, i8 -42>, %s
Expand Down Expand Up @@ -1269,7 +1270,8 @@ define <2 x i8> @udiv_constant_dividend_select_of_constants_divisor_vec(i1 %b) {

define <2 x i8> @udiv_constant_dividend_select_of_constants_divisor_vec_ub1(i1 %b) {
; CHECK-LABEL: @udiv_constant_dividend_select_of_constants_divisor_vec_ub1(
; CHECK-NEXT: ret <2 x i8> <i8 0, i8 53>
; CHECK-NEXT: [[R:%.*]] = select i1 [[B:%.*]], <2 x i8> <i8 poison, i8 0>, <2 x i8> <i8 0, i8 53>
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%s = select i1 %b, <2 x i8> <i8 0, i8 -5>, <2 x i8> <i8 -4, i8 4>
%r = udiv <2 x i8> <i8 42, i8 -42>, %s
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/Transforms/InstCombine/rem.ll
Original file line number Diff line number Diff line change
Expand Up @@ -997,7 +997,8 @@ define <2 x i8> @urem_constant_dividend_select_of_constants_divisor_vec(i1 %b) {

define <2 x i8> @urem_constant_dividend_select_of_constants_divisor_vec_ub1(i1 %b) {
; CHECK-LABEL: @urem_constant_dividend_select_of_constants_divisor_vec_ub1(
; CHECK-NEXT: ret <2 x i8> <i8 42, i8 2>
; CHECK-NEXT: [[R:%.*]] = select i1 [[B:%.*]], <2 x i8> <i8 poison, i8 -42>, <2 x i8> <i8 42, i8 2>
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%s = select i1 %b, <2 x i8> <i8 0, i8 -5>, <2 x i8> <i8 -4, i8 4>
%r = urem <2 x i8> <i8 42, i8 -42>, %s
Expand Down
15 changes: 15 additions & 0 deletions llvm/test/Transforms/InstCombine/saturating-add-sub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1890,6 +1890,21 @@ define <2 x i32> @uadd_sat_not_ugt_commute_add(<2 x i32> %x, <2 x i32> %yp) {
ret <2 x i32> %r
}

define <2 x i32> @uadd_sat_not_ugt_commute_add_partial_poison(<2 x i32> %x, <2 x i32> %yp) {
; CHECK-LABEL: @uadd_sat_not_ugt_commute_add_partial_poison(
; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X:%.*]], <i32 -1, i32 poison>
; CHECK-NEXT: [[A:%.*]] = add nuw <2 x i32> [[YP:%.*]], [[NOTX]]
; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[YP]], [[X]]
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> [[A]]
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%notx = xor <2 x i32> %x, <i32 -1, i32 poison>
%a = add nuw <2 x i32> %yp, %notx
%c = icmp ugt <2 x i32> %yp, %x
%r = select <2 x i1> %c, <2 x i32> <i32 -1, i32 -1>, <2 x i32> %a
ret <2 x i32> %r
}

define i32 @uadd_sat_not_commute_select(i32 %x, i32 %y) {
; CHECK-LABEL: @uadd_sat_not_commute_select(
; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
Expand Down
13 changes: 13 additions & 0 deletions llvm/test/Transforms/InstCombine/vector-udiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,16 @@ define <4 x i32> @test_v4i32_zext_shl_const_pow2(<4 x i32> %a0, <4 x i16> %a1) {
%3 = udiv <4 x i32> %a0, %2
ret <4 x i32> %3
}

; Make sure we do not simplify udiv <i32 42, i32 -7>, <i32 0, i32 1> to
; poison when threading udiv over selects

define <2 x i32> @vec_select_udiv_poison(<2 x i1> %x) {
; CHECK-LABEL: @vec_select_udiv_poison(
; CHECK-NEXT: [[DIV:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> zeroinitializer, <2 x i32> <i32 poison, i32 -7>
; CHECK-NEXT: ret <2 x i32> [[DIV]]
;
%sel = select <2 x i1> %x, <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 0, i32 1>
%div = udiv <2 x i32> <i32 42, i32 -7>, %sel
ret <2 x i32> %div
}
16 changes: 10 additions & 6 deletions llvm/test/Transforms/InstSimplify/div.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,47 +29,51 @@ define <2 x i32> @zero_dividend_vector_poison_elt(<2 x i32> %A) {

define <2 x i8> @sdiv_zero_elt_vec_constfold(<2 x i8> %x) {
; CHECK-LABEL: @sdiv_zero_elt_vec_constfold(
; CHECK-NEXT: ret <2 x i8> poison
; CHECK-NEXT: ret <2 x i8> <i8 poison, i8 0>
;
%div = sdiv <2 x i8> <i8 1, i8 2>, <i8 0, i8 -42>
ret <2 x i8> %div
}

define <2 x i8> @udiv_zero_elt_vec_constfold(<2 x i8> %x) {
; CHECK-LABEL: @udiv_zero_elt_vec_constfold(
; CHECK-NEXT: ret <2 x i8> poison
; CHECK-NEXT: ret <2 x i8> <i8 0, i8 poison>
;
%div = udiv <2 x i8> <i8 1, i8 2>, <i8 42, i8 0>
ret <2 x i8> %div
}

define <2 x i8> @sdiv_zero_elt_vec(<2 x i8> %x) {
; CHECK-LABEL: @sdiv_zero_elt_vec(
; CHECK-NEXT: ret <2 x i8> poison
; CHECK-NEXT: [[DIV:%.*]] = sdiv <2 x i8> [[X:%.*]], <i8 -42, i8 0>
; CHECK-NEXT: ret <2 x i8> [[DIV]]
;
%div = sdiv <2 x i8> %x, <i8 -42, i8 0>
ret <2 x i8> %div
}

define <2 x i8> @udiv_zero_elt_vec(<2 x i8> %x) {
; CHECK-LABEL: @udiv_zero_elt_vec(
; CHECK-NEXT: ret <2 x i8> poison
; CHECK-NEXT: [[DIV:%.*]] = udiv <2 x i8> [[X:%.*]], <i8 0, i8 42>
; CHECK-NEXT: ret <2 x i8> [[DIV]]
;
%div = udiv <2 x i8> %x, <i8 0, i8 42>
ret <2 x i8> %div
}

define <2 x i8> @sdiv_poison_elt_vec(<2 x i8> %x) {
; CHECK-LABEL: @sdiv_poison_elt_vec(
; CHECK-NEXT: ret <2 x i8> poison
; CHECK-NEXT: [[DIV:%.*]] = sdiv <2 x i8> [[X:%.*]], <i8 -42, i8 poison>
; CHECK-NEXT: ret <2 x i8> [[DIV]]
;
%div = sdiv <2 x i8> %x, <i8 -42, i8 poison>
ret <2 x i8> %div
}

define <2 x i8> @udiv_poison_elt_vec(<2 x i8> %x) {
; CHECK-LABEL: @udiv_poison_elt_vec(
; CHECK-NEXT: ret <2 x i8> poison
; CHECK-NEXT: [[DIV:%.*]] = udiv <2 x i8> [[X:%.*]], <i8 poison, i8 42>
; CHECK-NEXT: ret <2 x i8> [[DIV]]
;
%div = udiv <2 x i8> %x, <i8 poison, i8 42>
ret <2 x i8> %div
Expand Down
16 changes: 10 additions & 6 deletions llvm/test/Transforms/InstSimplify/rem.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,47 +29,51 @@ define <2 x i32> @zero_dividend_vector_poison_elt(<2 x i32> %A) {

define <2 x i8> @srem_zero_elt_vec_constfold(<2 x i8> %x) {
; CHECK-LABEL: @srem_zero_elt_vec_constfold(
; CHECK-NEXT: ret <2 x i8> poison
; CHECK-NEXT: ret <2 x i8> <i8 poison, i8 2>
;
%rem = srem <2 x i8> <i8 1, i8 2>, <i8 0, i8 -42>
ret <2 x i8> %rem
}

define <2 x i8> @urem_zero_elt_vec_constfold(<2 x i8> %x) {
; CHECK-LABEL: @urem_zero_elt_vec_constfold(
; CHECK-NEXT: ret <2 x i8> poison
; CHECK-NEXT: ret <2 x i8> <i8 1, i8 poison>
;
%rem = urem <2 x i8> <i8 1, i8 2>, <i8 42, i8 0>
ret <2 x i8> %rem
}

define <2 x i8> @srem_zero_elt_vec(<2 x i8> %x) {
; CHECK-LABEL: @srem_zero_elt_vec(
; CHECK-NEXT: ret <2 x i8> poison
; CHECK-NEXT: [[REM:%.*]] = srem <2 x i8> [[X:%.*]], <i8 -42, i8 0>
; CHECK-NEXT: ret <2 x i8> [[REM]]
;
%rem = srem <2 x i8> %x, <i8 -42, i8 0>
ret <2 x i8> %rem
}

define <2 x i8> @urem_zero_elt_vec(<2 x i8> %x) {
; CHECK-LABEL: @urem_zero_elt_vec(
; CHECK-NEXT: ret <2 x i8> poison
; CHECK-NEXT: [[REM:%.*]] = urem <2 x i8> [[X:%.*]], <i8 0, i8 42>
; CHECK-NEXT: ret <2 x i8> [[REM]]
;
%rem = urem <2 x i8> %x, <i8 0, i8 42>
ret <2 x i8> %rem
}

define <2 x i8> @srem_undef_elt_vec(<2 x i8> %x) {
; CHECK-LABEL: @srem_undef_elt_vec(
; CHECK-NEXT: ret <2 x i8> poison
; CHECK-NEXT: [[REM:%.*]] = srem <2 x i8> [[X:%.*]], <i8 -42, i8 undef>
; CHECK-NEXT: ret <2 x i8> [[REM]]
;
%rem = srem <2 x i8> %x, <i8 -42, i8 undef>
ret <2 x i8> %rem
}

define <2 x i8> @urem_undef_elt_vec(<2 x i8> %x) {
; CHECK-LABEL: @urem_undef_elt_vec(
; CHECK-NEXT: ret <2 x i8> poison
; CHECK-NEXT: [[REM:%.*]] = urem <2 x i8> [[X:%.*]], <i8 undef, i8 42>
; CHECK-NEXT: ret <2 x i8> [[REM]]
;
%rem = urem <2 x i8> %x, <i8 undef, i8 42>
ret <2 x i8> %rem
Expand Down
Loading