690 changes: 669 additions & 21 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir

Large diffs are not rendered by default.

414 changes: 207 additions & 207 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll

Large diffs are not rendered by default.

7 changes: 3 additions & 4 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,9 @@ body: |
; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64
; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[EXTRACT]], [[EXTRACT2]]
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT1]], [[EXTRACT3]]
; CHECK: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[AND]](s64), 0
; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[AND1]](s32), 64
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[AND1]](s32)
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
%0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
%1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
%2:_(s96) = G_AND %0, %1
Expand Down
38 changes: 14 additions & 24 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir
Original file line number Diff line number Diff line change
Expand Up @@ -883,38 +883,28 @@ body: |
; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32
; GFX9: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[EXTRACT]], [[EXTRACT2]](<2 x s16>)
; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[EXTRACT1]], [[EXTRACT3]](s16)
; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0
; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[ASHR]](<2 x s16>), 0
; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV8]](<3 x s16>), 0
; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[ASHR1]](s16), 32
; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT7]](<4 x s16>)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>)
; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
; GFX9: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>)
; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; GFX9: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16)
; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32)
; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32)
; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[COPY4]](s32)
; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
%2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,9 @@ body: |
; CHECK-LABEL: name: test_constant_s96
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -4780896129847249538
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -547834910
; CHECK: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[C]](s64), 0
; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[C1]](s32), 64
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[C1]](s32)
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
%0:_(s96) = G_CONSTANT i96 -10105770365747857631829412482
$vgpr0_vgpr1_vgpr2 = COPY %0
Expand Down
982 changes: 521 additions & 461 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir

Large diffs are not rendered by default.

443 changes: 201 additions & 242 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir

Large diffs are not rendered by default.

4,042 changes: 2,128 additions & 1,914 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir

Large diffs are not rendered by default.

2,495 changes: 1,269 additions & 1,226 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir

Large diffs are not rendered by default.

839 changes: 527 additions & 312 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir

Large diffs are not rendered by default.

63 changes: 27 additions & 36 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir
Original file line number Diff line number Diff line change
Expand Up @@ -728,19 +728,20 @@ body: |
; GFX9: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32
; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[EXTRACT2]], [[EXTRACT4]](<2 x s16>)
; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[EXTRACT3]], [[EXTRACT5]](s16)
; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>)
; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16)
; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[DEF1]](s32)
; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF2]](<2 x s16>)
; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[LSHR]](<2 x s16>), 0
; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0
; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[LSHR1]](s16), 32
; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT7]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>)
; GFX9: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV8]](<3 x s16>), 0
; GFX9: $vgpr0_vgpr1 = COPY [[INSERT8]](<4 x s16>)
; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
%2:_(<3 x s16>) = G_EXTRACT %0, 0
Expand Down Expand Up @@ -887,38 +888,28 @@ body: |
; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32
; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[EXTRACT]], [[EXTRACT2]](<2 x s16>)
; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[EXTRACT1]], [[EXTRACT3]](s16)
; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0
; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[LSHR]](<2 x s16>), 0
; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV8]](<3 x s16>), 0
; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[LSHR1]](s16), 32
; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT7]](<4 x s16>)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>)
; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
; GFX9: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>)
; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; GFX9: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16)
; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32)
; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32)
; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[COPY4]](s32)
; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
%2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,9 @@ body: |
; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64
; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[EXTRACT]], [[EXTRACT2]]
; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[EXTRACT1]], [[EXTRACT3]]
; CHECK: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[OR]](s64), 0
; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR1]](s32), 64
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](s64)
; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[OR1]](s32)
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
%0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
%1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
%2:_(s96) = G_OR %0, %1
Expand Down
19 changes: 8 additions & 11 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,9 @@ body: |
; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64
; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[EXTRACT2]]
; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[EXTRACT1]], [[EXTRACT3]]
; CHECK: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[SELECT]](s64), 0
; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[SELECT1]](s32), 64
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT]](s64)
; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[SELECT1]](s32)
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
%0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
%1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
%2:_(s32) = COPY $vgpr6
Expand Down Expand Up @@ -944,19 +943,17 @@ body: |
; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[UV2]](s96), 64
; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[EXTRACT2]]
; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[EXTRACT1]], [[EXTRACT3]]
; CHECK: [[DEF2:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY [[DEF2]](s96)
; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[COPY1]], [[SELECT]](s64), 0
; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[SELECT1]](s32), 64
; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT]](s64)
; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV4]](s32), [[UV5]](s32), [[SELECT1]](s32)
; CHECK: [[EXTRACT4:%[0-9]+]]:_(s64) = G_EXTRACT [[UV1]](s96), 0
; CHECK: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[UV1]](s96), 64
; CHECK: [[EXTRACT6:%[0-9]+]]:_(s64) = G_EXTRACT [[UV3]](s96), 0
; CHECK: [[EXTRACT7:%[0-9]+]]:_(s32) = G_EXTRACT [[UV3]](s96), 64
; CHECK: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[EXTRACT4]], [[EXTRACT6]]
; CHECK: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[EXTRACT5]], [[EXTRACT7]]
; CHECK: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF2]], [[SELECT2]](s64), 0
; CHECK: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[SELECT3]](s32), 64
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s96>) = G_BUILD_VECTOR [[INSERT1]](s96), [[INSERT3]](s96)
; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT2]](s64)
; CHECK: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV6]](s32), [[UV7]](s32), [[SELECT3]](s32)
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s96>) = G_BUILD_VECTOR [[MV]](s96), [[MV1]](s96)
; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s96>)
%0:_(<2 x s96>) = G_IMPLICIT_DEF
%1:_(<2 x s96>) = G_IMPLICIT_DEF
Expand Down
38 changes: 14 additions & 24 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir
Original file line number Diff line number Diff line change
Expand Up @@ -730,38 +730,28 @@ body: |
; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32
; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[EXTRACT]], [[EXTRACT2]](<2 x s16>)
; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[EXTRACT1]], [[EXTRACT3]](s16)
; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0
; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[SHL]](<2 x s16>), 0
; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV8]](<3 x s16>), 0
; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[SHL1]](s16), 32
; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT7]](<4 x s16>)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>)
; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
; GFX9: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>)
; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; GFX9: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16)
; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32)
; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32)
; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[COPY4]](s32)
; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
%2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,9 @@ body: |
; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64
; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[EXTRACT]], [[EXTRACT2]]
; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[EXTRACT1]], [[EXTRACT3]]
; CHECK: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[XOR]](s64), 0
; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[XOR1]](s32), 64
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[XOR1]](s32)
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
%0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
%1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
%2:_(s96) = G_XOR %0, %1
Expand Down
162 changes: 133 additions & 29 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir
Original file line number Diff line number Diff line change
Expand Up @@ -672,43 +672,147 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[C1]](s64)
; CHECK: [[DEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF
; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY [[DEF]](s128)
; CHECK: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY1]], [[C]](s64), 0
; CHECK: [[COPY2:%[0-9]+]]:_(s128) = COPY [[INSERT]](s128)
; CHECK: [[INSERT1:%[0-9]+]]:_(s128) = G_INSERT [[COPY2]], [[TRUNC]](s48), 64
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32)
; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C2]](s32)
; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL1]]
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C2]](s32)
; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C2]](s32)
; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]]
; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C2]](s32)
; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL5]]
; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
; CHECK: [[COPY11:%[0-9]+]]:_(s64) = COPY [[MV2]](s64)
; CHECK: [[EXTRACT:%[0-9]+]]:_(s48) = G_EXTRACT [[DEF1]](s64), 0
; CHECK: [[COPY6:%[0-9]+]]:_(s128) = COPY [[INSERT1]](s128)
; CHECK: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY6]](s128), 0
; CHECK: [[EXTRACT2:%[0-9]+]]:_(s48) = G_EXTRACT [[COPY6]](s128), 64
; CHECK: [[AND2:%[0-9]+]]:_(s64) = G_AND [[COPY5]], [[EXTRACT1]]
; CHECK: [[COPY12:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
; CHECK: [[EXTRACT1:%[0-9]+]]:_(s48) = G_EXTRACT [[MV1]](s64), 0
; CHECK: [[AND9:%[0-9]+]]:_(s64) = G_AND [[COPY11]], [[COPY12]]
; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s48)
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT2]](s48)
; CHECK: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[ANYEXT1]]
; CHECK: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[AND3]](s64)
; CHECK: [[DEF2:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF
; CHECK: [[COPY7:%[0-9]+]]:_(s128) = COPY [[DEF2]](s128)
; CHECK: [[INSERT2:%[0-9]+]]:_(s128) = G_INSERT [[COPY7]], [[AND2]](s64), 0
; CHECK: [[COPY8:%[0-9]+]]:_(s128) = COPY [[INSERT2]](s128)
; CHECK: [[INSERT3:%[0-9]+]]:_(s128) = G_INSERT [[COPY8]], [[TRUNC1]](s48), 64
; CHECK: [[TRUNC2:%[0-9]+]]:_(s112) = G_TRUNC [[INSERT3]](s128)
; CHECK: S_ENDPGM 0, implicit [[TRUNC2]](s112)
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT1]](s48)
; CHECK: [[AND10:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[ANYEXT1]]
; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND9]](s64)
; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND10]](s64)
; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32)
; CHECK: [[DEF2:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](s64)
; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32)
; CHECK: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C2]](s32)
; CHECK: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](s64)
; CHECK: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C2]](s32)
; CHECK: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV11]], [[C2]](s32)
; CHECK: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](s64)
; CHECK: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV12]], [[C2]](s32)
; CHECK: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV13]], [[C2]](s32)
; CHECK: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](s64)
; CHECK: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV14]], [[C2]](s32)
; CHECK: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV15]], [[C2]](s32)
; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]]
; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32)
; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]]
; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[UV5]](s32)
; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]]
; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32)
; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL7]]
; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32)
; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[UV6]](s32)
; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]]
; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
; CHECK: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]]
; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND16]], [[C2]](s32)
; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND15]], [[SHL8]]
; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV7]](s32)
; CHECK: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]]
; CHECK: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND17]], [[SHL3]]
; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32)
; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[UV8]](s32)
; CHECK: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]]
; CHECK: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C2]](s32)
; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY8]], [[SHL9]]
; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
; CHECK: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]]
; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV9]](s32)
; CHECK: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]]
; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C2]](s32)
; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL10]]
; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[UV10]](s32)
; CHECK: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C3]]
; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
; CHECK: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]]
; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C2]](s32)
; CHECK: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL11]]
; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY [[UV11]](s32)
; CHECK: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]]
; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY [[UV12]](s32)
; CHECK: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C3]]
; CHECK: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C2]](s32)
; CHECK: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL12]]
; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s32), [[OR13]](s32)
; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32)
; CHECK: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C3]]
; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY [[UV13]](s32)
; CHECK: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C3]]
; CHECK: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C2]](s32)
; CHECK: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL13]]
; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY [[UV14]](s32)
; CHECK: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C3]]
; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32)
; CHECK: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C3]]
; CHECK: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND28]], [[C2]](s32)
; CHECK: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND27]], [[SHL14]]
; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR15]](s32)
; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY [[UV15]](s32)
; CHECK: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C3]]
; CHECK: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND29]], [[SHL3]]
; CHECK: [[OR17:%[0-9]+]]:_(s32) = G_OR [[COPY8]], [[SHL3]]
; CHECK: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
; CHECK: [[MV9:%[0-9]+]]:_(s384) = G_MERGE_VALUES [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
; CHECK: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV9]](s384)
; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s112)
%0:_(s32) = COPY $vgpr0
%1:_(s2) = G_TRUNC %0
%2:_(s112) = G_ZEXT %1
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
Original file line number Diff line number Diff line change
Expand Up @@ -432,16 +432,16 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(<3 x i32> addrspace(4)*
; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 24, v4
; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v4, v5, s0, v6
; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 16, v7
; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v7, v9, v0, v10
; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 24, v8
; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 16, v11
; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v9, 24, v12
; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v0, v1, v2, v3
; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v1, v4, v5, v6
; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v2, v7, v8, v9
; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1
; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2
; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v9, v0, v10
; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 16, v11
; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 24, v12
; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v1, v1, v2, v3
; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v2, v4, v5, v6
; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v0, v0, v7, v8
; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v1
; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v2
; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v0
; GFX9-NOUNALIGNED-NEXT: ; return to shader part epilog
;
; GFX7-UNALIGNED-LABEL: s_load_constant_v3i32_align1:
Expand Down
116 changes: 58 additions & 58 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll
Original file line number Diff line number Diff line change
Expand Up @@ -37,114 +37,114 @@ define <3 x i32> @load_lds_v3i32_align1(<3 x i32> addrspace(3)* %ptr) {
; GFX9-LABEL: load_lds_v3i32_align1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: ds_read_u8 v0, v0
; GFX9-NEXT: ds_read_u8 v1, v2 offset:1
; GFX9-NEXT: ds_read_u8 v4, v2 offset:2
; GFX9-NEXT: ds_read_u8 v5, v2 offset:3
; GFX9-NEXT: ds_read_u8 v6, v2 offset:4
; GFX9-NEXT: ds_read_u8 v7, v2 offset:5
; GFX9-NEXT: ds_read_u8 v8, v2 offset:6
; GFX9-NEXT: ds_read_u8 v9, v2 offset:7
; GFX9-NEXT: ds_read_u8 v1, v0
; GFX9-NEXT: ds_read_u8 v3, v0 offset:1
; GFX9-NEXT: ds_read_u8 v4, v0 offset:2
; GFX9-NEXT: ds_read_u8 v5, v0 offset:3
; GFX9-NEXT: ds_read_u8 v6, v0 offset:4
; GFX9-NEXT: ds_read_u8 v7, v0 offset:5
; GFX9-NEXT: ds_read_u8 v8, v0 offset:6
; GFX9-NEXT: ds_read_u8 v9, v0 offset:7
; GFX9-NEXT: s_mov_b32 s5, 8
; GFX9-NEXT: s_movk_i32 s4, 0xff
; GFX9-NEXT: s_waitcnt lgkmcnt(6)
; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: v_and_or_b32 v0, v0, s4, v1
; GFX9-NEXT: v_lshlrev_b32_sdwa v3, s5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: v_and_or_b32 v1, v1, s4, v3
; GFX9-NEXT: s_waitcnt lgkmcnt(5)
; GFX9-NEXT: v_and_b32_e32 v1, s4, v4
; GFX9-NEXT: v_and_b32_e32 v3, s4, v4
; GFX9-NEXT: s_waitcnt lgkmcnt(4)
; GFX9-NEXT: v_and_b32_e32 v4, s4, v5
; GFX9-NEXT: v_mov_b32_e32 v3, 0xff
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX9-NEXT: v_mov_b32_e32 v2, 0xff
; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 24, v4
; GFX9-NEXT: v_or3_b32 v0, v0, v1, v4
; GFX9-NEXT: v_or3_b32 v3, v1, v3, v4
; GFX9-NEXT: s_waitcnt lgkmcnt(2)
; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s5, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: s_waitcnt lgkmcnt(1)
; GFX9-NEXT: v_and_b32_e32 v4, v8, v3
; GFX9-NEXT: v_and_b32_e32 v4, v8, v2
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v5, v9, v3
; GFX9-NEXT: v_and_b32_e32 v5, v9, v2
; GFX9-NEXT: v_and_or_b32 v1, v6, s4, v1
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v5
; GFX9-NEXT: v_or3_b32 v1, v1, v4, v5
; GFX9-NEXT: ds_read_u8 v4, v2 offset:8
; GFX9-NEXT: ds_read_u8 v5, v2 offset:9
; GFX9-NEXT: ds_read_u8 v6, v2 offset:10
; GFX9-NEXT: ds_read_u8 v2, v2 offset:11
; GFX9-NEXT: ds_read_u8 v4, v0 offset:8
; GFX9-NEXT: ds_read_u8 v5, v0 offset:9
; GFX9-NEXT: ds_read_u8 v6, v0 offset:10
; GFX9-NEXT: ds_read_u8 v0, v0 offset:11
; GFX9-NEXT: v_mov_b32_e32 v7, 8
; GFX9-NEXT: s_waitcnt lgkmcnt(2)
; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: v_and_or_b32 v4, v4, v3, v5
; GFX9-NEXT: v_and_or_b32 v4, v4, v2, v5
; GFX9-NEXT: s_waitcnt lgkmcnt(1)
; GFX9-NEXT: v_and_b32_e32 v5, v6, v3
; GFX9-NEXT: v_and_b32_e32 v5, v6, v2
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v2, v2, v3
; GFX9-NEXT: v_and_b32_e32 v0, v0, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v2
; GFX9-NEXT: v_or3_b32 v2, v4, v5, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 24, v0
; GFX9-NEXT: v_or3_b32 v2, v4, v5, v0
; GFX9-NEXT: v_mov_b32_e32 v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: load_lds_v3i32_align1:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_mov_b32 m0, -1
; GFX7-NEXT: v_mov_b32_e32 v2, v0
; GFX7-NEXT: s_movk_i32 s4, 0xff
; GFX7-NEXT: ds_read_u8 v0, v0
; GFX7-NEXT: ds_read_u8 v1, v2 offset:1
; GFX7-NEXT: ds_read_u8 v4, v2 offset:2
; GFX7-NEXT: ds_read_u8 v5, v2 offset:3
; GFX7-NEXT: ds_read_u8 v6, v2 offset:4
; GFX7-NEXT: ds_read_u8 v7, v2 offset:5
; GFX7-NEXT: ds_read_u8 v8, v2 offset:6
; GFX7-NEXT: ds_read_u8 v9, v2 offset:7
; GFX7-NEXT: ds_read_u8 v1, v0
; GFX7-NEXT: ds_read_u8 v3, v0 offset:1
; GFX7-NEXT: ds_read_u8 v4, v0 offset:2
; GFX7-NEXT: ds_read_u8 v5, v0 offset:3
; GFX7-NEXT: ds_read_u8 v6, v0 offset:4
; GFX7-NEXT: ds_read_u8 v7, v0 offset:5
; GFX7-NEXT: ds_read_u8 v8, v0 offset:6
; GFX7-NEXT: ds_read_u8 v9, v0 offset:7
; GFX7-NEXT: s_waitcnt lgkmcnt(6)
; GFX7-NEXT: v_and_b32_e32 v3, s4, v3
; GFX7-NEXT: v_and_b32_e32 v1, s4, v1
; GFX7-NEXT: v_and_b32_e32 v0, s4, v0
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3
; GFX7-NEXT: v_or_b32_e32 v1, v1, v3
; GFX7-NEXT: s_waitcnt lgkmcnt(5)
; GFX7-NEXT: v_and_b32_e32 v1, s4, v4
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_mov_b32_e32 v3, 0xff
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7-NEXT: v_and_b32_e32 v3, s4, v4
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_mov_b32_e32 v2, 0xff
; GFX7-NEXT: v_or_b32_e32 v1, v1, v3
; GFX7-NEXT: s_waitcnt lgkmcnt(4)
; GFX7-NEXT: v_and_b32_e32 v1, s4, v5
; GFX7-NEXT: v_and_b32_e32 v3, s4, v5
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v3
; GFX7-NEXT: s_waitcnt lgkmcnt(2)
; GFX7-NEXT: v_and_b32_e32 v4, v7, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7-NEXT: v_and_b32_e32 v4, v7, v2
; GFX7-NEXT: v_or_b32_e32 v3, v1, v3
; GFX7-NEXT: v_and_b32_e32 v1, s4, v6
; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; GFX7-NEXT: v_or_b32_e32 v1, v1, v4
; GFX7-NEXT: s_waitcnt lgkmcnt(1)
; GFX7-NEXT: v_and_b32_e32 v4, v8, v3
; GFX7-NEXT: v_and_b32_e32 v4, v8, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX7-NEXT: v_or_b32_e32 v1, v1, v4
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v4, v9, v3
; GFX7-NEXT: v_and_b32_e32 v4, v9, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4
; GFX7-NEXT: v_or_b32_e32 v1, v1, v4
; GFX7-NEXT: ds_read_u8 v4, v2 offset:8
; GFX7-NEXT: ds_read_u8 v5, v2 offset:9
; GFX7-NEXT: ds_read_u8 v6, v2 offset:10
; GFX7-NEXT: ds_read_u8 v2, v2 offset:11
; GFX7-NEXT: ds_read_u8 v4, v0 offset:8
; GFX7-NEXT: ds_read_u8 v5, v0 offset:9
; GFX7-NEXT: ds_read_u8 v6, v0 offset:10
; GFX7-NEXT: ds_read_u8 v0, v0 offset:11
; GFX7-NEXT: s_waitcnt lgkmcnt(3)
; GFX7-NEXT: v_and_b32_e32 v4, v4, v3
; GFX7-NEXT: v_and_b32_e32 v4, v4, v2
; GFX7-NEXT: s_waitcnt lgkmcnt(2)
; GFX7-NEXT: v_and_b32_e32 v5, v5, v3
; GFX7-NEXT: v_and_b32_e32 v5, v5, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5
; GFX7-NEXT: v_or_b32_e32 v4, v4, v5
; GFX7-NEXT: s_waitcnt lgkmcnt(1)
; GFX7-NEXT: v_and_b32_e32 v5, v6, v3
; GFX7-NEXT: v_and_b32_e32 v5, v6, v2
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v2, v2, v3
; GFX7-NEXT: v_and_b32_e32 v0, v0, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX7-NEXT: v_or_b32_e32 v4, v4, v5
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2
; GFX7-NEXT: v_or_b32_e32 v2, v4, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0
; GFX7-NEXT: v_or_b32_e32 v2, v4, v0
; GFX7-NEXT: v_mov_b32_e32 v0, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: load_lds_v3i32_align1:
Expand Down
64 changes: 32 additions & 32 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll
Original file line number Diff line number Diff line change
Expand Up @@ -174,61 +174,61 @@ define <3 x i32> @load_lds_v3i32_align1(<3 x i32> addrspace(3)* %ptr) {
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_mov_b32 m0, -1
; GFX7-NEXT: v_mov_b32_e32 v2, v0
; GFX7-NEXT: s_movk_i32 s4, 0xff
; GFX7-NEXT: ds_read_u8 v0, v0
; GFX7-NEXT: ds_read_u8 v1, v2 offset:1
; GFX7-NEXT: ds_read_u8 v4, v2 offset:2
; GFX7-NEXT: ds_read_u8 v5, v2 offset:3
; GFX7-NEXT: ds_read_u8 v6, v2 offset:4
; GFX7-NEXT: ds_read_u8 v7, v2 offset:5
; GFX7-NEXT: ds_read_u8 v8, v2 offset:6
; GFX7-NEXT: ds_read_u8 v9, v2 offset:7
; GFX7-NEXT: ds_read_u8 v1, v0
; GFX7-NEXT: ds_read_u8 v3, v0 offset:1
; GFX7-NEXT: ds_read_u8 v4, v0 offset:2
; GFX7-NEXT: ds_read_u8 v5, v0 offset:3
; GFX7-NEXT: ds_read_u8 v6, v0 offset:4
; GFX7-NEXT: ds_read_u8 v7, v0 offset:5
; GFX7-NEXT: ds_read_u8 v8, v0 offset:6
; GFX7-NEXT: ds_read_u8 v9, v0 offset:7
; GFX7-NEXT: s_waitcnt lgkmcnt(6)
; GFX7-NEXT: v_and_b32_e32 v3, s4, v3
; GFX7-NEXT: v_and_b32_e32 v1, s4, v1
; GFX7-NEXT: v_and_b32_e32 v0, s4, v0
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3
; GFX7-NEXT: v_or_b32_e32 v1, v1, v3
; GFX7-NEXT: s_waitcnt lgkmcnt(5)
; GFX7-NEXT: v_and_b32_e32 v1, s4, v4
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_mov_b32_e32 v3, 0xff
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7-NEXT: v_and_b32_e32 v3, s4, v4
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_mov_b32_e32 v2, 0xff
; GFX7-NEXT: v_or_b32_e32 v1, v1, v3
; GFX7-NEXT: s_waitcnt lgkmcnt(4)
; GFX7-NEXT: v_and_b32_e32 v1, s4, v5
; GFX7-NEXT: v_and_b32_e32 v3, s4, v5
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v3
; GFX7-NEXT: s_waitcnt lgkmcnt(2)
; GFX7-NEXT: v_and_b32_e32 v4, v7, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7-NEXT: v_and_b32_e32 v4, v7, v2
; GFX7-NEXT: v_or_b32_e32 v3, v1, v3
; GFX7-NEXT: v_and_b32_e32 v1, s4, v6
; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; GFX7-NEXT: v_or_b32_e32 v1, v1, v4
; GFX7-NEXT: s_waitcnt lgkmcnt(1)
; GFX7-NEXT: v_and_b32_e32 v4, v8, v3
; GFX7-NEXT: v_and_b32_e32 v4, v8, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX7-NEXT: v_or_b32_e32 v1, v1, v4
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v4, v9, v3
; GFX7-NEXT: v_and_b32_e32 v4, v9, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4
; GFX7-NEXT: v_or_b32_e32 v1, v1, v4
; GFX7-NEXT: ds_read_u8 v4, v2 offset:8
; GFX7-NEXT: ds_read_u8 v5, v2 offset:9
; GFX7-NEXT: ds_read_u8 v6, v2 offset:10
; GFX7-NEXT: ds_read_u8 v2, v2 offset:11
; GFX7-NEXT: ds_read_u8 v4, v0 offset:8
; GFX7-NEXT: ds_read_u8 v5, v0 offset:9
; GFX7-NEXT: ds_read_u8 v6, v0 offset:10
; GFX7-NEXT: ds_read_u8 v0, v0 offset:11
; GFX7-NEXT: s_waitcnt lgkmcnt(3)
; GFX7-NEXT: v_and_b32_e32 v4, v4, v3
; GFX7-NEXT: v_and_b32_e32 v4, v4, v2
; GFX7-NEXT: s_waitcnt lgkmcnt(2)
; GFX7-NEXT: v_and_b32_e32 v5, v5, v3
; GFX7-NEXT: v_and_b32_e32 v5, v5, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5
; GFX7-NEXT: v_or_b32_e32 v4, v4, v5
; GFX7-NEXT: s_waitcnt lgkmcnt(1)
; GFX7-NEXT: v_and_b32_e32 v5, v6, v3
; GFX7-NEXT: v_and_b32_e32 v5, v6, v2
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v2, v2, v3
; GFX7-NEXT: v_and_b32_e32 v0, v0, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX7-NEXT: v_or_b32_e32 v4, v4, v5
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2
; GFX7-NEXT: v_or_b32_e32 v2, v4, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0
; GFX7-NEXT: v_or_b32_e32 v2, v4, v0
; GFX7-NEXT: v_mov_b32_e32 v0, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: load_lds_v3i32_align1:
Expand Down
128 changes: 57 additions & 71 deletions llvm/test/CodeGen/AMDGPU/ds-alignment.ll
Original file line number Diff line number Diff line change
Expand Up @@ -352,74 +352,63 @@ define amdgpu_kernel void @ds12align1(<3 x i32> addrspace(3)* %in, <3 x i32> add
; ALIGNED-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; ALIGNED-GISEL-NEXT: s_mov_b32 s3, 8
; ALIGNED-GISEL-NEXT: s_movk_i32 s2, 0xff
; ALIGNED-GISEL-NEXT: v_mov_b32_e32 v2, 0xff
; ALIGNED-GISEL-NEXT: v_mov_b32_e32 v1, 0xff
; ALIGNED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; ALIGNED-GISEL-NEXT: v_mov_b32_e32 v3, s0
; ALIGNED-GISEL-NEXT: ds_read_u8 v0, v3
; ALIGNED-GISEL-NEXT: ds_read_u8 v1, v3 offset:1
; ALIGNED-GISEL-NEXT: ds_read_u8 v4, v3 offset:2
; ALIGNED-GISEL-NEXT: ds_read_u8 v5, v3 offset:3
; ALIGNED-GISEL-NEXT: ds_read_u8 v6, v3 offset:4
; ALIGNED-GISEL-NEXT: ds_read_u8 v7, v3 offset:5
; ALIGNED-GISEL-NEXT: ds_read_u8 v8, v3 offset:6
; ALIGNED-GISEL-NEXT: ds_read_u8 v9, v3 offset:7
; ALIGNED-GISEL-NEXT: v_mov_b32_e32 v2, s0
; ALIGNED-GISEL-NEXT: ds_read_u8 v0, v2
; ALIGNED-GISEL-NEXT: ds_read_u8 v3, v2 offset:1
; ALIGNED-GISEL-NEXT: ds_read_u8 v4, v2 offset:2
; ALIGNED-GISEL-NEXT: ds_read_u8 v5, v2 offset:3
; ALIGNED-GISEL-NEXT: ds_read_u8 v6, v2 offset:4
; ALIGNED-GISEL-NEXT: ds_read_u8 v7, v2 offset:5
; ALIGNED-GISEL-NEXT: ds_read_u8 v8, v2 offset:6
; ALIGNED-GISEL-NEXT: ds_read_u8 v9, v2 offset:7
; ALIGNED-GISEL-NEXT: s_waitcnt lgkmcnt(6)
; ALIGNED-GISEL-NEXT: v_lshlrev_b32_sdwa v1, s3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; ALIGNED-GISEL-NEXT: v_and_or_b32 v0, v0, s2, v1
; ALIGNED-GISEL-NEXT: v_lshlrev_b32_sdwa v3, s3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; ALIGNED-GISEL-NEXT: v_and_or_b32 v0, v0, s2, v3
; ALIGNED-GISEL-NEXT: s_waitcnt lgkmcnt(5)
; ALIGNED-GISEL-NEXT: v_and_b32_e32 v1, s2, v4
; ALIGNED-GISEL-NEXT: v_and_b32_e32 v3, s2, v4
; ALIGNED-GISEL-NEXT: s_waitcnt lgkmcnt(4)
; ALIGNED-GISEL-NEXT: v_and_b32_e32 v4, s2, v5
; ALIGNED-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; ALIGNED-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; ALIGNED-GISEL-NEXT: v_lshlrev_b32_e32 v4, 24, v4
; ALIGNED-GISEL-NEXT: v_or3_b32 v0, v0, v1, v4
; ALIGNED-GISEL-NEXT: s_waitcnt lgkmcnt(2)
; ALIGNED-GISEL-NEXT: v_lshlrev_b32_sdwa v1, s3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; ALIGNED-GISEL-NEXT: v_or3_b32 v0, v0, v3, v4
; ALIGNED-GISEL-NEXT: s_waitcnt lgkmcnt(1)
; ALIGNED-GISEL-NEXT: v_and_b32_e32 v4, v8, v2
; ALIGNED-GISEL-NEXT: v_and_b32_e32 v4, v8, v1
; ALIGNED-GISEL-NEXT: v_lshlrev_b32_sdwa v3, s3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; ALIGNED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; ALIGNED-GISEL-NEXT: v_and_b32_e32 v5, v9, v2
; ALIGNED-GISEL-NEXT: v_and_or_b32 v1, v6, s2, v1
; ALIGNED-GISEL-NEXT: v_and_b32_e32 v1, v9, v1
; ALIGNED-GISEL-NEXT: v_and_or_b32 v3, v6, s2, v3
; ALIGNED-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; ALIGNED-GISEL-NEXT: v_lshlrev_b32_e32 v5, 24, v5
; ALIGNED-GISEL-NEXT: v_or3_b32 v1, v1, v4, v5
; ALIGNED-GISEL-NEXT: ds_read_u8 v4, v3 offset:8
; ALIGNED-GISEL-NEXT: ds_read_u8 v5, v3 offset:9
; ALIGNED-GISEL-NEXT: ds_read_u8 v6, v3 offset:10
; ALIGNED-GISEL-NEXT: ds_read_u8 v3, v3 offset:11
; ALIGNED-GISEL-NEXT: v_mov_b32_e32 v7, 8
; ALIGNED-GISEL-NEXT: s_waitcnt lgkmcnt(2)
; ALIGNED-GISEL-NEXT: v_lshlrev_b32_sdwa v5, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; ALIGNED-GISEL-NEXT: v_and_or_b32 v4, v4, v2, v5
; ALIGNED-GISEL-NEXT: s_waitcnt lgkmcnt(1)
; ALIGNED-GISEL-NEXT: v_and_b32_e32 v5, v6, v2
; ALIGNED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; ALIGNED-GISEL-NEXT: v_and_b32_e32 v2, v3, v2
; ALIGNED-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; ALIGNED-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2
; ALIGNED-GISEL-NEXT: v_lshrrev_b32_e32 v3, 8, v0
; ALIGNED-GISEL-NEXT: v_mov_b32_e32 v6, s1
; ALIGNED-GISEL-NEXT: v_or3_b32 v2, v4, v5, v2
; ALIGNED-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; ALIGNED-GISEL-NEXT: v_lshrrev_b32_e32 v5, 24, v0
; ALIGNED-GISEL-NEXT: ds_write_b8 v6, v0
; ALIGNED-GISEL-NEXT: ds_write_b8 v6, v3 offset:1
; ALIGNED-GISEL-NEXT: ds_write_b8 v6, v4 offset:2
; ALIGNED-GISEL-NEXT: ds_write_b8 v6, v5 offset:3
; ALIGNED-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v1
; ALIGNED-GISEL-NEXT: v_or3_b32 v1, v3, v4, v1
; ALIGNED-GISEL-NEXT: ds_read_u8 v3, v2 offset:8
; ALIGNED-GISEL-NEXT: ds_read_u8 v4, v2 offset:9
; ALIGNED-GISEL-NEXT: ds_read_u8 v5, v2 offset:10
; ALIGNED-GISEL-NEXT: ds_read_u8 v2, v2 offset:11
; ALIGNED-GISEL-NEXT: v_lshrrev_b32_e32 v6, 8, v0
; ALIGNED-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v0
; ALIGNED-GISEL-NEXT: v_mov_b32_e32 v9, s1
; ALIGNED-GISEL-NEXT: v_lshrrev_b32_e32 v8, 24, v0
; ALIGNED-GISEL-NEXT: ds_write_b8 v9, v0
; ALIGNED-GISEL-NEXT: ds_write_b8 v9, v6 offset:1
; ALIGNED-GISEL-NEXT: ds_write_b8 v9, v7 offset:2
; ALIGNED-GISEL-NEXT: ds_write_b8 v9, v8 offset:3
; ALIGNED-GISEL-NEXT: v_lshrrev_b32_e32 v0, 8, v1
; ALIGNED-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; ALIGNED-GISEL-NEXT: v_lshrrev_b32_e32 v4, 24, v1
; ALIGNED-GISEL-NEXT: ds_write_b8 v6, v1 offset:4
; ALIGNED-GISEL-NEXT: ds_write_b8 v6, v0 offset:5
; ALIGNED-GISEL-NEXT: ds_write_b8 v6, v3 offset:6
; ALIGNED-GISEL-NEXT: ds_write_b8 v6, v4 offset:7
; ALIGNED-GISEL-NEXT: v_lshrrev_b32_e32 v0, 8, v2
; ALIGNED-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v2
; ALIGNED-GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v2
; ALIGNED-GISEL-NEXT: ds_write_b8 v6, v2 offset:8
; ALIGNED-GISEL-NEXT: ds_write_b8 v6, v0 offset:9
; ALIGNED-GISEL-NEXT: ds_write_b8 v6, v1 offset:10
; ALIGNED-GISEL-NEXT: ds_write_b8 v6, v3 offset:11
; ALIGNED-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v1
; ALIGNED-GISEL-NEXT: v_lshrrev_b32_e32 v7, 24, v1
; ALIGNED-GISEL-NEXT: ds_write_b8 v9, v1 offset:4
; ALIGNED-GISEL-NEXT: ds_write_b8 v9, v0 offset:5
; ALIGNED-GISEL-NEXT: ds_write_b8 v9, v6 offset:6
; ALIGNED-GISEL-NEXT: ds_write_b8 v9, v7 offset:7
; ALIGNED-GISEL-NEXT: s_waitcnt lgkmcnt(11)
; ALIGNED-GISEL-NEXT: ds_write_b8 v9, v3 offset:8
; ALIGNED-GISEL-NEXT: s_waitcnt lgkmcnt(11)
; ALIGNED-GISEL-NEXT: ds_write_b8 v9, v4 offset:9
; ALIGNED-GISEL-NEXT: s_waitcnt lgkmcnt(11)
; ALIGNED-GISEL-NEXT: ds_write_b8 v9, v5 offset:10
; ALIGNED-GISEL-NEXT: s_waitcnt lgkmcnt(11)
; ALIGNED-GISEL-NEXT: ds_write_b8 v9, v2 offset:11
; ALIGNED-GISEL-NEXT: s_endpgm
;
; UNALIGNED-LABEL: ds12align1:
Expand Down Expand Up @@ -480,20 +469,17 @@ define amdgpu_kernel void @ds12align2(<3 x i32> addrspace(3)* %in, <3 x i32> add
; ALIGNED-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; ALIGNED-GISEL-NEXT: v_and_or_b32 v0, v1, s2, v0
; ALIGNED-GISEL-NEXT: v_and_or_b32 v1, v3, s2, v2
; ALIGNED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; ALIGNED-GISEL-NEXT: v_and_b32_e32 v2, s2, v6
; ALIGNED-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; ALIGNED-GISEL-NEXT: v_mov_b32_e32 v4, s1
; ALIGNED-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; ALIGNED-GISEL-NEXT: ds_write_b16 v4, v0
; ALIGNED-GISEL-NEXT: ds_write_b16 v4, v3 offset:2
; ALIGNED-GISEL-NEXT: v_mov_b32_e32 v3, s1
; ALIGNED-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; ALIGNED-GISEL-NEXT: ds_write_b16 v3, v0
; ALIGNED-GISEL-NEXT: ds_write_b16 v3, v2 offset:2
; ALIGNED-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v1
; ALIGNED-GISEL-NEXT: v_and_or_b32 v2, v5, s2, v2
; ALIGNED-GISEL-NEXT: ds_write_b16 v4, v1 offset:4
; ALIGNED-GISEL-NEXT: ds_write_b16 v4, v0 offset:6
; ALIGNED-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v2
; ALIGNED-GISEL-NEXT: ds_write_b16 v4, v2 offset:8
; ALIGNED-GISEL-NEXT: ds_write_b16 v4, v0 offset:10
; ALIGNED-GISEL-NEXT: ds_write_b16 v3, v1 offset:4
; ALIGNED-GISEL-NEXT: ds_write_b16 v3, v0 offset:6
; ALIGNED-GISEL-NEXT: s_waitcnt lgkmcnt(5)
; ALIGNED-GISEL-NEXT: ds_write_b16 v3, v5 offset:8
; ALIGNED-GISEL-NEXT: s_waitcnt lgkmcnt(5)
; ALIGNED-GISEL-NEXT: ds_write_b16 v3, v6 offset:10
; ALIGNED-GISEL-NEXT: s_endpgm
;
; UNALIGNED-LABEL: ds12align2:
Expand Down
10 changes: 5 additions & 5 deletions llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1525,11 +1525,11 @@ TEST_F(AArch64GISelMITest, FewerElementsPhi) {
CHECK: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[EXTRACT2]]:_(s32), %bb.0, [[EXTRACT5]]:_(s32), %bb.1
CHECK: [[OTHER_PHI:%[0-9]+]]:_(s64) = G_PHI
CHECK: [[REBUILD_VAL_IMPDEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
CHECK: [[INSERT0:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[REBUILD_VAL_IMPDEF]]:_, [[PHI0]]:_(<2 x s32>), 0
CHECK: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT0]]:_, [[PHI1]]:_(<2 x s32>), 64
CHECK: [[INSERT2:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT1]]:_, [[PHI2]]:_(s32), 128
CHECK: [[USE_OP:%[0-9]+]]:_(<5 x s32>) = G_AND [[INSERT2]]:_, [[INSERT2]]:_
CHECK: [[UNMERGE0:%[0-9]+]]:_(s32), [[UNMERGE1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PHI0]]:_(<2 x s32>)
CHECK: [[UNMERGE2:%[0-9]+]]:_(s32), [[UNMERGE3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PHI1]]:_(<2 x s32>)
CHECK: [[BV:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UNMERGE0]]:_(s32), [[UNMERGE1]]:_(s32), [[UNMERGE2]]:_(s32), [[UNMERGE3]]:_(s32), [[PHI2]]:_(s32)
CHECK: [[USE_OP:%[0-9]+]]:_(<5 x s32>) = G_AND [[BV]]:_, [[BV]]:_
)";

EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
Expand Down