diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir index 2a270a41d5ec51..d41e8f8a986de3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s --- name: test_mul_s32 @@ -25,6 +25,11 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] ; GFX9-NEXT: $vgpr0 = COPY [[MUL]](s32) + ; GFX10-LABEL: name: test_mul_s32 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX10-NEXT: $vgpr0 = COPY [[MUL]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_MUL %0, %1 @@ -64,6 +69,15 @@ body: | ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-LABEL: name: test_mul_v2s32 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_MUL %0, %1 @@ -115,6 +129,19 @@ body: | ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX10-LABEL: name: test_mul_s64 + ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_MUL %0, %1 @@ -202,6 +229,31 @@ body: | ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX10-LABEL: name: test_mul_v2s64 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD3]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_MUL %0, %1 @@ -237,6 +289,14 @@ body: | ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[TRUNC]], [[TRUNC1]] ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[MUL]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; GFX10-LABEL: name: test_mul_s16 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[TRUNC]], [[TRUNC1]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[MUL]](s16) + ; GFX10-NEXT: $vgpr0 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -294,6 +354,11 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[COPY]], [[COPY1]] ; GFX9-NEXT: $vgpr0 = COPY [[MUL]](<2 x s16>) + ; GFX10-LABEL: name: test_mul_v2s16 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[COPY]], [[COPY1]] + ; GFX10-NEXT: $vgpr0 = COPY [[MUL]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_MUL %0, %1 @@ -359,6 +424,29 @@ body: | ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[MUL1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16) + ; GFX10-LABEL: name: test_mul_v3s16 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY6]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF]](s32) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[MUL]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[MUL1]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -462,6 +550,15 @@ body: | ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[UV1]], [[UV3]] ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[MUL]](<2 x s16>), [[MUL1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX10-LABEL: name: test_mul_v4s16 + ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[UV]], [[UV2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[UV1]], [[UV3]] + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[MUL]](<2 x s16>), [[MUL1]](<2 x s16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_MUL %0, %1 @@ -489,6 +586,11 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] ; GFX9-NEXT: $vgpr0 = COPY [[MUL]](s32) + ; GFX10-LABEL: name: test_mul_s24 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX10-NEXT: $vgpr0 = COPY [[MUL]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s24) = G_TRUNC %0 @@ -542,6 +644,19 @@ body: | ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX10-LABEL: name: test_mul_s33 + ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s33) = G_TRUNC %0 @@ -635,6 +750,32 @@ body: | ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDO2]](s32), [[ADD5]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; GFX10-LABEL: name: test_mul_s96 + ; GFX10: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) + ; GFX10-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV4]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV2]], [[UV3]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV4]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV5]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV4]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[MUL5]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDO2]](s32), [[ADD5]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 %2:_(s96) = G_MUL %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir index 69f258368739f6..6405bf32051e80 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s --- name: test_sdiv_s32 @@ -118,6 +118,42 @@ body: | ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; GFX10-LABEL: name: test_sdiv_s32 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB3]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_SDIV %0, %1 @@ -331,6 +367,73 @@ body: | ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-LABEL: name: test_sdiv_v2s32 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) + ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32) + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]] + ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR5]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C3]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]] + ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]] + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C3]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]] + ; GFX10-NEXT: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX10-NEXT: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]] + ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_SDIV %0, %1 @@ -853,6 +956,176 @@ body: | ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; GFX10-LABEL: name: test_sdiv_s64 + ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_SDIV %0, %1 @@ -1858,6 +2131,337 @@ body: | ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX10-LABEL: name: test_sdiv_v2s64 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV26]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV27]], [[UADDO39]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV28]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV29]], [[UADDO41]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV30]], [[UV32]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV31]], [[UV33]], [[USUBO7]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) + ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) + ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) + ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO43]] + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32) + ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) + ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV38]], [[UV40]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV39]], [[UV41]], [[UADDO45]] + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32) + ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] + ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] + ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV42]](s32) + ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV43]](s32) + ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C1]] + ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) + ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX10-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX10-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]] + ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) + ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) + ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV46]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV47]], [[USUBO9]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[FPTOUI2]] + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] + ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) + ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] + ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) + ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) + ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] + ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) + ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD20]] + ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) + ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO57]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO56]] + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO56]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE16]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO56]] + ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL24]] + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD25]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[MUL24]] + ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) + ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH21]] + ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) + ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD25]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL24]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD25]] + ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) + ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH23]] + ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) + ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD26]] + ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) + ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] + ; GFX10-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD25]] + ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD29]], [[UADDO69]] + ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX10-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO68]] + ; GFX10-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE18]] + ; GFX10-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO68]] + ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) + ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH25]] + ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) + ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX10-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE18]] + ; GFX10-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO68]] + ; GFX10-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE18]] + ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) + ; GFX10-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH27]] + ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1) + ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX10-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD30]] + ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1) + ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] + ; GFX10-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE18]] + ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] + ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD33]](s32) + ; GFX10-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX10-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[UADDO78]] + ; GFX10-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV53]], [[UADDO78]] + ; GFX10-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[ADD33]] + ; GFX10-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV52]], [[UADDO78]] + ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] + ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[MUL33]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[ADD35]], [[USUBO11]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[ADD35]] + ; GFX10-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE12]](s32), [[UV55]] + ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) + ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV54]] + ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) + ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE12]](s32), [[UV55]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] + ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV54]] + ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO11]] + ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] + ; GFX10-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV56]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV57]], [[UADDO81]] + ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32) + ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV55]] + ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) + ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV54]] + ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) + ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV55]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] + ; GFX10-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV58]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV59]], [[UADDO83]] + ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32) + ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] + ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] + ; GFX10-NEXT: [[XOR6:%[0-9]+]]:_(s64) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX10-NEXT: [[XOR7:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[XOR6]] + ; GFX10-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR7]](s64) + ; GFX10-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR6]](s64) + ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[UV60]], [[UV62]] + ; GFX10-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[UV61]], [[UV63]], [[USUBO15]] + ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_SDIV %0, %1 @@ -1984,6 +2588,44 @@ body: | ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; GFX10-LABEL: name: test_sdiv_s16 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB3]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -2231,6 +2873,80 @@ body: | ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SUB3]](s32), [[SUB7]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX10-LABEL: name: test_sdiv_v2s16 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C1]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX10-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 + ; GFX10-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 + ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) + ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32) + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]] + ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR5]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]] + ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]] + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]] + ; GFX10-NEXT: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX10-NEXT: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]] + ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SUB3]](s32), [[SUB7]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SDIV %0, %1 @@ -2357,6 +3073,44 @@ body: | ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; GFX10-LABEL: name: test_sdiv_s7 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB3]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -2486,6 +3240,44 @@ body: | ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; GFX10-LABEL: name: test_sdiv_s17 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB3]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s17) = G_TRUNC %0 @@ -3017,6 +3809,178 @@ body: | ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; GFX10-LABEL: name: test_sdiv_s33 + ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s33) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir index d8bfabd6cc5fc3..ba71db5b40e0cc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s --- name: test_srem_s32 @@ -106,6 +106,38 @@ body: | ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; GFX10-LABEL: name: test_srem_s32 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_SREM %0, %1 @@ -298,6 +330,66 @@ body: | ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-LABEL: name: test_srem_v2s32 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) + ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]] + ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR4]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]] + ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]] + ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]] + ; GFX10-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]] + ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]] + ; GFX10-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_SREM %0, %1 @@ -805,6 +897,171 @@ body: | ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; GFX10-LABEL: name: test_srem_s64 + ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_SREM %0, %1 @@ -1783,6 +2040,328 @@ body: | ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX10-LABEL: name: test_srem_v2s64 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) + ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV30]], [[UV32]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV31]], [[UV33]], [[UADDO39]] + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO41]] + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] + ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] + ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV38]](s32) + ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV39]](s32) + ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C1]] + ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) + ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX10-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX10-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]] + ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) + ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) + ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV40]], [[UV42]] + ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV41]], [[UV43]], [[USUBO11]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI2]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]] + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]] + ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) + ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH16]] + ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) + ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) + ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH18]] + ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) + ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD20]] + ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) + ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO53]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO52]] + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE12]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO52]] + ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL24]] + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD25]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[MUL24]] + ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) + ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH21]] + ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) + ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD25]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL24]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD25]] + ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) + ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH23]] + ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) + ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD26]] + ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) + ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] + ; GFX10-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD25]] + ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD29]], [[UADDO65]] + ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX10-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO64]] + ; GFX10-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE14]] + ; GFX10-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO64]] + ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) + ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH25]] + ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) + ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX10-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE14]] + ; GFX10-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO64]] + ; GFX10-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE14]] + ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) + ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH27]] + ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) + ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD30]] + ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) + ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] + ; GFX10-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE14]] + ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] + ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX10-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[UADDO74]] + ; GFX10-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV49]], [[UADDO74]] + ; GFX10-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[ADD33]] + ; GFX10-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV48]], [[UADDO74]] + ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] + ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[MUL33]] + ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[ADD35]], [[USUBO13]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[ADD35]] + ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32) + ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV51]] + ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) + ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV50]] + ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) + ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV51]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] + ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV50]] + ; GFX10-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV51]], [[USUBO13]] + ; GFX10-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]] + ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE20]](s32) + ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV51]] + ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) + ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV50]] + ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) + ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV51]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] + ; GFX10-NEXT: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV50]] + ; GFX10-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV51]], [[USUBO15]] + ; GFX10-NEXT: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]] + ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO16]](s32), [[USUBE24]](s32) + ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] + ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] + ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX10-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX10-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX10-NEXT: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV52]], [[UV54]] + ; GFX10-NEXT: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV53]], [[UV55]], [[USUBO19]] + ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_SREM %0, %1 @@ -1903,6 +2482,42 @@ body: | ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C3]] ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX10-LABEL: name: test_srem_s16 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C3]] + ; GFX10-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -2129,6 +2744,73 @@ body: | ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SUB4]](s32), [[SUB9]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX10-LABEL: name: test_srem_v2s16 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C1]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX10-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 + ; GFX10-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 + ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) + ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]] + ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR4]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]] + ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]] + ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]] + ; GFX10-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]] + ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]] + ; GFX10-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SUB4]](s32), [[SUB9]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SREM %0, %1 @@ -2243,6 +2925,40 @@ body: | ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; GFX10-LABEL: name: test_srem_s7 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -2360,6 +3076,40 @@ body: | ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; GFX10-LABEL: name: test_srem_s17 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s17) = G_TRUNC %0 @@ -2876,6 +3626,173 @@ body: | ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; GFX10-LABEL: name: test_srem_s33 + ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s33) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir index 46ea14df831ae1..d17c2bf04c5957 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s --- name: test_udiv_s32 @@ -88,6 +88,32 @@ body: | ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; GFX10-LABEL: name: test_udiv_s32 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[COPY1]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX10-NEXT: $vgpr0 = COPY [[SELECT2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_UDIV %0, %1 @@ -244,6 +270,54 @@ body: | ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-LABEL: name: test_udiv_v2s32 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[UV2]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD3]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[UV3]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C2]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[UV3]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[UV3]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C2]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_UDIV %0, %1 @@ -700,6 +774,154 @@ body: | ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX10-LABEL: name: test_udiv_s64 + ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_UDIV %0, %1 @@ -1576,6 +1798,294 @@ body: | ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX10-LABEL: name: test_udiv_v2s64 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV18]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV19]], [[UADDO35]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV20]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV21]], [[UADDO37]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV22]](s32) + ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV23]](s32) + ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C]] + ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) + ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX10-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C2]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX10-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]] + ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) + ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[UV26]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[UV27]], [[USUBO7]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI2]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[FPTOUI2]] + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI3]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[FPTOUI2]] + ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH16]] + ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) + ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) + ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH18]] + ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) + ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD20]] + ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) + ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO49]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDO48]] + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO48]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE8]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[UADDO48]] + ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[MUL24]] + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD25]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[MUL24]] + ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) + ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH21]] + ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) + ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[ADD25]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[MUL24]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD25]] + ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) + ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH23]] + ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) + ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD26]] + ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) + ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] + ; GFX10-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[ADD25]] + ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD29]], [[UADDO61]] + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDO60]] + ; GFX10-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV30]], [[UADDE10]] + ; GFX10-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDO60]] + ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) + ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH25]] + ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) + ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX10-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDE10]] + ; GFX10-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDO60]] + ; GFX10-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDE10]] + ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) + ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH27]] + ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) + ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD30]] + ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) + ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] + ; GFX10-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDE10]] + ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD33]](s32) + ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[UADDO70]] + ; GFX10-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV33]], [[UADDO70]] + ; GFX10-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[ADD33]] + ; GFX10-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV32]], [[UADDO70]] + ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV28]], [[MUL33]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[ADD35]], [[USUBO9]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV29]], [[ADD35]] + ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE10]](s32), [[UV35]] + ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) + ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO8]](s32), [[UV34]] + ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) + ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE10]](s32), [[UV35]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] + ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[USUBO8]], [[UV34]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV35]], [[USUBO9]] + ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] + ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV36]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV37]], [[UADDO73]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32) + ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV35]] + ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) + ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV34]] + ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) + ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV35]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] + ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV38]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV39]], [[UADDO75]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32) + ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] + ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_UDIV %0, %1 @@ -1678,6 +2188,36 @@ body: | ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; GFX10-LABEL: name: test_udiv_s16 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C]] + ; GFX10-NEXT: $vgpr0 = COPY [[AND2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -1869,6 +2409,62 @@ body: | ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT2]](s32), [[SELECT5]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX10-LABEL: name: test_udiv_v2s16 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD3]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[AND3]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[AND3]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[AND3]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT2]](s32), [[SELECT5]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_UDIV %0, %1 @@ -1968,6 +2564,35 @@ body: | ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; GFX10-LABEL: name: test_udiv_s7 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX10-NEXT: $vgpr0 = COPY [[SELECT2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -2070,6 +2695,35 @@ body: | ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; GFX10-LABEL: name: test_udiv_s17 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX10-NEXT: $vgpr0 = COPY [[SELECT2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s17) = G_TRUNC %0 @@ -2538,6 +3192,157 @@ body: | ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX10-LABEL: name: test_udiv_s33 + ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s33) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir index 47077c7d5dc3db..20bfb0b211800f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir @@ -10,24 +10,24 @@ body: | ; GFX8-LABEL: name: test_umulo_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; GFX8: $vgpr0 = COPY [[MUL]](s32) - ; GFX8: $vgpr1 = COPY [[ZEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX8-NEXT: $vgpr0 = COPY [[MUL]](s32) + ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32) ; GFX9-LABEL: name: test_umulo_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; GFX9: $vgpr0 = COPY [[MUL]](s32) - ; GFX9: $vgpr1 = COPY [[ZEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX9-NEXT: $vgpr0 = COPY [[MUL]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32), %3:_(s1) = G_UMULO %0, %1 @@ -44,46 +44,46 @@ body: | ; GFX8-LABEL: name: test_umulo_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH1]](s32), [[C]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; GFX8: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; GFX8: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH1]](s32), [[C]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) ; GFX9-LABEL: name: test_umulo_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH1]](s32), [[C]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; GFX9: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH1]](s32), [[C]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>), %3:_(<2 x s1>) = G_UMULO %0, %1 @@ -100,86 +100,86 @@ body: | ; GFX8-LABEL: name: test_umulo_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]] - ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] - ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]] - ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32) - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] - ; GFX8: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) - ; GFX8: $vgpr0_vgpr1 = COPY [[MV1]](s64) - ; GFX8: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64) + ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64) ; GFX9-LABEL: name: test_umulo_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]] - ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] - ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32) - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] - ; GFX9: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) - ; GFX9: $vgpr0_vgpr1 = COPY [[MV1]](s64) - ; GFX9: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64) + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64), %3:_(s1) = G_UMULO %0, %1 @@ -196,170 +196,170 @@ body: | ; GFX8-LABEL: name: test_umulo_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] - ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]] - ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] - ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] - ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] - ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] - ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]] - ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32) - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] - ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV14]] - ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UV15]] - ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV14]] - ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH5]] - ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV15]] - ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV14]] - ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV15]] - ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH6]] - ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH7]] - ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD6]] - ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ZEXT9]] - ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV15]] - ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UMULH8]], [[ADD8]] - ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD9]](s32) - ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV18]] - ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]] - ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]] - ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]] - ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[MUL10]], [[MUL11]] - ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[UMULH9]] - ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL9]](s32), [[ADD11]](s32) - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) - ; GFX8: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] - ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C1]] - ; GFX8: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) - ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; GFX8: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV14]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UV15]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV14]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH5]] + ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV15]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV14]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV15]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH6]] + ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH7]] + ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD6]] + ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV15]] + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UMULH8]], [[ADD8]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD9]](s32) + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV18]] + ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]] + ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]] + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[MUL10]], [[MUL11]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[UMULH9]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL9]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C1]] + ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>) ; GFX9-LABEL: name: test_umulo_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] - ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]] - ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] - ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] - ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] - ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] - ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32) - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] - ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV14]] - ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UV15]] - ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV14]] - ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH5]] - ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV15]] - ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV14]] - ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV15]] - ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH6]] - ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH7]] - ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD6]] - ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ZEXT9]] - ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV15]] - ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UMULH8]], [[ADD8]] - ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD9]](s32) - ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV18]] - ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]] - ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]] - ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]] - ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[MUL10]], [[MUL11]] - ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[UMULH9]] - ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL9]](s32), [[ADD11]](s32) - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; GFX9: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C1]] - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; GFX9: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV14]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UV15]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV14]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH5]] + ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV15]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV14]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV15]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH6]] + ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH7]] + ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD6]] + ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV15]] + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UMULH8]], [[ADD8]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD9]](s32) + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV18]] + ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]] + ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]] + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[MUL10]], [[MUL11]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[UMULH9]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL9]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C1]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>), %3:_(<2 x s1>) = G_UMULO %0, %1 @@ -376,38 +376,38 @@ body: | ; GFX8-LABEL: name: test_umulo_s24 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[AND1]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX8: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP1]] - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s1) - ; GFX8: $vgpr0 = COPY [[AND3]](s32) - ; GFX8: $vgpr1 = COPY [[ZEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[AND1]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s1) + ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](s32) + ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32) ; GFX9-LABEL: name: test_umulo_s24 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[AND1]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C1]] - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX9: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP1]] - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s1) - ; GFX9: $vgpr0 = COPY [[AND3]](s32) - ; GFX9: $vgpr1 = COPY [[ZEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[AND1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C1]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP1]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s1) + ; GFX9-NEXT: $vgpr0 = COPY [[AND3]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s24) = G_TRUNC %0 @@ -428,30 +428,30 @@ body: | ; GFX8-LABEL: name: test_umulo_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; GFX8: $vgpr0 = COPY [[AND3]](s32) - ; GFX8: $vgpr1 = COPY [[ZEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](s32) + ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32) ; GFX9-LABEL: name: test_umulo_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; GFX9: $vgpr0 = COPY [[AND3]](s32) - ; GFX9: $vgpr1 = COPY [[ZEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX9-NEXT: $vgpr0 = COPY [[AND3]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -471,30 +471,30 @@ body: | ; GFX8-LABEL: name: test_umulo_s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; GFX8: $vgpr0 = COPY [[AND3]](s32) - ; GFX8: $vgpr1 = COPY [[ZEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](s32) + ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32) ; GFX9-LABEL: name: test_umulo_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; GFX9: $vgpr0 = COPY [[AND3]](s32) - ; GFX9: $vgpr1 = COPY [[ZEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX9-NEXT: $vgpr0 = COPY [[AND3]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -513,70 +513,70 @@ body: | liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-LABEL: name: test_umulo_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] - ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] - ; GFX8: [[AND6:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] - ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; GFX8: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX8: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; GFX8: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX8: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND10]](s32), [[AND11]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>) - ; GFX8: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX8-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] + ; GFX8-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] + ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND10]](s32), [[AND11]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_umulo_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] - ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[MUL]](s32), [[MUL1]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]] - ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>) - ; GFX9: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[MUL]](s32), [[MUL1]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]] + ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s16>) = G_TRUNC %0 @@ -596,64 +596,64 @@ body: | liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-LABEL: name: test_umulo_v2s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] - ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) - ; GFX8: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) - ; GFX8: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX8: $vgpr1 = COPY [[ANYEXT1]](s32) - ; GFX8: $vgpr2 = COPY [[ANYEXT2]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; GFX8-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) ; GFX9-LABEL: name: test_umulo_v2s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] - ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) - ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) - ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9: $vgpr1 = COPY [[ANYEXT1]](s32) - ; GFX9: $vgpr2 = COPY [[ANYEXT2]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) + ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) + ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; GFX9-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -684,84 +684,84 @@ body: | liveins: $vgpr0, $vgpr1 ; GFX8-LABEL: name: test_umulo_v4s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) - ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] - ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] - ; GFX8: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] - ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[AND6]] - ; GFX8: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] - ; GFX8: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]] - ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[AND8]] - ; GFX8: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] - ; GFX8: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL]] - ; GFX8: [[AND11:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX8: [[AND12:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8: $vgpr0 = COPY [[OR2]](s32) - ; GFX8: $vgpr1 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) + ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[AND6]] + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] + ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[AND8]] + ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] + ; GFX8-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL]] + ; GFX8-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX8-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_umulo_v4s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) - ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] - ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] - ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] - ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[AND6]] - ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] - ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]] - ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[AND8]] - ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] - ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]] - ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32) - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL]] - ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32) - ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32) - ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9: $vgpr0 = COPY [[OR2]](s32) - ; GFX9: $vgpr1 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] + ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[AND6]] + ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] + ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[AND8]] + ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] + ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL]] + ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir index 934f5b59f564c8..cf63e5398c77bc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s --- name: test_urem_s32 @@ -79,6 +79,29 @@ body: | ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX10-LABEL: name: test_urem_s32 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: $vgpr0 = COPY [[SELECT1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_UREM %0, %1 @@ -220,6 +243,49 @@ body: | ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-LABEL: name: test_urem_v2s32 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[UV2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[UV2]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD1]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[UV3]] + ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[UV3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[UV3]] + ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[UV3]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_UREM %0, %1 @@ -664,6 +730,150 @@ body: | ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX10-LABEL: name: test_urem_s64 + ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_UREM %0, %1 @@ -1519,6 +1729,287 @@ body: | ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX10-LABEL: name: test_urem_v2s64 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV16]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV17]], [[USUBO5]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV18]](s32) + ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV19]](s32) + ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C]] + ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) + ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX10-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C2]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX10-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]] + ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) + ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV20]], [[UV22]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV21]], [[UV23]], [[USUBO9]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]] + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] + ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH16]] + ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH18]] + ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) + ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD20]] + ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) + ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO45]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO44]] + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE4]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO44]] + ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL24]] + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD25]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[MUL24]] + ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) + ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH21]] + ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) + ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD25]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL24]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD25]] + ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) + ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH23]] + ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) + ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD26]] + ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) + ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] + ; GFX10-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD25]] + ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD29]], [[UADDO57]] + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO56]] + ; GFX10-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDO56]] + ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) + ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH25]] + ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) + ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX10-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDO56]] + ; GFX10-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) + ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH27]] + ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) + ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD30]] + ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) + ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] + ; GFX10-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDE6]] + ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDO66]] + ; GFX10-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO66]] + ; GFX10-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[ADD33]] + ; GFX10-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO66]] + ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] + ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[MUL33]] + ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[ADD35]], [[USUBO11]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV25]], [[ADD35]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32) + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV31]] + ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) + ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV30]] + ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) + ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV31]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] + ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV30]] + ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV31]], [[USUBO11]] + ; GFX10-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE18]](s32) + ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV31]] + ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) + ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV30]] + ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) + ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV31]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] + ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV30]] + ; GFX10-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV31]], [[USUBO13]] + ; GFX10-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE22]](s32) + ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] + ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_UREM %0, %1 @@ -1612,6 +2103,33 @@ body: | ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; GFX10-LABEL: name: test_urem_s16 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C]] + ; GFX10-NEXT: $vgpr0 = COPY [[AND2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -1788,6 +2306,57 @@ body: | ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT1]](s32), [[SELECT3]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX10-LABEL: name: test_urem_v2s16 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD1]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[AND3]] + ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[AND3]] + ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[AND3]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT1]](s32), [[SELECT3]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_UREM %0, %1 @@ -1878,6 +2447,32 @@ body: | ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX10-LABEL: name: test_urem_s7 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: $vgpr0 = COPY [[SELECT1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -1971,6 +2566,32 @@ body: | ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX10-LABEL: name: test_urem_s17 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: $vgpr0 = COPY [[SELECT1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s17) = G_TRUNC %0 @@ -2427,6 +3048,153 @@ body: | ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX10-LABEL: name: test_urem_s33 + ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s33) = G_TRUNC %0