Skip to content

Conversation

@folkertdev
Copy link
Contributor

This LLVM IR

https://godbolt.org/z/5bM1vrMY1

define <4 x i32> @masked(<2 x double> %a, <4 x i32> %src, i8 noundef zeroext %mask) unnamed_addr #0 {
  %r = tail call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.128(<2 x double> %a, <4 x i32> %src, i8 noundef %mask)
  ret <4 x i32> %r
}

define <4 x i32> @unmasked(<2 x double> %a) unnamed_addr #0 {
  %r = tail call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.128(<2 x double> %a, <4 x i32> zeroinitializer, i8 noundef -1)
  ret <4 x i32> %r
}

declare <4 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.128(<2 x double>, <4 x i32>, i8) unnamed_addr

attributes #0 = { mustprogress nofree norecurse nosync nounwind nonlazybind willreturn memory(none) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" "target-features"="+avx10.2-512" }

produces

masked:                                 # @masked
        kmovd   k1, edi
        vcvttpd2dqs     xmm1 {k1}, xmm0
        vmovaps xmm0, xmm1
        ret
unmasked:                               # @unmasked
        vcvttpd2udqs    xmm0, xmm0
        ret

So, when a mask is used, somehow the signed version of this instruction is selected. I suspect this is a typo.

@llvmbot
Copy link
Member

llvmbot commented Dec 8, 2025

@llvm/pr-subscribers-backend-x86

Author: Folkert de Vries (folkertdev)

Changes

This LLVM IR

https://godbolt.org/z/5bM1vrMY1

define &lt;4 x i32&gt; @<!-- -->masked(&lt;2 x double&gt; %a, &lt;4 x i32&gt; %src, i8 noundef zeroext %mask) unnamed_addr #<!-- -->0 {
  %r = tail call &lt;4 x i32&gt; @<!-- -->llvm.x86.avx10.mask.vcvttpd2udqs.128(&lt;2 x double&gt; %a, &lt;4 x i32&gt; %src, i8 noundef %mask)
  ret &lt;4 x i32&gt; %r
}

define &lt;4 x i32&gt; @<!-- -->unmasked(&lt;2 x double&gt; %a) unnamed_addr #<!-- -->0 {
  %r = tail call &lt;4 x i32&gt; @<!-- -->llvm.x86.avx10.mask.vcvttpd2udqs.128(&lt;2 x double&gt; %a, &lt;4 x i32&gt; zeroinitializer, i8 noundef -1)
  ret &lt;4 x i32&gt; %r
}

declare &lt;4 x i32&gt; @<!-- -->llvm.x86.avx10.mask.vcvttpd2udqs.128(&lt;2 x double&gt;, &lt;4 x i32&gt;, i8) unnamed_addr

attributes #<!-- -->0 = { mustprogress nofree norecurse nosync nounwind nonlazybind willreturn memory(none) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" "target-features"="+avx10.2-512" }

produces

masked:                                 # @<!-- -->masked
        kmovd   k1, edi
        vcvttpd2dqs     xmm1 {k1}, xmm0
        vmovaps xmm0, xmm1
        ret
unmasked:                               # @<!-- -->unmasked
        vcvttpd2udqs    xmm0, xmm0
        ret

So, when a mask is used, somehow the signed version of this instruction is selected. I suspect this is a typo.


Full diff: https://github.com/llvm/llvm-project/pull/171229.diff

2 Files Affected:

  • (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (+1-1)
  • (modified) llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll (+6-6)
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 0f725a8eb338b..112fbd960cf86 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -534,7 +534,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
     X86_INTRINSIC_DATA(avx10_mask_vcvttpd2qqs_round_512, INTR_TYPE_1OP_MASK,
                        X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
     X86_INTRINSIC_DATA(avx10_mask_vcvttpd2udqs_128, CVTPD2DQ_MASK,
-                       X86ISD::CVTTP2UIS, X86ISD::MCVTTP2SIS),
+                       X86ISD::CVTTP2UIS, X86ISD::MCVTTP2UIS),
     X86_INTRINSIC_DATA(avx10_mask_vcvttpd2udqs_256, INTR_TYPE_1OP_MASK,
                        X86ISD::CVTTP2UIS, 0),
     X86_INTRINSIC_DATA(avx10_mask_vcvttpd2udqs_round_512, INTR_TYPE_1OP_MASK,
diff --git a/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll
index 38d54cff6dc23..00db1fb07c78d 100644
--- a/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll
@@ -652,14 +652,14 @@ define <4 x i32> @test_int_x86_mask_vcvtt_pd2udqs_128(<2 x double> %x0, <4 x i32
 ; X64-LABEL: test_int_x86_mask_vcvtt_pd2udqs_128:
 ; X64:       # %bb.0:
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT:    vcvttpd2dqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0xfc,0x09,0x6d,0xc8]
+; X64-NEXT:    vcvttpd2udqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0xfc,0x09,0x6c,0xc8]
 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
 ;
 ; X86-LABEL: test_int_x86_mask_vcvtt_pd2udqs_128:
 ; X86:       # %bb.0:
 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
-; X86-NEXT:    vcvttpd2dqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0xfc,0x09,0x6d,0xc8]
+; X86-NEXT:    vcvttpd2udqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0xfc,0x09,0x6c,0xc8]
 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
 ; X86-NEXT:    retl # encoding: [0xc3]
   %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.128( <2 x double> %x0, <4 x i32> %src, i8 %mask)
@@ -670,13 +670,13 @@ define <4 x i32> @test_int_x86_maskz_vcvtt_pd2udqs_128_z(<2 x double> %x0, i8 %m
 ; X64-LABEL: test_int_x86_maskz_vcvtt_pd2udqs_128_z:
 ; X64:       # %bb.0:
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT:    vcvttpd2dqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0x89,0x6d,0xc0]
+; X64-NEXT:    vcvttpd2udqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0x89,0x6c,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
 ;
 ; X86-LABEL: test_int_x86_maskz_vcvtt_pd2udqs_128_z:
 ; X86:       # %bb.0:
 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
-; X86-NEXT:    vcvttpd2dqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0x89,0x6d,0xc0]
+; X86-NEXT:    vcvttpd2udqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0x89,0x6c,0xc0]
 ; X86-NEXT:    retl # encoding: [0xc3]
   %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.128( <2 x double> %x0, <4 x i32> zeroinitializer, i8 %mask)
   ret <4 x i32> %res
@@ -686,13 +686,13 @@ define <4 x i32> @test_int_x86_mask_vcvtt_pd2udqs_128_undef(<2 x double> %x0, i8
 ; X64-LABEL: test_int_x86_mask_vcvtt_pd2udqs_128_undef:
 ; X64:       # %bb.0:
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT:    vcvttpd2dqs %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0xfc,0x09,0x6d,0xc0]
+; X64-NEXT:    vcvttpd2udqs %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0xfc,0x09,0x6c,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
 ;
 ; X86-LABEL: test_int_x86_mask_vcvtt_pd2udqs_128_undef:
 ; X86:       # %bb.0:
 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
-; X86-NEXT:    vcvttpd2dqs %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0xfc,0x09,0x6d,0xc0]
+; X86-NEXT:    vcvttpd2udqs %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0xfc,0x09,0x6c,0xc0]
 ; X86-NEXT:    retl # encoding: [0xc3]
   %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.128( <2 x double> %x0, <4 x i32> undef, i8 %mask)
   ret <4 x i32> %res

Copy link
Contributor

@phoebewang phoebewang left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, thanks for the fix!

@RKSimon RKSimon enabled auto-merge (squash) December 9, 2025 18:06
@RKSimon RKSimon merged commit 03160c1 into llvm:main Dec 9, 2025
9 of 10 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants