Skip to content

Conversation

@RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Dec 8, 2025

No description provided.

@RKSimon RKSimon enabled auto-merge (squash) December 8, 2025 10:08
@llvmbot
Copy link
Member

llvmbot commented Dec 8, 2025

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Patch is 94.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171104.diff

1 Files Affected:

  • (modified) llvm/test/CodeGen/X86/bitcnt-big-integer.ll (+1563-413)
diff --git a/llvm/test/CodeGen/X86/bitcnt-big-integer.ll b/llvm/test/CodeGen/X86/bitcnt-big-integer.ll
index 5b3b27a3d61de..0fd555991ae29 100644
--- a/llvm/test/CodeGen/X86/bitcnt-big-integer.ll
+++ b/llvm/test/CodeGen/X86/bitcnt-big-integer.ll
@@ -1,8 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 -mattr=+avx512vpopcntdq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512POPCNT
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefixes=AVX512,AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512,AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 -mattr=+avx512vpopcntdq | FileCheck %s --check-prefixes=AVX512,AVX512POPCNT
 
 ;
 ; CTPOP
@@ -16,6 +17,14 @@ define i32 @test_ctpop_i128(i128 %a0) nounwind {
 ; CHECK-NEXT:    addl %ecx, %eax
 ; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
 ; CHECK-NEXT:    retq
+;
+; AVX512-LABEL: test_ctpop_i128:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    popcntq %rsi, %rcx
+; AVX512-NEXT:    popcntq %rdi, %rax
+; AVX512-NEXT:    addl %ecx, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    retq
   %cnt = call i128 @llvm.ctpop.i128(i128 %a0)
   %res = trunc i128 %cnt to i32
   ret i32 %res
@@ -29,6 +38,14 @@ define i32 @load_ctpop_i128(ptr %p0) nounwind {
 ; CHECK-NEXT:    addl %ecx, %eax
 ; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
 ; CHECK-NEXT:    retq
+;
+; AVX512-LABEL: load_ctpop_i128:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    popcntq 8(%rdi), %rcx
+; AVX512-NEXT:    popcntq (%rdi), %rax
+; AVX512-NEXT:    addl %ecx, %eax
+; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT:    retq
   %a0 = load i128, ptr %p0
   %cnt = call i128 @llvm.ctpop.i128(i128 %a0)
   %res = trunc i128 %cnt to i32
@@ -50,6 +67,48 @@ define i32 @test_ctpop_i256(i256 %a0) nounwind {
 ; CHECK-NEXT:    addl %ecx, %eax
 ; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
 ; CHECK-NEXT:    retq
+;
+; AVX512F-LABEL: test_ctpop_i256:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    popcntq %rcx, %rax
+; AVX512F-NEXT:    popcntq %rdx, %rcx
+; AVX512F-NEXT:    addl %eax, %ecx
+; AVX512F-NEXT:    popcntq %rsi, %rdx
+; AVX512F-NEXT:    popcntq %rdi, %rax
+; AVX512F-NEXT:    addl %edx, %eax
+; AVX512F-NEXT:    addl %ecx, %eax
+; AVX512F-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: test_ctpop_i256:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    popcntq %rcx, %rax
+; AVX512VL-NEXT:    xorl %ecx, %ecx
+; AVX512VL-NEXT:    popcntq %rdx, %rcx
+; AVX512VL-NEXT:    addl %eax, %ecx
+; AVX512VL-NEXT:    xorl %edx, %edx
+; AVX512VL-NEXT:    popcntq %rsi, %rdx
+; AVX512VL-NEXT:    xorl %eax, %eax
+; AVX512VL-NEXT:    popcntq %rdi, %rax
+; AVX512VL-NEXT:    addl %edx, %eax
+; AVX512VL-NEXT:    addl %ecx, %eax
+; AVX512VL-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512VL-NEXT:    retq
+;
+; AVX512POPCNT-LABEL: test_ctpop_i256:
+; AVX512POPCNT:       # %bb.0:
+; AVX512POPCNT-NEXT:    popcntq %rcx, %rax
+; AVX512POPCNT-NEXT:    xorl %ecx, %ecx
+; AVX512POPCNT-NEXT:    popcntq %rdx, %rcx
+; AVX512POPCNT-NEXT:    addl %eax, %ecx
+; AVX512POPCNT-NEXT:    xorl %edx, %edx
+; AVX512POPCNT-NEXT:    popcntq %rsi, %rdx
+; AVX512POPCNT-NEXT:    xorl %eax, %eax
+; AVX512POPCNT-NEXT:    popcntq %rdi, %rax
+; AVX512POPCNT-NEXT:    addl %edx, %eax
+; AVX512POPCNT-NEXT:    addl %ecx, %eax
+; AVX512POPCNT-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512POPCNT-NEXT:    retq
   %cnt = call i256 @llvm.ctpop.i256(i256 %a0)
   %res = trunc i256 %cnt to i32
   ret i32 %res
@@ -81,18 +140,43 @@ define i32 @load_ctpop_i256(ptr %p0) nounwind {
 ; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: load_ctpop_i256:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    popcntq 24(%rdi), %rax
-; AVX512-NEXT:    popcntq 16(%rdi), %rcx
-; AVX512-NEXT:    addl %eax, %ecx
-; AVX512-NEXT:    popcntq 8(%rdi), %rdx
-; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    popcntq (%rdi), %rax
-; AVX512-NEXT:    addl %edx, %eax
-; AVX512-NEXT:    addl %ecx, %eax
-; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: load_ctpop_i256:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    popcntq 24(%rdi), %rax
+; AVX512F-NEXT:    popcntq 16(%rdi), %rcx
+; AVX512F-NEXT:    addl %eax, %ecx
+; AVX512F-NEXT:    popcntq 8(%rdi), %rdx
+; AVX512F-NEXT:    popcntq (%rdi), %rax
+; AVX512F-NEXT:    addl %edx, %eax
+; AVX512F-NEXT:    addl %ecx, %eax
+; AVX512F-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: load_ctpop_i256:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    popcntq 24(%rdi), %rax
+; AVX512VL-NEXT:    popcntq 16(%rdi), %rcx
+; AVX512VL-NEXT:    addl %eax, %ecx
+; AVX512VL-NEXT:    popcntq 8(%rdi), %rdx
+; AVX512VL-NEXT:    xorl %eax, %eax
+; AVX512VL-NEXT:    popcntq (%rdi), %rax
+; AVX512VL-NEXT:    addl %edx, %eax
+; AVX512VL-NEXT:    addl %ecx, %eax
+; AVX512VL-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512VL-NEXT:    retq
+;
+; AVX512POPCNT-LABEL: load_ctpop_i256:
+; AVX512POPCNT:       # %bb.0:
+; AVX512POPCNT-NEXT:    popcntq 24(%rdi), %rax
+; AVX512POPCNT-NEXT:    popcntq 16(%rdi), %rcx
+; AVX512POPCNT-NEXT:    addl %eax, %ecx
+; AVX512POPCNT-NEXT:    popcntq 8(%rdi), %rdx
+; AVX512POPCNT-NEXT:    xorl %eax, %eax
+; AVX512POPCNT-NEXT:    popcntq (%rdi), %rax
+; AVX512POPCNT-NEXT:    addl %edx, %eax
+; AVX512POPCNT-NEXT:    addl %ecx, %eax
+; AVX512POPCNT-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512POPCNT-NEXT:    retq
   %a0 = load i256, ptr %p0
   %cnt = call i256 @llvm.ctpop.i256(i256 %a0)
   %res = trunc i256 %cnt to i32
@@ -124,6 +208,76 @@ define i32 @test_ctpop_i512(i512 %a0) nounwind {
 ; CHECK-NEXT:    addl %r8d, %eax
 ; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
 ; CHECK-NEXT:    retq
+;
+; AVX512F-LABEL: test_ctpop_i512:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512F-NEXT:    popcntq {{[0-9]+}}(%rsp), %r10
+; AVX512F-NEXT:    addl %eax, %r10d
+; AVX512F-NEXT:    popcntq %r9, %rax
+; AVX512F-NEXT:    popcntq %r8, %r8
+; AVX512F-NEXT:    addl %eax, %r8d
+; AVX512F-NEXT:    addl %r10d, %r8d
+; AVX512F-NEXT:    popcntq %rcx, %rax
+; AVX512F-NEXT:    popcntq %rdx, %rcx
+; AVX512F-NEXT:    addl %eax, %ecx
+; AVX512F-NEXT:    popcntq %rsi, %rdx
+; AVX512F-NEXT:    popcntq %rdi, %rax
+; AVX512F-NEXT:    addl %edx, %eax
+; AVX512F-NEXT:    addl %ecx, %eax
+; AVX512F-NEXT:    addl %r8d, %eax
+; AVX512F-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: test_ctpop_i512:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512VL-NEXT:    popcntq {{[0-9]+}}(%rsp), %r10
+; AVX512VL-NEXT:    addl %eax, %r10d
+; AVX512VL-NEXT:    xorl %eax, %eax
+; AVX512VL-NEXT:    popcntq %r9, %rax
+; AVX512VL-NEXT:    popcntq %r8, %r8
+; AVX512VL-NEXT:    addl %eax, %r8d
+; AVX512VL-NEXT:    addl %r10d, %r8d
+; AVX512VL-NEXT:    xorl %eax, %eax
+; AVX512VL-NEXT:    popcntq %rcx, %rax
+; AVX512VL-NEXT:    xorl %ecx, %ecx
+; AVX512VL-NEXT:    popcntq %rdx, %rcx
+; AVX512VL-NEXT:    addl %eax, %ecx
+; AVX512VL-NEXT:    xorl %edx, %edx
+; AVX512VL-NEXT:    popcntq %rsi, %rdx
+; AVX512VL-NEXT:    xorl %eax, %eax
+; AVX512VL-NEXT:    popcntq %rdi, %rax
+; AVX512VL-NEXT:    addl %edx, %eax
+; AVX512VL-NEXT:    addl %ecx, %eax
+; AVX512VL-NEXT:    addl %r8d, %eax
+; AVX512VL-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512VL-NEXT:    retq
+;
+; AVX512POPCNT-LABEL: test_ctpop_i512:
+; AVX512POPCNT:       # %bb.0:
+; AVX512POPCNT-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512POPCNT-NEXT:    popcntq {{[0-9]+}}(%rsp), %r10
+; AVX512POPCNT-NEXT:    addl %eax, %r10d
+; AVX512POPCNT-NEXT:    xorl %eax, %eax
+; AVX512POPCNT-NEXT:    popcntq %r9, %rax
+; AVX512POPCNT-NEXT:    popcntq %r8, %r8
+; AVX512POPCNT-NEXT:    addl %eax, %r8d
+; AVX512POPCNT-NEXT:    addl %r10d, %r8d
+; AVX512POPCNT-NEXT:    xorl %eax, %eax
+; AVX512POPCNT-NEXT:    popcntq %rcx, %rax
+; AVX512POPCNT-NEXT:    xorl %ecx, %ecx
+; AVX512POPCNT-NEXT:    popcntq %rdx, %rcx
+; AVX512POPCNT-NEXT:    addl %eax, %ecx
+; AVX512POPCNT-NEXT:    xorl %edx, %edx
+; AVX512POPCNT-NEXT:    popcntq %rsi, %rdx
+; AVX512POPCNT-NEXT:    xorl %eax, %eax
+; AVX512POPCNT-NEXT:    popcntq %rdi, %rax
+; AVX512POPCNT-NEXT:    addl %edx, %eax
+; AVX512POPCNT-NEXT:    addl %ecx, %eax
+; AVX512POPCNT-NEXT:    addl %r8d, %eax
+; AVX512POPCNT-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512POPCNT-NEXT:    retq
   %cnt = call i512 @llvm.ctpop.i512(i512 %a0)
   %res = trunc i512 %cnt to i32
   ret i32 %res
@@ -177,29 +331,73 @@ define i32 @load_ctpop_i512(ptr %p0) nounwind {
 ; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: load_ctpop_i512:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    popcntq 56(%rdi), %rax
-; AVX512-NEXT:    popcntq 48(%rdi), %rcx
-; AVX512-NEXT:    addl %eax, %ecx
-; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    popcntq 40(%rdi), %rax
-; AVX512-NEXT:    popcntq 32(%rdi), %rdx
-; AVX512-NEXT:    addl %eax, %edx
-; AVX512-NEXT:    addl %ecx, %edx
-; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    popcntq 24(%rdi), %rax
-; AVX512-NEXT:    xorl %ecx, %ecx
-; AVX512-NEXT:    popcntq 16(%rdi), %rcx
-; AVX512-NEXT:    popcntq 8(%rdi), %rsi
-; AVX512-NEXT:    addl %eax, %ecx
-; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    popcntq (%rdi), %rax
-; AVX512-NEXT:    addl %esi, %eax
-; AVX512-NEXT:    addl %ecx, %eax
-; AVX512-NEXT:    addl %edx, %eax
-; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: load_ctpop_i512:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    popcntq 56(%rdi), %rax
+; AVX512F-NEXT:    popcntq 48(%rdi), %rcx
+; AVX512F-NEXT:    addl %eax, %ecx
+; AVX512F-NEXT:    popcntq 40(%rdi), %rax
+; AVX512F-NEXT:    popcntq 32(%rdi), %rdx
+; AVX512F-NEXT:    addl %eax, %edx
+; AVX512F-NEXT:    addl %ecx, %edx
+; AVX512F-NEXT:    popcntq 24(%rdi), %rcx
+; AVX512F-NEXT:    popcntq 16(%rdi), %rsi
+; AVX512F-NEXT:    popcntq 8(%rdi), %r8
+; AVX512F-NEXT:    popcntq (%rdi), %rax
+; AVX512F-NEXT:    addl %ecx, %esi
+; AVX512F-NEXT:    addl %r8d, %eax
+; AVX512F-NEXT:    addl %esi, %eax
+; AVX512F-NEXT:    addl %edx, %eax
+; AVX512F-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: load_ctpop_i512:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    popcntq 56(%rdi), %rax
+; AVX512VL-NEXT:    popcntq 48(%rdi), %rcx
+; AVX512VL-NEXT:    addl %eax, %ecx
+; AVX512VL-NEXT:    xorl %eax, %eax
+; AVX512VL-NEXT:    popcntq 40(%rdi), %rax
+; AVX512VL-NEXT:    popcntq 32(%rdi), %rdx
+; AVX512VL-NEXT:    addl %eax, %edx
+; AVX512VL-NEXT:    addl %ecx, %edx
+; AVX512VL-NEXT:    xorl %eax, %eax
+; AVX512VL-NEXT:    popcntq 24(%rdi), %rax
+; AVX512VL-NEXT:    xorl %ecx, %ecx
+; AVX512VL-NEXT:    popcntq 16(%rdi), %rcx
+; AVX512VL-NEXT:    popcntq 8(%rdi), %rsi
+; AVX512VL-NEXT:    addl %eax, %ecx
+; AVX512VL-NEXT:    xorl %eax, %eax
+; AVX512VL-NEXT:    popcntq (%rdi), %rax
+; AVX512VL-NEXT:    addl %esi, %eax
+; AVX512VL-NEXT:    addl %ecx, %eax
+; AVX512VL-NEXT:    addl %edx, %eax
+; AVX512VL-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512VL-NEXT:    retq
+;
+; AVX512POPCNT-LABEL: load_ctpop_i512:
+; AVX512POPCNT:       # %bb.0:
+; AVX512POPCNT-NEXT:    popcntq 56(%rdi), %rax
+; AVX512POPCNT-NEXT:    popcntq 48(%rdi), %rcx
+; AVX512POPCNT-NEXT:    addl %eax, %ecx
+; AVX512POPCNT-NEXT:    xorl %eax, %eax
+; AVX512POPCNT-NEXT:    popcntq 40(%rdi), %rax
+; AVX512POPCNT-NEXT:    popcntq 32(%rdi), %rdx
+; AVX512POPCNT-NEXT:    addl %eax, %edx
+; AVX512POPCNT-NEXT:    addl %ecx, %edx
+; AVX512POPCNT-NEXT:    xorl %eax, %eax
+; AVX512POPCNT-NEXT:    popcntq 24(%rdi), %rax
+; AVX512POPCNT-NEXT:    xorl %ecx, %ecx
+; AVX512POPCNT-NEXT:    popcntq 16(%rdi), %rcx
+; AVX512POPCNT-NEXT:    popcntq 8(%rdi), %rsi
+; AVX512POPCNT-NEXT:    addl %eax, %ecx
+; AVX512POPCNT-NEXT:    xorl %eax, %eax
+; AVX512POPCNT-NEXT:    popcntq (%rdi), %rax
+; AVX512POPCNT-NEXT:    addl %esi, %eax
+; AVX512POPCNT-NEXT:    addl %ecx, %eax
+; AVX512POPCNT-NEXT:    addl %edx, %eax
+; AVX512POPCNT-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512POPCNT-NEXT:    retq
   %a0 = load i512, ptr %p0
   %cnt = call i512 @llvm.ctpop.i512(i512 %a0)
   %res = trunc i512 %cnt to i32
@@ -309,57 +507,149 @@ define i32 @test_ctpop_i1024(i1024 %a0) nounwind {
 ; AVX2-NEXT:    popq %r14
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: test_ctpop_i1024:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    pushq %r14
-; AVX512-NEXT:    pushq %rbx
-; AVX512-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
-; AVX512-NEXT:    popcntq {{[0-9]+}}(%rsp), %r10
-; AVX512-NEXT:    addl %eax, %r10d
-; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
-; AVX512-NEXT:    popcntq {{[0-9]+}}(%rsp), %r11
-; AVX512-NEXT:    addl %eax, %r11d
-; AVX512-NEXT:    addl %r10d, %r11d
-; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
-; AVX512-NEXT:    xorl %ebx, %ebx
-; AVX512-NEXT:    popcntq {{[0-9]+}}(%rsp), %rbx
-; AVX512-NEXT:    xorl %r14d, %r14d
-; AVX512-NEXT:    popcntq {{[0-9]+}}(%rsp), %r14
-; AVX512-NEXT:    addl %eax, %ebx
-; AVX512-NEXT:    xorl %r10d, %r10d
-; AVX512-NEXT:    popcntq {{[0-9]+}}(%rsp), %r10
-; AVX512-NEXT:    addl %r14d, %r10d
-; AVX512-NEXT:    addl %ebx, %r10d
-; AVX512-NEXT:    addl %r11d, %r10d
-; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
-; AVX512-NEXT:    xorl %r11d, %r11d
-; AVX512-NEXT:    popcntq {{[0-9]+}}(%rsp), %r11
-; AVX512-NEXT:    addl %eax, %r11d
-; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    popcntq %r9, %rax
-; AVX512-NEXT:    popcntq %r8, %r8
-; AVX512-NEXT:    addl %eax, %r8d
-; AVX512-NEXT:    addl %r11d, %r8d
-; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    popcntq %rcx, %rax
-; AVX512-NEXT:    xorl %ecx, %ecx
-; AVX512-NEXT:    popcntq %rdx, %rcx
-; AVX512-NEXT:    addl %eax, %ecx
-; AVX512-NEXT:    xorl %edx, %edx
-; AVX512-NEXT:    popcntq %rsi, %rdx
-; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    popcntq %rdi, %rax
-; AVX512-NEXT:    addl %edx, %eax
-; AVX512-NEXT:    addl %ecx, %eax
-; AVX512-NEXT:    addl %r8d, %eax
-; AVX512-NEXT:    addl %r10d, %eax
-; AVX512-NEXT:    # kill: def $eax killed $eax killed $rax
-; AVX512-NEXT:    popq %rbx
-; AVX512-NEXT:    popq %r14
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: test_ctpop_i1024:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    pushq %r14
+; AVX512F-NEXT:    pushq %rbx
+; AVX512F-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512F-NEXT:    popcntq {{[0-9]+}}(%rsp), %r10
+; AVX512F-NEXT:    addl %eax, %r10d
+; AVX512F-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512F-NEXT:    popcntq {{[0-9]+}}(%rsp), %r11
+; AVX512F-NEXT:    addl %eax, %r11d
+; AVX512F-NEXT:    addl %r10d, %r11d
+; AVX512F-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512F-NEXT:    popcntq {{[0-9]+}}(%rsp), %rbx
+; AVX512F-NEXT:    popcntq {{[0-9]+}}(%rsp), %r14
+; AVX512F-NEXT:    addl %eax, %ebx
+; AVX512F-NEXT:    popcntq {{[0-9]+}}(%rsp), %r10
+; AVX512F-NEXT:    addl %r14d, %r10d
+; AVX512F-NEXT:    addl %ebx, %r10d
+; AVX512F-NEXT:    addl %r11d, %r10d
+; AVX512F-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512F-NEXT:    popcntq {{[0-9]+}}(%rsp), %r11
+; AVX512F-NEXT:    addl %eax, %r11d
+; AVX512F-NEXT:    popcntq %r9, %rax
+; AVX512F-NEXT:    popcntq %r8, %r8
+; AVX512F-NEXT:    addl %eax, %r8d
+; AVX512F-NEXT:    addl %r11d, %r8d
+; AVX512F-NEXT:    popcntq %rcx, %rax
+; AVX512F-NEXT:    popcntq %rdx, %rcx
+; AVX512F-NEXT:    addl %eax, %ecx
+; AVX512F-NEXT:    popcntq %rsi, %rdx
+; AVX512F-NEXT:    popcntq %rdi, %rax
+; AVX512F-NEXT:    addl %edx, %eax
+; AVX512F-NEXT:    addl %ecx, %eax
+; AVX512F-NEXT:    addl %r8d, %eax
+; AVX512F-NEXT:    addl %r10d, %eax
+; AVX512F-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512F-NEXT:    popq %rbx
+; AVX512F-NEXT:    popq %r14
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: test_ctpop_i1024:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    pushq %r14
+; AVX512VL-NEXT:    pushq %rbx
+; AVX512VL-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512VL-NEXT:    popcntq {{[0-9]+}}(%rsp), %r10
+; AVX512VL-NEXT:    addl %eax, %r10d
+; AVX512VL-NEXT:    xorl %eax, %eax
+; AVX512VL-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512VL-NEXT:    popcntq {{[0-9]+}}(%rsp), %r11
+; AVX512VL-NEXT:    addl %eax, %r11d
+; AVX512VL-NEXT:    addl %r10d, %r11d
+; AVX512VL-NEXT:    xorl %eax, %eax
+; AVX512VL-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512VL-NEXT:    xorl %ebx, %ebx
+; AVX512VL-NEXT:    popcntq {{[0-9]+}}(%rsp), %rbx
+; AVX512VL-NEXT:    xorl %r14d, %r14d
+; AVX512VL-NEXT:    popcntq {{[0-9]+}}(%rsp), %r14
+; AVX512VL-NEXT:    addl %eax, %ebx
+; AVX512VL-NEXT:    xorl %r10d, %r10d
+; AVX512VL-NEXT:    popcntq {{[0-9]+}}(%rsp), %r10
+; AVX512VL-NEXT:    addl %r14d, %r10d
+; AVX512VL-NEXT:    addl %ebx, %r10d
+; AVX512VL-NEXT:    addl %r11d, %r10d
+; AVX512VL-NEXT:    xorl %eax, %eax
+; AVX512VL-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512VL-NEXT:    xorl %r11d, %r11d
+; AVX512VL-NEXT:    popcntq {{[0-9]+}}(%rsp), %r11
+; AVX512VL-NEXT:    addl %eax, %r11d
+; AVX512VL-NEXT:    xorl %eax, %eax
+; AVX512VL-NEXT:    popcntq %r9, %rax
+; AVX512VL-NEXT:    popcntq %r8, %r8
+; AVX512VL-NEXT:    addl %eax, %r8d
+; AVX512VL-NEXT:    addl %r11d, %r8d
+; AVX512VL-NEXT:    xorl %eax, %eax
+; AVX512VL-NEXT:    popcntq %rcx, %rax
+; AVX512VL-NEXT:    xorl %ecx, %ecx
+; AVX512VL-NEXT:    popcntq %rdx, %rcx
+; AVX512VL-NEXT:    addl %eax, %ecx
+; AVX512VL-NEXT:    xorl %edx, %edx
+; AVX512VL-NEXT:    popcntq %rsi, %rdx
+; AVX512VL-NEXT:    xorl %eax, %eax
+; AVX512VL-NEXT:    popcntq %rdi, %rax
+; AVX512VL-NEXT:    addl %edx, %eax
+; AVX512VL-NEXT:    addl %ecx, %eax
+; AVX512VL-NEXT:    addl %r8d, %eax
+; AVX512VL-NEXT:    addl %r10d, %eax
+; AVX512VL-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512VL-NEXT:    popq %rbx
+; AVX512VL-NEXT:    popq %r14
+; AVX512VL-NEXT:    retq
+;
+; AVX512POPCNT-LABEL: test_ctpop_i1024:
+; AVX512POPCNT:       # %bb.0:
+; AVX512POPCNT-NEXT:    pushq %r14
+; AVX512POPCNT-NEXT:    pushq %rbx
+; AVX512POPCNT-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512POPCNT-NEXT:    popcntq {{[0-9]+}}(%rsp), %r10
+; AVX512POPCNT-NEXT:    addl %eax, %r10d
+; AVX512POPCNT-NEXT:    xorl %eax, %eax
+; AVX512POPCNT-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512POPCNT-NEXT:    popcntq {{[0-9]+}}(%rsp), %r11
+; AVX512POPCNT-NEXT:    addl %eax, %r11d
+; AVX512POPCNT-NEXT:    addl %r10d, %r11d
+; AVX512POPCNT-NEXT:    xorl %eax, %eax
+; AVX512POPCNT-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512POPCNT-NEXT:    xorl %ebx, %ebx
+; AVX512POPCNT-NEXT:    popcntq {{[0-9]+}}(%rsp), %rbx
+; AVX512POPCNT-NEXT:    xorl %r14d, %r14d
+; AVX512POPCNT-NEXT:    popcntq {{[0-9]+}}(%rsp), %r14
+; AVX512POPCNT-NEXT:    addl %eax, %ebx
+; AVX512POPCNT-NEXT:    xorl %r10d, %r10d
+; AVX512POPCNT-NEXT:    popcntq {{[0-9]+}}(%rsp), %r10
+; AVX512POPCNT-NEXT:    addl %r14d, %r10d
+; AVX512POPCNT-NEXT:    addl %ebx, %r10d
+; AVX512POPCNT-NEXT:    addl %r11d, %r10d
+; AVX512POPCNT-NEXT:    xorl %eax, %eax
+; AVX512POPCNT-NEXT:    popcntq {{[0-9]+}}(%rsp), %rax
+; AVX512POPCNT-NEXT:    xorl %r11d, %r11d
+; AVX512POPCNT-NEXT:    popcntq {{[0-9]+}}(%rsp), %r11
+; AVX512POPCNT-NEXT:    addl %eax, %r11d
+; AVX512POPCNT-NEXT:    xorl %eax, %eax
+; AVX512POPCNT-NEXT:    popcntq %r9, %rax
+; AVX512POPCNT-NEXT:    popcntq %r8, %r8
+; AVX512POPCNT-NEXT:    addl %eax, %r8d
+; AVX512POPCNT-NEXT:    addl %r11d, %r8d
+; AVX512POPCNT-NEXT:    xorl %eax, %eax
+; AVX512POPCNT-NEXT:    popcntq %rcx, %rax
+; AVX512POPCNT-NEXT:    xorl %ecx, %ecx
+; AVX512POPCNT-NEXT:    popcntq %rdx, %rcx
+; AVX512POPCNT-NEXT:...
[truncated]

@RKSimon RKSimon merged commit bb926c1 into llvm:main Dec 8, 2025
11 of 12 checks passed
@RKSimon RKSimon deleted the x86-bitcnt-noavx512vl branch December 8, 2025 10:34
honeygoyal pushed a commit to honeygoyal/llvm-project that referenced this pull request Dec 9, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants