Skip to content

Conversation

@RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Nov 13, 2025

No description provided.

@RKSimon RKSimon enabled auto-merge (squash) November 13, 2025 13:00
@llvmbot
Copy link
Member

llvmbot commented Nov 13, 2025

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Patch is 38.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/167877.diff

1 Files Affected:

  • (modified) llvm/test/CodeGen/X86/bfloat.ll (+467-276)
diff --git a/llvm/test/CodeGen/X86/bfloat.ll b/llvm/test/CodeGen/X86/bfloat.ll
index 684e2921b789e..7bccd6ba088ac 100644
--- a/llvm/test/CodeGen/X86/bfloat.ll
+++ b/llvm/test/CodeGen/X86/bfloat.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-linux-gnu -mattr=avx512bf16,avx512fp16,avx512vl | FileCheck %s --check-prefixes=X86
-; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,SSE2
-; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=avx512bf16,avx512vl | FileCheck %s --check-prefixes=CHECK,AVX,F16,BF16
-; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=avx512bf16,avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,AVX,F16,FP16
-; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=avxneconvert,f16c | FileCheck %s --check-prefixes=CHECK,AVX,BF16,AVXNC
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,SSE2
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=avx512bf16,avx512vl | FileCheck %s --check-prefixes=X64,AVX,AVX512,AVX512BF16
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=avx512bf16,avx512fp16,avx512vl | FileCheck %s --check-prefixes=X64,AVX,AVX512,AVX512FP16
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=avxneconvert,f16c | FileCheck %s --check-prefixes=X64,AVX,AVXNC
 
 define void @add(ptr %pa, ptr %pb, ptr %pc) nounwind {
 ; X86-LABEL: add:
@@ -39,18 +39,18 @@ define void @add(ptr %pa, ptr %pb, ptr %pc) nounwind {
 ; SSE2-NEXT:    popq %rbx
 ; SSE2-NEXT:    retq
 ;
-; F16-LABEL: add:
-; F16:       # %bb.0:
-; F16-NEXT:    movzwl (%rsi), %eax
-; F16-NEXT:    shll $16, %eax
-; F16-NEXT:    vmovd %eax, %xmm0
-; F16-NEXT:    movzwl (%rdi), %eax
-; F16-NEXT:    shll $16, %eax
-; F16-NEXT:    vmovd %eax, %xmm1
-; F16-NEXT:    vaddss %xmm0, %xmm1, %xmm0
-; F16-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
-; F16-NEXT:    vpextrw $0, %xmm0, (%rdx)
-; F16-NEXT:    retq
+; AVX512-LABEL: add:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movzwl (%rsi), %eax
+; AVX512-NEXT:    shll $16, %eax
+; AVX512-NEXT:    vmovd %eax, %xmm0
+; AVX512-NEXT:    movzwl (%rdi), %eax
+; AVX512-NEXT:    shll $16, %eax
+; AVX512-NEXT:    vmovd %eax, %xmm1
+; AVX512-NEXT:    vaddss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
+; AVX512-NEXT:    vpextrw $0, %xmm0, (%rdx)
+; AVX512-NEXT:    retq
 ;
 ; AVXNC-LABEL: add:
 ; AVXNC:       # %bb.0:
@@ -98,17 +98,29 @@ define bfloat @add2(bfloat %a, bfloat %b) nounwind {
 ; SSE2-NEXT:    popq %rax
 ; SSE2-NEXT:    retq
 ;
-; FP16-LABEL: add2:
-; FP16:       # %bb.0:
-; FP16-NEXT:    vmovw %xmm0, %eax
-; FP16-NEXT:    vmovw %xmm1, %ecx
-; FP16-NEXT:    shll $16, %ecx
-; FP16-NEXT:    vmovd %ecx, %xmm0
-; FP16-NEXT:    shll $16, %eax
-; FP16-NEXT:    vmovd %eax, %xmm1
-; FP16-NEXT:    vaddss %xmm0, %xmm1, %xmm0
-; FP16-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
-; FP16-NEXT:    retq
+; AVX512BF16-LABEL: add2:
+; AVX512BF16:       # %bb.0:
+; AVX512BF16-NEXT:    vpextrw $0, %xmm0, %eax
+; AVX512BF16-NEXT:    vpextrw $0, %xmm1, %ecx
+; AVX512BF16-NEXT:    shll $16, %ecx
+; AVX512BF16-NEXT:    vmovd %ecx, %xmm0
+; AVX512BF16-NEXT:    shll $16, %eax
+; AVX512BF16-NEXT:    vmovd %eax, %xmm1
+; AVX512BF16-NEXT:    vaddss %xmm0, %xmm1, %xmm0
+; AVX512BF16-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
+; AVX512BF16-NEXT:    retq
+;
+; AVX512FP16-LABEL: add2:
+; AVX512FP16:       # %bb.0:
+; AVX512FP16-NEXT:    vmovw %xmm0, %eax
+; AVX512FP16-NEXT:    vmovw %xmm1, %ecx
+; AVX512FP16-NEXT:    shll $16, %ecx
+; AVX512FP16-NEXT:    vmovd %ecx, %xmm0
+; AVX512FP16-NEXT:    shll $16, %eax
+; AVX512FP16-NEXT:    vmovd %eax, %xmm1
+; AVX512FP16-NEXT:    vaddss %xmm0, %xmm1, %xmm0
+; AVX512FP16-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
+; AVX512FP16-NEXT:    retq
 ;
 ; AVXNC-LABEL: add2:
 ; AVXNC:       # %bb.0:
@@ -189,34 +201,63 @@ define void @add_double(ptr %pa, ptr %pb, ptr %pc) nounwind {
 ; SSE2-NEXT:    popq %rbp
 ; SSE2-NEXT:    retq
 ;
-; FP16-LABEL: add_double:
-; FP16:       # %bb.0:
-; FP16-NEXT:    pushq %rbp
-; FP16-NEXT:    pushq %r14
-; FP16-NEXT:    pushq %rbx
-; FP16-NEXT:    movq %rdx, %rbx
-; FP16-NEXT:    movq %rsi, %r14
-; FP16-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; FP16-NEXT:    callq __truncdfbf2@PLT
-; FP16-NEXT:    vmovw %xmm0, %ebp
-; FP16-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; FP16-NEXT:    callq __truncdfbf2@PLT
-; FP16-NEXT:    vmovw %xmm0, %eax
-; FP16-NEXT:    shll $16, %eax
-; FP16-NEXT:    vmovd %eax, %xmm0
-; FP16-NEXT:    shll $16, %ebp
-; FP16-NEXT:    vmovd %ebp, %xmm1
-; FP16-NEXT:    vaddss %xmm0, %xmm1, %xmm0
-; FP16-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
-; FP16-NEXT:    vmovw %xmm0, %eax
-; FP16-NEXT:    shll $16, %eax
-; FP16-NEXT:    vmovd %eax, %xmm0
-; FP16-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; FP16-NEXT:    vmovsd %xmm0, (%rbx)
-; FP16-NEXT:    popq %rbx
-; FP16-NEXT:    popq %r14
-; FP16-NEXT:    popq %rbp
-; FP16-NEXT:    retq
+; AVX512BF16-LABEL: add_double:
+; AVX512BF16:       # %bb.0:
+; AVX512BF16-NEXT:    pushq %rbp
+; AVX512BF16-NEXT:    pushq %r14
+; AVX512BF16-NEXT:    pushq %rbx
+; AVX512BF16-NEXT:    movq %rdx, %rbx
+; AVX512BF16-NEXT:    movq %rsi, %r14
+; AVX512BF16-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX512BF16-NEXT:    callq __truncdfbf2@PLT
+; AVX512BF16-NEXT:    vpextrw $0, %xmm0, %ebp
+; AVX512BF16-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX512BF16-NEXT:    callq __truncdfbf2@PLT
+; AVX512BF16-NEXT:    vpextrw $0, %xmm0, %eax
+; AVX512BF16-NEXT:    shll $16, %eax
+; AVX512BF16-NEXT:    vmovd %eax, %xmm0
+; AVX512BF16-NEXT:    shll $16, %ebp
+; AVX512BF16-NEXT:    vmovd %ebp, %xmm1
+; AVX512BF16-NEXT:    vaddss %xmm0, %xmm1, %xmm0
+; AVX512BF16-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
+; AVX512BF16-NEXT:    vmovd %xmm0, %eax
+; AVX512BF16-NEXT:    shll $16, %eax
+; AVX512BF16-NEXT:    vmovd %eax, %xmm0
+; AVX512BF16-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
+; AVX512BF16-NEXT:    vmovsd %xmm0, (%rbx)
+; AVX512BF16-NEXT:    popq %rbx
+; AVX512BF16-NEXT:    popq %r14
+; AVX512BF16-NEXT:    popq %rbp
+; AVX512BF16-NEXT:    retq
+;
+; AVX512FP16-LABEL: add_double:
+; AVX512FP16:       # %bb.0:
+; AVX512FP16-NEXT:    pushq %rbp
+; AVX512FP16-NEXT:    pushq %r14
+; AVX512FP16-NEXT:    pushq %rbx
+; AVX512FP16-NEXT:    movq %rdx, %rbx
+; AVX512FP16-NEXT:    movq %rsi, %r14
+; AVX512FP16-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512FP16-NEXT:    callq __truncdfbf2@PLT
+; AVX512FP16-NEXT:    vmovw %xmm0, %ebp
+; AVX512FP16-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512FP16-NEXT:    callq __truncdfbf2@PLT
+; AVX512FP16-NEXT:    vmovw %xmm0, %eax
+; AVX512FP16-NEXT:    shll $16, %eax
+; AVX512FP16-NEXT:    vmovd %eax, %xmm0
+; AVX512FP16-NEXT:    shll $16, %ebp
+; AVX512FP16-NEXT:    vmovd %ebp, %xmm1
+; AVX512FP16-NEXT:    vaddss %xmm0, %xmm1, %xmm0
+; AVX512FP16-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
+; AVX512FP16-NEXT:    vmovw %xmm0, %eax
+; AVX512FP16-NEXT:    shll $16, %eax
+; AVX512FP16-NEXT:    vmovd %eax, %xmm0
+; AVX512FP16-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
+; AVX512FP16-NEXT:    vmovsd %xmm0, (%rbx)
+; AVX512FP16-NEXT:    popq %rbx
+; AVX512FP16-NEXT:    popq %r14
+; AVX512FP16-NEXT:    popq %rbp
+; AVX512FP16-NEXT:    retq
 ;
 ; AVXNC-LABEL: add_double:
 ; AVXNC:       # %bb.0:
@@ -310,30 +351,55 @@ define double @add_double2(double %da, double %db) nounwind {
 ; SSE2-NEXT:    popq %rbx
 ; SSE2-NEXT:    retq
 ;
-; FP16-LABEL: add_double2:
-; FP16:       # %bb.0:
-; FP16-NEXT:    pushq %rbx
-; FP16-NEXT:    subq $16, %rsp
-; FP16-NEXT:    vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; FP16-NEXT:    callq __truncdfbf2@PLT
-; FP16-NEXT:    vmovw %xmm0, %ebx
-; FP16-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
-; FP16-NEXT:    # xmm0 = mem[0],zero
-; FP16-NEXT:    callq __truncdfbf2@PLT
-; FP16-NEXT:    vmovw %xmm0, %eax
-; FP16-NEXT:    shll $16, %eax
-; FP16-NEXT:    vmovd %eax, %xmm0
-; FP16-NEXT:    shll $16, %ebx
-; FP16-NEXT:    vmovd %ebx, %xmm1
-; FP16-NEXT:    vaddss %xmm0, %xmm1, %xmm0
-; FP16-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
-; FP16-NEXT:    vmovw %xmm0, %eax
-; FP16-NEXT:    shll $16, %eax
-; FP16-NEXT:    vmovd %eax, %xmm0
-; FP16-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
-; FP16-NEXT:    addq $16, %rsp
-; FP16-NEXT:    popq %rbx
-; FP16-NEXT:    retq
+; AVX512BF16-LABEL: add_double2:
+; AVX512BF16:       # %bb.0:
+; AVX512BF16-NEXT:    pushq %rbx
+; AVX512BF16-NEXT:    subq $16, %rsp
+; AVX512BF16-NEXT:    vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512BF16-NEXT:    callq __truncdfbf2@PLT
+; AVX512BF16-NEXT:    vpextrw $0, %xmm0, %ebx
+; AVX512BF16-NEXT:    vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
+; AVX512BF16-NEXT:    # xmm0 = mem[0],zero
+; AVX512BF16-NEXT:    callq __truncdfbf2@PLT
+; AVX512BF16-NEXT:    vpextrw $0, %xmm0, %eax
+; AVX512BF16-NEXT:    shll $16, %eax
+; AVX512BF16-NEXT:    vmovd %eax, %xmm0
+; AVX512BF16-NEXT:    shll $16, %ebx
+; AVX512BF16-NEXT:    vmovd %ebx, %xmm1
+; AVX512BF16-NEXT:    vaddss %xmm0, %xmm1, %xmm0
+; AVX512BF16-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
+; AVX512BF16-NEXT:    vmovd %xmm0, %eax
+; AVX512BF16-NEXT:    shll $16, %eax
+; AVX512BF16-NEXT:    vmovd %eax, %xmm0
+; AVX512BF16-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
+; AVX512BF16-NEXT:    addq $16, %rsp
+; AVX512BF16-NEXT:    popq %rbx
+; AVX512BF16-NEXT:    retq
+;
+; AVX512FP16-LABEL: add_double2:
+; AVX512FP16:       # %bb.0:
+; AVX512FP16-NEXT:    pushq %rbx
+; AVX512FP16-NEXT:    subq $16, %rsp
+; AVX512FP16-NEXT:    vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512FP16-NEXT:    callq __truncdfbf2@PLT
+; AVX512FP16-NEXT:    vmovw %xmm0, %ebx
+; AVX512FP16-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; AVX512FP16-NEXT:    # xmm0 = mem[0],zero
+; AVX512FP16-NEXT:    callq __truncdfbf2@PLT
+; AVX512FP16-NEXT:    vmovw %xmm0, %eax
+; AVX512FP16-NEXT:    shll $16, %eax
+; AVX512FP16-NEXT:    vmovd %eax, %xmm0
+; AVX512FP16-NEXT:    shll $16, %ebx
+; AVX512FP16-NEXT:    vmovd %ebx, %xmm1
+; AVX512FP16-NEXT:    vaddss %xmm0, %xmm1, %xmm0
+; AVX512FP16-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
+; AVX512FP16-NEXT:    vmovw %xmm0, %eax
+; AVX512FP16-NEXT:    shll $16, %eax
+; AVX512FP16-NEXT:    vmovd %eax, %xmm0
+; AVX512FP16-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
+; AVX512FP16-NEXT:    addq $16, %rsp
+; AVX512FP16-NEXT:    popq %rbx
+; AVX512FP16-NEXT:    retq
 ;
 ; AVXNC-LABEL: add_double2:
 ; AVXNC:       # %bb.0:
@@ -393,15 +459,15 @@ define void @add_constant(ptr %pa, ptr %pc) nounwind {
 ; SSE2-NEXT:    popq %rbx
 ; SSE2-NEXT:    retq
 ;
-; F16-LABEL: add_constant:
-; F16:       # %bb.0:
-; F16-NEXT:    movzwl (%rdi), %eax
-; F16-NEXT:    shll $16, %eax
-; F16-NEXT:    vmovd %eax, %xmm0
-; F16-NEXT:    vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; F16-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
-; F16-NEXT:    vpextrw $0, %xmm0, (%rsi)
-; F16-NEXT:    retq
+; AVX512-LABEL: add_constant:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movzwl (%rdi), %eax
+; AVX512-NEXT:    shll $16, %eax
+; AVX512-NEXT:    vmovd %eax, %xmm0
+; AVX512-NEXT:    vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
+; AVX512-NEXT:    vpextrw $0, %xmm0, (%rsi)
+; AVX512-NEXT:    retq
 ;
 ; AVXNC-LABEL: add_constant:
 ; AVXNC:       # %bb.0:
@@ -439,14 +505,23 @@ define bfloat @add_constant2(bfloat %a) nounwind {
 ; SSE2-NEXT:    popq %rax
 ; SSE2-NEXT:    retq
 ;
-; FP16-LABEL: add_constant2:
-; FP16:       # %bb.0:
-; FP16-NEXT:    vmovw %xmm0, %eax
-; FP16-NEXT:    shll $16, %eax
-; FP16-NEXT:    vmovd %eax, %xmm0
-; FP16-NEXT:    vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; FP16-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
-; FP16-NEXT:    retq
+; AVX512BF16-LABEL: add_constant2:
+; AVX512BF16:       # %bb.0:
+; AVX512BF16-NEXT:    vpextrw $0, %xmm0, %eax
+; AVX512BF16-NEXT:    shll $16, %eax
+; AVX512BF16-NEXT:    vmovd %eax, %xmm0
+; AVX512BF16-NEXT:    vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BF16-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
+; AVX512BF16-NEXT:    retq
+;
+; AVX512FP16-LABEL: add_constant2:
+; AVX512FP16:       # %bb.0:
+; AVX512FP16-NEXT:    vmovw %xmm0, %eax
+; AVX512FP16-NEXT:    shll $16, %eax
+; AVX512FP16-NEXT:    vmovd %eax, %xmm0
+; AVX512FP16-NEXT:    vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512FP16-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
+; AVX512FP16-NEXT:    retq
 ;
 ; AVXNC-LABEL: add_constant2:
 ; AVXNC:       # %bb.0:
@@ -467,10 +542,10 @@ define void @store_constant(ptr %pc) nounwind {
 ; X86-NEXT:    movw $16256, (%eax) # imm = 0x3F80
 ; X86-NEXT:    retl
 ;
-; CHECK-LABEL: store_constant:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movw $16256, (%rdi) # imm = 0x3F80
-; CHECK-NEXT:    retq
+; X64-LABEL: store_constant:
+; X64:       # %bb.0:
+; X64-NEXT:    movw $16256, (%rdi) # imm = 0x3F80
+; X64-NEXT:    retq
   store bfloat 1.0, ptr %pc
   ret void
 }
@@ -484,11 +559,11 @@ define void @fold_ext_trunc(ptr %pa, ptr %pc) nounwind {
 ; X86-NEXT:    movw %cx, (%eax)
 ; X86-NEXT:    retl
 ;
-; CHECK-LABEL: fold_ext_trunc:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movzwl (%rdi), %eax
-; CHECK-NEXT:    movw %ax, (%rsi)
-; CHECK-NEXT:    retq
+; X64-LABEL: fold_ext_trunc:
+; X64:       # %bb.0:
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    movw %ax, (%rsi)
+; X64-NEXT:    retq
   %a = load bfloat, ptr %pa
   %ext = fpext bfloat %a to float
   %trunc = fptrunc float %ext to bfloat
@@ -502,9 +577,9 @@ define bfloat @fold_ext_trunc2(bfloat %a) nounwind {
 ; X86-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
 ; X86-NEXT:    retl
 ;
-; CHECK-LABEL: fold_ext_trunc2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    retq
+; X64-LABEL: fold_ext_trunc2:
+; X64:       # %bb.0:
+; X64-NEXT:    retq
   %ext = fpext bfloat %a to float
   %trunc = fptrunc float %ext to bfloat
   ret bfloat %trunc
@@ -526,11 +601,17 @@ define bfloat @fold_from_half(half %a) nounwind {
 ; SSE2-NEXT:    popq %rax
 ; SSE2-NEXT:    retq
 ;
-; FP16-LABEL: fold_from_half:
-; FP16:       # %bb.0:
-; FP16-NEXT:    vcvtsh2ss %xmm0, %xmm0, %xmm0
-; FP16-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
-; FP16-NEXT:    retq
+; AVX512BF16-LABEL: fold_from_half:
+; AVX512BF16:       # %bb.0:
+; AVX512BF16-NEXT:    vcvtph2ps %xmm0, %xmm0
+; AVX512BF16-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
+; AVX512BF16-NEXT:    retq
+;
+; AVX512FP16-LABEL: fold_from_half:
+; AVX512FP16:       # %bb.0:
+; AVX512FP16-NEXT:    vcvtsh2ss %xmm0, %xmm0, %xmm0
+; AVX512FP16-NEXT:    vcvtneps2bf16 %xmm0, %xmm0
+; AVX512FP16-NEXT:    retq
 ;
 ; AVXNC-LABEL: fold_from_half:
 ; AVXNC:       # %bb.0:
@@ -561,21 +642,29 @@ define half @fold_to_half(bfloat %a) nounwind {
 ; SSE2-NEXT:    popq %rax
 ; SSE2-NEXT:    retq
 ;
-; BF16-LABEL: fold_to_half:
-; BF16:       # %bb.0:
-; BF16-NEXT:    vpextrw $0, %xmm0, %eax
-; BF16-NEXT:    shll $16, %eax
-; BF16-NEXT:    vmovd %eax, %xmm0
-; BF16-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
-; BF16-NEXT:    retq
-;
-; FP16-LABEL: fold_to_half:
-; FP16:       # %bb.0:
-; FP16-NEXT:    vmovw %xmm0, %eax
-; FP16-NEXT:    shll $16, %eax
-; FP16-NEXT:    vmovd %eax, %xmm0
-; FP16-NEXT:    vcvtss2sh %xmm0, %xmm0, %xmm0
-; FP16-NEXT:    retq
+; AVX512BF16-LABEL: fold_to_half:
+; AVX512BF16:       # %bb.0:
+; AVX512BF16-NEXT:    vpextrw $0, %xmm0, %eax
+; AVX512BF16-NEXT:    shll $16, %eax
+; AVX512BF16-NEXT:    vmovd %eax, %xmm0
+; AVX512BF16-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; AVX512BF16-NEXT:    retq
+;
+; AVX512FP16-LABEL: fold_to_half:
+; AVX512FP16:       # %bb.0:
+; AVX512FP16-NEXT:    vmovw %xmm0, %eax
+; AVX512FP16-NEXT:    shll $16, %eax
+; AVX512FP16-NEXT:    vmovd %eax, %xmm0
+; AVX512FP16-NEXT:    vcvtss2sh %xmm0, %xmm0, %xmm0
+; AVX512FP16-NEXT:    retq
+;
+; AVXNC-LABEL: fold_to_half:
+; AVXNC:       # %bb.0:
+; AVXNC-NEXT:    vpextrw $0, %xmm0, %eax
+; AVXNC-NEXT:    shll $16, %eax
+; AVXNC-NEXT:    vmovd %eax, %xmm0
+; AVXNC-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; AVXNC-NEXT:    retq
   %ext = fpext bfloat %a to float
   %trunc = fptrunc float %ext to half
   ret half %trunc
@@ -587,9 +676,9 @@ define bfloat @bitcast_from_half(half %a) nounwind {
 ; X86-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
 ; X86-NEXT:    retl
 ;
-; CHECK-LABEL: bitcast_from_half:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    retq
+; X64-LABEL: bitcast_from_half:
+; X64:       # %bb.0:
+; X64-NEXT:    retq
   %bc = bitcast half %a to bfloat
   ret bfloat %bc
 }
@@ -600,9 +689,9 @@ define half @bitcast_to_half(bfloat %a) nounwind {
 ; X86-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
 ; X86-NEXT:    retl
 ;
-; CHECK-LABEL: bitcast_to_half:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    retq
+; X64-LABEL: bitcast_to_half:
+; X64:       # %bb.0:
+; X64-NEXT:    retq
   %bc = bitcast bfloat %a to half
   ret half %bc
 }
@@ -753,16 +842,16 @@ define <8 x bfloat> @addv(<8 x bfloat> %a, <8 x bfloat> %b) nounwind {
 ; SSE2-NEXT:    popq %rbp
 ; SSE2-NEXT:    retq
 ;
-; F16-LABEL: addv:
-; F16:       # %bb.0:
-; F16-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; F16-NEXT:    vpslld $16, %ymm1, %ymm1
-; F16-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; F16-NEXT:    vpslld $16, %ymm0, %ymm0
-; F16-NEXT:    vaddps %ymm1, %ymm0, %ymm0
-; F16-NEXT:    vcvtneps2bf16 %ymm0, %xmm0
-; F16-NEXT:    vzeroupper
-; F16-NEXT:    retq
+; AVX512-LABEL: addv:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; AVX512-NEXT:    vpslld $16, %ymm1, %ymm1
+; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX512-NEXT:    vpslld $16, %ymm0, %ymm0
+; AVX512-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vcvtneps2bf16 %ymm0, %xmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
 ;
 ; AVXNC-LABEL: addv:
 ; AVXNC:       # %bb.0:
@@ -791,16 +880,22 @@ define <2 x bfloat> @pr62997(bfloat %a, bfloat %b) {
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
 ; SSE2-NEXT:    retq
 ;
-; BF16-LABEL: pr62997:
-; BF16:       # %bb.0:
-; BF16-NEXT:    vpextrw $0, %xmm1, %eax
-; BF16-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
-; BF16-NEXT:    retq
+; AVX512BF16-LABEL: pr62997:
+; AVX512BF16:       # %bb.0:
+; AVX512BF16-NEXT:    vpextrw $0, %xmm1, %eax
+; AVX512BF16-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
+; AVX512BF16-NEXT:    retq
 ;
-; FP16-LABEL: pr62997:
-; FP16:       # %bb.0:
-; FP16-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; FP16-NEXT:    retq
+; AVX512FP16-LABEL: pr62997:
+; AVX512FP16:       # %bb.0:
+; AVX512FP16-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX512FP16-NEXT:    retq
+;
+; AVXNC-LABEL: pr62997:
+; AVXNC:       # %bb.0:
+; AVXNC-NEXT:    vpextrw $0, %xmm1, %eax
+; AVXNC-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
+; AVXNC-NEXT:    retq
   %1 = insertelement <2 x bfloat> undef, bfloat %a, i64 0
   %2 = insertelement <2 x bfloat> %1, bfloat %b, i64 1
   ret <2 x bfloat> %2
@@ -820,10 +915,10 @@ define <32 x bfloat> @pr63017() {
 ; SSE2-NEXT:    xorps %xmm3, %xmm3
 ; SSE2-NEXT:    retq
 ;
-; F16-LABEL: pr63017:
-; F16:       # %bb.0:
-; F16-NEXT:    vxorps %xmm0, %xmm0, %xmm0
-; F16-NEXT:    retq
+; AVX512-LABEL: pr63017:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT:    retq
 ;
 ; AVXNC-LABEL: pr63017:
 ; AVXNC:       # %bb.0:
@@ -1077,11 +1172,17 @@ define <32 x bfloat> @pr63017_2() nounwind {
 ; SSE2-NEXT:    popq %r14
 ; SSE2-NEXT:    retq
 ;
-; FP16-LABEL: pr63017_2:
-; FP16:       # %bb.0:
-; FP16-NEXT:    vpbroadcastw {{.*#+}} zmm0 = [-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0,-1.875E+0]
-; FP16-NEXT:    vmovdqu16 (%rax), %zmm0 {%k1}
-; FP16-NEXT:    retq
+; AVX512BF16-LABEL: pr63017_2:
+; AVX512BF16:       #...
[truncated]

@RKSimon RKSimon merged commit f969c86 into llvm:main Nov 13, 2025
11 of 12 checks passed
@RKSimon RKSimon deleted the x86-bfloat-check-prefixes branch November 13, 2025 13:36
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants