Skip to content

Conversation

@RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Dec 1, 2025

These were missed in #170160

@RKSimon RKSimon enabled auto-merge (squash) December 1, 2025 16:45
@llvmbot
Copy link
Member

llvmbot commented Dec 1, 2025

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

These were missed in #170160


Full diff: https://github.com/llvm/llvm-project/pull/170168.diff

1 Files Affected:

  • (added) llvm/test/CodeGen/X86/combine-ffloor.ll (+178)
diff --git a/llvm/test/CodeGen/X86/combine-ffloor.ll b/llvm/test/CodeGen/X86/combine-ffloor.ll
new file mode 100644
index 0000000000000..35c28d2c85399
--- /dev/null
+++ b/llvm/test/CodeGen/X86/combine-ffloor.ll
@@ -0,0 +1,178 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX,AVX512
+
+define <4 x double> @concat_floor_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1) {
+; SSE-LABEL: concat_floor_v4f64_v2f64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    roundpd $9, %xmm0, %xmm0
+; SSE-NEXT:    roundpd $9, %xmm1, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: concat_floor_v4f64_v2f64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vroundpd $9, %xmm0, %xmm0
+; AVX-NEXT:    vroundpd $9, %xmm1, %xmm1
+; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+  %v0 = call <2 x double> @llvm.floor.v2f64(<2 x double> %a0)
+  %v1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %a1)
+  %res  = shufflevector <2 x double> %v0, <2 x double> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x double> %res
+}
+
+define <8 x float> @concat_floor_v8f32_v4f32(<4 x float> %a0, <4 x float> %a1) {
+; SSE-LABEL: concat_floor_v8f32_v4f32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    roundps $9, %xmm0, %xmm0
+; SSE-NEXT:    roundps $9, %xmm1, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: concat_floor_v8f32_v4f32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vroundps $9, %xmm0, %xmm0
+; AVX-NEXT:    vroundps $9, %xmm1, %xmm1
+; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+  %v0 = call <4 x float> @llvm.floor.v4f32(<4 x float> %a0)
+  %v1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %a1)
+  %res  = shufflevector <4 x float> %v0, <4 x float> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x float> %res
+}
+
+define <8 x double> @concat_floor_v8f64_v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> %a3) {
+; SSE-LABEL: concat_floor_v8f64_v2f64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    roundpd $9, %xmm0, %xmm0
+; SSE-NEXT:    roundpd $9, %xmm1, %xmm1
+; SSE-NEXT:    roundpd $9, %xmm2, %xmm2
+; SSE-NEXT:    roundpd $9, %xmm3, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1OR2-LABEL: concat_floor_v8f64_v2f64:
+; AVX1OR2:       # %bb.0:
+; AVX1OR2-NEXT:    vroundpd $9, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vroundpd $9, %xmm1, %xmm1
+; AVX1OR2-NEXT:    vroundpd $9, %xmm2, %xmm2
+; AVX1OR2-NEXT:    vroundpd $9, %xmm3, %xmm3
+; AVX1OR2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1OR2-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX1OR2-NEXT:    retq
+;
+; AVX512-LABEL: concat_floor_v8f64_v2f64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vroundpd $9, %xmm0, %xmm0
+; AVX512-NEXT:    vroundpd $9, %xmm1, %xmm1
+; AVX512-NEXT:    vroundpd $9, %xmm2, %xmm2
+; AVX512-NEXT:    vroundpd $9, %xmm3, %xmm3
+; AVX512-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX512-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %v0 = call <2 x double> @llvm.floor.v2f64(<2 x double> %a0)
+  %v1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %a1)
+  %v2 = call <2 x double> @llvm.floor.v2f64(<2 x double> %a2)
+  %v3 = call <2 x double> @llvm.floor.v2f64(<2 x double> %a3)
+  %r01 = shufflevector <2 x double> %v0, <2 x double> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %r23 = shufflevector <2 x double> %v2, <2 x double> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %res  = shufflevector <4 x double> %r01, <4 x double> %r23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x double> %res
+}
+
+define <16 x float> @concat_floor_v16f32_v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> %a3) {
+; SSE-LABEL: concat_floor_v16f32_v4f32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    roundps $9, %xmm0, %xmm0
+; SSE-NEXT:    roundps $9, %xmm1, %xmm1
+; SSE-NEXT:    roundps $9, %xmm2, %xmm2
+; SSE-NEXT:    roundps $9, %xmm3, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1OR2-LABEL: concat_floor_v16f32_v4f32:
+; AVX1OR2:       # %bb.0:
+; AVX1OR2-NEXT:    vroundps $9, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vroundps $9, %xmm1, %xmm1
+; AVX1OR2-NEXT:    vroundps $9, %xmm2, %xmm2
+; AVX1OR2-NEXT:    vroundps $9, %xmm3, %xmm3
+; AVX1OR2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1OR2-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX1OR2-NEXT:    retq
+;
+; AVX512-LABEL: concat_floor_v16f32_v4f32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vroundps $9, %xmm0, %xmm0
+; AVX512-NEXT:    vroundps $9, %xmm1, %xmm1
+; AVX512-NEXT:    vroundps $9, %xmm2, %xmm2
+; AVX512-NEXT:    vroundps $9, %xmm3, %xmm3
+; AVX512-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX512-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %v0 = call <4 x float> @llvm.floor.v4f32(<4 x float> %a0)
+  %v1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %a1)
+  %v2 = call <4 x float> @llvm.floor.v4f32(<4 x float> %a2)
+  %v3 = call <4 x float> @llvm.floor.v4f32(<4 x float> %a3)
+  %r01 = shufflevector <4 x float> %v0, <4 x float> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %r23 = shufflevector <4 x float> %v2, <4 x float> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %res  = shufflevector <8 x float> %r01, <8 x float> %r23, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x float> %res
+}
+
+define <8 x double> @concat_floor_v8f64_v4f64(<4 x double> %a0, <4 x double> %a1) {
+; SSE-LABEL: concat_floor_v8f64_v4f64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    roundpd $9, %xmm0, %xmm0
+; SSE-NEXT:    roundpd $9, %xmm1, %xmm1
+; SSE-NEXT:    roundpd $9, %xmm2, %xmm2
+; SSE-NEXT:    roundpd $9, %xmm3, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1OR2-LABEL: concat_floor_v8f64_v4f64:
+; AVX1OR2:       # %bb.0:
+; AVX1OR2-NEXT:    vroundpd $9, %ymm0, %ymm0
+; AVX1OR2-NEXT:    vroundpd $9, %ymm1, %ymm1
+; AVX1OR2-NEXT:    retq
+;
+; AVX512-LABEL: concat_floor_v8f64_v4f64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vroundpd $9, %ymm0, %ymm0
+; AVX512-NEXT:    vroundpd $9, %ymm1, %ymm1
+; AVX512-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %v0 = call <4 x double> @llvm.floor.v4f64(<4 x double> %a0)
+  %v1 = call <4 x double> @llvm.floor.v4f64(<4 x double> %a1)
+  %res  = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x double> %res
+}
+
+define <16 x float> @concat_floor_v16f32_v8f32(<8 x float> %a0, <8 x float> %a1) {
+; SSE-LABEL: concat_floor_v16f32_v8f32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    roundps $9, %xmm0, %xmm0
+; SSE-NEXT:    roundps $9, %xmm1, %xmm1
+; SSE-NEXT:    roundps $9, %xmm2, %xmm2
+; SSE-NEXT:    roundps $9, %xmm3, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1OR2-LABEL: concat_floor_v16f32_v8f32:
+; AVX1OR2:       # %bb.0:
+; AVX1OR2-NEXT:    vroundps $9, %ymm0, %ymm0
+; AVX1OR2-NEXT:    vroundps $9, %ymm1, %ymm1
+; AVX1OR2-NEXT:    retq
+;
+; AVX512-LABEL: concat_floor_v16f32_v8f32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vroundps $9, %ymm0, %ymm0
+; AVX512-NEXT:    vroundps $9, %ymm1, %ymm1
+; AVX512-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %v0 = call <8 x float> @llvm.floor.v8f32(<8 x float> %a0)
+  %v1 = call <8 x float> @llvm.floor.v8f32(<8 x float> %a1)
+  %res  = shufflevector <8 x float> %v0, <8 x float> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x float> %res
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX1: {{.*}}
+; AVX2: {{.*}}

@RKSimon RKSimon merged commit 8ccdb35 into llvm:main Dec 1, 2025
11 of 12 checks passed
@RKSimon RKSimon deleted the x86-concat-ffloor-test branch December 1, 2025 17:22
augusto2112 pushed a commit to augusto2112/llvm-project that referenced this pull request Dec 3, 2025
kcloudy0717 pushed a commit to kcloudy0717/llvm-project that referenced this pull request Dec 4, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants