-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[PhaseOrdering][X86] Ensure middleend has equivalent addsub pattern test coverage to backend #164163
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…est coverage to backend Small step towards llvm#144489
|
@llvm/pr-subscribers-llvm-transforms Author: Simon Pilgrim (RKSimon) ChangesSmall step towards #144489 Patch is 45.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/164163.diff 2 Files Affected:
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/addsub-inseltpoison.ll b/llvm/test/Transforms/PhaseOrdering/X86/addsub-inseltpoison.ll
index a3af048c4e442..2c1d73eaafc5e 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/addsub-inseltpoison.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/addsub-inseltpoison.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE
-; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
+; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4
; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX
; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX
-; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE
-; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
+; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4
; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX
; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX
@@ -12,6 +12,400 @@
; That may require some coordination between VectorCombine, SLP, and other passes.
; The end goal is to get a single "vaddsubps" instruction for x86 with AVX.
+define <2 x double> @test_addsub_v2f64(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: @test_addsub_v2f64(
+; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[A]], [[B]]
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: ret <2 x double> [[TMP3]]
+;
+ %1 = extractelement <2 x double> %A, i32 0
+ %2 = extractelement <2 x double> %B, i32 0
+ %sub = fsub double %1, %2
+ %3 = extractelement <2 x double> %A, i32 1
+ %4 = extractelement <2 x double> %B, i32 1
+ %add = fadd double %3, %4
+ %vecinsert1 = insertelement <2 x double> poison, double %sub, i32 0
+ %vecinsert2 = insertelement <2 x double> %vecinsert1, double %add, i32 1
+ ret <2 x double> %vecinsert2
+}
+
+define <4 x double> @test_addsub_v4f64(<4 x double> %A, <4 x double> %B) {
+; CHECK-LABEL: @test_addsub_v4f64(
+; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x double> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x double> [[A]], [[B]]
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT: ret <4 x double> [[TMP3]]
+;
+ %1 = extractelement <4 x double> %A, i32 0
+ %2 = extractelement <4 x double> %B, i32 0
+ %sub = fsub double %1, %2
+ %3 = extractelement <4 x double> %A, i32 2
+ %4 = extractelement <4 x double> %B, i32 2
+ %sub2 = fsub double %3, %4
+ %5 = extractelement <4 x double> %A, i32 1
+ %6 = extractelement <4 x double> %B, i32 1
+ %add = fadd double %5, %6
+ %7 = extractelement <4 x double> %A, i32 3
+ %8 = extractelement <4 x double> %B, i32 3
+ %add2 = fadd double %7, %8
+ %vecinsert1 = insertelement <4 x double> poison, double %add, i32 1
+ %vecinsert2 = insertelement <4 x double> %vecinsert1, double %add2, i32 3
+ %vecinsert3 = insertelement <4 x double> %vecinsert2, double %sub, i32 0
+ %vecinsert4 = insertelement <4 x double> %vecinsert3, double %sub2, i32 2
+ ret <4 x double> %vecinsert4
+}
+
+define <8 x double> @test_addsub_v8f64(<8 x double> %A, <8 x double> %B) {
+; SSE2-LABEL: @test_addsub_v8f64(
+; SSE2-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]]
+; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; SSE2-NEXT: [[TMP3:%.*]] = fadd <8 x double> [[A]], [[B]]
+; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[TMP3]], <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; SSE2-NEXT: ret <8 x double> [[TMP5]]
+;
+; SSE4-LABEL: @test_addsub_v8f64(
+; SSE4-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]]
+; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x double> [[A]], [[B]]
+; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+; SSE4-NEXT: ret <8 x double> [[TMP3]]
+;
+; AVX-LABEL: @test_addsub_v8f64(
+; AVX-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]]
+; AVX-NEXT: [[TMP2:%.*]] = fadd <8 x double> [[A]], [[B]]
+; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+; AVX-NEXT: ret <8 x double> [[TMP3]]
+;
+ %1 = extractelement <8 x double> %A, i32 0
+ %2 = extractelement <8 x double> %B, i32 0
+ %sub = fsub double %1, %2
+ %3 = extractelement <8 x double> %A, i32 2
+ %4 = extractelement <8 x double> %B, i32 2
+ %sub2 = fsub double %3, %4
+ %5 = extractelement <8 x double> %A, i32 1
+ %6 = extractelement <8 x double> %B, i32 1
+ %add = fadd double %5, %6
+ %7 = extractelement <8 x double> %A, i32 3
+ %8 = extractelement <8 x double> %B, i32 3
+ %add2 = fadd double %7, %8
+ %9 = extractelement <8 x double> %A, i32 4
+ %10 = extractelement <8 x double> %B, i32 4
+ %sub3 = fsub double %9, %10
+ %11 = extractelement <8 x double> %A, i32 6
+ %12 = extractelement <8 x double> %B, i32 6
+ %sub4 = fsub double %11, %12
+ %13 = extractelement <8 x double> %A, i32 5
+ %14 = extractelement <8 x double> %B, i32 5
+ %add3 = fadd double %13, %14
+ %15 = extractelement <8 x double> %A, i32 7
+ %16 = extractelement <8 x double> %B, i32 7
+ %add4 = fadd double %15, %16
+ %vecinsert1 = insertelement <8 x double> poison, double %add, i32 1
+ %vecinsert2 = insertelement <8 x double> %vecinsert1, double %add2, i32 3
+ %vecinsert3 = insertelement <8 x double> %vecinsert2, double %sub, i32 0
+ %vecinsert4 = insertelement <8 x double> %vecinsert3, double %sub2, i32 2
+ %vecinsert5 = insertelement <8 x double> %vecinsert4, double %add3, i32 5
+ %vecinsert6 = insertelement <8 x double> %vecinsert5, double %add4, i32 7
+ %vecinsert7 = insertelement <8 x double> %vecinsert6, double %sub3, i32 4
+ %vecinsert8 = insertelement <8 x double> %vecinsert7, double %sub4, i32 6
+ ret <8 x double> %vecinsert8
+}
+
+define <2 x float> @test_addsub_v2f32(<2 x float> %v0, <2 x float> %v1) {
+; CHECK-LABEL: @test_addsub_v2f32(
+; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x float> [[V0:%.*]], [[V1:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> [[V0]], [[V1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: ret <2 x float> [[TMP3]]
+;
+ %v2 = extractelement <2 x float> %v0, i32 0
+ %v3 = extractelement <2 x float> %v1, i32 0
+ %v4 = extractelement <2 x float> %v0, i32 1
+ %v5 = extractelement <2 x float> %v1, i32 1
+ %sub = fsub float %v2, %v3
+ %add = fadd float %v5, %v4
+ %res0 = insertelement <2 x float> poison, float %sub, i32 0
+ %res1 = insertelement <2 x float> %res0, float %add, i32 1
+ ret <2 x float> %res1
+}
+
+define <4 x float> @test_addsub_v4f32(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: @test_addsub_v4f32(
+; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]]
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT: ret <4 x float> [[TMP3]]
+;
+ %1 = extractelement <4 x float> %A, i32 0
+ %2 = extractelement <4 x float> %B, i32 0
+ %sub = fsub float %1, %2
+ %3 = extractelement <4 x float> %A, i32 2
+ %4 = extractelement <4 x float> %B, i32 2
+ %sub2 = fsub float %3, %4
+ %5 = extractelement <4 x float> %A, i32 1
+ %6 = extractelement <4 x float> %B, i32 1
+ %add = fadd float %5, %6
+ %7 = extractelement <4 x float> %A, i32 3
+ %8 = extractelement <4 x float> %B, i32 3
+ %add2 = fadd float %7, %8
+ %vecinsert1 = insertelement <4 x float> poison, float %add, i32 1
+ %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 3
+ %vecinsert3 = insertelement <4 x float> %vecinsert2, float %sub, i32 0
+ %vecinsert4 = insertelement <4 x float> %vecinsert3, float %sub2, i32 2
+ ret <4 x float> %vecinsert4
+}
+
+define <8 x float> @test_v8f32(<8 x float> %A, <8 x float> %B) {
+; SSE2-LABEL: @test_v8f32(
+; SSE2-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[A:%.*]], [[B:%.*]]
+; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; SSE2-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[A]], [[B]]
+; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; SSE2-NEXT: ret <8 x float> [[TMP5]]
+;
+; SSE4-LABEL: @test_v8f32(
+; SSE4-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[A:%.*]], [[B:%.*]]
+; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]]
+; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+; SSE4-NEXT: ret <8 x float> [[TMP3]]
+;
+; AVX-LABEL: @test_v8f32(
+; AVX-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[A:%.*]], [[B:%.*]]
+; AVX-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]]
+; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+; AVX-NEXT: ret <8 x float> [[TMP3]]
+;
+ %1 = extractelement <8 x float> %A, i32 0
+ %2 = extractelement <8 x float> %B, i32 0
+ %sub = fsub float %1, %2
+ %3 = extractelement <8 x float> %A, i32 2
+ %4 = extractelement <8 x float> %B, i32 2
+ %sub2 = fsub float %3, %4
+ %5 = extractelement <8 x float> %A, i32 1
+ %6 = extractelement <8 x float> %B, i32 1
+ %add = fadd float %5, %6
+ %7 = extractelement <8 x float> %A, i32 3
+ %8 = extractelement <8 x float> %B, i32 3
+ %add2 = fadd float %7, %8
+ %9 = extractelement <8 x float> %A, i32 4
+ %10 = extractelement <8 x float> %B, i32 4
+ %sub3 = fsub float %9, %10
+ %11 = extractelement <8 x float> %A, i32 6
+ %12 = extractelement <8 x float> %B, i32 6
+ %sub4 = fsub float %11, %12
+ %13 = extractelement <8 x float> %A, i32 5
+ %14 = extractelement <8 x float> %B, i32 5
+ %add3 = fadd float %13, %14
+ %15 = extractelement <8 x float> %A, i32 7
+ %16 = extractelement <8 x float> %B, i32 7
+ %add4 = fadd float %15, %16
+ %vecinsert1 = insertelement <8 x float> poison, float %add, i32 1
+ %vecinsert2 = insertelement <8 x float> %vecinsert1, float %add2, i32 3
+ %vecinsert3 = insertelement <8 x float> %vecinsert2, float %sub, i32 0
+ %vecinsert4 = insertelement <8 x float> %vecinsert3, float %sub2, i32 2
+ %vecinsert5 = insertelement <8 x float> %vecinsert4, float %add3, i32 5
+ %vecinsert6 = insertelement <8 x float> %vecinsert5, float %add4, i32 7
+ %vecinsert7 = insertelement <8 x float> %vecinsert6, float %sub3, i32 4
+ %vecinsert8 = insertelement <8 x float> %vecinsert7, float %sub4, i32 6
+ ret <8 x float> %vecinsert8
+}
+
+define <16 x float> @test_addsub_v16f32(<16 x float> %A, <16 x float> %B) {
+; SSE2-LABEL: @test_addsub_v16f32(
+; SSE2-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]]
+; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; SSE2-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[A]], [[B]]
+; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> [[TMP4]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+; SSE2-NEXT: ret <16 x float> [[TMP5]]
+;
+; SSE4-LABEL: @test_addsub_v16f32(
+; SSE4-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]]
+; SSE4-NEXT: [[TMP2:%.*]] = fadd <16 x float> [[A]], [[B]]
+; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> [[TMP2]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+; SSE4-NEXT: ret <16 x float> [[TMP3]]
+;
+; AVX-LABEL: @test_addsub_v16f32(
+; AVX-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]]
+; AVX-NEXT: [[TMP2:%.*]] = fadd <16 x float> [[A]], [[B]]
+; AVX-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> [[TMP2]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+; AVX-NEXT: ret <16 x float> [[TMP3]]
+;
+ %1 = extractelement <16 x float> %A, i32 0
+ %2 = extractelement <16 x float> %B, i32 0
+ %sub = fsub float %1, %2
+ %3 = extractelement <16 x float> %A, i32 2
+ %4 = extractelement <16 x float> %B, i32 2
+ %sub2 = fsub float %3, %4
+ %5 = extractelement <16 x float> %A, i32 1
+ %6 = extractelement <16 x float> %B, i32 1
+ %add = fadd float %5, %6
+ %7 = extractelement <16 x float> %A, i32 3
+ %8 = extractelement <16 x float> %B, i32 3
+ %add2 = fadd float %7, %8
+ %9 = extractelement <16 x float> %A, i32 4
+ %10 = extractelement <16 x float> %B, i32 4
+ %sub3 = fsub float %9, %10
+ %11 = extractelement <16 x float> %A, i32 6
+ %12 = extractelement <16 x float> %B, i32 6
+ %sub4 = fsub float %11, %12
+ %13 = extractelement <16 x float> %A, i32 5
+ %14 = extractelement <16 x float> %B, i32 5
+ %add3 = fadd float %13, %14
+ %15 = extractelement <16 x float> %A, i32 7
+ %16 = extractelement <16 x float> %B, i32 7
+ %add4 = fadd float %15, %16
+ %17 = extractelement <16 x float> %A, i32 8
+ %18 = extractelement <16 x float> %B, i32 8
+ %sub5 = fsub float %17, %18
+ %19 = extractelement <16 x float> %A, i32 10
+ %20 = extractelement <16 x float> %B, i32 10
+ %sub6 = fsub float %19, %20
+ %21 = extractelement <16 x float> %A, i32 9
+ %22 = extractelement <16 x float> %B, i32 9
+ %add5 = fadd float %21, %22
+ %23 = extractelement <16 x float> %A, i32 11
+ %24 = extractelement <16 x float> %B, i32 11
+ %add6 = fadd float %23, %24
+ %25 = extractelement <16 x float> %A, i32 12
+ %26 = extractelement <16 x float> %B, i32 12
+ %sub7 = fsub float %25, %26
+ %27 = extractelement <16 x float> %A, i32 14
+ %28 = extractelement <16 x float> %B, i32 14
+ %sub8 = fsub float %27, %28
+ %29 = extractelement <16 x float> %A, i32 13
+ %30 = extractelement <16 x float> %B, i32 13
+ %add7 = fadd float %29, %30
+ %31 = extractelement <16 x float> %A, i32 15
+ %32 = extractelement <16 x float> %B, i32 15
+ %add8 = fadd float %31, %32
+ %vecinsert1 = insertelement <16 x float> poison, float %add, i32 1
+ %vecinsert2 = insertelement <16 x float> %vecinsert1, float %add2, i32 3
+ %vecinsert3 = insertelement <16 x float> %vecinsert2, float %sub, i32 0
+ %vecinsert4 = insertelement <16 x float> %vecinsert3, float %sub2, i32 2
+ %vecinsert5 = insertelement <16 x float> %vecinsert4, float %add3, i32 5
+ %vecinsert6 = insertelement <16 x float> %vecinsert5, float %add4, i32 7
+ %vecinsert7 = insertelement <16 x float> %vecinsert6, float %sub3, i32 4
+ %vecinsert8 = insertelement <16 x float> %vecinsert7, float %sub4, i32 6
+ %vecinsert9 = insertelement <16 x float> %vecinsert8, float %add5, i32 9
+ %vecinsert10 = insertelement <16 x float> %vecinsert9, float %add6, i32 11
+ %vecinsert11 = insertelement <16 x float> %vecinsert10, float %sub5, i32 8
+ %vecinsert12 = insertelement <16 x float> %vecinsert11, float %sub6, i32 10
+ %vecinsert13 = insertelement <16 x float> %vecinsert12, float %add7, i32 13
+ %vecinsert14 = insertelement <16 x float> %vecinsert13, float %add8, i32 15
+ %vecinsert15 = insertelement <16 x float> %vecinsert14, float %sub7, i32 12
+ %vecinsert16 = insertelement <16 x float> %vecinsert15, float %sub8, i32 14
+ ret <16 x float> %vecinsert16
+}
+
+; Test that non-sequential / partial add-sub patterns are still folded.
+
+define <4 x float> @test_addsub_v4f32_shuffle_1302(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: @test_addsub_v4f32_shuffle_1302(
+; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]]
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT: ret <4 x float> [[TMP3]]
+;
+ %1 = extractelement <4 x float> %A, i32 0
+ %2 = extractelement <4 x float> %B, i32 0
+ %sub = fsub float %1, %2
+ %3 = extractelement <4 x float> %A, i32 2
+ %4 = extractelement <4 x float> %B, i32 2
+ %sub2 = fsub float %3, %4
+ %5 = extractelement <4 x float> %A, i32 1
+ %6 = extractelement <4 x float> %B, i32 1
+ %add = fadd float %5, %6
+ %7 = extractelement <4 x float> %A, i32 3
+ %8 = extractelement <4 x float> %B, i32 3
+ %add2 = fadd float %7, %8
+ %vecinsert1 = insertelement <4 x float> poison, float %add, i32 1
+ %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 3
+ %vecinsert3 = insertelement <4 x float> %vecinsert2, float %sub, i32 0
+ %vecinsert4 = insertelement <4 x float> %vecinsert3, float %sub2, i32 2
+ ret <4 x float> %vecinsert4
+}
+
+define <4 x float> @test_addsub_v4f32_partial_23(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: @test_addsub_v4f32_partial_23(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[VECINSERT21:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP4]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 3>
+; CHECK-NEXT: ret <4 x float> [[VECINSERT21]]
+;
+ %1 = extractelement <4 x float> %A, i32 2
+ %2 = extractelement <4 x float> %B, i32 2
+ %sub2 = fsub float %1, %2
+ %3 = extractelement <4 x float> %A, i32 3
+ %4 = extractelement <4 x float> %B, i32 3
+ %add2 = fadd float %3, %4
+ %vecinsert1 = insertelement <4 x float> poison, float %sub2, i32 2
+ %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 3
+ ret <4 x float> %vecinsert2
+}
+
+define <4 x float> @test_addsub_v4f32_partial_03(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: @test_addsub_v4f32_partial_03(
+; CHECK-NEXT: [[FOLDEXTEXTBINOP:%.*]] = fsub <4 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[FOLDEXTEXTBINOP2:%.*]] = fadd <4 x float> [[A]], [[B]]
+; CHECK-NEXT: [[VECINSERT2:%.*]] = shufflevector <4 x float> [[FOLDEXTEXTBINOP]], <4 x float> [[FOLDEXTEXTBINOP2]], <4 x i32> <i32 0, i32 poison, i32 poison, i32 7>
+; CHECK-NEXT: ret <4 x float> [[VECINSERT2]]
+;
+ %1 = extractelement <4 x float> %A, i32 0
+ %2 = extractelement <4 x float> %B, i32 0
+ %sub = fsub float %1, %2
+ %3 = extractelement <4 x float> %A, i32 3
+ %4 = extractelement <4 x float> %B, i32 3
+ %add = fadd float %4, %3
+ %vecinsert1 = insertelement <4 x float> poison, float %sub, i32 0
+ %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add, i32 3
+ ret <4 x float> %vecinsert2
+}
+
+de...
[truncated]
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef([^a-zA-Z0-9_-]|$)|UndefValue::get)' 'HEAD~1' HEAD llvm/test/Transforms/PhaseOrdering/X86/addsub-inseltpoison.ll llvm/test/Transforms/PhaseOrdering/X86/addsub.llThe following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
}Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
}Please refer to the Undefined Behavior Manual for more information. |
…est coverage to backend (llvm#164163) Small step towards llvm#144489
…est coverage to backend (llvm#164163) Small step towards llvm#144489
Small step towards #144489