diff --git a/llvm/test/CodeGen/X86/vectorization-remarks-loopid-dbg.ll b/llvm/test/CodeGen/X86/vectorization-remarks-loopid-dbg.ll new file mode 100644 index 0000000000000..31949403b4465 --- /dev/null +++ b/llvm/test/CodeGen/X86/vectorization-remarks-loopid-dbg.ll @@ -0,0 +1,66 @@ +; RUN: llc < %s -mtriple x86_64-pc-linux-gnu -o - | FileCheck -check-prefix=DEBUG-OUTPUT %s +; DEBUG-OUTPUT-NOT: .loc +; DEBUG-OUTPUT-NOT: {{.*}}.debug_info + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define i32 @foo(i32 %n) #0 !dbg !4 { +entry: + %diff = alloca i32, align 4 + %cb = alloca [16 x i8], align 16 + %cc = alloca [16 x i8], align 16 + store i32 0, ptr %diff, align 4, !tbaa !11 + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %add8 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds [16 x i8], ptr %cb, i64 0, i64 %indvars.iv + %0 = load i8, ptr %arrayidx, align 1, !tbaa !21 + %conv = sext i8 %0 to i32 + %arrayidx2 = getelementptr inbounds [16 x i8], ptr %cc, i64 0, i64 %indvars.iv + %1 = load i8, ptr %arrayidx2, align 1, !tbaa !21 + %conv3 = sext i8 %1 to i32 + %sub = sub i32 %conv, %conv3 + %add = add nsw i32 %sub, %add8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 16 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !25 + +for.end: ; preds = %for.body + store i32 %add, ptr %diff, align 4, !tbaa !11 + call void @ibar(ptr %diff) #2 + ret i32 0 +} + +declare void @ibar(ptr) #1 + +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} +!llvm.dbg.cu = !{!24} + +!1 = !DIFile(filename: "vectorization-remarks.c", directory: ".") +!2 = !{} +!3 = !{!4} +!4 = distinct !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !24, scopeLine: 6, file: !1, scope: !5, type: !6, retainedNodes: !2) +!5 = !DIFile(filename: "vectorization-remarks.c", directory: ".") +!6 = !DISubroutineType(types: !2) +!7 = !{i32 2, !"Dwarf Version", i32 4} +!8 = !{i32 1, !"Debug Info Version", i32 3} +!9 = !{!"clang version 3.5.0 "} +!10 = !DILocation(line: 8, column: 3, scope: !4) +!11 = !{!12, !12, i64 0} +!12 = !{!"int", !13, i64 0} +!13 = !{!"omnipotent char", !14, i64 0} +!14 = !{!"Simple C/C++ TBAA"} +!15 = !DILocation(line: 17, column: 8, scope: !16) +!16 = distinct !DILexicalBlock(line: 17, column: 8, file: !1, scope: !17) +!17 = distinct !DILexicalBlock(line: 17, column: 8, file: !1, scope: !18) +!18 = distinct !DILexicalBlock(line: 17, column: 3, file: !1, scope: !4) +!19 = !DILocation(line: 18, column: 5, scope: !20) +!20 = distinct !DILexicalBlock(line: 17, column: 27, file: !1, scope: !18) +!21 = !{!13, !13, i64 0} +!22 = !DILocation(line: 20, column: 3, scope: !4) +!23 = !DILocation(line: 21, column: 3, scope: !4) +!24 = distinct !DICompileUnit(language: DW_LANG_C89, file: !1, emissionKind: NoDebug) +!25 = !{!25, !15} diff --git a/llvm/test/Transforms/LoopVectorize/X86/avx512.ll b/llvm/test/Transforms/LoopVectorize/X86/avx512.ll index 33d1d3f0d2219..b8e0697c8ac6d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/avx512.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/avx512.ll @@ -1,5 +1,5 @@ -; RUN: opt -mattr=+avx512f -passes=loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s -; RUN: opt -mattr=+avx512vl,+prefer-256-bit -passes=loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s --check-prefix=CHECK-PREFER-AVX256 +; RUN: opt -mattr=+avx512f -passes=loop-vectorize -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-PREFER +; RUN: opt -mattr=+avx512vl,+prefer-256-bit -passes=loop-vectorize -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREFER-AVX256 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" @@ -7,18 +7,19 @@ target triple = "x86_64-apple-macosx10.9.0" ; Verify that we generate 512-bit wide vectors for a basic integer memset ; loop. -; CHECK-LABEL: _f: -; CHECK: %vec.epilog.vector.body -; CHECK: %ymm -; CHECK: %vector.body -; CHECK-NOT: %ymm -; CHECK: vmovdqu64 %zmm{{.}}, +; CHECK-NO-PREFER-LABEL: @f( +; CHECK-NO-PREFER: vector.body: +; CHECK-NO-PREFER: store <16 x i32> +; CHECK-NO-PREFER: vec.epilog.vector.body: +; CHECK-NO-PREFER: store <8 x i32> ; Verify that we don't generate 512-bit wide vectors when subtarget feature says not to -; CHECK-PREFER-AVX256-LABEL: f: -; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}}, -; CHECK-PREFER-AVX256-NOT: %zmm +; CHECK-PREFER-AVX256-LABEL: @f( +; CHECK-PREFER-AVX256: vector.body: +; CHECK-PREFER-AVX256: store <8 x i32> +; CHECK-PREFER-AVX256: vec.epilog.vector.body: +; CHECK-PREFER-AVX256: store <4 x i32> define void @f(ptr %a, i32 %n) { entry: @@ -47,13 +48,11 @@ for.end: ; preds = %for.end.loopexit, % ; Verify that the "prefer-vector-width=256" attribute prevents the use of 512-bit ; vectors -; CHECK-LABEL: _g: -; CHECK: vmovdqu %ymm{{.}}, -; CHECK-NOT: %zmm - -; CHECK-PREFER-AVX256-LABEL: g: -; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}}, -; CHECK-PREFER-AVX256-NOT: %zmm +; CHECK-LABEL: @g( +; CHECK: vector.body: +; CHECK: store <8 x i32> +; CHECK: vec.epilog.vector.body: +; CHECK: store <4 x i32> define void @g(ptr %a, i32 %n) "prefer-vector-width"="256" { entry: @@ -82,19 +81,11 @@ for.end: ; preds = %for.end.loopexit, % ; Verify that the "prefer-vector-width=512" attribute override the subtarget ; vectors -; CHECK-LABEL: _h: -; CHECK: %vec.epilog.vector.body -; CHECK: %ymm -; CHECK: %vector.body -; CHECK: vmovdqu64 %zmm{{.}}, -; CHECK-NOT: %ymm - -; CHECK-PREFER-AVX256-LABEL: h: -; CHECK-PREFER-AVX256: %vec.epilog.vector.body -; CHECK-PREFER-AVX256: %ymm -; CHECK-PREFER-AVX256: %vector.body -; CHECK-PREFER-AVX256: vmovdqu64 %zmm{{.}}, -; CHECK-PREFER-AVX256-NOT: %ymm +; CHECK-LABEL: @h( +; CHECK: vector.body: +; CHECK: store <16 x i32> +; CHECK: vec.epilog.vector.body: +; CHECK: store <8 x i32> define void @h(ptr %a, i32 %n) "prefer-vector-width"="512" { entry: diff --git a/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll index 4d92c1a3cf424..15533b2e25fab 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -mcpu=core-avx2 -passes=loop-vectorize -S | llc -mcpu=core-avx2 | FileCheck %s +; RUN: opt < %s -mcpu=core-avx2 -passes=loop-vectorize -S | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx" @@ -8,7 +8,7 @@ target triple = "x86_64-apple-macosx" ; If we need to scalarize the fptoui and then use inserts to build up the ; vector again, then there is certainly no value in going 256-bit wide. -; CHECK-NOT: vinserti128 +; CHECK-NOT: fptoui <4 x float> define void @convert(i32 %N) { entry: diff --git a/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll index 03783d3a6c9fb..0eb87d0d793ed 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -mcpu=core-avx2 -passes=loop-vectorize -S | llc -mcpu=core-avx2 | FileCheck %s +; RUN: opt < %s -mcpu=core-avx2 -passes=loop-vectorize -S | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx" @@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx" ; If we need to scalarize the fptoui and then use inserts to build up the ; vector again, then there is certainly no value in going 256-bit wide. -; CHECK-NOT: vpinsrd +; CHECK-NOT: fptoui <2 x double> define void @convert() { entry: diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll index d774f778b7fdc..e1ecb70feb436 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll @@ -2,10 +2,6 @@ ; RUN: opt < %s -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s ; RUN: opt < %s -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s -; RUN: llc < %s -mtriple x86_64-pc-linux-gnu -o - | FileCheck -check-prefix=DEBUG-OUTPUT %s -; DEBUG-OUTPUT-NOT: .loc -; DEBUG-OUTPUT-NOT: {{.*}}.debug_info - ; VECTORIZED: remark: vectorization-remarks.c:17:8: vectorized loop (vectorization width: 4, interleaved count: 2) ; UNROLLED: remark: vectorization-remarks.c:17:8: interleaved loop (interleaved count: 4) ; NONE: remark: vectorization-remarks.c:17:8: loop not vectorized: vectorization and interleaving are explicitly disabled, or the loop has already been vectorized diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll index f0b960c640562..8ba28042fcf2d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll @@ -2,10 +2,6 @@ ; RUN: opt < %s -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s ; RUN: opt < %s -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s -; RUN: llc < %s -mtriple x86_64-pc-linux-gnu -o - | FileCheck -check-prefix=DEBUG-OUTPUT %s -; DEBUG-OUTPUT-NOT: .loc -; DEBUG-OUTPUT-NOT: {{.*}}.debug_info - ; VECTORIZED: remark: vectorization-remarks.c:17:8: vectorized loop (vectorization width: 4, interleaved count: 2) ; UNROLLED: remark: vectorization-remarks.c:17:8: interleaved loop (interleaved count: 4) ; NONE: remark: vectorization-remarks.c:17:8: loop not vectorized: vectorization and interleaving are explicitly disabled, or the loop has already been vectorized