diff --git a/llvm/test/CodeGen/X86/vectorization-remarks-loopid-dbg.ll b/llvm/test/CodeGen/X86/vectorization-remarks-loopid-dbg.ll
new file mode 100644
index 0000000000000..31949403b4465
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vectorization-remarks-loopid-dbg.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple x86_64-pc-linux-gnu -o - | FileCheck -check-prefix=DEBUG-OUTPUT %s
+; DEBUG-OUTPUT-NOT: .loc
+; DEBUG-OUTPUT-NOT: {{.*}}.debug_info
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @foo(i32 %n) #0 !dbg !4 {
+entry:
+  %diff = alloca i32, align 4
+  %cb = alloca [16 x i8], align 16
+  %cc = alloca [16 x i8], align 16
+  store i32 0, ptr %diff, align 4, !tbaa !11
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %add8 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds [16 x i8], ptr %cb, i64 0, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx, align 1, !tbaa !21
+  %conv = sext i8 %0 to i32
+  %arrayidx2 = getelementptr inbounds [16 x i8], ptr %cc, i64 0, i64 %indvars.iv
+  %1 = load i8, ptr %arrayidx2, align 1, !tbaa !21
+  %conv3 = sext i8 %1 to i32
+  %sub = sub i32 %conv, %conv3
+  %add = add nsw i32 %sub, %add8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 16
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !25
+
+for.end:                                          ; preds = %for.body
+  store i32 %add, ptr %diff, align 4, !tbaa !11
+  call void @ibar(ptr %diff) #2
+  ret i32 0
+}
+
+declare void @ibar(ptr) #1
+
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+!llvm.dbg.cu = !{!24}
+
+!1 = !DIFile(filename: "vectorization-remarks.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !24, scopeLine: 6, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "vectorization-remarks.c", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 1, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.5.0 "}
+!10 = !DILocation(line: 8, column: 3, scope: !4)
+!11 = !{!12, !12, i64 0}
+!12 = !{!"int", !13, i64 0}
+!13 = !{!"omnipotent char", !14, i64 0}
+!14 = !{!"Simple C/C++ TBAA"}
+!15 = !DILocation(line: 17, column: 8, scope: !16)
+!16 = distinct !DILexicalBlock(line: 17, column: 8, file: !1, scope: !17)
+!17 = distinct !DILexicalBlock(line: 17, column: 8, file: !1, scope: !18)
+!18 = distinct !DILexicalBlock(line: 17, column: 3, file: !1, scope: !4)
+!19 = !DILocation(line: 18, column: 5, scope: !20)
+!20 = distinct !DILexicalBlock(line: 17, column: 27, file: !1, scope: !18)
+!21 = !{!13, !13, i64 0}
+!22 = !DILocation(line: 20, column: 3, scope: !4)
+!23 = !DILocation(line: 21, column: 3, scope: !4)
+!24 = distinct !DICompileUnit(language: DW_LANG_C89, file: !1, emissionKind: NoDebug)
+!25 = !{!25, !15}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/avx512.ll b/llvm/test/Transforms/LoopVectorize/X86/avx512.ll
index 33d1d3f0d2219..b8e0697c8ac6d 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/avx512.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/avx512.ll
@@ -1,5 +1,5 @@
-; RUN: opt -mattr=+avx512f -passes=loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s
-; RUN: opt -mattr=+avx512vl,+prefer-256-bit -passes=loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s --check-prefix=CHECK-PREFER-AVX256
+; RUN: opt -mattr=+avx512f -passes=loop-vectorize -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-PREFER
+; RUN: opt -mattr=+avx512vl,+prefer-256-bit -passes=loop-vectorize -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREFER-AVX256
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
@@ -7,18 +7,19 @@ target triple = "x86_64-apple-macosx10.9.0"
 ; Verify that we generate 512-bit wide vectors for a basic integer memset
 ; loop.
 
-; CHECK-LABEL: _f:
-; CHECK: %vec.epilog.vector.body
-; CHECK: %ymm
-; CHECK: %vector.body
-; CHECK-NOT: %ymm
-; CHECK: vmovdqu64 %zmm{{.}},
+; CHECK-NO-PREFER-LABEL: @f(
+; CHECK-NO-PREFER: vector.body:
+; CHECK-NO-PREFER: store <16 x i32>
+; CHECK-NO-PREFER: vec.epilog.vector.body:
+; CHECK-NO-PREFER: store <8 x i32>
 
 ; Verify that we don't generate 512-bit wide vectors when subtarget feature says not to
 
-; CHECK-PREFER-AVX256-LABEL: f:
-; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}},
-; CHECK-PREFER-AVX256-NOT: %zmm
+; CHECK-PREFER-AVX256-LABEL: @f(
+; CHECK-PREFER-AVX256: vector.body:
+; CHECK-PREFER-AVX256: store <8 x i32>
+; CHECK-PREFER-AVX256: vec.epilog.vector.body:
+; CHECK-PREFER-AVX256: store <4 x i32>
 
 define void @f(ptr %a, i32 %n) {
 entry:
@@ -47,13 +48,11 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; Verify that the "prefer-vector-width=256" attribute prevents the use of 512-bit
 ; vectors
 
-; CHECK-LABEL: _g:
-; CHECK: vmovdqu %ymm{{.}},
-; CHECK-NOT: %zmm
-
-; CHECK-PREFER-AVX256-LABEL: g:
-; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}},
-; CHECK-PREFER-AVX256-NOT: %zmm
+; CHECK-LABEL: @g(
+; CHECK: vector.body:
+; CHECK: store <8 x i32>
+; CHECK: vec.epilog.vector.body:
+; CHECK: store <4 x i32>
 
 define void @g(ptr %a, i32 %n) "prefer-vector-width"="256" {
 entry:
@@ -82,19 +81,11 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; Verify that the "prefer-vector-width=512" attribute override the subtarget
 ; vectors
 
-; CHECK-LABEL: _h:
-; CHECK: %vec.epilog.vector.body
-; CHECK: %ymm
-; CHECK: %vector.body
-; CHECK: vmovdqu64 %zmm{{.}},
-; CHECK-NOT: %ymm
-
-; CHECK-PREFER-AVX256-LABEL: h:
-; CHECK-PREFER-AVX256: %vec.epilog.vector.body
-; CHECK-PREFER-AVX256: %ymm
-; CHECK-PREFER-AVX256: %vector.body
-; CHECK-PREFER-AVX256: vmovdqu64 %zmm{{.}},
-; CHECK-PREFER-AVX256-NOT: %ymm
+; CHECK-LABEL: @h(
+; CHECK: vector.body:
+; CHECK: store <16 x i32>
+; CHECK: vec.epilog.vector.body:
+; CHECK: store <8 x i32>
 
 define void @h(ptr %a, i32 %n) "prefer-vector-width"="512" {
 entry:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll
index 4d92c1a3cf424..15533b2e25fab 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -mcpu=core-avx2 -passes=loop-vectorize -S | llc -mcpu=core-avx2 | FileCheck %s
+; RUN: opt < %s -mcpu=core-avx2 -passes=loop-vectorize -S | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx"
@@ -8,7 +8,7 @@ target triple = "x86_64-apple-macosx"
 
 ; If we need to scalarize the fptoui and then use inserts to build up the
 ; vector again, then there is certainly no value in going 256-bit wide.
-; CHECK-NOT: vinserti128
+; CHECK-NOT: fptoui <4 x float>
 
 define void @convert(i32 %N) {
 entry:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
index 03783d3a6c9fb..0eb87d0d793ed 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -mcpu=core-avx2 -passes=loop-vectorize -S | llc -mcpu=core-avx2 | FileCheck %s
+; RUN: opt < %s -mcpu=core-avx2 -passes=loop-vectorize -S | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx"
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx"
 
 ; If we need to scalarize the fptoui and then use inserts to build up the
 ; vector again, then there is certainly no value in going 256-bit wide.
-; CHECK-NOT: vpinsrd
+; CHECK-NOT: fptoui <2 x double>
 
 define void @convert() {
 entry:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll
index d774f778b7fdc..e1ecb70feb436 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll
@@ -2,10 +2,6 @@
 ; RUN: opt < %s -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s
 ; RUN: opt < %s -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s
 
-; RUN: llc < %s -mtriple x86_64-pc-linux-gnu -o - | FileCheck -check-prefix=DEBUG-OUTPUT %s
-; DEBUG-OUTPUT-NOT: .loc
-; DEBUG-OUTPUT-NOT: {{.*}}.debug_info
-
 ; VECTORIZED: remark: vectorization-remarks.c:17:8: vectorized loop (vectorization width: 4, interleaved count: 2)
 ; UNROLLED: remark: vectorization-remarks.c:17:8: interleaved loop (interleaved count: 4)
 ; NONE: remark: vectorization-remarks.c:17:8: loop not vectorized: vectorization and interleaving are explicitly disabled, or the loop has already been vectorized
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
index f0b960c640562..8ba28042fcf2d 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
@@ -2,10 +2,6 @@
 ; RUN: opt < %s -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s
 ; RUN: opt < %s -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s
 
-; RUN: llc < %s -mtriple x86_64-pc-linux-gnu -o - | FileCheck -check-prefix=DEBUG-OUTPUT %s
-; DEBUG-OUTPUT-NOT: .loc
-; DEBUG-OUTPUT-NOT: {{.*}}.debug_info
-
 ; VECTORIZED: remark: vectorization-remarks.c:17:8: vectorized loop (vectorization width: 4, interleaved count: 2)
 ; UNROLLED: remark: vectorization-remarks.c:17:8: interleaved loop (interleaved count: 4)
 ; NONE: remark: vectorization-remarks.c:17:8: loop not vectorized: vectorization and interleaving are explicitly disabled, or the loop has already been vectorized