diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/pr42729.ll b/llvm/test/CodeGen/ARM/ParallelDSP/pr42729.ll
index 9f032cd24857c9..11124a504cef0d 100644
--- a/llvm/test/CodeGen/ARM/ParallelDSP/pr42729.ll
+++ b/llvm/test/CodeGen/ARM/ParallelDSP/pr42729.ll
@@ -1,16 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -mtriple=thumbv7-unknown-linux-android -arm-parallel-dsp -S %s -o - | FileCheck %s
 
-; CHECK-LABEL: undef_no_return
-; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %incdec.ptr21 to i32*
-; CHECK: [[LOAD_A:%[^ ]+]] = load i32, i32* [[CAST_A]], align 2
-; CHECK: %uglygep15 = getelementptr i8, i8* undef, i32 undef
-; CHECK: [[GEP8:%[^ ]+]] = getelementptr i8, i8* undef, i32 undef
-; CHECK: [[CAST_GEP8:%[^ ]+]] = bitcast i8* [[GEP8]] to i16*
-; CHECK: [[GEP16:%[^ ]+]] = getelementptr i16, i16* [[CAST_GEP8]], i32 6
-; CHECK: [[CAST_GEP16:%[^ ]+]] = bitcast i16* [[GEP16]] to i32*
-; CHECK: [[LOAD_UNDEF:%[^ ]+]] = load i32, i32* [[CAST_GEP16]], align 2
-; CHECK: call i32 @llvm.arm.smladx(i32 [[LOAD_UNDEF]], i32 [[LOAD_A]], i32 undef)
 define void @undef_no_return(i16* %a) {
+; CHECK-LABEL: @undef_no_return(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INCDEC_PTR21:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i32 3
+; CHECK-NEXT:    [[INCDEC_PTR29:%.*]] = getelementptr inbounds i16, i16* [[A]], i32 4
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[INCDEC_PTR21]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[INCDEC_PTR21]] to i32*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
+; CHECK-NEXT:    [[TMP4:%.*]] = sext i16 [[TMP3]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP2]], 16
+; CHECK-NEXT:    [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
+; CHECK-NEXT:    [[TMP7:%.*]] = sext i16 [[TMP6]] to i32
+; CHECK-NEXT:    [[CONV25:%.*]] = sext i16 [[TMP0]] to i32
+; CHECK-NEXT:    [[UGLYGEP15:%.*]] = getelementptr i8, i8* undef, i32 undef
+; CHECK-NEXT:    [[UGLYGEP1516:%.*]] = bitcast i8* [[UGLYGEP15]] to i16*
+; CHECK-NEXT:    [[SCEVGEP17:%.*]] = getelementptr i16, i16* [[UGLYGEP1516]], i32 7
+; CHECK-NEXT:    [[TMP8:%.*]] = load i16, i16* [[SCEVGEP17]], align 2
+; CHECK-NEXT:    [[UGLYGEP12:%.*]] = getelementptr i8, i8* undef, i32 undef
+; CHECK-NEXT:    [[UGLYGEP1213:%.*]] = bitcast i8* [[UGLYGEP12]] to i16*
+; CHECK-NEXT:    [[SCEVGEP14:%.*]] = getelementptr i16, i16* [[UGLYGEP1213]], i32 6
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16* [[SCEVGEP14]] to i32*
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 2
+; CHECK-NEXT:    [[TMP11:%.*]] = trunc i32 [[TMP10]] to i16
+; CHECK-NEXT:    [[TMP12:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP10]], i32 [[TMP2]], i32 undef)
+; CHECK-NEXT:    [[TMP13:%.*]] = sext i16 [[TMP11]] to i32
+; CHECK-NEXT:    [[TMP14:%.*]] = lshr i32 [[TMP10]], 16
+; CHECK-NEXT:    [[TMP15:%.*]] = trunc i32 [[TMP14]] to i16
+; CHECK-NEXT:    [[TMP16:%.*]] = sext i16 [[TMP15]] to i32
+; CHECK-NEXT:    [[CONV31:%.*]] = sext i16 [[TMP8]] to i32
+; CHECK-NEXT:    [[TMP17:%.*]] = load i16, i16* [[INCDEC_PTR29]], align 2
+; CHECK-NEXT:    [[CONV33:%.*]] = sext i16 [[TMP17]] to i32
+; CHECK-NEXT:    [[TMP18:%.*]] = load i16, i16* [[SCEVGEP14]], align 2
+; CHECK-NEXT:    [[CONV39:%.*]] = sext i16 [[TMP18]] to i32
+; CHECK-NEXT:    [[MUL_I287_NEG_NEG:%.*]] = mul nsw i32 [[TMP16]], [[TMP4]]
+; CHECK-NEXT:    [[MUL_I283_NEG_NEG:%.*]] = mul nsw i32 [[TMP13]], [[TMP7]]
+; CHECK-NEXT:    [[REASS_ADD408:%.*]] = add i32 undef, [[MUL_I287_NEG_NEG]]
+; CHECK-NEXT:    [[REASS_ADD409:%.*]] = add i32 [[REASS_ADD408]], [[MUL_I283_NEG_NEG]]
+; CHECK-NEXT:    br label [[FOR_BODY]]
+;
 entry:
   %incdec.ptr21 = getelementptr inbounds i16, i16* %a, i32 3
   %incdec.ptr29 = getelementptr inbounds i16, i16* %a, i32 4
@@ -38,18 +70,53 @@ for.body:
   br label %for.body
 }
 
-; CHECK-LABEL: return
-; CHECK: phi i32 [ %N, %entry ]
-; CHECK: [[ACC:%[^ ]+]] = phi i32 [ 0, %entry ], [ [[ACC_NEXT:%[^ ]+]], %for.body ]
-; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %incdec.ptr21 to i32*
-; CHECK: [[LOAD_A:%[^ ]+]] = load i32, i32* [[CAST_A]], align 2
-; CHECK: [[GEP8:%[^ ]+]] = getelementptr i8, i8* %b, i32 0
-; CHECK: [[CAST_GEP8:%[^ ]+]] = bitcast i8* [[GEP8]] to i16*
-; CHECK: [[GEP16:%[^ ]+]] = getelementptr i16, i16* [[CAST_GEP8]], i32 %iv
-; CHECK: [[CAST_GEP16:%[^ ]+]] = bitcast i16* [[GEP16]] to i32*
-; CHECK: [[LOAD_B:%[^ ]+]] = load i32, i32* [[CAST_GEP16]], align 2
-; CHECK: [[ACC_NEXT]] = call i32 @llvm.arm.smladx(i32 [[LOAD_B]], i32 [[LOAD_A]], i32 [[ACC]])
 define i32 @return(i16* %a, i8* %b, i32 %N) {
+; CHECK-LABEL: @return(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INCDEC_PTR21:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i32 3
+; CHECK-NEXT:    [[INCDEC_PTR29:%.*]] = getelementptr inbounds i16, i16* [[A]], i32 4
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ACC:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[INCDEC_PTR21]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[INCDEC_PTR21]] to i32*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
+; CHECK-NEXT:    [[TMP4:%.*]] = sext i16 [[TMP3]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP2]], 16
+; CHECK-NEXT:    [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
+; CHECK-NEXT:    [[TMP7:%.*]] = sext i16 [[TMP6]] to i32
+; CHECK-NEXT:    [[CONV25:%.*]] = sext i16 [[TMP0]] to i32
+; CHECK-NEXT:    [[UGLYGEP15:%.*]] = getelementptr i8, i8* [[B:%.*]], i32 0
+; CHECK-NEXT:    [[UGLYGEP1516:%.*]] = bitcast i8* [[UGLYGEP15]] to i16*
+; CHECK-NEXT:    [[B_IDX:%.*]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[SCEVGEP17:%.*]] = getelementptr i16, i16* [[UGLYGEP1516]], i32 [[B_IDX]]
+; CHECK-NEXT:    [[SCEVGEP14:%.*]] = getelementptr i16, i16* [[UGLYGEP1516]], i32 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i16, i16* [[SCEVGEP17]], align 2
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16* [[SCEVGEP14]] to i32*
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 2
+; CHECK-NEXT:    [[TMP11:%.*]] = trunc i32 [[TMP10]] to i16
+; CHECK-NEXT:    [[TMP12]] = call i32 @llvm.arm.smladx(i32 [[TMP10]], i32 [[TMP2]], i32 [[ACC]])
+; CHECK-NEXT:    [[TMP13:%.*]] = sext i16 [[TMP11]] to i32
+; CHECK-NEXT:    [[TMP14:%.*]] = lshr i32 [[TMP10]], 16
+; CHECK-NEXT:    [[TMP15:%.*]] = trunc i32 [[TMP14]] to i16
+; CHECK-NEXT:    [[TMP16:%.*]] = sext i16 [[TMP15]] to i32
+; CHECK-NEXT:    [[CONV31:%.*]] = sext i16 [[TMP8]] to i32
+; CHECK-NEXT:    [[TMP17:%.*]] = load i16, i16* [[INCDEC_PTR29]], align 2
+; CHECK-NEXT:    [[CONV33:%.*]] = sext i16 [[TMP17]] to i32
+; CHECK-NEXT:    [[TMP18:%.*]] = load i16, i16* [[SCEVGEP14]], align 2
+; CHECK-NEXT:    [[CONV39:%.*]] = sext i16 [[TMP18]] to i32
+; CHECK-NEXT:    [[MUL_I287_NEG_NEG:%.*]] = mul nsw i32 [[TMP16]], [[TMP4]]
+; CHECK-NEXT:    [[MUL_I283_NEG_NEG:%.*]] = mul nsw i32 [[TMP13]], [[TMP7]]
+; CHECK-NEXT:    [[REASS_ADD408:%.*]] = add i32 [[ACC]], [[MUL_I287_NEG_NEG]]
+; CHECK-NEXT:    [[REASS_ADD409:%.*]] = add i32 [[REASS_ADD408]], [[MUL_I283_NEG_NEG]]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[IV_NEXT]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 [[TMP12]]
+;
 entry:
   %incdec.ptr21 = getelementptr inbounds i16, i16* %a, i32 3
   %incdec.ptr29 = getelementptr inbounds i16, i16* %a, i32 4