diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll index 7e54d33633e2a..3a4fe398adf65 100644 --- a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll @@ -18,12 +18,12 @@ ; return res + count; ; } -define i64 @test_no_prep(i8* %0, i32 signext %1) { +define i64 @test_no_prep(i8* %arg, i32 signext %arg1) { ; CHECK-LABEL: test_no_prep: -; CHECK: # %bb.0: +; CHECK: # %bb.0: # %bb ; CHECK-NEXT: cmplwi r4, 0 ; CHECK-NEXT: beq cr0, .LBB0_4 -; CHECK-NEXT: # %bb.1: # %.preheader +; CHECK-NEXT: # %bb.1: # %bb3.preheader ; CHECK-NEXT: cmpldi r4, 1 ; CHECK-NEXT: li r5, 1 ; CHECK-NEXT: addi r3, r3, 4004 @@ -34,7 +34,8 @@ define i64 @test_no_prep(i8* %0, i32 signext %1) { ; CHECK-NEXT: mtctr r5 ; CHECK-NEXT: li r5, 0 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: .LBB0_2: # %bb3 +; CHECK-NEXT: # ; CHECK-NEXT: ldx r9, r3, r6 ; CHECK-NEXT: ldx r10, r3, r7 ; CHECK-NEXT: ldx r11, r3, r8 @@ -44,44 +45,45 @@ define i64 @test_no_prep(i8* %0, i32 signext %1) { ; CHECK-NEXT: mulld r9, r9, r11 ; CHECK-NEXT: maddld r5, r9, r12, r5 ; CHECK-NEXT: bdnz .LBB0_2 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: # %bb.3: # %bb25 ; CHECK-NEXT: add r3, r5, r4 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB0_4: ; CHECK-NEXT: addi r3, r4, 0 ; CHECK-NEXT: blr - %3 = sext i32 %1 to i64 - %4 = icmp eq i32 %1, 0 - br i1 %4, label %27, label %5 - -5: ; preds = %2, %5 - %6 = phi i64 [ %25, %5 ], [ 0, %2 ] - %7 = phi i64 [ %24, %5 ], [ 0, %2 ] - %8 = getelementptr inbounds i8, i8* %0, i64 %6 - %9 = getelementptr inbounds i8, i8* %8, i64 4001 - %10 = bitcast i8* %9 to i64* - %11 = load i64, i64* %10, align 8 - %12 = getelementptr inbounds i8, i8* %8, i64 4002 - %13 = bitcast i8* %12 to i64* - %14 = load i64, i64* %13, align 8 - %15 = getelementptr inbounds i8, i8* %8, i64 4003 - %16 = bitcast i8* %15 to i64* - %17 = load i64, i64* %16, align 8 - %18 = getelementptr inbounds i8, i8* %8, i64 4004 - %19 = bitcast i8* %18 to i64* - %20 = load i64, i64* %19, align 8 - %21 = mul i64 %14, %11 - %22 = mul i64 %21, %17 - %23 = mul i64 %22, %20 - %24 = add i64 %23, %7 - %25 = add nuw i64 %6, 1 - %26 = icmp ult i64 %25, %3 - br i1 %26, label %5, label %27 - -27: ; preds = %5, %2 - %28 = phi i64 [ 0, %2 ], [ %24, %5 ] - %29 = add i64 %28, %3 - ret i64 %29 +bb: + %i = sext i32 %arg1 to i64 + %i2 = icmp eq i32 %arg1, 0 + br i1 %i2, label %bb25, label %bb3 + +bb3: ; preds = %bb3, %bb + %i4 = phi i64 [ %i23, %bb3 ], [ 0, %bb ] + %i5 = phi i64 [ %i22, %bb3 ], [ 0, %bb ] + %i6 = getelementptr inbounds i8, i8* %arg, i64 %i4 + %i7 = getelementptr inbounds i8, i8* %i6, i64 4001 + %i8 = bitcast i8* %i7 to i64* + %i9 = load i64, i64* %i8, align 8 + %i10 = getelementptr inbounds i8, i8* %i6, i64 4002 + %i11 = bitcast i8* %i10 to i64* + %i12 = load i64, i64* %i11, align 8 + %i13 = getelementptr inbounds i8, i8* %i6, i64 4003 + %i14 = bitcast i8* %i13 to i64* + %i15 = load i64, i64* %i14, align 8 + %i16 = getelementptr inbounds i8, i8* %i6, i64 4004 + %i17 = bitcast i8* %i16 to i64* + %i18 = load i64, i64* %i17, align 8 + %i19 = mul i64 %i12, %i9 + %i20 = mul i64 %i19, %i15 + %i21 = mul i64 %i20, %i18 + %i22 = add i64 %i21, %i5 + %i23 = add nuw i64 %i4, 1 + %i24 = icmp ult i64 %i23, %i + br i1 %i24, label %bb3, label %bb25 + +bb25: ; preds = %bb3, %bb + %i26 = phi i64 [ 0, %bb ], [ %i22, %bb3 ] + %i27 = add i64 %i26, %i + ret i64 %i27 } ; test_ds_prep: @@ -101,12 +103,12 @@ define i64 @test_no_prep(i8* %0, i32 signext %1) { ; return res + count; ; } -define i64 @test_ds_prep(i8* %0, i32 signext %1) { +define i64 @test_ds_prep(i8* %arg, i32 signext %arg1) { ; CHECK-LABEL: test_ds_prep: -; CHECK: # %bb.0: +; CHECK: # %bb.0: # %bb ; CHECK-NEXT: cmplwi r4, 0 ; CHECK-NEXT: beq cr0, .LBB1_4 -; CHECK-NEXT: # %bb.1: # %.preheader +; CHECK-NEXT: # %bb.1: # %bb3.preheader ; CHECK-NEXT: cmpldi r4, 1 ; CHECK-NEXT: li r5, 1 ; CHECK-NEXT: addi r6, r3, 4002 @@ -115,7 +117,8 @@ define i64 @test_ds_prep(i8* %0, i32 signext %1) { ; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: .LBB1_2: # %bb3 +; CHECK-NEXT: # ; CHECK-NEXT: ldx r8, r6, r7 ; CHECK-NEXT: ld r9, 0(r6) ; CHECK-NEXT: ldx r10, r6, r5 @@ -125,44 +128,45 @@ define i64 @test_ds_prep(i8* %0, i32 signext %1) { ; CHECK-NEXT: mulld r8, r8, r10 ; CHECK-NEXT: maddld r3, r8, r11, r3 ; CHECK-NEXT: bdnz .LBB1_2 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: # %bb.3: # %bb25 ; CHECK-NEXT: add r3, r3, r4 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: addi r3, r4, 0 ; CHECK-NEXT: blr - %3 = sext i32 %1 to i64 - %4 = icmp eq i32 %1, 0 - br i1 %4, label %27, label %5 - -5: ; preds = %2, %5 - %6 = phi i64 [ %25, %5 ], [ 0, %2 ] - %7 = phi i64 [ %24, %5 ], [ 0, %2 ] - %8 = getelementptr inbounds i8, i8* %0, i64 %6 - %9 = getelementptr inbounds i8, i8* %8, i64 4001 - %10 = bitcast i8* %9 to i64* - %11 = load i64, i64* %10, align 8 - %12 = getelementptr inbounds i8, i8* %8, i64 4002 - %13 = bitcast i8* %12 to i64* - %14 = load i64, i64* %13, align 8 - %15 = getelementptr inbounds i8, i8* %8, i64 4003 - %16 = bitcast i8* %15 to i64* - %17 = load i64, i64* %16, align 8 - %18 = getelementptr inbounds i8, i8* %8, i64 4006 - %19 = bitcast i8* %18 to i64* - %20 = load i64, i64* %19, align 8 - %21 = mul i64 %14, %11 - %22 = mul i64 %21, %17 - %23 = mul i64 %22, %20 - %24 = add i64 %23, %7 - %25 = add nuw i64 %6, 1 - %26 = icmp ult i64 %25, %3 - br i1 %26, label %5, label %27 - -27: ; preds = %5, %2 - %28 = phi i64 [ 0, %2 ], [ %24, %5 ] - %29 = add i64 %28, %3 - ret i64 %29 +bb: + %i = sext i32 %arg1 to i64 + %i2 = icmp eq i32 %arg1, 0 + br i1 %i2, label %bb25, label %bb3 + +bb3: ; preds = %bb3, %bb + %i4 = phi i64 [ %i23, %bb3 ], [ 0, %bb ] + %i5 = phi i64 [ %i22, %bb3 ], [ 0, %bb ] + %i6 = getelementptr inbounds i8, i8* %arg, i64 %i4 + %i7 = getelementptr inbounds i8, i8* %i6, i64 4001 + %i8 = bitcast i8* %i7 to i64* + %i9 = load i64, i64* %i8, align 8 + %i10 = getelementptr inbounds i8, i8* %i6, i64 4002 + %i11 = bitcast i8* %i10 to i64* + %i12 = load i64, i64* %i11, align 8 + %i13 = getelementptr inbounds i8, i8* %i6, i64 4003 + %i14 = bitcast i8* %i13 to i64* + %i15 = load i64, i64* %i14, align 8 + %i16 = getelementptr inbounds i8, i8* %i6, i64 4006 + %i17 = bitcast i8* %i16 to i64* + %i18 = load i64, i64* %i17, align 8 + %i19 = mul i64 %i12, %i9 + %i20 = mul i64 %i19, %i15 + %i21 = mul i64 %i20, %i18 + %i22 = add i64 %i21, %i5 + %i23 = add nuw i64 %i4, 1 + %i24 = icmp ult i64 %i23, %i + br i1 %i24, label %bb3, label %bb25 + +bb25: ; preds = %bb3, %bb + %i26 = phi i64 [ 0, %bb ], [ %i22, %bb3 ] + %i27 = add i64 %i26, %i + ret i64 %i27 } ; test_max_number_reminder: @@ -192,9 +196,9 @@ define i64 @test_ds_prep(i8* %0, i32 signext %1) { ; return res + count; ;} -define i64 @test_max_number_reminder(i8* %0, i32 signext %1) { +define i64 @test_max_number_reminder(i8* %arg, i32 signext %arg1) { ; CHECK-LABEL: test_max_number_reminder: -; CHECK: # %bb.0: +; CHECK: # %bb.0: # %bb ; CHECK-NEXT: cmplwi r4, 0 ; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill @@ -203,7 +207,7 @@ define i64 @test_max_number_reminder(i8* %0, i32 signext %1) { ; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: beq cr0, .LBB2_3 -; CHECK-NEXT: # %bb.1: # %.preheader +; CHECK-NEXT: # %bb.1: # %bb3.preheader ; CHECK-NEXT: cmpldi r4, 1 ; CHECK-NEXT: li r5, 1 ; CHECK-NEXT: addi r9, r3, 4002 @@ -215,7 +219,8 @@ define i64 @test_max_number_reminder(i8* %0, i32 signext %1) { ; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: .LBB2_2: # %bb3 +; CHECK-NEXT: # ; CHECK-NEXT: ldx r11, r9, r6 ; CHECK-NEXT: ld r12, 0(r9) ; CHECK-NEXT: ldx r0, r9, r5 @@ -238,7 +243,7 @@ define i64 @test_max_number_reminder(i8* %0, i32 signext %1) { ; CHECK-NEXT: b .LBB2_4 ; CHECK-NEXT: .LBB2_3: ; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: .LBB2_4: # %bb45 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload @@ -247,58 +252,59 @@ define i64 @test_max_number_reminder(i8* %0, i32 signext %1) { ; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r25, -56(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr - %3 = sext i32 %1 to i64 - %4 = icmp eq i32 %1, 0 - br i1 %4, label %47, label %5 - -5: ; preds = %2, %5 - %6 = phi i64 [ %45, %5 ], [ 0, %2 ] - %7 = phi i64 [ %44, %5 ], [ 0, %2 ] - %8 = getelementptr inbounds i8, i8* %0, i64 %6 - %9 = getelementptr inbounds i8, i8* %8, i64 4001 - %10 = bitcast i8* %9 to i64* - %11 = load i64, i64* %10, align 8 - %12 = getelementptr inbounds i8, i8* %8, i64 4002 - %13 = bitcast i8* %12 to i64* - %14 = load i64, i64* %13, align 8 - %15 = getelementptr inbounds i8, i8* %8, i64 4003 - %16 = bitcast i8* %15 to i64* - %17 = load i64, i64* %16, align 8 - %18 = getelementptr inbounds i8, i8* %8, i64 4005 - %19 = bitcast i8* %18 to i64* - %20 = load i64, i64* %19, align 8 - %21 = getelementptr inbounds i8, i8* %8, i64 4006 - %22 = bitcast i8* %21 to i64* - %23 = load i64, i64* %22, align 8 - %24 = getelementptr inbounds i8, i8* %8, i64 4007 - %25 = bitcast i8* %24 to i64* - %26 = load i64, i64* %25, align 8 - %27 = getelementptr inbounds i8, i8* %8, i64 4014 - %28 = bitcast i8* %27 to i64* - %29 = load i64, i64* %28, align 8 - %30 = getelementptr inbounds i8, i8* %8, i64 4010 - %31 = bitcast i8* %30 to i64* - %32 = load i64, i64* %31, align 8 - %33 = getelementptr inbounds i8, i8* %8, i64 4011 - %34 = bitcast i8* %33 to i64* - %35 = load i64, i64* %34, align 8 - %36 = mul i64 %14, %11 - %37 = mul i64 %36, %17 - %38 = mul i64 %37, %20 - %39 = mul i64 %38, %23 - %40 = mul i64 %39, %26 - %41 = mul i64 %40, %29 - %42 = mul i64 %41, %32 - %43 = mul i64 %42, %35 - %44 = add i64 %43, %7 - %45 = add nuw i64 %6, 1 - %46 = icmp ult i64 %45, %3 - br i1 %46, label %5, label %47 - -47: ; preds = %5, %2 - %48 = phi i64 [ 0, %2 ], [ %44, %5 ] - %49 = add i64 %48, %3 - ret i64 %49 +bb: + %i = sext i32 %arg1 to i64 + %i2 = icmp eq i32 %arg1, 0 + br i1 %i2, label %bb45, label %bb3 + +bb3: ; preds = %bb3, %bb + %i4 = phi i64 [ %i43, %bb3 ], [ 0, %bb ] + %i5 = phi i64 [ %i42, %bb3 ], [ 0, %bb ] + %i6 = getelementptr inbounds i8, i8* %arg, i64 %i4 + %i7 = getelementptr inbounds i8, i8* %i6, i64 4001 + %i8 = bitcast i8* %i7 to i64* + %i9 = load i64, i64* %i8, align 8 + %i10 = getelementptr inbounds i8, i8* %i6, i64 4002 + %i11 = bitcast i8* %i10 to i64* + %i12 = load i64, i64* %i11, align 8 + %i13 = getelementptr inbounds i8, i8* %i6, i64 4003 + %i14 = bitcast i8* %i13 to i64* + %i15 = load i64, i64* %i14, align 8 + %i16 = getelementptr inbounds i8, i8* %i6, i64 4005 + %i17 = bitcast i8* %i16 to i64* + %i18 = load i64, i64* %i17, align 8 + %i19 = getelementptr inbounds i8, i8* %i6, i64 4006 + %i20 = bitcast i8* %i19 to i64* + %i21 = load i64, i64* %i20, align 8 + %i22 = getelementptr inbounds i8, i8* %i6, i64 4007 + %i23 = bitcast i8* %i22 to i64* + %i24 = load i64, i64* %i23, align 8 + %i25 = getelementptr inbounds i8, i8* %i6, i64 4014 + %i26 = bitcast i8* %i25 to i64* + %i27 = load i64, i64* %i26, align 8 + %i28 = getelementptr inbounds i8, i8* %i6, i64 4010 + %i29 = bitcast i8* %i28 to i64* + %i30 = load i64, i64* %i29, align 8 + %i31 = getelementptr inbounds i8, i8* %i6, i64 4011 + %i32 = bitcast i8* %i31 to i64* + %i33 = load i64, i64* %i32, align 8 + %i34 = mul i64 %i12, %i9 + %i35 = mul i64 %i34, %i15 + %i36 = mul i64 %i35, %i18 + %i37 = mul i64 %i36, %i21 + %i38 = mul i64 %i37, %i24 + %i39 = mul i64 %i38, %i27 + %i40 = mul i64 %i39, %i30 + %i41 = mul i64 %i40, %i33 + %i42 = add i64 %i41, %i5 + %i43 = add nuw i64 %i4, 1 + %i44 = icmp ult i64 %i43, %i + br i1 %i44, label %bb3, label %bb45 + +bb45: ; preds = %bb3, %bb + %i46 = phi i64 [ 0, %bb ], [ %i42, %bb3 ] + %i47 = add i64 %i46, %i + ret i64 %i47 } ; test_update_ds_prep_interact: @@ -318,12 +324,12 @@ define i64 @test_max_number_reminder(i8* %0, i32 signext %1) { ; return res + count; ; } -define dso_local i64 @test_update_ds_prep_interact(i8* %0, i32 signext %1) { +define dso_local i64 @test_update_ds_prep_interact(i8* %arg, i32 signext %arg1) { ; CHECK-LABEL: test_update_ds_prep_interact: -; CHECK: # %bb.0: +; CHECK: # %bb.0: # %bb ; CHECK-NEXT: cmplwi r4, 0 ; CHECK-NEXT: beq cr0, .LBB3_4 -; CHECK-NEXT: # %bb.1: # %.preheader +; CHECK-NEXT: # %bb.1: # %bb3.preheader ; CHECK-NEXT: cmpldi r4, 1 ; CHECK-NEXT: li r6, 1 ; CHECK-NEXT: addi r3, r3, 3998 @@ -332,7 +338,8 @@ define dso_local i64 @test_update_ds_prep_interact(i8* %0, i32 signext %1) { ; CHECK-NEXT: mtctr r5 ; CHECK-NEXT: li r5, 0 ; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: .LBB3_2: # %bb3 +; CHECK-NEXT: # ; CHECK-NEXT: ldu r8, 4(r3) ; CHECK-NEXT: ldx r9, r3, r7 ; CHECK-NEXT: ldx r10, r3, r6 @@ -341,45 +348,46 @@ define dso_local i64 @test_update_ds_prep_interact(i8* %0, i32 signext %1) { ; CHECK-NEXT: mulld r8, r8, r10 ; CHECK-NEXT: maddld r5, r8, r11, r5 ; CHECK-NEXT: bdnz .LBB3_2 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: # %bb.3: # %bb26 ; CHECK-NEXT: add r3, r5, r4 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB3_4: ; CHECK-NEXT: addi r3, r4, 0 ; CHECK-NEXT: blr - %3 = sext i32 %1 to i64 - %4 = icmp eq i32 %1, 0 - br i1 %4, label %28, label %5 - -5: ; preds = %2, %5 - %6 = phi i64 [ %26, %5 ], [ 0, %2 ] - %7 = phi i64 [ %25, %5 ], [ 0, %2 ] - %8 = shl i64 %6, 2 - %9 = getelementptr inbounds i8, i8* %0, i64 %8 - %10 = getelementptr inbounds i8, i8* %9, i64 4001 - %11 = bitcast i8* %10 to i64* - %12 = load i64, i64* %11, align 8 - %13 = getelementptr inbounds i8, i8* %9, i64 4002 - %14 = bitcast i8* %13 to i64* - %15 = load i64, i64* %14, align 8 - %16 = getelementptr inbounds i8, i8* %9, i64 4003 - %17 = bitcast i8* %16 to i64* - %18 = load i64, i64* %17, align 8 - %19 = getelementptr inbounds i8, i8* %9, i64 4006 - %20 = bitcast i8* %19 to i64* - %21 = load i64, i64* %20, align 8 - %22 = mul i64 %15, %12 - %23 = mul i64 %22, %18 - %24 = mul i64 %23, %21 - %25 = add i64 %24, %7 - %26 = add nuw i64 %6, 1 - %27 = icmp ult i64 %26, %3 - br i1 %27, label %5, label %28 - -28: ; preds = %5, %2 - %29 = phi i64 [ 0, %2 ], [ %25, %5 ] - %30 = add i64 %29, %3 - ret i64 %30 +bb: + %i = sext i32 %arg1 to i64 + %i2 = icmp eq i32 %arg1, 0 + br i1 %i2, label %bb26, label %bb3 + +bb3: ; preds = %bb3, %bb + %i4 = phi i64 [ %i24, %bb3 ], [ 0, %bb ] + %i5 = phi i64 [ %i23, %bb3 ], [ 0, %bb ] + %i6 = shl i64 %i4, 2 + %i7 = getelementptr inbounds i8, i8* %arg, i64 %i6 + %i8 = getelementptr inbounds i8, i8* %i7, i64 4001 + %i9 = bitcast i8* %i8 to i64* + %i10 = load i64, i64* %i9, align 8 + %i11 = getelementptr inbounds i8, i8* %i7, i64 4002 + %i12 = bitcast i8* %i11 to i64* + %i13 = load i64, i64* %i12, align 8 + %i14 = getelementptr inbounds i8, i8* %i7, i64 4003 + %i15 = bitcast i8* %i14 to i64* + %i16 = load i64, i64* %i15, align 8 + %i17 = getelementptr inbounds i8, i8* %i7, i64 4006 + %i18 = bitcast i8* %i17 to i64* + %i19 = load i64, i64* %i18, align 8 + %i20 = mul i64 %i13, %i10 + %i21 = mul i64 %i20, %i16 + %i22 = mul i64 %i21, %i19 + %i23 = add i64 %i22, %i5 + %i24 = add nuw i64 %i4, 1 + %i25 = icmp ult i64 %i24, %i + br i1 %i25, label %bb3, label %bb26 + +bb26: ; preds = %bb3, %bb + %i27 = phi i64 [ 0, %bb ], [ %i23, %bb3 ] + %i28 = add i64 %i27, %i + ret i64 %i28 } ; test_update_ds_prep_nointeract: @@ -399,12 +407,12 @@ define dso_local i64 @test_update_ds_prep_interact(i8* %0, i32 signext %1) { ; return res + count; ; } -define i64 @test_update_ds_prep_nointeract(i8* %0, i32 signext %1) { +define i64 @test_update_ds_prep_nointeract(i8* %arg, i32 signext %arg1) { ; CHECK-LABEL: test_update_ds_prep_nointeract: -; CHECK: # %bb.0: +; CHECK: # %bb.0: # %bb ; CHECK-NEXT: cmplwi r4, 0 ; CHECK-NEXT: beq cr0, .LBB4_4 -; CHECK-NEXT: # %bb.1: # %.preheader +; CHECK-NEXT: # %bb.1: # %bb3.preheader ; CHECK-NEXT: cmpldi r4, 1 ; CHECK-NEXT: li r6, 1 ; CHECK-NEXT: addi r5, r3, 4000 @@ -414,7 +422,8 @@ define i64 @test_update_ds_prep_nointeract(i8* %0, i32 signext %1) { ; CHECK-NEXT: mtctr r6 ; CHECK-NEXT: li r6, 0 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: .LBB4_2: # %bb3 +; CHECK-NEXT: # ; CHECK-NEXT: lbzu r8, 1(r5) ; CHECK-NEXT: ldx r9, r3, r7 ; CHECK-NEXT: ld r10, 0(r3) @@ -424,44 +433,45 @@ define i64 @test_update_ds_prep_nointeract(i8* %0, i32 signext %1) { ; CHECK-NEXT: mulld r8, r8, r10 ; CHECK-NEXT: maddld r6, r8, r11, r6 ; CHECK-NEXT: bdnz .LBB4_2 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: # %bb.3: # %bb25 ; CHECK-NEXT: add r3, r6, r4 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB4_4: ; CHECK-NEXT: addi r3, r4, 0 ; CHECK-NEXT: blr - %3 = sext i32 %1 to i64 - %4 = icmp eq i32 %1, 0 - br i1 %4, label %27, label %5 - -5: ; preds = %2, %5 - %6 = phi i64 [ %25, %5 ], [ 0, %2 ] - %7 = phi i64 [ %24, %5 ], [ 0, %2 ] - %8 = getelementptr inbounds i8, i8* %0, i64 %6 - %9 = getelementptr inbounds i8, i8* %8, i64 4001 - %10 = load i8, i8* %9, align 1 - %11 = getelementptr inbounds i8, i8* %8, i64 4002 - %12 = bitcast i8* %11 to i64* - %13 = load i64, i64* %12, align 8 - %14 = getelementptr inbounds i8, i8* %8, i64 4003 - %15 = bitcast i8* %14 to i64* - %16 = load i64, i64* %15, align 8 - %17 = getelementptr inbounds i8, i8* %8, i64 4007 - %18 = bitcast i8* %17 to i64* - %19 = load i64, i64* %18, align 8 - %20 = zext i8 %10 to i64 - %21 = mul i64 %13, %20 - %22 = mul i64 %21, %16 - %23 = mul i64 %22, %19 - %24 = add i64 %23, %7 - %25 = add nuw i64 %6, 1 - %26 = icmp ult i64 %25, %3 - br i1 %26, label %5, label %27 - -27: ; preds = %5, %2 - %28 = phi i64 [ 0, %2 ], [ %24, %5 ] - %29 = add i64 %28, %3 - ret i64 %29 +bb: + %i = sext i32 %arg1 to i64 + %i2 = icmp eq i32 %arg1, 0 + br i1 %i2, label %bb25, label %bb3 + +bb3: ; preds = %bb3, %bb + %i4 = phi i64 [ %i23, %bb3 ], [ 0, %bb ] + %i5 = phi i64 [ %i22, %bb3 ], [ 0, %bb ] + %i6 = getelementptr inbounds i8, i8* %arg, i64 %i4 + %i7 = getelementptr inbounds i8, i8* %i6, i64 4001 + %i8 = load i8, i8* %i7, align 1 + %i9 = getelementptr inbounds i8, i8* %i6, i64 4002 + %i10 = bitcast i8* %i9 to i64* + %i11 = load i64, i64* %i10, align 8 + %i12 = getelementptr inbounds i8, i8* %i6, i64 4003 + %i13 = bitcast i8* %i12 to i64* + %i14 = load i64, i64* %i13, align 8 + %i15 = getelementptr inbounds i8, i8* %i6, i64 4007 + %i16 = bitcast i8* %i15 to i64* + %i17 = load i64, i64* %i16, align 8 + %i18 = zext i8 %i8 to i64 + %i19 = mul i64 %i11, %i18 + %i20 = mul i64 %i19, %i14 + %i21 = mul i64 %i20, %i17 + %i22 = add i64 %i21, %i5 + %i23 = add nuw i64 %i4, 1 + %i24 = icmp ult i64 %i23, %i + br i1 %i24, label %bb3, label %bb25 + +bb25: ; preds = %bb3, %bb + %i26 = phi i64 [ 0, %bb ], [ %i22, %bb3 ] + %i27 = add i64 %i26, %i + ret i64 %i27 } ; test_ds_multiple_chains: @@ -485,13 +495,13 @@ define i64 @test_update_ds_prep_nointeract(i8* %0, i32 signext %1) { ; return res + count; ; } -define dso_local i64 @test_ds_multiple_chains(i8* %0, i8* %1, i32 signext %2) { +define dso_local i64 @test_ds_multiple_chains(i8* %arg, i8* %arg1, i32 signext %arg2) { ; CHECK-LABEL: test_ds_multiple_chains: -; CHECK: # %bb.0: +; CHECK: # %bb.0: # %bb ; CHECK-NEXT: cmplwi r5, 0 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: beq cr0, .LBB5_3 -; CHECK-NEXT: # %bb.1: # %.preheader +; CHECK-NEXT: # %bb.1: # %bb4.preheader ; CHECK-NEXT: cmpldi r5, 1 ; CHECK-NEXT: li r6, 1 ; CHECK-NEXT: addi r3, r3, 4001 @@ -501,7 +511,8 @@ define dso_local i64 @test_ds_multiple_chains(i8* %0, i8* %1, i32 signext %2) { ; CHECK-NEXT: mtctr r6 ; CHECK-NEXT: li r6, 0 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: .LBB5_2: # %bb4 +; CHECK-NEXT: # ; CHECK-NEXT: ld r8, 0(r3) ; CHECK-NEXT: ldx r9, r3, r7 ; CHECK-NEXT: ld r10, 4(r3) @@ -523,59 +534,60 @@ define dso_local i64 @test_ds_multiple_chains(i8* %0, i8* %1, i32 signext %2) { ; CHECK-NEXT: b .LBB5_4 ; CHECK-NEXT: .LBB5_3: ; CHECK-NEXT: li r6, 0 -; CHECK-NEXT: .LBB5_4: +; CHECK-NEXT: .LBB5_4: # %bb43 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: add r3, r6, r5 ; CHECK-NEXT: blr - %4 = sext i32 %2 to i64 - %5 = icmp eq i32 %2, 0 - br i1 %5, label %45, label %6 - -6: ; preds = %3, %6 - %7 = phi i64 [ %43, %6 ], [ 0, %3 ] - %8 = phi i64 [ %42, %6 ], [ 0, %3 ] - %9 = getelementptr inbounds i8, i8* %0, i64 %7 - %10 = getelementptr inbounds i8, i8* %9, i64 4001 - %11 = bitcast i8* %10 to i64* - %12 = load i64, i64* %11, align 8 - %13 = getelementptr inbounds i8, i8* %9, i64 4010 - %14 = bitcast i8* %13 to i64* - %15 = load i64, i64* %14, align 8 - %16 = getelementptr inbounds i8, i8* %9, i64 4005 - %17 = bitcast i8* %16 to i64* - %18 = load i64, i64* %17, align 8 - %19 = getelementptr inbounds i8, i8* %9, i64 4009 - %20 = bitcast i8* %19 to i64* - %21 = load i64, i64* %20, align 8 - %22 = getelementptr inbounds i8, i8* %1, i64 %7 - %23 = getelementptr inbounds i8, i8* %22, i64 4001 - %24 = bitcast i8* %23 to i64* - %25 = load i64, i64* %24, align 8 - %26 = getelementptr inbounds i8, i8* %22, i64 4010 - %27 = bitcast i8* %26 to i64* - %28 = load i64, i64* %27, align 8 - %29 = getelementptr inbounds i8, i8* %22, i64 4005 - %30 = bitcast i8* %29 to i64* - %31 = load i64, i64* %30, align 8 - %32 = getelementptr inbounds i8, i8* %22, i64 4009 - %33 = bitcast i8* %32 to i64* - %34 = load i64, i64* %33, align 8 - %35 = mul i64 %15, %12 - %36 = mul i64 %35, %18 - %37 = mul i64 %36, %21 - %38 = mul i64 %37, %25 - %39 = mul i64 %38, %28 - %40 = mul i64 %39, %31 - %41 = mul i64 %40, %34 - %42 = add i64 %41, %8 - %43 = add nuw i64 %7, 1 - %44 = icmp ult i64 %43, %4 - br i1 %44, label %6, label %45 - -45: ; preds = %6, %3 - %46 = phi i64 [ 0, %3 ], [ %42, %6 ] - %47 = add i64 %46, %4 - ret i64 %47 +bb: + %i = sext i32 %arg2 to i64 + %i3 = icmp eq i32 %arg2, 0 + br i1 %i3, label %bb43, label %bb4 + +bb4: ; preds = %bb4, %bb + %i5 = phi i64 [ %i41, %bb4 ], [ 0, %bb ] + %i6 = phi i64 [ %i40, %bb4 ], [ 0, %bb ] + %i7 = getelementptr inbounds i8, i8* %arg, i64 %i5 + %i8 = getelementptr inbounds i8, i8* %i7, i64 4001 + %i9 = bitcast i8* %i8 to i64* + %i10 = load i64, i64* %i9, align 8 + %i11 = getelementptr inbounds i8, i8* %i7, i64 4010 + %i12 = bitcast i8* %i11 to i64* + %i13 = load i64, i64* %i12, align 8 + %i14 = getelementptr inbounds i8, i8* %i7, i64 4005 + %i15 = bitcast i8* %i14 to i64* + %i16 = load i64, i64* %i15, align 8 + %i17 = getelementptr inbounds i8, i8* %i7, i64 4009 + %i18 = bitcast i8* %i17 to i64* + %i19 = load i64, i64* %i18, align 8 + %i20 = getelementptr inbounds i8, i8* %arg1, i64 %i5 + %i21 = getelementptr inbounds i8, i8* %i20, i64 4001 + %i22 = bitcast i8* %i21 to i64* + %i23 = load i64, i64* %i22, align 8 + %i24 = getelementptr inbounds i8, i8* %i20, i64 4010 + %i25 = bitcast i8* %i24 to i64* + %i26 = load i64, i64* %i25, align 8 + %i27 = getelementptr inbounds i8, i8* %i20, i64 4005 + %i28 = bitcast i8* %i27 to i64* + %i29 = load i64, i64* %i28, align 8 + %i30 = getelementptr inbounds i8, i8* %i20, i64 4009 + %i31 = bitcast i8* %i30 to i64* + %i32 = load i64, i64* %i31, align 8 + %i33 = mul i64 %i13, %i10 + %i34 = mul i64 %i33, %i16 + %i35 = mul i64 %i34, %i19 + %i36 = mul i64 %i35, %i23 + %i37 = mul i64 %i36, %i26 + %i38 = mul i64 %i37, %i29 + %i39 = mul i64 %i38, %i32 + %i40 = add i64 %i39, %i6 + %i41 = add nuw i64 %i5, 1 + %i42 = icmp ult i64 %i41, %i + br i1 %i42, label %bb4, label %bb43 + +bb43: ; preds = %bb4, %bb + %i44 = phi i64 [ 0, %bb ], [ %i40, %bb4 ] + %i45 = add i64 %i44, %i + ret i64 %i45 } ; test_ds_cross_basic_blocks: @@ -611,16 +623,16 @@ define dso_local i64 @test_ds_multiple_chains(i8* %0, i8* %1, i32 signext %2) { @arr = external local_unnamed_addr global i8*, align 8 -define i64 @test_ds_cross_basic_blocks(i8* %0, i32 signext %1) { +define i64 @test_ds_cross_basic_blocks(i8* %arg, i32 signext %arg1) { ; CHECK-LABEL: test_ds_cross_basic_blocks: -; CHECK: # %bb.0: +; CHECK: # %bb.0: # %bb ; CHECK-NEXT: cmplwi r4, 0 ; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: beq cr0, .LBB6_8 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: # %bb.1: # %bb3 ; CHECK-NEXT: addis r5, r2, .LC0@toc@ha ; CHECK-NEXT: cmpldi r4, 1 ; CHECK-NEXT: li r7, 1 @@ -641,12 +653,14 @@ define i64 @test_ds_cross_basic_blocks(i8* %0, i32 signext %1) { ; CHECK-NEXT: addi r5, r5, -1 ; CHECK-NEXT: b .LBB6_4 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: .LBB6_2: # %bb18 +; CHECK-NEXT: # ; CHECK-NEXT: ldx r0, r6, r4 ; CHECK-NEXT: add r29, r0, r29 ; CHECK-NEXT: ld r0, -8(r6) ; CHECK-NEXT: add r30, r0, r30 -; CHECK-NEXT: .LBB6_3: +; CHECK-NEXT: .LBB6_3: # %bb49 +; CHECK-NEXT: # ; CHECK-NEXT: mulld r0, r30, r29 ; CHECK-NEXT: addi r6, r6, 1 ; CHECK-NEXT: mulld r0, r0, r12 @@ -654,7 +668,8 @@ define i64 @test_ds_cross_basic_blocks(i8* %0, i32 signext %1) { ; CHECK-NEXT: mulld r0, r0, r10 ; CHECK-NEXT: maddld r3, r0, r7, r3 ; CHECK-NEXT: bdz .LBB6_9 -; CHECK-NEXT: .LBB6_4: +; CHECK-NEXT: .LBB6_4: # %bb5 +; CHECK-NEXT: # ; CHECK-NEXT: lbzu r0, 1(r5) ; CHECK-NEXT: mulli r28, r0, 171 ; CHECK-NEXT: rlwinm r27, r28, 24, 8, 30 @@ -664,17 +679,20 @@ define i64 @test_ds_cross_basic_blocks(i8* %0, i32 signext %1) { ; CHECK-NEXT: clrlwi r0, r0, 24 ; CHECK-NEXT: cmplwi r0, 1 ; CHECK-NEXT: beq cr0, .LBB6_2 -; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: # %bb.5: # %bb28 +; CHECK-NEXT: # ; CHECK-NEXT: cmplwi r0, 2 ; CHECK-NEXT: bne cr0, .LBB6_7 -; CHECK-NEXT: # %bb.6: +; CHECK-NEXT: # %bb.6: # %bb31 +; CHECK-NEXT: # ; CHECK-NEXT: ldx r0, r6, r8 ; CHECK-NEXT: add r12, r0, r12 ; CHECK-NEXT: ld r0, -4(r6) ; CHECK-NEXT: add r11, r0, r11 ; CHECK-NEXT: b .LBB6_3 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB6_7: +; CHECK-NEXT: .LBB6_7: # %bb40 +; CHECK-NEXT: # ; CHECK-NEXT: ldx r0, r6, r9 ; CHECK-NEXT: add r10, r0, r10 ; CHECK-NEXT: ld r0, 0(r6) @@ -682,94 +700,95 @@ define i64 @test_ds_cross_basic_blocks(i8* %0, i32 signext %1) { ; CHECK-NEXT: b .LBB6_3 ; CHECK-NEXT: .LBB6_8: ; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: .LBB6_9: +; CHECK-NEXT: .LBB6_9: # %bb64 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r27, -40(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr - %3 = sext i32 %1 to i64 - %4 = icmp eq i32 %1, 0 - br i1 %4, label %66, label %5 - -5: ; preds = %2 - %6 = load i8*, i8** @arr, align 8 - br label %7 - -7: ; preds = %5, %51 - %8 = phi i64 [ 1, %5 ], [ %57, %51 ] - %9 = phi i64 [ 1, %5 ], [ %56, %51 ] - %10 = phi i64 [ 1, %5 ], [ %55, %51 ] - %11 = phi i64 [ 1, %5 ], [ %54, %51 ] - %12 = phi i64 [ 1, %5 ], [ %53, %51 ] - %13 = phi i64 [ 1, %5 ], [ %52, %51 ] - %14 = phi i64 [ 0, %5 ], [ %64, %51 ] - %15 = phi i64 [ 0, %5 ], [ %63, %51 ] - %16 = getelementptr inbounds i8, i8* %6, i64 %14 - %17 = load i8, i8* %16, align 1 - %18 = urem i8 %17, 3 - %19 = icmp eq i8 %18, 1 - br i1 %19, label %20, label %30 - -20: ; preds = %7 - %21 = getelementptr inbounds i8, i8* %0, i64 %14 - %22 = getelementptr inbounds i8, i8* %21, i64 4000 - %23 = bitcast i8* %22 to i64* - %24 = load i64, i64* %23, align 8 - %25 = add i64 %24, %13 - %26 = getelementptr inbounds i8, i8* %21, i64 4001 - %27 = bitcast i8* %26 to i64* - %28 = load i64, i64* %27, align 8 - %29 = add i64 %28, %12 - br label %51 - -30: ; preds = %7 - %31 = icmp eq i8 %18, 2 - %32 = getelementptr inbounds i8, i8* %0, i64 %14 - br i1 %31, label %33, label %42 - -33: ; preds = %30 - %34 = getelementptr inbounds i8, i8* %32, i64 4002 - %35 = bitcast i8* %34 to i64* - %36 = load i64, i64* %35, align 8 - %37 = add i64 %36, %11 - %38 = getelementptr inbounds i8, i8* %32, i64 4005 - %39 = bitcast i8* %38 to i64* - %40 = load i64, i64* %39, align 8 - %41 = add i64 %40, %10 - br label %51 - -42: ; preds = %30 - %43 = getelementptr inbounds i8, i8* %32, i64 4003 - %44 = bitcast i8* %43 to i64* - %45 = load i64, i64* %44, align 8 - %46 = add i64 %45, %9 - %47 = getelementptr inbounds i8, i8* %32, i64 4009 - %48 = bitcast i8* %47 to i64* - %49 = load i64, i64* %48, align 8 - %50 = add i64 %49, %8 - br label %51 - -51: ; preds = %33, %42, %20 - %52 = phi i64 [ %25, %20 ], [ %13, %33 ], [ %13, %42 ] - %53 = phi i64 [ %29, %20 ], [ %12, %33 ], [ %12, %42 ] - %54 = phi i64 [ %11, %20 ], [ %37, %33 ], [ %11, %42 ] - %55 = phi i64 [ %10, %20 ], [ %41, %33 ], [ %10, %42 ] - %56 = phi i64 [ %9, %20 ], [ %9, %33 ], [ %46, %42 ] - %57 = phi i64 [ %8, %20 ], [ %8, %33 ], [ %50, %42 ] - %58 = mul i64 %53, %52 - %59 = mul i64 %58, %54 - %60 = mul i64 %59, %55 - %61 = mul i64 %60, %56 - %62 = mul i64 %61, %57 - %63 = add i64 %62, %15 - %64 = add nuw i64 %14, 1 - %65 = icmp ult i64 %64, %3 - br i1 %65, label %7, label %66 - -66: ; preds = %51, %2 - %67 = phi i64 [ 0, %2 ], [ %63, %51 ] - ret i64 %67 +bb: + %i = sext i32 %arg1 to i64 + %i2 = icmp eq i32 %arg1, 0 + br i1 %i2, label %bb64, label %bb3 + +bb3: ; preds = %bb + %i4 = load i8*, i8** @arr, align 8 + br label %bb5 + +bb5: ; preds = %bb49, %bb3 + %i6 = phi i64 [ 1, %bb3 ], [ %i55, %bb49 ] + %i7 = phi i64 [ 1, %bb3 ], [ %i54, %bb49 ] + %i8 = phi i64 [ 1, %bb3 ], [ %i53, %bb49 ] + %i9 = phi i64 [ 1, %bb3 ], [ %i52, %bb49 ] + %i10 = phi i64 [ 1, %bb3 ], [ %i51, %bb49 ] + %i11 = phi i64 [ 1, %bb3 ], [ %i50, %bb49 ] + %i12 = phi i64 [ 0, %bb3 ], [ %i62, %bb49 ] + %i13 = phi i64 [ 0, %bb3 ], [ %i61, %bb49 ] + %i14 = getelementptr inbounds i8, i8* %i4, i64 %i12 + %i15 = load i8, i8* %i14, align 1 + %i16 = urem i8 %i15, 3 + %i17 = icmp eq i8 %i16, 1 + br i1 %i17, label %bb18, label %bb28 + +bb18: ; preds = %bb5 + %i19 = getelementptr inbounds i8, i8* %arg, i64 %i12 + %i20 = getelementptr inbounds i8, i8* %i19, i64 4000 + %i21 = bitcast i8* %i20 to i64* + %i22 = load i64, i64* %i21, align 8 + %i23 = add i64 %i22, %i11 + %i24 = getelementptr inbounds i8, i8* %i19, i64 4001 + %i25 = bitcast i8* %i24 to i64* + %i26 = load i64, i64* %i25, align 8 + %i27 = add i64 %i26, %i10 + br label %bb49 + +bb28: ; preds = %bb5 + %i29 = icmp eq i8 %i16, 2 + %i30 = getelementptr inbounds i8, i8* %arg, i64 %i12 + br i1 %i29, label %bb31, label %bb40 + +bb31: ; preds = %bb28 + %i32 = getelementptr inbounds i8, i8* %i30, i64 4002 + %i33 = bitcast i8* %i32 to i64* + %i34 = load i64, i64* %i33, align 8 + %i35 = add i64 %i34, %i9 + %i36 = getelementptr inbounds i8, i8* %i30, i64 4005 + %i37 = bitcast i8* %i36 to i64* + %i38 = load i64, i64* %i37, align 8 + %i39 = add i64 %i38, %i8 + br label %bb49 + +bb40: ; preds = %bb28 + %i41 = getelementptr inbounds i8, i8* %i30, i64 4003 + %i42 = bitcast i8* %i41 to i64* + %i43 = load i64, i64* %i42, align 8 + %i44 = add i64 %i43, %i7 + %i45 = getelementptr inbounds i8, i8* %i30, i64 4009 + %i46 = bitcast i8* %i45 to i64* + %i47 = load i64, i64* %i46, align 8 + %i48 = add i64 %i47, %i6 + br label %bb49 + +bb49: ; preds = %bb40, %bb31, %bb18 + %i50 = phi i64 [ %i23, %bb18 ], [ %i11, %bb31 ], [ %i11, %bb40 ] + %i51 = phi i64 [ %i27, %bb18 ], [ %i10, %bb31 ], [ %i10, %bb40 ] + %i52 = phi i64 [ %i9, %bb18 ], [ %i35, %bb31 ], [ %i9, %bb40 ] + %i53 = phi i64 [ %i8, %bb18 ], [ %i39, %bb31 ], [ %i8, %bb40 ] + %i54 = phi i64 [ %i7, %bb18 ], [ %i7, %bb31 ], [ %i44, %bb40 ] + %i55 = phi i64 [ %i6, %bb18 ], [ %i6, %bb31 ], [ %i48, %bb40 ] + %i56 = mul i64 %i51, %i50 + %i57 = mul i64 %i56, %i52 + %i58 = mul i64 %i57, %i53 + %i59 = mul i64 %i58, %i54 + %i60 = mul i64 %i59, %i55 + %i61 = add i64 %i60, %i13 + %i62 = add nuw i64 %i12, 1 + %i63 = icmp ult i64 %i62, %i + br i1 %i63, label %bb5, label %bb64 + +bb64: ; preds = %bb49, %bb + %i65 = phi i64 [ 0, %bb ], [ %i61, %bb49 ] + ret i64 %i65 } ; test_ds_float: @@ -790,19 +809,20 @@ define i64 @test_ds_cross_basic_blocks(i8* %0, i32 signext %1) { ; return res; ;} -define float @test_ds_float(i8* %0, i32 signext %1) { +define float @test_ds_float(i8* %arg, i32 signext %arg1) { ; CHECK-LABEL: test_ds_float: -; CHECK: # %bb.0: +; CHECK: # %bb.0: # %bb ; CHECK-NEXT: cmpwi r4, 0 ; CHECK-NEXT: ble cr0, .LBB7_4 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: # %bb.1: # %bb2 ; CHECK-NEXT: clrldi r4, r4, 32 ; CHECK-NEXT: addi r3, r3, 4002 ; CHECK-NEXT: xxlxor f1, f1, f1 ; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: li r4, -1 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: .LBB7_2: # %bb4 +; CHECK-NEXT: # ; CHECK-NEXT: lfsx f0, r3, r4 ; CHECK-NEXT: lfs f2, 0(r3) ; CHECK-NEXT: xsmulsp f0, f0, f2 @@ -813,45 +833,46 @@ define float @test_ds_float(i8* %0, i32 signext %1) { ; CHECK-NEXT: xsmulsp f0, f0, f4 ; CHECK-NEXT: xsaddsp f1, f1, f0 ; CHECK-NEXT: bdnz .LBB7_2 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: # %bb.3: # %bb26 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB7_4: ; CHECK-NEXT: xxlxor f1, f1, f1 ; CHECK-NEXT: blr - %3 = icmp sgt i32 %1, 0 - br i1 %3, label %4, label %28 - -4: ; preds = %2 - %5 = zext i32 %1 to i64 - br label %6 - -6: ; preds = %6, %4 - %7 = phi i64 [ 0, %4 ], [ %26, %6 ] - %8 = phi float [ 0.000000e+00, %4 ], [ %25, %6 ] - %9 = getelementptr inbounds i8, i8* %0, i64 %7 - %10 = getelementptr inbounds i8, i8* %9, i64 4001 - %11 = bitcast i8* %10 to float* - %12 = load float, float* %11, align 4 - %13 = getelementptr inbounds i8, i8* %9, i64 4002 - %14 = bitcast i8* %13 to float* - %15 = load float, float* %14, align 4 - %16 = getelementptr inbounds i8, i8* %9, i64 4022 - %17 = bitcast i8* %16 to float* - %18 = load float, float* %17, align 4 - %19 = getelementptr inbounds i8, i8* %9, i64 4062 - %20 = bitcast i8* %19 to float* - %21 = load float, float* %20, align 4 - %22 = fmul float %12, %15 - %23 = fmul float %22, %18 - %24 = fmul float %23, %21 - %25 = fadd float %8, %24 - %26 = add nuw nsw i64 %7, 1 - %27 = icmp eq i64 %26, %5 - br i1 %27, label %28, label %6 - -28: ; preds = %6, %2 - %29 = phi float [ 0.000000e+00, %2 ], [ %25, %6 ] - ret float %29 +bb: + %i = icmp sgt i32 %arg1, 0 + br i1 %i, label %bb2, label %bb26 + +bb2: ; preds = %bb + %i3 = zext i32 %arg1 to i64 + br label %bb4 + +bb4: ; preds = %bb4, %bb2 + %i5 = phi i64 [ 0, %bb2 ], [ %i24, %bb4 ] + %i6 = phi float [ 0.000000e+00, %bb2 ], [ %i23, %bb4 ] + %i7 = getelementptr inbounds i8, i8* %arg, i64 %i5 + %i8 = getelementptr inbounds i8, i8* %i7, i64 4001 + %i9 = bitcast i8* %i8 to float* + %i10 = load float, float* %i9, align 4 + %i11 = getelementptr inbounds i8, i8* %i7, i64 4002 + %i12 = bitcast i8* %i11 to float* + %i13 = load float, float* %i12, align 4 + %i14 = getelementptr inbounds i8, i8* %i7, i64 4022 + %i15 = bitcast i8* %i14 to float* + %i16 = load float, float* %i15, align 4 + %i17 = getelementptr inbounds i8, i8* %i7, i64 4062 + %i18 = bitcast i8* %i17 to float* + %i19 = load float, float* %i18, align 4 + %i20 = fmul float %i10, %i13 + %i21 = fmul float %i20, %i16 + %i22 = fmul float %i21, %i19 + %i23 = fadd float %i6, %i22 + %i24 = add nuw nsw i64 %i5, 1 + %i25 = icmp eq i64 %i24, %i3 + br i1 %i25, label %bb26, label %bb4 + +bb26: ; preds = %bb4, %bb + %i27 = phi float [ 0.000000e+00, %bb ], [ %i23, %bb4 ] + ret float %i27 } ; test_ds_combine_float_int: @@ -872,19 +893,20 @@ define float @test_ds_float(i8* %0, i32 signext %1) { ; return res; ;} -define float @test_ds_combine_float_int(i8* %0, i32 signext %1) { +define float @test_ds_combine_float_int(i8* %arg, i32 signext %arg1) { ; CHECK-LABEL: test_ds_combine_float_int: -; CHECK: # %bb.0: +; CHECK: # %bb.0: # %bb ; CHECK-NEXT: cmpwi r4, 0 ; CHECK-NEXT: ble cr0, .LBB8_4 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: # %bb.1: # %bb2 ; CHECK-NEXT: clrldi r4, r4, 32 ; CHECK-NEXT: addi r3, r3, 4002 ; CHECK-NEXT: xxlxor f1, f1, f1 ; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: li r4, -1 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB8_2: +; CHECK-NEXT: .LBB8_2: # %bb4 +; CHECK-NEXT: # ; CHECK-NEXT: lfd f4, 0(r3) ; CHECK-NEXT: lfsx f0, r3, r4 ; CHECK-NEXT: xscvuxdsp f4, f4 @@ -896,46 +918,47 @@ define float @test_ds_combine_float_int(i8* %0, i32 signext %1) { ; CHECK-NEXT: xsmulsp f0, f3, f0 ; CHECK-NEXT: xsaddsp f1, f1, f0 ; CHECK-NEXT: bdnz .LBB8_2 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: # %bb.3: # %bb27 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB8_4: ; CHECK-NEXT: xxlxor f1, f1, f1 ; CHECK-NEXT: blr - %3 = icmp sgt i32 %1, 0 - br i1 %3, label %4, label %29 - -4: ; preds = %2 - %5 = zext i32 %1 to i64 - br label %6 - -6: ; preds = %6, %4 - %7 = phi i64 [ 0, %4 ], [ %27, %6 ] - %8 = phi float [ 0.000000e+00, %4 ], [ %26, %6 ] - %9 = getelementptr inbounds i8, i8* %0, i64 %7 - %10 = getelementptr inbounds i8, i8* %9, i64 4001 - %11 = bitcast i8* %10 to float* - %12 = load float, float* %11, align 4 - %13 = getelementptr inbounds i8, i8* %9, i64 4002 - %14 = bitcast i8* %13 to i64* - %15 = load i64, i64* %14, align 8 - %16 = getelementptr inbounds i8, i8* %9, i64 4022 - %17 = bitcast i8* %16 to float* - %18 = load float, float* %17, align 4 - %19 = getelementptr inbounds i8, i8* %9, i64 4062 - %20 = bitcast i8* %19 to float* - %21 = load float, float* %20, align 4 - %22 = uitofp i64 %15 to float - %23 = fmul float %12, %22 - %24 = fmul float %18, %23 - %25 = fmul float %21, %24 - %26 = fadd float %8, %25 - %27 = add nuw nsw i64 %7, 1 - %28 = icmp eq i64 %27, %5 - br i1 %28, label %29, label %6 - -29: ; preds = %6, %2 - %30 = phi float [ 0.000000e+00, %2 ], [ %26, %6 ] - ret float %30 +bb: + %i = icmp sgt i32 %arg1, 0 + br i1 %i, label %bb2, label %bb27 + +bb2: ; preds = %bb + %i3 = zext i32 %arg1 to i64 + br label %bb4 + +bb4: ; preds = %bb4, %bb2 + %i5 = phi i64 [ 0, %bb2 ], [ %i25, %bb4 ] + %i6 = phi float [ 0.000000e+00, %bb2 ], [ %i24, %bb4 ] + %i7 = getelementptr inbounds i8, i8* %arg, i64 %i5 + %i8 = getelementptr inbounds i8, i8* %i7, i64 4001 + %i9 = bitcast i8* %i8 to float* + %i10 = load float, float* %i9, align 4 + %i11 = getelementptr inbounds i8, i8* %i7, i64 4002 + %i12 = bitcast i8* %i11 to i64* + %i13 = load i64, i64* %i12, align 8 + %i14 = getelementptr inbounds i8, i8* %i7, i64 4022 + %i15 = bitcast i8* %i14 to float* + %i16 = load float, float* %i15, align 4 + %i17 = getelementptr inbounds i8, i8* %i7, i64 4062 + %i18 = bitcast i8* %i17 to float* + %i19 = load float, float* %i18, align 4 + %i20 = uitofp i64 %i13 to float + %i21 = fmul float %i10, %i20 + %i22 = fmul float %i16, %i21 + %i23 = fmul float %i19, %i22 + %i24 = fadd float %i6, %i23 + %i25 = add nuw nsw i64 %i5, 1 + %i26 = icmp eq i64 %i25, %i3 + br i1 %i26, label %bb27, label %bb4 + +bb27: ; preds = %bb4, %bb + %i28 = phi float [ 0.000000e+00, %bb ], [ %i24, %bb4 ] + ret float %i28 } ; test_ds_lwa_prep: @@ -955,18 +978,19 @@ define float @test_ds_combine_float_int(i8* %0, i32 signext %1) { ; return res + count; ; } -define i64 @test_ds_lwa_prep(i8* %0, i32 signext %1) { +define i64 @test_ds_lwa_prep(i8* %arg, i32 signext %arg1) { ; CHECK-LABEL: test_ds_lwa_prep: -; CHECK: # %bb.0: +; CHECK: # %bb.0: # %bb ; CHECK-NEXT: cmpwi r4, 0 ; CHECK-NEXT: ble cr0, .LBB9_4 -; CHECK-NEXT: # %bb.1: # %.preheader +; CHECK-NEXT: # %bb.1: # %bb3.preheader ; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: addi r5, r3, 2 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: li r6, -1 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB9_2: +; CHECK-NEXT: .LBB9_2: # %bb3 +; CHECK-NEXT: # ; CHECK-NEXT: lwax r7, r5, r6 ; CHECK-NEXT: lwa r8, 0(r5) ; CHECK-NEXT: lwa r9, 4(r5) @@ -976,48 +1000,48 @@ define i64 @test_ds_lwa_prep(i8* %0, i32 signext %1) { ; CHECK-NEXT: mulld r7, r7, r9 ; CHECK-NEXT: maddld r3, r7, r10, r3 ; CHECK-NEXT: bdnz .LBB9_2 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: # %bb.3: # %bb29 ; CHECK-NEXT: add r3, r3, r4 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB9_4: ; CHECK-NEXT: addi r3, r4, 0 ; CHECK-NEXT: blr +bb: + %i = sext i32 %arg1 to i64 + %i2 = icmp sgt i32 %arg1, 0 + br i1 %i2, label %bb3, label %bb29 + +bb3: ; preds = %bb3, %bb + %i4 = phi i64 [ %i27, %bb3 ], [ 0, %bb ] + %i5 = phi i64 [ %i26, %bb3 ], [ 0, %bb ] + %i6 = getelementptr inbounds i8, i8* %arg, i64 %i4 + %i7 = getelementptr inbounds i8, i8* %i6, i64 1 + %i8 = bitcast i8* %i7 to i32* + %i9 = load i32, i32* %i8, align 4 + %i10 = sext i32 %i9 to i64 + %i11 = getelementptr inbounds i8, i8* %i6, i64 2 + %i12 = bitcast i8* %i11 to i32* + %i13 = load i32, i32* %i12, align 4 + %i14 = sext i32 %i13 to i64 + %i15 = getelementptr inbounds i8, i8* %i6, i64 6 + %i16 = bitcast i8* %i15 to i32* + %i17 = load i32, i32* %i16, align 4 + %i18 = sext i32 %i17 to i64 + %i19 = getelementptr inbounds i8, i8* %i6, i64 10 + %i20 = bitcast i8* %i19 to i32* + %i21 = load i32, i32* %i20, align 4 + %i22 = sext i32 %i21 to i64 + %i23 = mul nsw i64 %i14, %i10 + %i24 = mul nsw i64 %i23, %i18 + %i25 = mul nsw i64 %i24, %i22 + %i26 = add nsw i64 %i25, %i5 + %i27 = add nuw nsw i64 %i4, 1 + %i28 = icmp eq i64 %i27, %i + br i1 %i28, label %bb29, label %bb3 - %3 = sext i32 %1 to i64 - %4 = icmp sgt i32 %1, 0 - br i1 %4, label %5, label %31 - -5: ; preds = %2, %5 - %6 = phi i64 [ %29, %5 ], [ 0, %2 ] - %7 = phi i64 [ %28, %5 ], [ 0, %2 ] - %8 = getelementptr inbounds i8, i8* %0, i64 %6 - %9 = getelementptr inbounds i8, i8* %8, i64 1 - %10 = bitcast i8* %9 to i32* - %11 = load i32, i32* %10, align 4 - %12 = sext i32 %11 to i64 - %13 = getelementptr inbounds i8, i8* %8, i64 2 - %14 = bitcast i8* %13 to i32* - %15 = load i32, i32* %14, align 4 - %16 = sext i32 %15 to i64 - %17 = getelementptr inbounds i8, i8* %8, i64 6 - %18 = bitcast i8* %17 to i32* - %19 = load i32, i32* %18, align 4 - %20 = sext i32 %19 to i64 - %21 = getelementptr inbounds i8, i8* %8, i64 10 - %22 = bitcast i8* %21 to i32* - %23 = load i32, i32* %22, align 4 - %24 = sext i32 %23 to i64 - %25 = mul nsw i64 %16, %12 - %26 = mul nsw i64 %25, %20 - %27 = mul nsw i64 %26, %24 - %28 = add nsw i64 %27, %7 - %29 = add nuw nsw i64 %6, 1 - %30 = icmp eq i64 %29, %3 - br i1 %30, label %31, label %5 - -31: ; preds = %5, %2 - %32 = phi i64 [ 0, %2 ], [ %28, %5 ] - %33 = add nsw i64 %32, %3 - ret i64 %33 +bb29: ; preds = %bb3, %bb + %i30 = phi i64 [ 0, %bb ], [ %i26, %bb3 ] + %i31 = add nsw i64 %i30, %i + ret i64 %i31 } diff --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll index 665addaad6ab4..7c2464a1ec60d 100644 --- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll @@ -398,243 +398,243 @@ _loop_1_do_.preheader: ; preds = %_loop_1_do_.lr.ph %.vy2a.promoted = load <2 x double>, <2 x double>* %.vy2a, align 16 %.vy2b.promoted = load <2 x double>, <2 x double>* %.vy2b, align 16 %.vy2c.promoted = load <2 x double>, <2 x double>* %.vy2c, align 16 - %0 = zext i32 %_val_m_ to i64 - %1 = zext i32 %_val_n_ to i64 + %i = zext i32 %_val_m_ to i64 + %i1 = zext i32 %_val_n_ to i64 br label %_loop_2_do_.lr.ph _loop_2_do_.lr.ph: ; preds = %_loop_2_endl_, %_loop_1_do_.preheader %indvars.iv212 = phi i64 [ %indvars.iv.next213, %_loop_2_endl_ ], [ 1, %_loop_1_do_.preheader ] - %2 = phi <2 x double> [ %142, %_loop_2_endl_ ], [ %.vy2c.promoted, %_loop_1_do_.preheader ] - %3 = phi <2 x double> [ %140, %_loop_2_endl_ ], [ %.vy2b.promoted, %_loop_1_do_.preheader ] - %4 = phi <2 x double> [ %138, %_loop_2_endl_ ], [ %.vy2a.promoted, %_loop_1_do_.preheader ] - %5 = phi <2 x double> [ %136, %_loop_2_endl_ ], [ %.vy29.promoted, %_loop_1_do_.preheader ] - %6 = phi <2 x double> [ %134, %_loop_2_endl_ ], [ %.vy28.promoted, %_loop_1_do_.preheader ] - %7 = phi <2 x double> [ %132, %_loop_2_endl_ ], [ %.vy27.promoted, %_loop_1_do_.preheader ] - %8 = phi <2 x double> [ %129, %_loop_2_endl_ ], [ %.vy26.promoted, %_loop_1_do_.preheader ] - %9 = phi <2 x double> [ %127, %_loop_2_endl_ ], [ %.vy25.promoted, %_loop_1_do_.preheader ] - %10 = phi <2 x double> [ %125, %_loop_2_endl_ ], [ %.vy24.promoted, %_loop_1_do_.preheader ] - %11 = phi <2 x double> [ %123, %_loop_2_endl_ ], [ %.vy23.promoted, %_loop_1_do_.preheader ] - %12 = phi <2 x double> [ %121, %_loop_2_endl_ ], [ %.vy22.promoted, %_loop_1_do_.preheader ] - %13 = phi <2 x double> [ %119, %_loop_2_endl_ ], [ %.vy21.promoted, %_loop_1_do_.preheader ] - %14 = phi <2 x double> [ %116, %_loop_2_endl_ ], [ %.vy0c.promoted, %_loop_1_do_.preheader ] - %15 = phi <2 x double> [ %114, %_loop_2_endl_ ], [ %.vy0b.promoted, %_loop_1_do_.preheader ] - %16 = phi <2 x double> [ %112, %_loop_2_endl_ ], [ %.vy0a.promoted, %_loop_1_do_.preheader ] - %17 = phi <2 x double> [ %110, %_loop_2_endl_ ], [ %.vy09.promoted, %_loop_1_do_.preheader ] - %18 = phi <2 x double> [ %108, %_loop_2_endl_ ], [ %.vy08.promoted, %_loop_1_do_.preheader ] - %19 = phi <2 x double> [ %106, %_loop_2_endl_ ], [ %.vy07.promoted, %_loop_1_do_.preheader ] - %20 = phi <2 x double> [ %81, %_loop_2_endl_ ], [ %.vy06.promoted, %_loop_1_do_.preheader ] - %21 = phi <2 x double> [ %79, %_loop_2_endl_ ], [ %.vy05.promoted, %_loop_1_do_.preheader ] - %22 = phi <2 x double> [ %77, %_loop_2_endl_ ], [ %.vy04.promoted, %_loop_1_do_.preheader ] - %23 = phi <2 x double> [ %75, %_loop_2_endl_ ], [ %.vy03.promoted, %_loop_1_do_.preheader ] - %24 = phi <2 x double> [ %73, %_loop_2_endl_ ], [ %.vy02.promoted, %_loop_1_do_.preheader ] - %25 = phi <2 x double> [ %71, %_loop_2_endl_ ], [ %.vy01.promoted, %_loop_1_do_.preheader ] + %i2 = phi <2 x double> [ %i142, %_loop_2_endl_ ], [ %.vy2c.promoted, %_loop_1_do_.preheader ] + %i3 = phi <2 x double> [ %i140, %_loop_2_endl_ ], [ %.vy2b.promoted, %_loop_1_do_.preheader ] + %i4 = phi <2 x double> [ %i138, %_loop_2_endl_ ], [ %.vy2a.promoted, %_loop_1_do_.preheader ] + %i5 = phi <2 x double> [ %i136, %_loop_2_endl_ ], [ %.vy29.promoted, %_loop_1_do_.preheader ] + %i6 = phi <2 x double> [ %i134, %_loop_2_endl_ ], [ %.vy28.promoted, %_loop_1_do_.preheader ] + %i7 = phi <2 x double> [ %i132, %_loop_2_endl_ ], [ %.vy27.promoted, %_loop_1_do_.preheader ] + %i8 = phi <2 x double> [ %i129, %_loop_2_endl_ ], [ %.vy26.promoted, %_loop_1_do_.preheader ] + %i9 = phi <2 x double> [ %i127, %_loop_2_endl_ ], [ %.vy25.promoted, %_loop_1_do_.preheader ] + %i10 = phi <2 x double> [ %i125, %_loop_2_endl_ ], [ %.vy24.promoted, %_loop_1_do_.preheader ] + %i11 = phi <2 x double> [ %i123, %_loop_2_endl_ ], [ %.vy23.promoted, %_loop_1_do_.preheader ] + %i12 = phi <2 x double> [ %i121, %_loop_2_endl_ ], [ %.vy22.promoted, %_loop_1_do_.preheader ] + %i13 = phi <2 x double> [ %i119, %_loop_2_endl_ ], [ %.vy21.promoted, %_loop_1_do_.preheader ] + %i14 = phi <2 x double> [ %i116, %_loop_2_endl_ ], [ %.vy0c.promoted, %_loop_1_do_.preheader ] + %i15 = phi <2 x double> [ %i114, %_loop_2_endl_ ], [ %.vy0b.promoted, %_loop_1_do_.preheader ] + %i16 = phi <2 x double> [ %i112, %_loop_2_endl_ ], [ %.vy0a.promoted, %_loop_1_do_.preheader ] + %i17 = phi <2 x double> [ %i110, %_loop_2_endl_ ], [ %.vy09.promoted, %_loop_1_do_.preheader ] + %i18 = phi <2 x double> [ %i108, %_loop_2_endl_ ], [ %.vy08.promoted, %_loop_1_do_.preheader ] + %i19 = phi <2 x double> [ %i106, %_loop_2_endl_ ], [ %.vy07.promoted, %_loop_1_do_.preheader ] + %i20 = phi <2 x double> [ %i81, %_loop_2_endl_ ], [ %.vy06.promoted, %_loop_1_do_.preheader ] + %i21 = phi <2 x double> [ %i79, %_loop_2_endl_ ], [ %.vy05.promoted, %_loop_1_do_.preheader ] + %i22 = phi <2 x double> [ %i77, %_loop_2_endl_ ], [ %.vy04.promoted, %_loop_1_do_.preheader ] + %i23 = phi <2 x double> [ %i75, %_loop_2_endl_ ], [ %.vy03.promoted, %_loop_1_do_.preheader ] + %i24 = phi <2 x double> [ %i73, %_loop_2_endl_ ], [ %.vy02.promoted, %_loop_1_do_.preheader ] + %i25 = phi <2 x double> [ %i71, %_loop_2_endl_ ], [ %.vy01.promoted, %_loop_1_do_.preheader ] %_ix_x_len10 = mul i64 %_mult_tmp, %indvars.iv212 %a_ix_dim_0_ = getelementptr inbounds i8, i8* %a_rvo_based_addr_, i64 %_ix_x_len10 - %26 = add nuw nsw i64 %indvars.iv212, 1 - %_ix_x_len24 = mul i64 %_mult_tmp, %26 + %i26 = add nuw nsw i64 %indvars.iv212, 1 + %_ix_x_len24 = mul i64 %_mult_tmp, %i26 %a_ix_dim_0_25 = getelementptr inbounds i8, i8* %a_rvo_based_addr_, i64 %_ix_x_len24 - %27 = add nuw nsw i64 %indvars.iv212, 2 - %_ix_x_len40 = mul i64 %_mult_tmp, %27 + %i27 = add nuw nsw i64 %indvars.iv212, 2 + %_ix_x_len40 = mul i64 %_mult_tmp, %i27 %a_ix_dim_0_41 = getelementptr inbounds i8, i8* %a_rvo_based_addr_, i64 %_ix_x_len40 - %28 = add nuw nsw i64 %indvars.iv212, 3 - %_ix_x_len56 = mul i64 %_mult_tmp, %28 + %i28 = add nuw nsw i64 %indvars.iv212, 3 + %_ix_x_len56 = mul i64 %_mult_tmp, %i28 %a_ix_dim_0_57 = getelementptr inbounds i8, i8* %a_rvo_based_addr_, i64 %_ix_x_len56 - %29 = add nuw nsw i64 %indvars.iv212, 4 - %_ix_x_len72 = mul i64 %_mult_tmp, %29 + %i29 = add nuw nsw i64 %indvars.iv212, 4 + %_ix_x_len72 = mul i64 %_mult_tmp, %i29 %a_ix_dim_0_73 = getelementptr inbounds i8, i8* %a_rvo_based_addr_, i64 %_ix_x_len72 - %30 = add nuw nsw i64 %indvars.iv212, 5 - %_ix_x_len88 = mul i64 %_mult_tmp, %30 + %i30 = add nuw nsw i64 %indvars.iv212, 5 + %_ix_x_len88 = mul i64 %_mult_tmp, %i30 %a_ix_dim_0_89 = getelementptr inbounds i8, i8* %a_rvo_based_addr_, i64 %_ix_x_len88 br label %_loop_2_do_ -_loop_2_do_: ; preds = %_loop_2_do_.lr.ph, %_loop_2_do_ +_loop_2_do_: ; preds = %_loop_2_do_, %_loop_2_do_.lr.ph %indvars.iv = phi i64 [ 1, %_loop_2_do_.lr.ph ], [ %indvars.iv.next, %_loop_2_do_ ] - %31 = phi <2 x double> [ %2, %_loop_2_do_.lr.ph ], [ %142, %_loop_2_do_ ] - %32 = phi <2 x double> [ %3, %_loop_2_do_.lr.ph ], [ %140, %_loop_2_do_ ] - %33 = phi <2 x double> [ %4, %_loop_2_do_.lr.ph ], [ %138, %_loop_2_do_ ] - %34 = phi <2 x double> [ %5, %_loop_2_do_.lr.ph ], [ %136, %_loop_2_do_ ] - %35 = phi <2 x double> [ %6, %_loop_2_do_.lr.ph ], [ %134, %_loop_2_do_ ] - %36 = phi <2 x double> [ %7, %_loop_2_do_.lr.ph ], [ %132, %_loop_2_do_ ] - %37 = phi <2 x double> [ %8, %_loop_2_do_.lr.ph ], [ %129, %_loop_2_do_ ] - %38 = phi <2 x double> [ %9, %_loop_2_do_.lr.ph ], [ %127, %_loop_2_do_ ] - %39 = phi <2 x double> [ %10, %_loop_2_do_.lr.ph ], [ %125, %_loop_2_do_ ] - %40 = phi <2 x double> [ %11, %_loop_2_do_.lr.ph ], [ %123, %_loop_2_do_ ] - %41 = phi <2 x double> [ %12, %_loop_2_do_.lr.ph ], [ %121, %_loop_2_do_ ] - %42 = phi <2 x double> [ %13, %_loop_2_do_.lr.ph ], [ %119, %_loop_2_do_ ] - %43 = phi <2 x double> [ %14, %_loop_2_do_.lr.ph ], [ %116, %_loop_2_do_ ] - %44 = phi <2 x double> [ %15, %_loop_2_do_.lr.ph ], [ %114, %_loop_2_do_ ] - %45 = phi <2 x double> [ %16, %_loop_2_do_.lr.ph ], [ %112, %_loop_2_do_ ] - %46 = phi <2 x double> [ %17, %_loop_2_do_.lr.ph ], [ %110, %_loop_2_do_ ] - %47 = phi <2 x double> [ %18, %_loop_2_do_.lr.ph ], [ %108, %_loop_2_do_ ] - %48 = phi <2 x double> [ %19, %_loop_2_do_.lr.ph ], [ %106, %_loop_2_do_ ] - %49 = phi <2 x double> [ %20, %_loop_2_do_.lr.ph ], [ %81, %_loop_2_do_ ] - %50 = phi <2 x double> [ %21, %_loop_2_do_.lr.ph ], [ %79, %_loop_2_do_ ] - %51 = phi <2 x double> [ %22, %_loop_2_do_.lr.ph ], [ %77, %_loop_2_do_ ] - %52 = phi <2 x double> [ %23, %_loop_2_do_.lr.ph ], [ %75, %_loop_2_do_ ] - %53 = phi <2 x double> [ %24, %_loop_2_do_.lr.ph ], [ %73, %_loop_2_do_ ] - %54 = phi <2 x double> [ %25, %_loop_2_do_.lr.ph ], [ %71, %_loop_2_do_ ] + %i31 = phi <2 x double> [ %i2, %_loop_2_do_.lr.ph ], [ %i142, %_loop_2_do_ ] + %i32 = phi <2 x double> [ %i3, %_loop_2_do_.lr.ph ], [ %i140, %_loop_2_do_ ] + %i33 = phi <2 x double> [ %i4, %_loop_2_do_.lr.ph ], [ %i138, %_loop_2_do_ ] + %i34 = phi <2 x double> [ %i5, %_loop_2_do_.lr.ph ], [ %i136, %_loop_2_do_ ] + %i35 = phi <2 x double> [ %i6, %_loop_2_do_.lr.ph ], [ %i134, %_loop_2_do_ ] + %i36 = phi <2 x double> [ %i7, %_loop_2_do_.lr.ph ], [ %i132, %_loop_2_do_ ] + %i37 = phi <2 x double> [ %i8, %_loop_2_do_.lr.ph ], [ %i129, %_loop_2_do_ ] + %i38 = phi <2 x double> [ %i9, %_loop_2_do_.lr.ph ], [ %i127, %_loop_2_do_ ] + %i39 = phi <2 x double> [ %i10, %_loop_2_do_.lr.ph ], [ %i125, %_loop_2_do_ ] + %i40 = phi <2 x double> [ %i11, %_loop_2_do_.lr.ph ], [ %i123, %_loop_2_do_ ] + %i41 = phi <2 x double> [ %i12, %_loop_2_do_.lr.ph ], [ %i121, %_loop_2_do_ ] + %i42 = phi <2 x double> [ %i13, %_loop_2_do_.lr.ph ], [ %i119, %_loop_2_do_ ] + %i43 = phi <2 x double> [ %i14, %_loop_2_do_.lr.ph ], [ %i116, %_loop_2_do_ ] + %i44 = phi <2 x double> [ %i15, %_loop_2_do_.lr.ph ], [ %i114, %_loop_2_do_ ] + %i45 = phi <2 x double> [ %i16, %_loop_2_do_.lr.ph ], [ %i112, %_loop_2_do_ ] + %i46 = phi <2 x double> [ %i17, %_loop_2_do_.lr.ph ], [ %i110, %_loop_2_do_ ] + %i47 = phi <2 x double> [ %i18, %_loop_2_do_.lr.ph ], [ %i108, %_loop_2_do_ ] + %i48 = phi <2 x double> [ %i19, %_loop_2_do_.lr.ph ], [ %i106, %_loop_2_do_ ] + %i49 = phi <2 x double> [ %i20, %_loop_2_do_.lr.ph ], [ %i81, %_loop_2_do_ ] + %i50 = phi <2 x double> [ %i21, %_loop_2_do_.lr.ph ], [ %i79, %_loop_2_do_ ] + %i51 = phi <2 x double> [ %i22, %_loop_2_do_.lr.ph ], [ %i77, %_loop_2_do_ ] + %i52 = phi <2 x double> [ %i23, %_loop_2_do_.lr.ph ], [ %i75, %_loop_2_do_ ] + %i53 = phi <2 x double> [ %i24, %_loop_2_do_.lr.ph ], [ %i73, %_loop_2_do_ ] + %i54 = phi <2 x double> [ %i25, %_loop_2_do_.lr.ph ], [ %i71, %_loop_2_do_ ] %_ix_x_len = shl nuw nsw i64 %indvars.iv, 3 %x_ix_dim_0_113 = getelementptr inbounds %_elem_type_of_x, %_elem_type_of_x* %x_rvo_based_addr_112, i64 %indvars.iv %x_ix_dim_0_ = bitcast %_elem_type_of_x* %x_ix_dim_0_113 to i8* - %55 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %x_ix_dim_0_) + %i55 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %x_ix_dim_0_) %a_ix_dim_1_ = getelementptr inbounds i8, i8* %a_ix_dim_0_, i64 %_ix_x_len - %56 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_) + %i56 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_) %a_ix_dim_1_29 = getelementptr inbounds i8, i8* %a_ix_dim_0_25, i64 %_ix_x_len - %57 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_29) + %i57 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_29) %a_ix_dim_1_45 = getelementptr inbounds i8, i8* %a_ix_dim_0_41, i64 %_ix_x_len - %58 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_45) + %i58 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_45) %a_ix_dim_1_61 = getelementptr inbounds i8, i8* %a_ix_dim_0_57, i64 %_ix_x_len - %59 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_61) + %i59 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_61) %a_ix_dim_1_77 = getelementptr inbounds i8, i8* %a_ix_dim_0_73, i64 %_ix_x_len - %60 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_77) + %i60 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_77) %a_ix_dim_1_93 = getelementptr inbounds i8, i8* %a_ix_dim_0_89, i64 %_ix_x_len - %61 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_93) - %62 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %55) - %.fca.0.extract35 = extractvalue { <16 x i8>, <16 x i8> } %62, 0 - %.fca.1.extract36 = extractvalue { <16 x i8>, <16 x i8> } %62, 1 - %63 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %56) - %.fca.0.extract29 = extractvalue { <16 x i8>, <16 x i8> } %63, 0 - %.fca.1.extract30 = extractvalue { <16 x i8>, <16 x i8> } %63, 1 - %64 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %57) - %.fca.0.extract23 = extractvalue { <16 x i8>, <16 x i8> } %64, 0 - %.fca.1.extract24 = extractvalue { <16 x i8>, <16 x i8> } %64, 1 - %65 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %58) - %.fca.0.extract17 = extractvalue { <16 x i8>, <16 x i8> } %65, 0 - %.fca.1.extract18 = extractvalue { <16 x i8>, <16 x i8> } %65, 1 - %66 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %59) - %.fca.0.extract11 = extractvalue { <16 x i8>, <16 x i8> } %66, 0 - %.fca.1.extract12 = extractvalue { <16 x i8>, <16 x i8> } %66, 1 - %67 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %60) - %.fca.0.extract5 = extractvalue { <16 x i8>, <16 x i8> } %67, 0 - %.fca.1.extract6 = extractvalue { <16 x i8>, <16 x i8> } %67, 1 - %68 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %61) - %.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %68, 0 - %.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %68, 1 - %69 = bitcast <16 x i8> %.fca.0.extract29 to <2 x double> - %70 = bitcast <16 x i8> %.fca.0.extract35 to <2 x double> - %71 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %69, <2 x double> %70, <2 x double> %54) - %72 = bitcast <16 x i8> %.fca.0.extract23 to <2 x double> - %73 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %72, <2 x double> %70, <2 x double> %53) - %74 = bitcast <16 x i8> %.fca.0.extract17 to <2 x double> - %75 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %74, <2 x double> %70, <2 x double> %52) - %76 = bitcast <16 x i8> %.fca.0.extract11 to <2 x double> - %77 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %76, <2 x double> %70, <2 x double> %51) - %78 = bitcast <16 x i8> %.fca.0.extract5 to <2 x double> - %79 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %78, <2 x double> %70, <2 x double> %50) - %80 = bitcast <16 x i8> %.fca.0.extract to <2 x double> - %81 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %80, <2 x double> %70, <2 x double> %49) - %82 = getelementptr %_elem_type_of_x, %_elem_type_of_x* %x_ix_dim_0_113, i64 4 - %83 = bitcast %_elem_type_of_x* %82 to i8* - %84 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %83) - %85 = getelementptr i8, i8* %a_ix_dim_1_, i64 32 - %86 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %85) - %87 = getelementptr i8, i8* %a_ix_dim_1_29, i64 32 - %88 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %87) - %89 = getelementptr i8, i8* %a_ix_dim_1_45, i64 32 - %90 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %89) - %91 = getelementptr i8, i8* %a_ix_dim_1_61, i64 32 - %92 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %91) - %93 = getelementptr i8, i8* %a_ix_dim_1_77, i64 32 - %94 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %93) - %95 = getelementptr i8, i8* %a_ix_dim_1_93, i64 32 - %96 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %95) - %97 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %84) - %.fca.0.extract37 = extractvalue { <16 x i8>, <16 x i8> } %97, 0 - %.fca.1.extract39 = extractvalue { <16 x i8>, <16 x i8> } %97, 1 - %98 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %86) - %.fca.0.extract31 = extractvalue { <16 x i8>, <16 x i8> } %98, 0 - %.fca.1.extract33 = extractvalue { <16 x i8>, <16 x i8> } %98, 1 - %99 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %88) - %.fca.0.extract25 = extractvalue { <16 x i8>, <16 x i8> } %99, 0 - %.fca.1.extract27 = extractvalue { <16 x i8>, <16 x i8> } %99, 1 - %100 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %90) - %.fca.0.extract19 = extractvalue { <16 x i8>, <16 x i8> } %100, 0 - %.fca.1.extract21 = extractvalue { <16 x i8>, <16 x i8> } %100, 1 - %101 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %92) - %.fca.0.extract13 = extractvalue { <16 x i8>, <16 x i8> } %101, 0 - %.fca.1.extract15 = extractvalue { <16 x i8>, <16 x i8> } %101, 1 - %102 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %94) - %.fca.0.extract7 = extractvalue { <16 x i8>, <16 x i8> } %102, 0 - %.fca.1.extract9 = extractvalue { <16 x i8>, <16 x i8> } %102, 1 - %103 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %96) - %.fca.0.extract1 = extractvalue { <16 x i8>, <16 x i8> } %103, 0 - %.fca.1.extract3 = extractvalue { <16 x i8>, <16 x i8> } %103, 1 - %104 = bitcast <16 x i8> %.fca.1.extract30 to <2 x double> - %105 = bitcast <16 x i8> %.fca.1.extract36 to <2 x double> - %106 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %104, <2 x double> %105, <2 x double> %48) - %107 = bitcast <16 x i8> %.fca.1.extract24 to <2 x double> - %108 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %107, <2 x double> %105, <2 x double> %47) - %109 = bitcast <16 x i8> %.fca.1.extract18 to <2 x double> - %110 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %109, <2 x double> %105, <2 x double> %46) - %111 = bitcast <16 x i8> %.fca.1.extract12 to <2 x double> - %112 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %111, <2 x double> %105, <2 x double> %45) - %113 = bitcast <16 x i8> %.fca.1.extract6 to <2 x double> - %114 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %113, <2 x double> %105, <2 x double> %44) - %115 = bitcast <16 x i8> %.fca.1.extract to <2 x double> - %116 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %115, <2 x double> %105, <2 x double> %43) - %117 = bitcast <16 x i8> %.fca.0.extract31 to <2 x double> - %118 = bitcast <16 x i8> %.fca.0.extract37 to <2 x double> - %119 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %117, <2 x double> %118, <2 x double> %42) - %120 = bitcast <16 x i8> %.fca.0.extract25 to <2 x double> - %121 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %120, <2 x double> %118, <2 x double> %41) - %122 = bitcast <16 x i8> %.fca.0.extract19 to <2 x double> - %123 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %122, <2 x double> %118, <2 x double> %40) - %124 = bitcast <16 x i8> %.fca.0.extract13 to <2 x double> - %125 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %124, <2 x double> %118, <2 x double> %39) - %126 = bitcast <16 x i8> %.fca.0.extract7 to <2 x double> - %127 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %126, <2 x double> %118, <2 x double> %38) - %128 = bitcast <16 x i8> %.fca.0.extract1 to <2 x double> - %129 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %128, <2 x double> %118, <2 x double> %37) - %130 = bitcast <16 x i8> %.fca.1.extract33 to <2 x double> - %131 = bitcast <16 x i8> %.fca.1.extract39 to <2 x double> - %132 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %130, <2 x double> %131, <2 x double> %36) - %133 = bitcast <16 x i8> %.fca.1.extract27 to <2 x double> - %134 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %133, <2 x double> %131, <2 x double> %35) - %135 = bitcast <16 x i8> %.fca.1.extract21 to <2 x double> - %136 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %135, <2 x double> %131, <2 x double> %34) - %137 = bitcast <16 x i8> %.fca.1.extract15 to <2 x double> - %138 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %137, <2 x double> %131, <2 x double> %33) - %139 = bitcast <16 x i8> %.fca.1.extract9 to <2 x double> - %140 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %139, <2 x double> %131, <2 x double> %32) - %141 = bitcast <16 x i8> %.fca.1.extract3 to <2 x double> - %142 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %141, <2 x double> %131, <2 x double> %31) + %i61 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull %a_ix_dim_1_93) + %i62 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i55) + %.fca.0.extract35 = extractvalue { <16 x i8>, <16 x i8> } %i62, 0 + %.fca.1.extract36 = extractvalue { <16 x i8>, <16 x i8> } %i62, 1 + %i63 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i56) + %.fca.0.extract29 = extractvalue { <16 x i8>, <16 x i8> } %i63, 0 + %.fca.1.extract30 = extractvalue { <16 x i8>, <16 x i8> } %i63, 1 + %i64 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i57) + %.fca.0.extract23 = extractvalue { <16 x i8>, <16 x i8> } %i64, 0 + %.fca.1.extract24 = extractvalue { <16 x i8>, <16 x i8> } %i64, 1 + %i65 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i58) + %.fca.0.extract17 = extractvalue { <16 x i8>, <16 x i8> } %i65, 0 + %.fca.1.extract18 = extractvalue { <16 x i8>, <16 x i8> } %i65, 1 + %i66 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i59) + %.fca.0.extract11 = extractvalue { <16 x i8>, <16 x i8> } %i66, 0 + %.fca.1.extract12 = extractvalue { <16 x i8>, <16 x i8> } %i66, 1 + %i67 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i60) + %.fca.0.extract5 = extractvalue { <16 x i8>, <16 x i8> } %i67, 0 + %.fca.1.extract6 = extractvalue { <16 x i8>, <16 x i8> } %i67, 1 + %i68 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i61) + %.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %i68, 0 + %.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %i68, 1 + %i69 = bitcast <16 x i8> %.fca.0.extract29 to <2 x double> + %i70 = bitcast <16 x i8> %.fca.0.extract35 to <2 x double> + %i71 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i69, <2 x double> %i70, <2 x double> %i54) + %i72 = bitcast <16 x i8> %.fca.0.extract23 to <2 x double> + %i73 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i72, <2 x double> %i70, <2 x double> %i53) + %i74 = bitcast <16 x i8> %.fca.0.extract17 to <2 x double> + %i75 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i74, <2 x double> %i70, <2 x double> %i52) + %i76 = bitcast <16 x i8> %.fca.0.extract11 to <2 x double> + %i77 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i76, <2 x double> %i70, <2 x double> %i51) + %i78 = bitcast <16 x i8> %.fca.0.extract5 to <2 x double> + %i79 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i78, <2 x double> %i70, <2 x double> %i50) + %i80 = bitcast <16 x i8> %.fca.0.extract to <2 x double> + %i81 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i80, <2 x double> %i70, <2 x double> %i49) + %i82 = getelementptr %_elem_type_of_x, %_elem_type_of_x* %x_ix_dim_0_113, i64 4 + %i83 = bitcast %_elem_type_of_x* %i82 to i8* + %i84 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %i83) + %i85 = getelementptr i8, i8* %a_ix_dim_1_, i64 32 + %i86 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %i85) + %i87 = getelementptr i8, i8* %a_ix_dim_1_29, i64 32 + %i88 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %i87) + %i89 = getelementptr i8, i8* %a_ix_dim_1_45, i64 32 + %i90 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %i89) + %i91 = getelementptr i8, i8* %a_ix_dim_1_61, i64 32 + %i92 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %i91) + %i93 = getelementptr i8, i8* %a_ix_dim_1_77, i64 32 + %i94 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %i93) + %i95 = getelementptr i8, i8* %a_ix_dim_1_93, i64 32 + %i96 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %i95) + %i97 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i84) + %.fca.0.extract37 = extractvalue { <16 x i8>, <16 x i8> } %i97, 0 + %.fca.1.extract39 = extractvalue { <16 x i8>, <16 x i8> } %i97, 1 + %i98 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i86) + %.fca.0.extract31 = extractvalue { <16 x i8>, <16 x i8> } %i98, 0 + %.fca.1.extract33 = extractvalue { <16 x i8>, <16 x i8> } %i98, 1 + %i99 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i88) + %.fca.0.extract25 = extractvalue { <16 x i8>, <16 x i8> } %i99, 0 + %.fca.1.extract27 = extractvalue { <16 x i8>, <16 x i8> } %i99, 1 + %i100 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i90) + %.fca.0.extract19 = extractvalue { <16 x i8>, <16 x i8> } %i100, 0 + %.fca.1.extract21 = extractvalue { <16 x i8>, <16 x i8> } %i100, 1 + %i101 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i92) + %.fca.0.extract13 = extractvalue { <16 x i8>, <16 x i8> } %i101, 0 + %.fca.1.extract15 = extractvalue { <16 x i8>, <16 x i8> } %i101, 1 + %i102 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i94) + %.fca.0.extract7 = extractvalue { <16 x i8>, <16 x i8> } %i102, 0 + %.fca.1.extract9 = extractvalue { <16 x i8>, <16 x i8> } %i102, 1 + %i103 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i96) + %.fca.0.extract1 = extractvalue { <16 x i8>, <16 x i8> } %i103, 0 + %.fca.1.extract3 = extractvalue { <16 x i8>, <16 x i8> } %i103, 1 + %i104 = bitcast <16 x i8> %.fca.1.extract30 to <2 x double> + %i105 = bitcast <16 x i8> %.fca.1.extract36 to <2 x double> + %i106 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i104, <2 x double> %i105, <2 x double> %i48) + %i107 = bitcast <16 x i8> %.fca.1.extract24 to <2 x double> + %i108 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i107, <2 x double> %i105, <2 x double> %i47) + %i109 = bitcast <16 x i8> %.fca.1.extract18 to <2 x double> + %i110 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i109, <2 x double> %i105, <2 x double> %i46) + %i111 = bitcast <16 x i8> %.fca.1.extract12 to <2 x double> + %i112 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i111, <2 x double> %i105, <2 x double> %i45) + %i113 = bitcast <16 x i8> %.fca.1.extract6 to <2 x double> + %i114 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i113, <2 x double> %i105, <2 x double> %i44) + %i115 = bitcast <16 x i8> %.fca.1.extract to <2 x double> + %i116 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i115, <2 x double> %i105, <2 x double> %i43) + %i117 = bitcast <16 x i8> %.fca.0.extract31 to <2 x double> + %i118 = bitcast <16 x i8> %.fca.0.extract37 to <2 x double> + %i119 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i117, <2 x double> %i118, <2 x double> %i42) + %i120 = bitcast <16 x i8> %.fca.0.extract25 to <2 x double> + %i121 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i120, <2 x double> %i118, <2 x double> %i41) + %i122 = bitcast <16 x i8> %.fca.0.extract19 to <2 x double> + %i123 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i122, <2 x double> %i118, <2 x double> %i40) + %i124 = bitcast <16 x i8> %.fca.0.extract13 to <2 x double> + %i125 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i124, <2 x double> %i118, <2 x double> %i39) + %i126 = bitcast <16 x i8> %.fca.0.extract7 to <2 x double> + %i127 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i126, <2 x double> %i118, <2 x double> %i38) + %i128 = bitcast <16 x i8> %.fca.0.extract1 to <2 x double> + %i129 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i128, <2 x double> %i118, <2 x double> %i37) + %i130 = bitcast <16 x i8> %.fca.1.extract33 to <2 x double> + %i131 = bitcast <16 x i8> %.fca.1.extract39 to <2 x double> + %i132 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i130, <2 x double> %i131, <2 x double> %i36) + %i133 = bitcast <16 x i8> %.fca.1.extract27 to <2 x double> + %i134 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i133, <2 x double> %i131, <2 x double> %i35) + %i135 = bitcast <16 x i8> %.fca.1.extract21 to <2 x double> + %i136 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i135, <2 x double> %i131, <2 x double> %i34) + %i137 = bitcast <16 x i8> %.fca.1.extract15 to <2 x double> + %i138 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i137, <2 x double> %i131, <2 x double> %i33) + %i139 = bitcast <16 x i8> %.fca.1.extract9 to <2 x double> + %i140 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i139, <2 x double> %i131, <2 x double> %i32) + %i141 = bitcast <16 x i8> %.fca.1.extract3 to <2 x double> + %i142 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i141, <2 x double> %i131, <2 x double> %i31) %indvars.iv.next = add nuw nsw i64 %indvars.iv, 8 - %_leq_tmp6.not = icmp ugt i64 %indvars.iv.next, %0 + %_leq_tmp6.not = icmp ugt i64 %indvars.iv.next, %i br i1 %_leq_tmp6.not, label %_loop_2_endl_, label %_loop_2_do_ _loop_2_endl_: ; preds = %_loop_2_do_ %indvars.iv.next213 = add nuw nsw i64 %indvars.iv212, 6 - %_leq_tmp.not = icmp ugt i64 %indvars.iv.next213, %1 + %_leq_tmp.not = icmp ugt i64 %indvars.iv.next213, %i1 br i1 %_leq_tmp.not, label %_loop_1_loopHeader_._return_bb_crit_edge.loopexit, label %_loop_2_do_.lr.ph _loop_1_loopHeader_._return_bb_crit_edge.loopexit: ; preds = %_loop_2_endl_ - store <2 x double> %71, <2 x double>* %.vy01, align 16 - store <2 x double> %73, <2 x double>* %.vy02, align 16 - store <2 x double> %75, <2 x double>* %.vy03, align 16 - store <2 x double> %77, <2 x double>* %.vy04, align 16 - store <2 x double> %79, <2 x double>* %.vy05, align 16 - store <2 x double> %81, <2 x double>* %.vy06, align 16 - store <2 x double> %106, <2 x double>* %.vy07, align 16 - store <2 x double> %108, <2 x double>* %.vy08, align 16 - store <2 x double> %110, <2 x double>* %.vy09, align 16 - store <2 x double> %112, <2 x double>* %.vy0a, align 16 - store <2 x double> %114, <2 x double>* %.vy0b, align 16 - store <2 x double> %116, <2 x double>* %.vy0c, align 16 - store <2 x double> %119, <2 x double>* %.vy21, align 16 - store <2 x double> %121, <2 x double>* %.vy22, align 16 - store <2 x double> %123, <2 x double>* %.vy23, align 16 - store <2 x double> %125, <2 x double>* %.vy24, align 16 - store <2 x double> %127, <2 x double>* %.vy25, align 16 - store <2 x double> %129, <2 x double>* %.vy26, align 16 - store <2 x double> %132, <2 x double>* %.vy27, align 16 - store <2 x double> %134, <2 x double>* %.vy28, align 16 - store <2 x double> %136, <2 x double>* %.vy29, align 16 - store <2 x double> %138, <2 x double>* %.vy2a, align 16 - store <2 x double> %140, <2 x double>* %.vy2b, align 16 - store <2 x double> %142, <2 x double>* %.vy2c, align 16 + store <2 x double> %i71, <2 x double>* %.vy01, align 16 + store <2 x double> %i73, <2 x double>* %.vy02, align 16 + store <2 x double> %i75, <2 x double>* %.vy03, align 16 + store <2 x double> %i77, <2 x double>* %.vy04, align 16 + store <2 x double> %i79, <2 x double>* %.vy05, align 16 + store <2 x double> %i81, <2 x double>* %.vy06, align 16 + store <2 x double> %i106, <2 x double>* %.vy07, align 16 + store <2 x double> %i108, <2 x double>* %.vy08, align 16 + store <2 x double> %i110, <2 x double>* %.vy09, align 16 + store <2 x double> %i112, <2 x double>* %.vy0a, align 16 + store <2 x double> %i114, <2 x double>* %.vy0b, align 16 + store <2 x double> %i116, <2 x double>* %.vy0c, align 16 + store <2 x double> %i119, <2 x double>* %.vy21, align 16 + store <2 x double> %i121, <2 x double>* %.vy22, align 16 + store <2 x double> %i123, <2 x double>* %.vy23, align 16 + store <2 x double> %i125, <2 x double>* %.vy24, align 16 + store <2 x double> %i127, <2 x double>* %.vy25, align 16 + store <2 x double> %i129, <2 x double>* %.vy26, align 16 + store <2 x double> %i132, <2 x double>* %.vy27, align 16 + store <2 x double> %i134, <2 x double>* %.vy28, align 16 + store <2 x double> %i136, <2 x double>* %.vy29, align 16 + store <2 x double> %i138, <2 x double>* %.vy2a, align 16 + store <2 x double> %i140, <2 x double>* %.vy2b, align 16 + store <2 x double> %i142, <2 x double>* %.vy2c, align 16 br label %_return_bb -_return_bb: ; preds = %_loop_1_do_.lr.ph, %_loop_1_loopHeader_._return_bb_crit_edge.loopexit, %entry +_return_bb: ; preds = %_loop_1_loopHeader_._return_bb_crit_edge.loopexit, %_loop_1_do_.lr.ph, %entry ret void }