diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/x264-satd-8x4.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/x264-satd-8x4.ll new file mode 100644 index 0000000000000..c1042f1842107 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/x264-satd-8x4.ll @@ -0,0 +1,526 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=riscv64 -mattr=+m,+v,+unaligned-vector-mem \ +; RUN: -passes=slp-vectorizer -S < %s | FileCheck %s +; Function Attrs: nounwind uwtable vscale_range(8,1024) +define i32 @x264_pixel_satd_8x4(ptr %pix1, i32 %i_pix1, ptr %pix2, i32 %i_pix2) { +; CHECK-LABEL: define i32 @x264_pixel_satd_8x4( +; CHECK-SAME: ptr [[PIX1:%.*]], i32 [[I_PIX1:%.*]], ptr [[PIX2:%.*]], i32 [[I_PIX2:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[I_PIX1]] to i64 +; CHECK-NEXT: [[IDX_EXT63:%.*]] = sext i32 [[I_PIX2]] to i64 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[PIX1]], i64 4 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr [[PIX2]], i64 4 +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[PIX1]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[ADD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[PIX2]], i64 [[IDX_EXT63]] +; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR]], i64 4 +; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR64]], i64 4 +; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]] +; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR_1]], i64 4 +; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR64_1]], i64 4 +; CHECK-NEXT: [[ADD_PTR_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[ADD_PTR64_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 [[IDX_EXT63]] +; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR_2]], i64 4 +; CHECK-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR64_2]], i64 4 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[PIX1]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[PIX2]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ADD_PTR_2]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> [[TMP4]], <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x i8> [[TMP15]], <16 x i8> [[TMP16]], <16 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <16 x i8> [[TMP17]], <16 x i8> [[TMP18]], <16 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = zext <16 x i8> [[TMP19]] to <16 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_2]], align 1 +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP5]], <16 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <16 x i8> [[TMP24]], <16 x i8> [[TMP25]], <16 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP21]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <16 x i8> [[TMP26]], <16 x i8> [[TMP27]], <16 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = zext <16 x i8> [[TMP28]] to <16 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = sub nsw <16 x i32> [[TMP20]], [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_3]], align 1 +; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP6]], <16 x i32> +; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP10]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <16 x i8> [[TMP34]], <16 x i8> [[TMP35]], <16 x i32> +; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i8> [[TMP31]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <16 x i8> [[TMP36]], <16 x i8> [[TMP37]], <16 x i32> +; CHECK-NEXT: [[TMP39:%.*]] = zext <16 x i8> [[TMP38]] to <16 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 +; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> [[TMP7]], <16 x i32> +; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <16 x i8> [[TMP43]], <16 x i8> [[TMP44]], <16 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <4 x i8> [[TMP40]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <16 x i8> [[TMP45]], <16 x i8> [[TMP46]], <16 x i32> +; CHECK-NEXT: [[TMP48:%.*]] = zext <16 x i8> [[TMP47]] to <16 x i32> +; CHECK-NEXT: [[TMP49:%.*]] = sub nsw <16 x i32> [[TMP39]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = shl nsw <16 x i32> [[TMP49]], splat (i32 16) +; CHECK-NEXT: [[TMP51:%.*]] = add nsw <16 x i32> [[TMP50]], [[TMP30]] +; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = add nsw <16 x i32> [[TMP52]], [[TMP51]] +; CHECK-NEXT: [[TMP54:%.*]] = sub nsw <16 x i32> [[TMP52]], [[TMP51]] +; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP57:%.*]] = add nsw <16 x i32> [[TMP55]], [[TMP56]] +; CHECK-NEXT: [[TMP58:%.*]] = sub nsw <16 x i32> [[TMP55]], [[TMP56]] +; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP57]], <16 x i32> [[TMP58]], <16 x i32> +; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP61:%.*]] = sub nsw <16 x i32> [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP62:%.*]] = add nsw <16 x i32> [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <16 x i32> +; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP63]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP65:%.*]] = add nsw <16 x i32> [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP66:%.*]] = sub nsw <16 x i32> [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <16 x i32> [[TMP65]], <16 x i32> [[TMP66]], <16 x i32> +; CHECK-NEXT: [[TMP68:%.*]] = lshr <16 x i32> [[TMP67]], splat (i32 15) +; CHECK-NEXT: [[TMP69:%.*]] = and <16 x i32> [[TMP68]], splat (i32 65537) +; CHECK-NEXT: [[TMP70:%.*]] = mul nuw <16 x i32> [[TMP69]], splat (i32 65535) +; CHECK-NEXT: [[TMP71:%.*]] = add <16 x i32> [[TMP70]], [[TMP67]] +; CHECK-NEXT: [[TMP72:%.*]] = xor <16 x i32> [[TMP71]], [[TMP70]] +; CHECK-NEXT: [[TMP73:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP72]]) +; CHECK-NEXT: [[CONV118:%.*]] = and i32 [[TMP73]], 65535 +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP73]], 16 +; CHECK-NEXT: [[ADD119:%.*]] = add nuw nsw i32 [[CONV118]], [[SHR]] +; CHECK-NEXT: [[SHR120:%.*]] = lshr i32 [[ADD119]], 1 +; CHECK-NEXT: ret i32 [[SHR120]] +; +entry: + %idx.ext = sext i32 %i_pix1 to i64 + %idx.ext63 = sext i32 %i_pix2 to i64 + %0 = load i8, ptr %pix1, align 1 + %conv = zext i8 %0 to i32 + %1 = load i8, ptr %pix2, align 1 + %conv2 = zext i8 %1 to i32 + %sub = sub nsw i32 %conv, %conv2 + %arrayidx3 = getelementptr inbounds nuw i8, ptr %pix1, i64 4 + %2 = load i8, ptr %arrayidx3, align 1 + %conv4 = zext i8 %2 to i32 + %arrayidx5 = getelementptr inbounds nuw i8, ptr %pix2, i64 4 + %3 = load i8, ptr %arrayidx5, align 1 + %conv6 = zext i8 %3 to i32 + %sub7 = sub nsw i32 %conv4, %conv6 + %shl = shl nsw i32 %sub7, 16 + %add = add nsw i32 %shl, %sub + %arrayidx8 = getelementptr inbounds nuw i8, ptr %pix1, i64 1 + %4 = load i8, ptr %arrayidx8, align 1 + %conv9 = zext i8 %4 to i32 + %arrayidx10 = getelementptr inbounds nuw i8, ptr %pix2, i64 1 + %5 = load i8, ptr %arrayidx10, align 1 + %conv11 = zext i8 %5 to i32 + %sub12 = sub nsw i32 %conv9, %conv11 + %arrayidx13 = getelementptr inbounds nuw i8, ptr %pix1, i64 5 + %6 = load i8, ptr %arrayidx13, align 1 + %conv14 = zext i8 %6 to i32 + %arrayidx15 = getelementptr inbounds nuw i8, ptr %pix2, i64 5 + %7 = load i8, ptr %arrayidx15, align 1 + %conv16 = zext i8 %7 to i32 + %sub17 = sub nsw i32 %conv14, %conv16 + %shl18 = shl nsw i32 %sub17, 16 + %add19 = add nsw i32 %shl18, %sub12 + %arrayidx20 = getelementptr inbounds nuw i8, ptr %pix1, i64 2 + %8 = load i8, ptr %arrayidx20, align 1 + %conv21 = zext i8 %8 to i32 + %arrayidx22 = getelementptr inbounds nuw i8, ptr %pix2, i64 2 + %9 = load i8, ptr %arrayidx22, align 1 + %conv23 = zext i8 %9 to i32 + %sub24 = sub nsw i32 %conv21, %conv23 + %arrayidx25 = getelementptr inbounds nuw i8, ptr %pix1, i64 6 + %10 = load i8, ptr %arrayidx25, align 1 + %conv26 = zext i8 %10 to i32 + %arrayidx27 = getelementptr inbounds nuw i8, ptr %pix2, i64 6 + %11 = load i8, ptr %arrayidx27, align 1 + %conv28 = zext i8 %11 to i32 + %sub29 = sub nsw i32 %conv26, %conv28 + %shl30 = shl nsw i32 %sub29, 16 + %add31 = add nsw i32 %shl30, %sub24 + %arrayidx32 = getelementptr inbounds nuw i8, ptr %pix1, i64 3 + %12 = load i8, ptr %arrayidx32, align 1 + %conv33 = zext i8 %12 to i32 + %arrayidx34 = getelementptr inbounds nuw i8, ptr %pix2, i64 3 + %13 = load i8, ptr %arrayidx34, align 1 + %conv35 = zext i8 %13 to i32 + %sub36 = sub nsw i32 %conv33, %conv35 + %arrayidx37 = getelementptr inbounds nuw i8, ptr %pix1, i64 7 + %14 = load i8, ptr %arrayidx37, align 1 + %conv38 = zext i8 %14 to i32 + %arrayidx39 = getelementptr inbounds nuw i8, ptr %pix2, i64 7 + %15 = load i8, ptr %arrayidx39, align 1 + %conv40 = zext i8 %15 to i32 + %sub41 = sub nsw i32 %conv38, %conv40 + %shl42 = shl nsw i32 %sub41, 16 + %add43 = add nsw i32 %shl42, %sub36 + %add44 = add nsw i32 %add19, %add + %sub45 = sub nsw i32 %add, %add19 + %add46 = add nsw i32 %add43, %add31 + %sub47 = sub nsw i32 %add31, %add43 + %add48 = add nsw i32 %add46, %add44 + %sub51 = sub nsw i32 %add44, %add46 + %add55 = add nsw i32 %sub47, %sub45 + %sub59 = sub nsw i32 %sub45, %sub47 + %add.ptr = getelementptr inbounds i8, ptr %pix1, i64 %idx.ext + %add.ptr64 = getelementptr inbounds i8, ptr %pix2, i64 %idx.ext63 + %16 = load i8, ptr %add.ptr, align 1 + %conv.1 = zext i8 %16 to i32 + %17 = load i8, ptr %add.ptr64, align 1 + %conv2.1 = zext i8 %17 to i32 + %sub.1 = sub nsw i32 %conv.1, %conv2.1 + %arrayidx3.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 4 + %18 = load i8, ptr %arrayidx3.1, align 1 + %conv4.1 = zext i8 %18 to i32 + %arrayidx5.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 4 + %19 = load i8, ptr %arrayidx5.1, align 1 + %conv6.1 = zext i8 %19 to i32 + %sub7.1 = sub nsw i32 %conv4.1, %conv6.1 + %shl.1 = shl nsw i32 %sub7.1, 16 + %add.1 = add nsw i32 %shl.1, %sub.1 + %arrayidx8.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 1 + %20 = load i8, ptr %arrayidx8.1, align 1 + %conv9.1 = zext i8 %20 to i32 + %arrayidx10.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 1 + %21 = load i8, ptr %arrayidx10.1, align 1 + %conv11.1 = zext i8 %21 to i32 + %sub12.1 = sub nsw i32 %conv9.1, %conv11.1 + %arrayidx13.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 5 + %22 = load i8, ptr %arrayidx13.1, align 1 + %conv14.1 = zext i8 %22 to i32 + %arrayidx15.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 5 + %23 = load i8, ptr %arrayidx15.1, align 1 + %conv16.1 = zext i8 %23 to i32 + %sub17.1 = sub nsw i32 %conv14.1, %conv16.1 + %shl18.1 = shl nsw i32 %sub17.1, 16 + %add19.1 = add nsw i32 %shl18.1, %sub12.1 + %arrayidx20.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 2 + %24 = load i8, ptr %arrayidx20.1, align 1 + %conv21.1 = zext i8 %24 to i32 + %arrayidx22.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 2 + %25 = load i8, ptr %arrayidx22.1, align 1 + %conv23.1 = zext i8 %25 to i32 + %sub24.1 = sub nsw i32 %conv21.1, %conv23.1 + %arrayidx25.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 6 + %26 = load i8, ptr %arrayidx25.1, align 1 + %conv26.1 = zext i8 %26 to i32 + %arrayidx27.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 6 + %27 = load i8, ptr %arrayidx27.1, align 1 + %conv28.1 = zext i8 %27 to i32 + %sub29.1 = sub nsw i32 %conv26.1, %conv28.1 + %shl30.1 = shl nsw i32 %sub29.1, 16 + %add31.1 = add nsw i32 %shl30.1, %sub24.1 + %arrayidx32.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 3 + %28 = load i8, ptr %arrayidx32.1, align 1 + %conv33.1 = zext i8 %28 to i32 + %arrayidx34.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 3 + %29 = load i8, ptr %arrayidx34.1, align 1 + %conv35.1 = zext i8 %29 to i32 + %sub36.1 = sub nsw i32 %conv33.1, %conv35.1 + %arrayidx37.1 = getelementptr inbounds nuw i8, ptr %add.ptr, i64 7 + %30 = load i8, ptr %arrayidx37.1, align 1 + %conv38.1 = zext i8 %30 to i32 + %arrayidx39.1 = getelementptr inbounds nuw i8, ptr %add.ptr64, i64 7 + %31 = load i8, ptr %arrayidx39.1, align 1 + %conv40.1 = zext i8 %31 to i32 + %sub41.1 = sub nsw i32 %conv38.1, %conv40.1 + %shl42.1 = shl nsw i32 %sub41.1, 16 + %add43.1 = add nsw i32 %shl42.1, %sub36.1 + %add44.1 = add nsw i32 %add19.1, %add.1 + %sub45.1 = sub nsw i32 %add.1, %add19.1 + %add46.1 = add nsw i32 %add43.1, %add31.1 + %sub47.1 = sub nsw i32 %add31.1, %add43.1 + %add48.1 = add nsw i32 %add46.1, %add44.1 + %sub51.1 = sub nsw i32 %add44.1, %add46.1 + %add55.1 = add nsw i32 %sub47.1, %sub45.1 + %sub59.1 = sub nsw i32 %sub45.1, %sub47.1 + %add.ptr.1 = getelementptr inbounds i8, ptr %add.ptr, i64 %idx.ext + %add.ptr64.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 %idx.ext63 + %32 = load i8, ptr %add.ptr.1, align 1 + %conv.2 = zext i8 %32 to i32 + %33 = load i8, ptr %add.ptr64.1, align 1 + %conv2.2 = zext i8 %33 to i32 + %sub.2 = sub nsw i32 %conv.2, %conv2.2 + %arrayidx3.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 4 + %34 = load i8, ptr %arrayidx3.2, align 1 + %conv4.2 = zext i8 %34 to i32 + %arrayidx5.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 4 + %35 = load i8, ptr %arrayidx5.2, align 1 + %conv6.2 = zext i8 %35 to i32 + %sub7.2 = sub nsw i32 %conv4.2, %conv6.2 + %shl.2 = shl nsw i32 %sub7.2, 16 + %add.2 = add nsw i32 %shl.2, %sub.2 + %arrayidx8.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 1 + %36 = load i8, ptr %arrayidx8.2, align 1 + %conv9.2 = zext i8 %36 to i32 + %arrayidx10.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 1 + %37 = load i8, ptr %arrayidx10.2, align 1 + %conv11.2 = zext i8 %37 to i32 + %sub12.2 = sub nsw i32 %conv9.2, %conv11.2 + %arrayidx13.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 5 + %38 = load i8, ptr %arrayidx13.2, align 1 + %conv14.2 = zext i8 %38 to i32 + %arrayidx15.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 5 + %39 = load i8, ptr %arrayidx15.2, align 1 + %conv16.2 = zext i8 %39 to i32 + %sub17.2 = sub nsw i32 %conv14.2, %conv16.2 + %shl18.2 = shl nsw i32 %sub17.2, 16 + %add19.2 = add nsw i32 %shl18.2, %sub12.2 + %arrayidx20.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 2 + %40 = load i8, ptr %arrayidx20.2, align 1 + %conv21.2 = zext i8 %40 to i32 + %arrayidx22.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 2 + %41 = load i8, ptr %arrayidx22.2, align 1 + %conv23.2 = zext i8 %41 to i32 + %sub24.2 = sub nsw i32 %conv21.2, %conv23.2 + %arrayidx25.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 6 + %42 = load i8, ptr %arrayidx25.2, align 1 + %conv26.2 = zext i8 %42 to i32 + %arrayidx27.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 6 + %43 = load i8, ptr %arrayidx27.2, align 1 + %conv28.2 = zext i8 %43 to i32 + %sub29.2 = sub nsw i32 %conv26.2, %conv28.2 + %shl30.2 = shl nsw i32 %sub29.2, 16 + %add31.2 = add nsw i32 %shl30.2, %sub24.2 + %arrayidx32.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 3 + %44 = load i8, ptr %arrayidx32.2, align 1 + %conv33.2 = zext i8 %44 to i32 + %arrayidx34.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 3 + %45 = load i8, ptr %arrayidx34.2, align 1 + %conv35.2 = zext i8 %45 to i32 + %sub36.2 = sub nsw i32 %conv33.2, %conv35.2 + %arrayidx37.2 = getelementptr inbounds nuw i8, ptr %add.ptr.1, i64 7 + %46 = load i8, ptr %arrayidx37.2, align 1 + %conv38.2 = zext i8 %46 to i32 + %arrayidx39.2 = getelementptr inbounds nuw i8, ptr %add.ptr64.1, i64 7 + %47 = load i8, ptr %arrayidx39.2, align 1 + %conv40.2 = zext i8 %47 to i32 + %sub41.2 = sub nsw i32 %conv38.2, %conv40.2 + %shl42.2 = shl nsw i32 %sub41.2, 16 + %add43.2 = add nsw i32 %shl42.2, %sub36.2 + %add44.2 = add nsw i32 %add19.2, %add.2 + %sub45.2 = sub nsw i32 %add.2, %add19.2 + %add46.2 = add nsw i32 %add43.2, %add31.2 + %sub47.2 = sub nsw i32 %add31.2, %add43.2 + %add48.2 = add nsw i32 %add46.2, %add44.2 + %sub51.2 = sub nsw i32 %add44.2, %add46.2 + %add55.2 = add nsw i32 %sub47.2, %sub45.2 + %sub59.2 = sub nsw i32 %sub45.2, %sub47.2 + %add.ptr.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 %idx.ext + %add.ptr64.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 %idx.ext63 + %48 = load i8, ptr %add.ptr.2, align 1 + %conv.3 = zext i8 %48 to i32 + %49 = load i8, ptr %add.ptr64.2, align 1 + %conv2.3 = zext i8 %49 to i32 + %sub.3 = sub nsw i32 %conv.3, %conv2.3 + %arrayidx3.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 4 + %50 = load i8, ptr %arrayidx3.3, align 1 + %conv4.3 = zext i8 %50 to i32 + %arrayidx5.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 4 + %51 = load i8, ptr %arrayidx5.3, align 1 + %conv6.3 = zext i8 %51 to i32 + %sub7.3 = sub nsw i32 %conv4.3, %conv6.3 + %shl.3 = shl nsw i32 %sub7.3, 16 + %add.3 = add nsw i32 %shl.3, %sub.3 + %arrayidx8.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 1 + %52 = load i8, ptr %arrayidx8.3, align 1 + %conv9.3 = zext i8 %52 to i32 + %arrayidx10.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 1 + %53 = load i8, ptr %arrayidx10.3, align 1 + %conv11.3 = zext i8 %53 to i32 + %sub12.3 = sub nsw i32 %conv9.3, %conv11.3 + %arrayidx13.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 5 + %54 = load i8, ptr %arrayidx13.3, align 1 + %conv14.3 = zext i8 %54 to i32 + %arrayidx15.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 5 + %55 = load i8, ptr %arrayidx15.3, align 1 + %conv16.3 = zext i8 %55 to i32 + %sub17.3 = sub nsw i32 %conv14.3, %conv16.3 + %shl18.3 = shl nsw i32 %sub17.3, 16 + %add19.3 = add nsw i32 %shl18.3, %sub12.3 + %arrayidx20.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 2 + %56 = load i8, ptr %arrayidx20.3, align 1 + %conv21.3 = zext i8 %56 to i32 + %arrayidx22.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 2 + %57 = load i8, ptr %arrayidx22.3, align 1 + %conv23.3 = zext i8 %57 to i32 + %sub24.3 = sub nsw i32 %conv21.3, %conv23.3 + %arrayidx25.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 6 + %58 = load i8, ptr %arrayidx25.3, align 1 + %conv26.3 = zext i8 %58 to i32 + %arrayidx27.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 6 + %59 = load i8, ptr %arrayidx27.3, align 1 + %conv28.3 = zext i8 %59 to i32 + %sub29.3 = sub nsw i32 %conv26.3, %conv28.3 + %shl30.3 = shl nsw i32 %sub29.3, 16 + %add31.3 = add nsw i32 %shl30.3, %sub24.3 + %arrayidx32.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 3 + %60 = load i8, ptr %arrayidx32.3, align 1 + %conv33.3 = zext i8 %60 to i32 + %arrayidx34.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 3 + %61 = load i8, ptr %arrayidx34.3, align 1 + %conv35.3 = zext i8 %61 to i32 + %sub36.3 = sub nsw i32 %conv33.3, %conv35.3 + %arrayidx37.3 = getelementptr inbounds nuw i8, ptr %add.ptr.2, i64 7 + %62 = load i8, ptr %arrayidx37.3, align 1 + %conv38.3 = zext i8 %62 to i32 + %arrayidx39.3 = getelementptr inbounds nuw i8, ptr %add.ptr64.2, i64 7 + %63 = load i8, ptr %arrayidx39.3, align 1 + %conv40.3 = zext i8 %63 to i32 + %sub41.3 = sub nsw i32 %conv38.3, %conv40.3 + %shl42.3 = shl nsw i32 %sub41.3, 16 + %add43.3 = add nsw i32 %shl42.3, %sub36.3 + %add44.3 = add nsw i32 %add19.3, %add.3 + %sub45.3 = sub nsw i32 %add.3, %add19.3 + %add46.3 = add nsw i32 %add43.3, %add31.3 + %sub47.3 = sub nsw i32 %add31.3, %add43.3 + %add48.3 = add nsw i32 %add46.3, %add44.3 + %sub51.3 = sub nsw i32 %add44.3, %add46.3 + %add55.3 = add nsw i32 %sub47.3, %sub45.3 + %sub59.3 = sub nsw i32 %sub45.3, %sub47.3 + %add78 = add nsw i32 %add48.1, %add48 + %sub86 = sub nsw i32 %add48, %add48.1 + %add94 = add nsw i32 %add48.3, %add48.2 + %sub102 = sub nsw i32 %add48.2, %add48.3 + %add103 = add nsw i32 %add94, %add78 + %sub104 = sub nsw i32 %add78, %add94 + %add105 = add nsw i32 %sub102, %sub86 + %sub106 = sub nsw i32 %sub86, %sub102 + %shr.i = lshr i32 %add103, 15 + %and.i = and i32 %shr.i, 65537 + %mul.i = mul nuw i32 %and.i, 65535 + %add.i = add i32 %mul.i, %add103 + %xor.i = xor i32 %add.i, %mul.i + %shr.i169 = lshr i32 %add105, 15 + %and.i170 = and i32 %shr.i169, 65537 + %mul.i171 = mul nuw i32 %and.i170, 65535 + %add.i172 = add i32 %mul.i171, %add105 + %xor.i173 = xor i32 %add.i172, %mul.i171 + %shr.i174 = lshr i32 %sub104, 15 + %and.i175 = and i32 %shr.i174, 65537 + %mul.i176 = mul nuw i32 %and.i175, 65535 + %add.i177 = add i32 %mul.i176, %sub104 + %xor.i178 = xor i32 %add.i177, %mul.i176 + %shr.i179 = lshr i32 %sub106, 15 + %and.i180 = and i32 %shr.i179, 65537 + %mul.i181 = mul nuw i32 %and.i180, 65535 + %add.i182 = add i32 %mul.i181, %sub106 + %xor.i183 = xor i32 %add.i182, %mul.i181 + %add110 = add i32 %xor.i173, %xor.i + %add112 = add i32 %add110, %xor.i178 + %add113 = add i32 %add112, %xor.i183 + %add78.1 = add nsw i32 %add55.1, %add55 + %sub86.1 = sub nsw i32 %add55, %add55.1 + %add94.1 = add nsw i32 %add55.3, %add55.2 + %sub102.1 = sub nsw i32 %add55.2, %add55.3 + %add103.1 = add nsw i32 %add94.1, %add78.1 + %sub104.1 = sub nsw i32 %add78.1, %add94.1 + %add105.1 = add nsw i32 %sub102.1, %sub86.1 + %sub106.1 = sub nsw i32 %sub86.1, %sub102.1 + %shr.i.1 = lshr i32 %add103.1, 15 + %and.i.1 = and i32 %shr.i.1, 65537 + %mul.i.1 = mul nuw i32 %and.i.1, 65535 + %add.i.1 = add i32 %mul.i.1, %add103.1 + %xor.i.1 = xor i32 %add.i.1, %mul.i.1 + %shr.i169.1 = lshr i32 %add105.1, 15 + %and.i170.1 = and i32 %shr.i169.1, 65537 + %mul.i171.1 = mul nuw i32 %and.i170.1, 65535 + %add.i172.1 = add i32 %mul.i171.1, %add105.1 + %xor.i173.1 = xor i32 %add.i172.1, %mul.i171.1 + %shr.i174.1 = lshr i32 %sub104.1, 15 + %and.i175.1 = and i32 %shr.i174.1, 65537 + %mul.i176.1 = mul nuw i32 %and.i175.1, 65535 + %add.i177.1 = add i32 %mul.i176.1, %sub104.1 + %xor.i178.1 = xor i32 %add.i177.1, %mul.i176.1 + %shr.i179.1 = lshr i32 %sub106.1, 15 + %and.i180.1 = and i32 %shr.i179.1, 65537 + %mul.i181.1 = mul nuw i32 %and.i180.1, 65535 + %add.i182.1 = add i32 %mul.i181.1, %sub106.1 + %xor.i183.1 = xor i32 %add.i182.1, %mul.i181.1 + %add108.1 = add i32 %xor.i173.1, %add113 + %add110.1 = add i32 %add108.1, %xor.i.1 + %add112.1 = add i32 %add110.1, %xor.i178.1 + %add113.1 = add i32 %add112.1, %xor.i183.1 + %add78.2 = add nsw i32 %sub51.1, %sub51 + %sub86.2 = sub nsw i32 %sub51, %sub51.1 + %add94.2 = add nsw i32 %sub51.3, %sub51.2 + %sub102.2 = sub nsw i32 %sub51.2, %sub51.3 + %add103.2 = add nsw i32 %add94.2, %add78.2 + %sub104.2 = sub nsw i32 %add78.2, %add94.2 + %add105.2 = add nsw i32 %sub102.2, %sub86.2 + %sub106.2 = sub nsw i32 %sub86.2, %sub102.2 + %shr.i.2 = lshr i32 %add103.2, 15 + %and.i.2 = and i32 %shr.i.2, 65537 + %mul.i.2 = mul nuw i32 %and.i.2, 65535 + %add.i.2 = add i32 %mul.i.2, %add103.2 + %xor.i.2 = xor i32 %add.i.2, %mul.i.2 + %shr.i169.2 = lshr i32 %add105.2, 15 + %and.i170.2 = and i32 %shr.i169.2, 65537 + %mul.i171.2 = mul nuw i32 %and.i170.2, 65535 + %add.i172.2 = add i32 %mul.i171.2, %add105.2 + %xor.i173.2 = xor i32 %add.i172.2, %mul.i171.2 + %shr.i174.2 = lshr i32 %sub104.2, 15 + %and.i175.2 = and i32 %shr.i174.2, 65537 + %mul.i176.2 = mul nuw i32 %and.i175.2, 65535 + %add.i177.2 = add i32 %mul.i176.2, %sub104.2 + %xor.i178.2 = xor i32 %add.i177.2, %mul.i176.2 + %shr.i179.2 = lshr i32 %sub106.2, 15 + %and.i180.2 = and i32 %shr.i179.2, 65537 + %mul.i181.2 = mul nuw i32 %and.i180.2, 65535 + %add.i182.2 = add i32 %mul.i181.2, %sub106.2 + %xor.i183.2 = xor i32 %add.i182.2, %mul.i181.2 + %add108.2 = add i32 %xor.i173.2, %add113.1 + %add110.2 = add i32 %add108.2, %xor.i.2 + %add112.2 = add i32 %add110.2, %xor.i178.2 + %add113.2 = add i32 %add112.2, %xor.i183.2 + %add78.3 = add nsw i32 %sub59.1, %sub59 + %sub86.3 = sub nsw i32 %sub59, %sub59.1 + %add94.3 = add nsw i32 %sub59.3, %sub59.2 + %sub102.3 = sub nsw i32 %sub59.2, %sub59.3 + %add103.3 = add nsw i32 %add94.3, %add78.3 + %sub104.3 = sub nsw i32 %add78.3, %add94.3 + %add105.3 = add nsw i32 %sub102.3, %sub86.3 + %sub106.3 = sub nsw i32 %sub86.3, %sub102.3 + %shr.i.3 = lshr i32 %add103.3, 15 + %and.i.3 = and i32 %shr.i.3, 65537 + %mul.i.3 = mul nuw i32 %and.i.3, 65535 + %add.i.3 = add i32 %mul.i.3, %add103.3 + %xor.i.3 = xor i32 %add.i.3, %mul.i.3 + %shr.i169.3 = lshr i32 %add105.3, 15 + %and.i170.3 = and i32 %shr.i169.3, 65537 + %mul.i171.3 = mul nuw i32 %and.i170.3, 65535 + %add.i172.3 = add i32 %mul.i171.3, %add105.3 + %xor.i173.3 = xor i32 %add.i172.3, %mul.i171.3 + %shr.i174.3 = lshr i32 %sub104.3, 15 + %and.i175.3 = and i32 %shr.i174.3, 65537 + %mul.i176.3 = mul nuw i32 %and.i175.3, 65535 + %add.i177.3 = add i32 %mul.i176.3, %sub104.3 + %xor.i178.3 = xor i32 %add.i177.3, %mul.i176.3 + %shr.i179.3 = lshr i32 %sub106.3, 15 + %and.i180.3 = and i32 %shr.i179.3, 65537 + %mul.i181.3 = mul nuw i32 %and.i180.3, 65535 + %add.i182.3 = add i32 %mul.i181.3, %sub106.3 + %xor.i183.3 = xor i32 %add.i182.3, %mul.i181.3 + %add108.3 = add i32 %xor.i173.3, %add113.2 + %add110.3 = add i32 %add108.3, %xor.i.3 + %add112.3 = add i32 %add110.3, %xor.i178.3 + %add113.3 = add i32 %add112.3, %xor.i183.3 + %conv118 = and i32 %add113.3, 65535 + %shr = lshr i32 %add113.3, 16 + %add119 = add nuw nsw i32 %conv118, %shr + %shr120 = lshr i32 %add119, 1 + ret i32 %shr120 +}