-
Notifications
You must be signed in to change notification settings - Fork 14.9k
Closed
Labels
Description
Illegal instructions "vslideup.vi v8, v8, 5" and "vslideup.vi v8, v8, 2" are generated for the following IR:
; ModuleID = './foobar.c'
source_filename = "./foobar.c"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-linux-gnu"
%struct.png_row_info_struct = type { i32, i64, i8, i8, i8, i8 }
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(read)
declare <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8.i64(<vscale x 8 x i8>, ptr nocapture, i64) #2
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8>, i64 immarg) #3
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8>, <16 x i8>, i64 immarg) #3
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
declare <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8>, <vscale x 8 x i8>, i64, i64, i64 immarg) #5
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
declare <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8>, <vscale x 8 x i8>, i64, i64, i64 immarg) #5
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(read)
declare <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1.i64(ptr nocapture, i64) #2
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32>, <4 x i32>, i64 immarg) #3
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(write)
declare void @llvm.riscv.vse.mask.nxv2i32.i64(<vscale x 2 x i32>, ptr nocapture, <vscale x 2 x i1>, i64) #4
; Function Attrs: nofree noinline nosync nounwind uwtable vscale_range(2,2)
define dso_local void @foo(ptr nocapture noundef readonly %0, ptr noundef %1, ptr nocapture noundef readonly %2) local_unnamed_addr #0 {
%4 = alloca i64, align 8
%5 = alloca i64, align 8
%6 = alloca i64, align 8
%7 = alloca i64, align 8
%8 = getelementptr inbounds %struct.png_row_info_struct, ptr %0, i64 0, i32 1
%9 = load i64, ptr %8, align 8, !tbaa !5
%10 = getelementptr inbounds i8, ptr %1, i64 %9
%11 = icmp sgt i64 %9, 0
br i1 %11, label %12, label %240
12: ; preds = %3
%13 = tail call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8.i64(<vscale x 8 x i8> poison, ptr %1, i64 16)
%14 = tail call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8> poison, i8 0, i64 8)
%15 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %14, i64 0)
%16 = bitcast <16 x i8> %15 to <2 x i64>
%17 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> undef, i64 0)
%18 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> poison, i64 0)
%19 = tail call <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %17, i64 1, i64 8, i64 3)
%20 = tail call <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8> %19, <vscale x 8 x i8> %17, i64 7, i64 8, i64 3)
%21 = tail call <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %18, i64 1, i64 8, i64 3)
%22 = tail call <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8> %21, <vscale x 8 x i8> %18, i64 7, i64 8, i64 3)
%23 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %22, i64 0)
%24 = bitcast <16 x i8> %23 to <2 x i64>
%25 = extractelement <2 x i64> %24, i64 0
%26 = insertvalue [2 x i64] poison, i64 %25, 0
%27 = extractelement <2 x i64> %24, i64 1
%28 = insertvalue [2 x i64] %26, i64 %27, 1
%29 = tail call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8.i64(<vscale x 8 x i8> poison, ptr %2, i64 16)
%30 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %29, i64 0)
%31 = bitcast <16 x i8> %30 to <2 x i64>
%32 = extractelement <2 x i64> %16, i64 0
%33 = insertvalue [2 x i64] poison, i64 %32, 0
%34 = extractelement <2 x i64> %16, i64 1
%35 = insertvalue [2 x i64] %33, i64 %34, 1
%36 = extractelement <2 x i64> %31, i64 0
%37 = insertvalue [2 x i64] poison, i64 %36, 0
%38 = extractelement <2 x i64> %31, i64 1
%39 = insertvalue [2 x i64] %37, i64 %38, 1
%40 = tail call fastcc [2 x i64] @bar([2 x i64] %35, [2 x i64] %39, [2 x i64] %35)
%41 = extractvalue [2 x i64] %40, 0
%42 = insertelement <2 x i64> undef, i64 %41, i64 0
%43 = extractvalue [2 x i64] %40, 1
%44 = insertelement <2 x i64> %42, i64 %43, i64 1
%45 = bitcast <2 x i64> %44 to <16 x i8>
%46 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> %45, i64 0)
%47 = tail call <vscale x 8 x i8> @llvm.riscv.vadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %46, <vscale x 8 x i8> %13, i64 8)
%48 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %47, i64 0)
%49 = bitcast <16 x i8> %48 to <2 x i64>
%50 = extractelement <2 x i64> %49, i64 0
%51 = insertvalue [2 x i64] poison, i64 %50, 0
%52 = extractelement <2 x i64> %49, i64 1
%53 = insertvalue [2 x i64] %51, i64 %52, 1
%54 = tail call <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %13, i64 3, i64 8, i64 3)
%55 = tail call <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8> %54, <vscale x 8 x i8> %17, i64 5, i64 8, i64 3)
%56 = tail call <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %29, i64 3, i64 8, i64 3)
%57 = tail call <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8> %56, <vscale x 8 x i8> %18, i64 5, i64 8, i64 3)
%58 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %57, i64 0)
%59 = bitcast <16 x i8> %58 to <2 x i64>
%60 = extractelement <2 x i64> %59, i64 0
%61 = insertvalue [2 x i64] poison, i64 %60, 0
%62 = extractelement <2 x i64> %59, i64 1
%63 = insertvalue [2 x i64] %61, i64 %62, 1
%64 = tail call fastcc [2 x i64] @bar([2 x i64] %53, [2 x i64] %63, [2 x i64] %39)
%65 = extractvalue [2 x i64] %64, 0
%66 = insertelement <2 x i64> undef, i64 %65, i64 0
%67 = extractvalue [2 x i64] %64, 1
%68 = insertelement <2 x i64> %66, i64 %67, i64 1
%69 = bitcast <2 x i64> %68 to <16 x i8>
%70 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> %69, i64 0)
%71 = tail call <vscale x 8 x i8> @llvm.riscv.vadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %70, <vscale x 8 x i8> %55, i64 8)
%72 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %71, i64 0)
%73 = bitcast <16 x i8> %72 to <2 x i64>
%74 = extractelement <2 x i64> %73, i64 0
%75 = insertvalue [2 x i64] poison, i64 %74, 0
%76 = extractelement <2 x i64> %73, i64 1
%77 = insertvalue [2 x i64] %75, i64 %76, 1
%78 = tail call <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %13, i64 6, i64 8, i64 3)
%79 = tail call <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8> %78, <vscale x 8 x i8> %17, i64 2, i64 8, i64 3)
%80 = tail call <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %29, i64 6, i64 8, i64 3)
%81 = tail call <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8> %80, <vscale x 8 x i8> %18, i64 2, i64 8, i64 3)
%82 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %81, i64 0)
%83 = bitcast <16 x i8> %82 to <2 x i64>
%84 = extractelement <2 x i64> %83, i64 0
%85 = insertvalue [2 x i64] poison, i64 %84, 0
%86 = extractelement <2 x i64> %83, i64 1
%87 = insertvalue [2 x i64] %85, i64 %86, 1
%88 = tail call fastcc [2 x i64] @bar([2 x i64] %77, [2 x i64] %87, [2 x i64] %63)
%89 = extractvalue [2 x i64] %88, 0
%90 = insertelement <2 x i64> undef, i64 %89, i64 0
%91 = extractvalue [2 x i64] %88, 1
%92 = insertelement <2 x i64> %90, i64 %91, i64 1
%93 = bitcast <2 x i64> %92 to <16 x i8>
%94 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> %93, i64 0)
%95 = tail call <vscale x 8 x i8> @llvm.riscv.vadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %94, <vscale x 8 x i8> %79, i64 8)
%96 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %95, i64 0)
%97 = bitcast <16 x i8> %96 to <2 x i64>
%98 = extractelement <2 x i64> %97, i64 0
%99 = insertvalue [2 x i64] poison, i64 %98, 0
%100 = extractelement <2 x i64> %97, i64 1
%101 = insertvalue [2 x i64] %99, i64 %100, 1
%102 = getelementptr inbounds i8, ptr %1, i64 12
%103 = tail call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8.i64(<vscale x 8 x i8> poison, ptr nonnull %102, i64 16)
%104 = tail call fastcc [2 x i64] @bar([2 x i64] %101, [2 x i64] %28, [2 x i64] %87)
%105 = extractvalue [2 x i64] %104, 0
%106 = insertelement <2 x i64> undef, i64 %105, i64 0
%107 = extractvalue [2 x i64] %104, 1
%108 = insertelement <2 x i64> %106, i64 %107, i64 1
%109 = bitcast <2 x i64> %108 to <16 x i8>
%110 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> %109, i64 0)
%111 = tail call <vscale x 8 x i8> @llvm.riscv.vadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %110, <vscale x 8 x i8> %20, i64 8)
%112 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %111, i64 0)
%113 = bitcast <16 x i8> %48 to <4 x i32>
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %4) #8
store i64 1, ptr %4, align 8, !tbaa !13
%114 = call <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1.i64(ptr nonnull %4, i64 2)
%115 = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> undef, <4 x i32> %113, i64 0)
tail call void @llvm.riscv.vse.mask.nxv2i32.i64(<vscale x 2 x i32> %115, ptr %1, <vscale x 2 x i1> %114, i64 2)
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %4) #8
%116 = getelementptr inbounds i8, ptr %1, i64 3
%117 = bitcast <16 x i8> %72 to <4 x i32>
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %5) #8
store i64 1, ptr %5, align 8, !tbaa !13
%118 = call <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1.i64(ptr nonnull %5, i64 2)
%119 = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> undef, <4 x i32> %117, i64 0)
tail call void @llvm.riscv.vse.mask.nxv2i32.i64(<vscale x 2 x i32> %119, ptr nonnull %116, <vscale x 2 x i1> %118, i64 2)
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %5) #8
%120 = getelementptr inbounds i8, ptr %1, i64 6
%121 = bitcast <16 x i8> %96 to <4 x i32>
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %6) #8
store i64 1, ptr %6, align 8, !tbaa !13
%122 = call <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1.i64(ptr nonnull %6, i64 2)
%123 = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> undef, <4 x i32> %121, i64 0)
tail call void @llvm.riscv.vse.mask.nxv2i32.i64(<vscale x 2 x i32> %123, ptr nonnull %120, <vscale x 2 x i1> %122, i64 2)
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %6) #8
%124 = getelementptr inbounds i8, ptr %1, i64 9
%125 = bitcast <16 x i8> %112 to <4 x i32>
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %7) #8
store i64 1, ptr %7, align 8, !tbaa !13
%126 = call <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1.i64(ptr nonnull %7, i64 2)
%127 = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> undef, <4 x i32> %125, i64 0)
tail call void @llvm.riscv.vse.mask.nxv2i32.i64(<vscale x 2 x i32> %127, ptr nonnull %124, <vscale x 2 x i1> %126, i64 2)
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %7) #8
%128 = icmp ugt i64 %9, 12
br i1 %128, label %129, label %240
129: ; preds = %12
%130 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %103, i64 0)
br label %131
131: ; preds = %129, %131
%132 = phi ptr [ %102, %129 ], [ %212, %131 ]
%133 = phi ptr [ %2, %129 ], [ %137, %131 ]
%134 = phi <16 x i8> [ %130, %129 ], [ %214, %131 ]
%135 = phi <16 x i8> [ %112, %129 ], [ %223, %131 ]
%136 = bitcast <16 x i8> %135 to <2 x i64>
%137 = getelementptr inbounds i8, ptr %133, i64 12
%138 = tail call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8.i64(<vscale x 8 x i8> poison, ptr nonnull %137, i64 16)
%139 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %138, i64 0)
%140 = bitcast <16 x i8> %139 to <2 x i64>
%141 = extractelement <2 x i64> %136, i64 0
%142 = insertvalue [2 x i64] poison, i64 %141, 0
%143 = extractelement <2 x i64> %136, i64 1
%144 = insertvalue [2 x i64] %142, i64 %143, 1
%145 = extractelement <2 x i64> %140, i64 0
%146 = insertvalue [2 x i64] poison, i64 %145, 0
%147 = extractelement <2 x i64> %140, i64 1
%148 = insertvalue [2 x i64] %146, i64 %147, 1
%149 = tail call fastcc [2 x i64] @bar([2 x i64] %144, [2 x i64] %148, [2 x i64] %28)
%150 = extractvalue [2 x i64] %149, 0
%151 = insertelement <2 x i64> undef, i64 %150, i64 0
%152 = extractvalue [2 x i64] %149, 1
%153 = insertelement <2 x i64> %151, i64 %152, i64 1
%154 = bitcast <2 x i64> %153 to <16 x i8>
%155 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> %154, i64 0)
%156 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> %134, i64 0)
%157 = tail call <vscale x 8 x i8> @llvm.riscv.vadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %155, <vscale x 8 x i8> %156, i64 8)
%158 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %157, i64 0)
%159 = bitcast <16 x i8> %158 to <2 x i64>
%160 = extractelement <2 x i64> %159, i64 0
%161 = insertvalue [2 x i64] poison, i64 %160, 0
%162 = extractelement <2 x i64> %159, i64 1
%163 = insertvalue [2 x i64] %161, i64 %162, 1
%164 = tail call <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %156, i64 3, i64 8, i64 3)
%165 = tail call <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8> %164, <vscale x 8 x i8> %17, i64 5, i64 8, i64 3)
%166 = tail call <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %138, i64 3, i64 8, i64 3)
%167 = tail call <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8> %166, <vscale x 8 x i8> %18, i64 5, i64 8, i64 3)
%168 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %167, i64 0)
%169 = bitcast <16 x i8> %168 to <2 x i64>
%170 = extractelement <2 x i64> %169, i64 0
%171 = insertvalue [2 x i64] poison, i64 %170, 0
%172 = extractelement <2 x i64> %169, i64 1
%173 = insertvalue [2 x i64] %171, i64 %172, 1
%174 = tail call fastcc [2 x i64] @bar([2 x i64] %163, [2 x i64] %173, [2 x i64] %148)
%175 = extractvalue [2 x i64] %174, 0
%176 = insertelement <2 x i64> undef, i64 %175, i64 0
%177 = extractvalue [2 x i64] %174, 1
%178 = insertelement <2 x i64> %176, i64 %177, i64 1
%179 = bitcast <2 x i64> %178 to <16 x i8>
%180 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> %179, i64 0)
%181 = tail call <vscale x 8 x i8> @llvm.riscv.vadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %180, <vscale x 8 x i8> %165, i64 8)
%182 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %181, i64 0)
%183 = bitcast <16 x i8> %182 to <2 x i64>
%184 = extractelement <2 x i64> %183, i64 0
%185 = insertvalue [2 x i64] poison, i64 %184, 0
%186 = extractelement <2 x i64> %183, i64 1
%187 = insertvalue [2 x i64] %185, i64 %186, 1
%188 = tail call <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %156, i64 6, i64 8, i64 3)
%189 = tail call <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8> %188, <vscale x 8 x i8> %17, i64 2, i64 8, i64 3)
%190 = tail call <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %138, i64 6, i64 8, i64 3)
%191 = tail call <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8> %190, <vscale x 8 x i8> %18, i64 2, i64 8, i64 3)
%192 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %191, i64 0)
%193 = bitcast <16 x i8> %192 to <2 x i64>
%194 = extractelement <2 x i64> %193, i64 0
%195 = insertvalue [2 x i64] poison, i64 %194, 0
%196 = extractelement <2 x i64> %193, i64 1
%197 = insertvalue [2 x i64] %195, i64 %196, 1
%198 = tail call fastcc [2 x i64] @bar([2 x i64] %187, [2 x i64] %197, [2 x i64] %173)
%199 = extractvalue [2 x i64] %198, 0
%200 = insertelement <2 x i64> undef, i64 %199, i64 0
%201 = extractvalue [2 x i64] %198, 1
%202 = insertelement <2 x i64> %200, i64 %201, i64 1
%203 = bitcast <2 x i64> %202 to <16 x i8>
%204 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> %203, i64 0)
%205 = tail call <vscale x 8 x i8> @llvm.riscv.vadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %204, <vscale x 8 x i8> %189, i64 8)
%206 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %205, i64 0)
%207 = bitcast <16 x i8> %206 to <2 x i64>
%208 = extractelement <2 x i64> %207, i64 0
%209 = insertvalue [2 x i64] poison, i64 %208, 0
%210 = extractelement <2 x i64> %207, i64 1
%211 = insertvalue [2 x i64] %209, i64 %210, 1
%212 = getelementptr inbounds i8, ptr %132, i64 12
%213 = tail call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8.i64(<vscale x 8 x i8> poison, ptr nonnull %212, i64 16)
%214 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %213, i64 0)
%215 = tail call fastcc [2 x i64] @bar([2 x i64] %211, [2 x i64] %28, [2 x i64] %197)
%216 = extractvalue [2 x i64] %215, 0
%217 = insertelement <2 x i64> undef, i64 %216, i64 0
%218 = extractvalue [2 x i64] %215, 1
%219 = insertelement <2 x i64> %217, i64 %218, i64 1
%220 = bitcast <2 x i64> %219 to <16 x i8>
%221 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> %220, i64 0)
%222 = tail call <vscale x 8 x i8> @llvm.riscv.vadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %221, <vscale x 8 x i8> %20, i64 8)
%223 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %222, i64 0)
%224 = bitcast <16 x i8> %158 to <4 x i32>
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %4) #8
store i64 1, ptr %4, align 8, !tbaa !13
%225 = call <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1.i64(ptr nonnull %4, i64 2)
%226 = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> undef, <4 x i32> %224, i64 0)
tail call void @llvm.riscv.vse.mask.nxv2i32.i64(<vscale x 2 x i32> %226, ptr nonnull %132, <vscale x 2 x i1> %225, i64 2)
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %4) #8
%227 = getelementptr inbounds i8, ptr %132, i64 3
%228 = bitcast <16 x i8> %182 to <4 x i32>
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %5) #8
store i64 1, ptr %5, align 8, !tbaa !13
%229 = call <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1.i64(ptr nonnull %5, i64 2)
%230 = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> undef, <4 x i32> %228, i64 0)
tail call void @llvm.riscv.vse.mask.nxv2i32.i64(<vscale x 2 x i32> %230, ptr nonnull %227, <vscale x 2 x i1> %229, i64 2)
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %5) #8
%231 = getelementptr inbounds i8, ptr %132, i64 6
%232 = bitcast <16 x i8> %206 to <4 x i32>
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %6) #8
store i64 1, ptr %6, align 8, !tbaa !13
%233 = call <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1.i64(ptr nonnull %6, i64 2)
%234 = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> undef, <4 x i32> %232, i64 0)
tail call void @llvm.riscv.vse.mask.nxv2i32.i64(<vscale x 2 x i32> %234, ptr nonnull %231, <vscale x 2 x i1> %233, i64 2)
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %6) #8
%235 = getelementptr inbounds i8, ptr %132, i64 9
%236 = bitcast <16 x i8> %223 to <4 x i32>
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %7) #8
store i64 1, ptr %7, align 8, !tbaa !13
%237 = call <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1.i64(ptr nonnull %7, i64 2)
%238 = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> undef, <4 x i32> %236, i64 0)
tail call void @llvm.riscv.vse.mask.nxv2i32.i64(<vscale x 2 x i32> %238, ptr nonnull %235, <vscale x 2 x i1> %237, i64 2)
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %7) #8
%239 = icmp ult ptr %212, %10
br i1 %239, label %131, label %240, !llvm.loop !18
240: ; preds = %131, %12, %3
ret void
}
; Function Attrs: mustprogress nofree noinline nosync nounwind willreturn memory(none) uwtable vscale_range(2,2)
declare fastcc [2 x i64] @bar([2 x i64] %0, [2 x i64] %1, [2 x i64] %2) #6
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
declare <vscale x 8 x i8> @llvm.riscv.vadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, i64) #5
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
declare <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8>, i8, i64) #5
attributes #0 = { nofree noinline nosync nounwind uwtable vscale_range(2,2) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+d,+f,+m,+relax,+v,+zicsr,+zifencei,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-c,-e,-h,-save-restore,-svinval,-svnapot,-svpbmt,-unaligned-scalar-mem,-unaligned-vector-mem,-xcvalu,-xcvbi,-xcvbitmanip,-xcvmac,-xcvsimd,-xsfcie,-xsfvcp,-xventanacondops,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmp,-zcmt,-zdinx,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zicbom,-zicbop,-zicboz,-zicntr,-zihintntl,-zihintpause,-zihpm,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-zvfh,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" }
attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(read) }
attributes #3 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #4 = { mustprogress nocallback nofree nosync nounwind willreturn memory(write) }
attributes #5 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
attributes #6 = { mustprogress nofree noinline nosync nounwind willreturn memory(none) uwtable vscale_range(2,2) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+d,+f,+m,+relax,+v,+zicsr,+zifencei,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-c,-e,-h,-save-restore,-svinval,-svnapot,-svpbmt,-unaligned-scalar-mem,-unaligned-vector-mem,-xcvalu,-xcvbi,-xcvbitmanip,-xcvmac,-xcvsimd,-xsfcie,-xsfvcp,-xventanacondops,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmp,-zcmt,-zdinx,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zicbom,-zicbop,-zicboz,-zicntr,-zihintntl,-zihintpause,-zihpm,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-zvfh,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" }
attributes #7 = { nofree nosync nounwind memory(none) }
attributes #8 = { nounwind }
!llvm.module.flags = !{!0, !1, !2, !3}
!llvm.ident = !{!4}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 1, !"target-abi", !"lp64d"}
!2 = !{i32 7, !"uwtable", i32 2}
!3 = !{i32 8, !"SmallDataLimit", i32 8}
!4 = !{!"clang version 18.0.0"}
!5 = !{!6, !10, i64 8}
!6 = !{!"png_row_info_struct", !7, i64 0, !10, i64 8, !8, i64 16, !8, i64 17, !8, i64 18, !8, i64 19}
!7 = !{!"int", !8, i64 0}
!8 = !{!"omnipotent char", !9, i64 0}
!9 = !{!"Simple C/C++ TBAA"}
!10 = !{!"long", !8, i64 0}
!11 = distinct !{!11, !12}
!12 = !{!"llvm.loop.mustprogress"}
!13 = !{!10, !10, i64 0}
!14 = distinct !{!14, !12}
!15 = distinct !{!15, !12}
!16 = distinct !{!16, !12}
!17 = distinct !{!17, !12}
!18 = distinct !{!18, !12, !19}
!19 = !{!"llvm.loop.peeled.count", i32 1}
!20 = distinct !{!20, !12}