Skip to content

[RISC-V] Same register allocated for operands of vslideup #65704

@garthlei

Description

@garthlei

Illegal instructions "vslideup.vi v8, v8, 5" and "vslideup.vi v8, v8, 2" are generated for the following IR:

; ModuleID = './foobar.c'
source_filename = "./foobar.c"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-linux-gnu"

%struct.png_row_info_struct = type { i32, i64, i8, i8, i8, i8 }

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(read)
declare <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8.i64(<vscale x 8 x i8>, ptr nocapture, i64) #2

; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8>, i64 immarg) #3

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1

; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8>, <16 x i8>, i64 immarg) #3

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
declare <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8>, <vscale x 8 x i8>, i64, i64, i64 immarg) #5

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
declare <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8>, <vscale x 8 x i8>, i64, i64, i64 immarg) #5

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(read)
declare <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1.i64(ptr nocapture, i64) #2

; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32>, <4 x i32>, i64 immarg) #3

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(write)
declare void @llvm.riscv.vse.mask.nxv2i32.i64(<vscale x 2 x i32>, ptr nocapture, <vscale x 2 x i1>, i64) #4

; Function Attrs: nofree noinline nosync nounwind uwtable vscale_range(2,2)
define dso_local void @foo(ptr nocapture noundef readonly %0, ptr noundef %1, ptr nocapture noundef readonly %2) local_unnamed_addr #0 {
  %4 = alloca i64, align 8
  %5 = alloca i64, align 8
  %6 = alloca i64, align 8
  %7 = alloca i64, align 8
  %8 = getelementptr inbounds %struct.png_row_info_struct, ptr %0, i64 0, i32 1
  %9 = load i64, ptr %8, align 8, !tbaa !5
  %10 = getelementptr inbounds i8, ptr %1, i64 %9
  %11 = icmp sgt i64 %9, 0
  br i1 %11, label %12, label %240

12:                                               ; preds = %3
  %13 = tail call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8.i64(<vscale x 8 x i8> poison, ptr %1, i64 16)
  %14 = tail call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8> poison, i8 0, i64 8)
  %15 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %14, i64 0)
  %16 = bitcast <16 x i8> %15 to <2 x i64>
  %17 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> undef, i64 0)
  %18 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> poison, i64 0)
  %19 = tail call <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %17, i64 1, i64 8, i64 3)
  %20 = tail call <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8> %19, <vscale x 8 x i8> %17, i64 7, i64 8, i64 3)
  %21 = tail call <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %18, i64 1, i64 8, i64 3)
  %22 = tail call <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8> %21, <vscale x 8 x i8> %18, i64 7, i64 8, i64 3)
  %23 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %22, i64 0)
  %24 = bitcast <16 x i8> %23 to <2 x i64>
  %25 = extractelement <2 x i64> %24, i64 0
  %26 = insertvalue [2 x i64] poison, i64 %25, 0
  %27 = extractelement <2 x i64> %24, i64 1
  %28 = insertvalue [2 x i64] %26, i64 %27, 1
  %29 = tail call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8.i64(<vscale x 8 x i8> poison, ptr %2, i64 16)
  %30 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %29, i64 0)
  %31 = bitcast <16 x i8> %30 to <2 x i64>
  %32 = extractelement <2 x i64> %16, i64 0
  %33 = insertvalue [2 x i64] poison, i64 %32, 0
  %34 = extractelement <2 x i64> %16, i64 1
  %35 = insertvalue [2 x i64] %33, i64 %34, 1
  %36 = extractelement <2 x i64> %31, i64 0
  %37 = insertvalue [2 x i64] poison, i64 %36, 0
  %38 = extractelement <2 x i64> %31, i64 1
  %39 = insertvalue [2 x i64] %37, i64 %38, 1
  %40 = tail call fastcc [2 x i64] @bar([2 x i64] %35, [2 x i64] %39, [2 x i64] %35)
  %41 = extractvalue [2 x i64] %40, 0
  %42 = insertelement <2 x i64> undef, i64 %41, i64 0
  %43 = extractvalue [2 x i64] %40, 1
  %44 = insertelement <2 x i64> %42, i64 %43, i64 1
  %45 = bitcast <2 x i64> %44 to <16 x i8>
  %46 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> %45, i64 0)
  %47 = tail call <vscale x 8 x i8> @llvm.riscv.vadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %46, <vscale x 8 x i8> %13, i64 8)
  %48 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %47, i64 0)
  %49 = bitcast <16 x i8> %48 to <2 x i64>
  %50 = extractelement <2 x i64> %49, i64 0
  %51 = insertvalue [2 x i64] poison, i64 %50, 0
  %52 = extractelement <2 x i64> %49, i64 1
  %53 = insertvalue [2 x i64] %51, i64 %52, 1
  %54 = tail call <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %13, i64 3, i64 8, i64 3)
  %55 = tail call <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8> %54, <vscale x 8 x i8> %17, i64 5, i64 8, i64 3)
  %56 = tail call <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %29, i64 3, i64 8, i64 3)
  %57 = tail call <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8> %56, <vscale x 8 x i8> %18, i64 5, i64 8, i64 3)
  %58 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %57, i64 0)
  %59 = bitcast <16 x i8> %58 to <2 x i64>
  %60 = extractelement <2 x i64> %59, i64 0
  %61 = insertvalue [2 x i64] poison, i64 %60, 0
  %62 = extractelement <2 x i64> %59, i64 1
  %63 = insertvalue [2 x i64] %61, i64 %62, 1
  %64 = tail call fastcc [2 x i64] @bar([2 x i64] %53, [2 x i64] %63, [2 x i64] %39)
  %65 = extractvalue [2 x i64] %64, 0
  %66 = insertelement <2 x i64> undef, i64 %65, i64 0
  %67 = extractvalue [2 x i64] %64, 1
  %68 = insertelement <2 x i64> %66, i64 %67, i64 1
  %69 = bitcast <2 x i64> %68 to <16 x i8>
  %70 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> %69, i64 0)
  %71 = tail call <vscale x 8 x i8> @llvm.riscv.vadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %70, <vscale x 8 x i8> %55, i64 8)
  %72 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %71, i64 0)
  %73 = bitcast <16 x i8> %72 to <2 x i64>
  %74 = extractelement <2 x i64> %73, i64 0
  %75 = insertvalue [2 x i64] poison, i64 %74, 0
  %76 = extractelement <2 x i64> %73, i64 1
  %77 = insertvalue [2 x i64] %75, i64 %76, 1
  %78 = tail call <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %13, i64 6, i64 8, i64 3)
  %79 = tail call <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8> %78, <vscale x 8 x i8> %17, i64 2, i64 8, i64 3)
  %80 = tail call <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %29, i64 6, i64 8, i64 3)
  %81 = tail call <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8> %80, <vscale x 8 x i8> %18, i64 2, i64 8, i64 3)
  %82 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %81, i64 0)
  %83 = bitcast <16 x i8> %82 to <2 x i64>
  %84 = extractelement <2 x i64> %83, i64 0
  %85 = insertvalue [2 x i64] poison, i64 %84, 0
  %86 = extractelement <2 x i64> %83, i64 1
  %87 = insertvalue [2 x i64] %85, i64 %86, 1
  %88 = tail call fastcc [2 x i64] @bar([2 x i64] %77, [2 x i64] %87, [2 x i64] %63)
  %89 = extractvalue [2 x i64] %88, 0
  %90 = insertelement <2 x i64> undef, i64 %89, i64 0
  %91 = extractvalue [2 x i64] %88, 1
  %92 = insertelement <2 x i64> %90, i64 %91, i64 1
  %93 = bitcast <2 x i64> %92 to <16 x i8>
  %94 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> %93, i64 0)
  %95 = tail call <vscale x 8 x i8> @llvm.riscv.vadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %94, <vscale x 8 x i8> %79, i64 8)
  %96 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %95, i64 0)
  %97 = bitcast <16 x i8> %96 to <2 x i64>
  %98 = extractelement <2 x i64> %97, i64 0
  %99 = insertvalue [2 x i64] poison, i64 %98, 0
  %100 = extractelement <2 x i64> %97, i64 1
  %101 = insertvalue [2 x i64] %99, i64 %100, 1
  %102 = getelementptr inbounds i8, ptr %1, i64 12
  %103 = tail call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8.i64(<vscale x 8 x i8> poison, ptr nonnull %102, i64 16)
  %104 = tail call fastcc [2 x i64] @bar([2 x i64] %101, [2 x i64] %28, [2 x i64] %87)
  %105 = extractvalue [2 x i64] %104, 0
  %106 = insertelement <2 x i64> undef, i64 %105, i64 0
  %107 = extractvalue [2 x i64] %104, 1
  %108 = insertelement <2 x i64> %106, i64 %107, i64 1
  %109 = bitcast <2 x i64> %108 to <16 x i8>
  %110 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> %109, i64 0)
  %111 = tail call <vscale x 8 x i8> @llvm.riscv.vadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %110, <vscale x 8 x i8> %20, i64 8)
  %112 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %111, i64 0)
  %113 = bitcast <16 x i8> %48 to <4 x i32>
  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %4) #8
  store i64 1, ptr %4, align 8, !tbaa !13
  %114 = call <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1.i64(ptr nonnull %4, i64 2)
  %115 = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> undef, <4 x i32> %113, i64 0)
  tail call void @llvm.riscv.vse.mask.nxv2i32.i64(<vscale x 2 x i32> %115, ptr %1, <vscale x 2 x i1> %114, i64 2)
  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %4) #8
  %116 = getelementptr inbounds i8, ptr %1, i64 3
  %117 = bitcast <16 x i8> %72 to <4 x i32>
  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %5) #8
  store i64 1, ptr %5, align 8, !tbaa !13
  %118 = call <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1.i64(ptr nonnull %5, i64 2)
  %119 = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> undef, <4 x i32> %117, i64 0)
  tail call void @llvm.riscv.vse.mask.nxv2i32.i64(<vscale x 2 x i32> %119, ptr nonnull %116, <vscale x 2 x i1> %118, i64 2)
  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %5) #8
  %120 = getelementptr inbounds i8, ptr %1, i64 6
  %121 = bitcast <16 x i8> %96 to <4 x i32>
  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %6) #8
  store i64 1, ptr %6, align 8, !tbaa !13
  %122 = call <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1.i64(ptr nonnull %6, i64 2)
  %123 = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> undef, <4 x i32> %121, i64 0)
  tail call void @llvm.riscv.vse.mask.nxv2i32.i64(<vscale x 2 x i32> %123, ptr nonnull %120, <vscale x 2 x i1> %122, i64 2)
  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %6) #8
  %124 = getelementptr inbounds i8, ptr %1, i64 9
  %125 = bitcast <16 x i8> %112 to <4 x i32>
  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %7) #8
  store i64 1, ptr %7, align 8, !tbaa !13
  %126 = call <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1.i64(ptr nonnull %7, i64 2)
  %127 = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> undef, <4 x i32> %125, i64 0)
  tail call void @llvm.riscv.vse.mask.nxv2i32.i64(<vscale x 2 x i32> %127, ptr nonnull %124, <vscale x 2 x i1> %126, i64 2)
  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %7) #8
  %128 = icmp ugt i64 %9, 12
  br i1 %128, label %129, label %240

129:                                              ; preds = %12
  %130 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %103, i64 0)
  br label %131

131:                                              ; preds = %129, %131
  %132 = phi ptr [ %102, %129 ], [ %212, %131 ]
  %133 = phi ptr [ %2, %129 ], [ %137, %131 ]
  %134 = phi <16 x i8> [ %130, %129 ], [ %214, %131 ]
  %135 = phi <16 x i8> [ %112, %129 ], [ %223, %131 ]
  %136 = bitcast <16 x i8> %135 to <2 x i64>
  %137 = getelementptr inbounds i8, ptr %133, i64 12
  %138 = tail call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8.i64(<vscale x 8 x i8> poison, ptr nonnull %137, i64 16)
  %139 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %138, i64 0)
  %140 = bitcast <16 x i8> %139 to <2 x i64>
  %141 = extractelement <2 x i64> %136, i64 0
  %142 = insertvalue [2 x i64] poison, i64 %141, 0
  %143 = extractelement <2 x i64> %136, i64 1
  %144 = insertvalue [2 x i64] %142, i64 %143, 1
  %145 = extractelement <2 x i64> %140, i64 0
  %146 = insertvalue [2 x i64] poison, i64 %145, 0
  %147 = extractelement <2 x i64> %140, i64 1
  %148 = insertvalue [2 x i64] %146, i64 %147, 1
  %149 = tail call fastcc [2 x i64] @bar([2 x i64] %144, [2 x i64] %148, [2 x i64] %28)
  %150 = extractvalue [2 x i64] %149, 0
  %151 = insertelement <2 x i64> undef, i64 %150, i64 0
  %152 = extractvalue [2 x i64] %149, 1
  %153 = insertelement <2 x i64> %151, i64 %152, i64 1
  %154 = bitcast <2 x i64> %153 to <16 x i8>
  %155 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> %154, i64 0)
  %156 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> %134, i64 0)
  %157 = tail call <vscale x 8 x i8> @llvm.riscv.vadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %155, <vscale x 8 x i8> %156, i64 8)
  %158 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %157, i64 0)
  %159 = bitcast <16 x i8> %158 to <2 x i64>
  %160 = extractelement <2 x i64> %159, i64 0
  %161 = insertvalue [2 x i64] poison, i64 %160, 0
  %162 = extractelement <2 x i64> %159, i64 1
  %163 = insertvalue [2 x i64] %161, i64 %162, 1
  %164 = tail call <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %156, i64 3, i64 8, i64 3)
  %165 = tail call <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8> %164, <vscale x 8 x i8> %17, i64 5, i64 8, i64 3)
  %166 = tail call <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %138, i64 3, i64 8, i64 3)
  %167 = tail call <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8> %166, <vscale x 8 x i8> %18, i64 5, i64 8, i64 3)
  %168 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %167, i64 0)
  %169 = bitcast <16 x i8> %168 to <2 x i64>
  %170 = extractelement <2 x i64> %169, i64 0
  %171 = insertvalue [2 x i64] poison, i64 %170, 0
  %172 = extractelement <2 x i64> %169, i64 1
  %173 = insertvalue [2 x i64] %171, i64 %172, 1
  %174 = tail call fastcc [2 x i64] @bar([2 x i64] %163, [2 x i64] %173, [2 x i64] %148)
  %175 = extractvalue [2 x i64] %174, 0
  %176 = insertelement <2 x i64> undef, i64 %175, i64 0
  %177 = extractvalue [2 x i64] %174, 1
  %178 = insertelement <2 x i64> %176, i64 %177, i64 1
  %179 = bitcast <2 x i64> %178 to <16 x i8>
  %180 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> %179, i64 0)
  %181 = tail call <vscale x 8 x i8> @llvm.riscv.vadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %180, <vscale x 8 x i8> %165, i64 8)
  %182 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %181, i64 0)
  %183 = bitcast <16 x i8> %182 to <2 x i64>
  %184 = extractelement <2 x i64> %183, i64 0
  %185 = insertvalue [2 x i64] poison, i64 %184, 0
  %186 = extractelement <2 x i64> %183, i64 1
  %187 = insertvalue [2 x i64] %185, i64 %186, 1
  %188 = tail call <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %156, i64 6, i64 8, i64 3)
  %189 = tail call <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8> %188, <vscale x 8 x i8> %17, i64 2, i64 8, i64 3)
  %190 = tail call <vscale x 8 x i8> @llvm.riscv.vslidedown.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %138, i64 6, i64 8, i64 3)
  %191 = tail call <vscale x 8 x i8> @llvm.riscv.vslideup.nxv8i8.i64(<vscale x 8 x i8> %190, <vscale x 8 x i8> %18, i64 2, i64 8, i64 3)
  %192 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %191, i64 0)
  %193 = bitcast <16 x i8> %192 to <2 x i64>
  %194 = extractelement <2 x i64> %193, i64 0
  %195 = insertvalue [2 x i64] poison, i64 %194, 0
  %196 = extractelement <2 x i64> %193, i64 1
  %197 = insertvalue [2 x i64] %195, i64 %196, 1
  %198 = tail call fastcc [2 x i64] @bar([2 x i64] %187, [2 x i64] %197, [2 x i64] %173)
  %199 = extractvalue [2 x i64] %198, 0
  %200 = insertelement <2 x i64> undef, i64 %199, i64 0
  %201 = extractvalue [2 x i64] %198, 1
  %202 = insertelement <2 x i64> %200, i64 %201, i64 1
  %203 = bitcast <2 x i64> %202 to <16 x i8>
  %204 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> %203, i64 0)
  %205 = tail call <vscale x 8 x i8> @llvm.riscv.vadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %204, <vscale x 8 x i8> %189, i64 8)
  %206 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %205, i64 0)
  %207 = bitcast <16 x i8> %206 to <2 x i64>
  %208 = extractelement <2 x i64> %207, i64 0
  %209 = insertvalue [2 x i64] poison, i64 %208, 0
  %210 = extractelement <2 x i64> %207, i64 1
  %211 = insertvalue [2 x i64] %209, i64 %210, 1
  %212 = getelementptr inbounds i8, ptr %132, i64 12
  %213 = tail call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8.i64(<vscale x 8 x i8> poison, ptr nonnull %212, i64 16)
  %214 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %213, i64 0)
  %215 = tail call fastcc [2 x i64] @bar([2 x i64] %211, [2 x i64] %28, [2 x i64] %197)
  %216 = extractvalue [2 x i64] %215, 0
  %217 = insertelement <2 x i64> undef, i64 %216, i64 0
  %218 = extractvalue [2 x i64] %215, 1
  %219 = insertelement <2 x i64> %217, i64 %218, i64 1
  %220 = bitcast <2 x i64> %219 to <16 x i8>
  %221 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> %220, i64 0)
  %222 = tail call <vscale x 8 x i8> @llvm.riscv.vadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> poison, <vscale x 8 x i8> %221, <vscale x 8 x i8> %20, i64 8)
  %223 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %222, i64 0)
  %224 = bitcast <16 x i8> %158 to <4 x i32>
  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %4) #8
  store i64 1, ptr %4, align 8, !tbaa !13
  %225 = call <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1.i64(ptr nonnull %4, i64 2)
  %226 = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> undef, <4 x i32> %224, i64 0)
  tail call void @llvm.riscv.vse.mask.nxv2i32.i64(<vscale x 2 x i32> %226, ptr nonnull %132, <vscale x 2 x i1> %225, i64 2)
  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %4) #8
  %227 = getelementptr inbounds i8, ptr %132, i64 3
  %228 = bitcast <16 x i8> %182 to <4 x i32>
  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %5) #8
  store i64 1, ptr %5, align 8, !tbaa !13
  %229 = call <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1.i64(ptr nonnull %5, i64 2)
  %230 = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> undef, <4 x i32> %228, i64 0)
  tail call void @llvm.riscv.vse.mask.nxv2i32.i64(<vscale x 2 x i32> %230, ptr nonnull %227, <vscale x 2 x i1> %229, i64 2)
  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %5) #8
  %231 = getelementptr inbounds i8, ptr %132, i64 6
  %232 = bitcast <16 x i8> %206 to <4 x i32>
  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %6) #8
  store i64 1, ptr %6, align 8, !tbaa !13
  %233 = call <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1.i64(ptr nonnull %6, i64 2)
  %234 = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> undef, <4 x i32> %232, i64 0)
  tail call void @llvm.riscv.vse.mask.nxv2i32.i64(<vscale x 2 x i32> %234, ptr nonnull %231, <vscale x 2 x i1> %233, i64 2)
  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %6) #8
  %235 = getelementptr inbounds i8, ptr %132, i64 9
  %236 = bitcast <16 x i8> %223 to <4 x i32>
  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %7) #8
  store i64 1, ptr %7, align 8, !tbaa !13
  %237 = call <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1.i64(ptr nonnull %7, i64 2)
  %238 = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> undef, <4 x i32> %236, i64 0)
  tail call void @llvm.riscv.vse.mask.nxv2i32.i64(<vscale x 2 x i32> %238, ptr nonnull %235, <vscale x 2 x i1> %237, i64 2)
  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %7) #8
  %239 = icmp ult ptr %212, %10
  br i1 %239, label %131, label %240, !llvm.loop !18

240:                                              ; preds = %131, %12, %3
  ret void
}

; Function Attrs: mustprogress nofree noinline nosync nounwind willreturn memory(none) uwtable vscale_range(2,2)
declare fastcc [2 x i64] @bar([2 x i64] %0, [2 x i64] %1, [2 x i64] %2) #6

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
declare <vscale x 8 x i8> @llvm.riscv.vadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, i64) #5

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
declare <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8>, i8, i64) #5

attributes #0 = { nofree noinline nosync nounwind uwtable vscale_range(2,2) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+d,+f,+m,+relax,+v,+zicsr,+zifencei,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-c,-e,-h,-save-restore,-svinval,-svnapot,-svpbmt,-unaligned-scalar-mem,-unaligned-vector-mem,-xcvalu,-xcvbi,-xcvbitmanip,-xcvmac,-xcvsimd,-xsfcie,-xsfvcp,-xventanacondops,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmp,-zcmt,-zdinx,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zicbom,-zicbop,-zicboz,-zicntr,-zihintntl,-zihintpause,-zihpm,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-zvfh,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" }
attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(read) }
attributes #3 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #4 = { mustprogress nocallback nofree nosync nounwind willreturn memory(write) }
attributes #5 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
attributes #6 = { mustprogress nofree noinline nosync nounwind willreturn memory(none) uwtable vscale_range(2,2) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+d,+f,+m,+relax,+v,+zicsr,+zifencei,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-c,-e,-h,-save-restore,-svinval,-svnapot,-svpbmt,-unaligned-scalar-mem,-unaligned-vector-mem,-xcvalu,-xcvbi,-xcvbitmanip,-xcvmac,-xcvsimd,-xsfcie,-xsfvcp,-xventanacondops,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmp,-zcmt,-zdinx,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zicbom,-zicbop,-zicboz,-zicntr,-zihintntl,-zihintpause,-zihpm,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-zvfh,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" }
attributes #7 = { nofree nosync nounwind memory(none) }
attributes #8 = { nounwind }

!llvm.module.flags = !{!0, !1, !2, !3}
!llvm.ident = !{!4}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 1, !"target-abi", !"lp64d"}
!2 = !{i32 7, !"uwtable", i32 2}
!3 = !{i32 8, !"SmallDataLimit", i32 8}
!4 = !{!"clang version 18.0.0"}
!5 = !{!6, !10, i64 8}
!6 = !{!"png_row_info_struct", !7, i64 0, !10, i64 8, !8, i64 16, !8, i64 17, !8, i64 18, !8, i64 19}
!7 = !{!"int", !8, i64 0}
!8 = !{!"omnipotent char", !9, i64 0}
!9 = !{!"Simple C/C++ TBAA"}
!10 = !{!"long", !8, i64 0}
!11 = distinct !{!11, !12}
!12 = !{!"llvm.loop.mustprogress"}
!13 = !{!10, !10, i64 0}
!14 = distinct !{!14, !12}
!15 = distinct !{!15, !12}
!16 = distinct !{!16, !12}
!17 = distinct !{!17, !12}
!18 = distinct !{!18, !12, !19}
!19 = !{!"llvm.loop.peeled.count", i32 1}
!20 = distinct !{!20, !12}

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions