diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index cd26a3df78602..d05cf1c6e1814 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1301,27 +1301,25 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty, // If coercing a fixed vector to a scalable vector for ABI compatibility, and // the types match, use the llvm.vector.insert intrinsic to perform the // conversion. - if (auto *ScalableDst = dyn_cast(Ty)) { - if (auto *FixedSrc = dyn_cast(SrcTy)) { - // If we are casting a fixed i8 vector to a scalable 16 x i1 predicate + if (auto *ScalableDstTy = dyn_cast(Ty)) { + if (auto *FixedSrcTy = dyn_cast(SrcTy)) { + // If we are casting a fixed i8 vector to a scalable i1 predicate // vector, use a vector insert and bitcast the result. - bool NeedsBitcast = false; - auto PredType = - llvm::ScalableVectorType::get(CGF.Builder.getInt1Ty(), 16); - llvm::Type *OrigType = Ty; - if (ScalableDst == PredType && - FixedSrc->getElementType() == CGF.Builder.getInt8Ty()) { - ScalableDst = llvm::ScalableVectorType::get(CGF.Builder.getInt8Ty(), 2); - NeedsBitcast = true; + if (ScalableDstTy->getElementType()->isIntegerTy(1) && + ScalableDstTy->getElementCount().isKnownMultipleOf(8) && + FixedSrcTy->getElementType()->isIntegerTy(8)) { + ScalableDstTy = llvm::ScalableVectorType::get( + FixedSrcTy->getElementType(), + ScalableDstTy->getElementCount().getKnownMinValue() / 8); } - if (ScalableDst->getElementType() == FixedSrc->getElementType()) { + if (ScalableDstTy->getElementType() == FixedSrcTy->getElementType()) { auto *Load = CGF.Builder.CreateLoad(Src); - auto *UndefVec = llvm::UndefValue::get(ScalableDst); + auto *UndefVec = llvm::UndefValue::get(ScalableDstTy); auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); llvm::Value *Result = CGF.Builder.CreateInsertVector( - ScalableDst, UndefVec, Load, Zero, "cast.scalable"); - if (NeedsBitcast) - Result = CGF.Builder.CreateBitCast(Result, OrigType); + ScalableDstTy, UndefVec, Load, Zero, "cast.scalable"); + if (ScalableDstTy != Ty) + Result = CGF.Builder.CreateBitCast(Result, Ty); return Result; } } @@ -3199,13 +3197,14 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, llvm::Value *Coerced = Fn->getArg(FirstIRArg); if (auto *VecTyFrom = dyn_cast(Coerced->getType())) { - // If we are casting a scalable 16 x i1 predicate vector to a fixed i8 + // If we are casting a scalable i1 predicate vector to a fixed i8 // vector, bitcast the source and use a vector extract. - auto PredType = - llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); - if (VecTyFrom == PredType && + if (VecTyFrom->getElementType()->isIntegerTy(1) && + VecTyFrom->getElementCount().isKnownMultipleOf(8) && VecTyTo->getElementType() == Builder.getInt8Ty()) { - VecTyFrom = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2); + VecTyFrom = llvm::ScalableVectorType::get( + VecTyTo->getElementType(), + VecTyFrom->getElementCount().getKnownMinValue() / 8); Coerced = Builder.CreateBitCast(Coerced, VecTyFrom); } if (VecTyFrom->getElementType() == VecTyTo->getElementType()) { @@ -5877,12 +5876,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // If coercing a fixed vector from a scalable vector for ABI // compatibility, and the types match, use the llvm.vector.extract // intrinsic to perform the conversion. - if (auto *FixedDst = dyn_cast(RetIRTy)) { + if (auto *FixedDstTy = dyn_cast(RetIRTy)) { llvm::Value *V = CI; - if (auto *ScalableSrc = dyn_cast(V->getType())) { - if (FixedDst->getElementType() == ScalableSrc->getElementType()) { + if (auto *ScalableSrcTy = + dyn_cast(V->getType())) { + if (FixedDstTy->getElementType() == ScalableSrcTy->getElementType()) { llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty); - V = Builder.CreateExtractVector(FixedDst, V, Zero, "cast.fixed"); + V = Builder.CreateExtractVector(FixedDstTy, V, Zero, "cast.fixed"); return RValue::get(V); } } diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index fa03163bbde57..aa805f291d175 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2137,26 +2137,24 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { // If Src is a fixed vector and Dst is a scalable vector, and both have the // same element type, use the llvm.vector.insert intrinsic to perform the // bitcast. - if (const auto *FixedSrc = dyn_cast(SrcTy)) { - if (const auto *ScalableDst = dyn_cast(DstTy)) { - // If we are casting a fixed i8 vector to a scalable 16 x i1 predicate + if (auto *FixedSrcTy = dyn_cast(SrcTy)) { + if (auto *ScalableDstTy = dyn_cast(DstTy)) { + // If we are casting a fixed i8 vector to a scalable i1 predicate // vector, use a vector insert and bitcast the result. - bool NeedsBitCast = false; - auto PredType = llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); - llvm::Type *OrigType = DstTy; - if (ScalableDst == PredType && - FixedSrc->getElementType() == Builder.getInt8Ty()) { - DstTy = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2); - ScalableDst = cast(DstTy); - NeedsBitCast = true; + if (ScalableDstTy->getElementType()->isIntegerTy(1) && + ScalableDstTy->getElementCount().isKnownMultipleOf(8) && + FixedSrcTy->getElementType()->isIntegerTy(8)) { + ScalableDstTy = llvm::ScalableVectorType::get( + FixedSrcTy->getElementType(), + ScalableDstTy->getElementCount().getKnownMinValue() / 8); } - if (FixedSrc->getElementType() == ScalableDst->getElementType()) { - llvm::Value *UndefVec = llvm::UndefValue::get(DstTy); + if (FixedSrcTy->getElementType() == ScalableDstTy->getElementType()) { + llvm::Value *UndefVec = llvm::UndefValue::get(ScalableDstTy); llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); llvm::Value *Result = Builder.CreateInsertVector( - DstTy, UndefVec, Src, Zero, "cast.scalable"); - if (NeedsBitCast) - Result = Builder.CreateBitCast(Result, OrigType); + ScalableDstTy, UndefVec, Src, Zero, "cast.scalable"); + if (Result->getType() != DstTy) + Result = Builder.CreateBitCast(Result, DstTy); return Result; } } @@ -2165,18 +2163,19 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { // If Src is a scalable vector and Dst is a fixed vector, and both have the // same element type, use the llvm.vector.extract intrinsic to perform the // bitcast. - if (const auto *ScalableSrc = dyn_cast(SrcTy)) { - if (const auto *FixedDst = dyn_cast(DstTy)) { - // If we are casting a scalable 16 x i1 predicate vector to a fixed i8 + if (auto *ScalableSrcTy = dyn_cast(SrcTy)) { + if (auto *FixedDstTy = dyn_cast(DstTy)) { + // If we are casting a scalable i1 predicate vector to a fixed i8 // vector, bitcast the source and use a vector extract. - auto PredType = llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); - if (ScalableSrc == PredType && - FixedDst->getElementType() == Builder.getInt8Ty()) { - SrcTy = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2); - ScalableSrc = cast(SrcTy); - Src = Builder.CreateBitCast(Src, SrcTy); + if (ScalableSrcTy->getElementType()->isIntegerTy(1) && + ScalableSrcTy->getElementCount().isKnownMultipleOf(8) && + FixedDstTy->getElementType()->isIntegerTy(8)) { + ScalableSrcTy = llvm::ScalableVectorType::get( + FixedDstTy->getElementType(), + ScalableSrcTy->getElementCount().getKnownMinValue() / 8); + Src = Builder.CreateBitCast(Src, ScalableSrcTy); } - if (ScalableSrc->getElementType() == FixedDst->getElementType()) { + if (ScalableSrcTy->getElementType() == FixedDstTy->getElementType()) { llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); return Builder.CreateExtractVector(DstTy, Src, Zero, "cast.fixed"); } diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-bitcast.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-bitcast.c index a7b3123e61cd5..20fb4a04564c7 100644 --- a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-bitcast.c +++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-bitcast.c @@ -177,29 +177,26 @@ void write_float64m1(struct struct_float64m1 *s, vfloat64m1_t x) { // CHECK-64-LABEL: @read_bool1( // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8 // CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[Y]], align 8, !tbaa [[TBAA4]] -// CHECK-64-NEXT: store <8 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]] -// CHECK-64-NEXT: [[TMP1:%.*]] = load , ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]] +// CHECK-64-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8i8.v8i8( undef, <8 x i8> [[TMP0]], i64 0) +// CHECK-64-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-64-NEXT: ret [[TMP1]] // // CHECK-128-LABEL: @read_bool1( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <16 x i8>, align 16 // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 16 // CHECK-128-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Y]], align 8, !tbaa [[TBAA4]] -// CHECK-128-NEXT: store <16 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 16, !tbaa [[TBAA4]] -// CHECK-128-NEXT: [[TMP1:%.*]] = load , ptr [[SAVED_VALUE]], align 16, !tbaa [[TBAA4]] +// CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8i8.v16i8( undef, <16 x i8> [[TMP0]], i64 0) +// CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-128-NEXT: ret [[TMP1]] // // CHECK-256-LABEL: @read_bool1( // CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca <32 x i8>, align 32 // CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 32 // CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[Y]], align 8, !tbaa [[TBAA4]] -// CHECK-256-NEXT: store <32 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 32, !tbaa [[TBAA4]] -// CHECK-256-NEXT: [[TMP1:%.*]] = load , ptr [[SAVED_VALUE]], align 32, !tbaa [[TBAA4]] +// CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[TMP0]], i64 0) +// CHECK-256-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-256-NEXT: ret [[TMP1]] // vbool1_t read_bool1(struct struct_bool1 *s) { @@ -208,29 +205,26 @@ vbool1_t read_bool1(struct struct_bool1 *s) { // CHECK-64-LABEL: @write_bool1( // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 8 -// CHECK-64-NEXT: store [[X:%.*]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA7:![0-9]+]] -// CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]] +// CHECK-64-NEXT: [[TMP0:%.*]] = bitcast [[X:%.*]] to +// CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv8i8( [[TMP0]], i64 0) // CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 8 -// CHECK-64-NEXT: store <8 x i8> [[TMP0]], ptr [[Y]], align 8, !tbaa [[TBAA4]] +// CHECK-64-NEXT: store <8 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA4]] // CHECK-64-NEXT: ret void // // CHECK-128-LABEL: @write_bool1( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 16 -// CHECK-128-NEXT: store [[X:%.*]], ptr [[SAVED_VALUE]], align 16, !tbaa [[TBAA7:![0-9]+]] -// CHECK-128-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[SAVED_VALUE]], align 16, !tbaa [[TBAA4]] +// CHECK-128-NEXT: [[TMP0:%.*]] = bitcast [[X:%.*]] to +// CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8( [[TMP0]], i64 0) // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 16 -// CHECK-128-NEXT: store <16 x i8> [[TMP0]], ptr [[Y]], align 8, !tbaa [[TBAA4]] +// CHECK-128-NEXT: store <16 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA4]] // CHECK-128-NEXT: ret void // // CHECK-256-LABEL: @write_bool1( // CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 8 -// CHECK-256-NEXT: store [[X:%.*]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA7:![0-9]+]] -// CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]] +// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast [[X:%.*]] to +// CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[TMP0]], i64 0) // CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 32 -// CHECK-256-NEXT: store <32 x i8> [[TMP0]], ptr [[Y]], align 8, !tbaa [[TBAA4]] +// CHECK-256-NEXT: store <32 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA4]] // CHECK-256-NEXT: ret void // void write_bool1(struct struct_bool1 *s, vbool1_t x) { diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c index 888abe1a7bc3f..1824d97d04dda 100644 --- a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c +++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c @@ -70,13 +70,7 @@ fixed_float64m1_t call_float64_ff(fixed_float64m1_t op1, fixed_float64m1_t op2) // CHECK-LABEL: @call_bool1_ff( // CHECK-NEXT: entry: -// CHECK-NEXT: [[SAVED_VALUE4:%.*]] = alloca , align 8 -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 8 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv64i1.i64( [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]], i64 256) -// CHECK-NEXT: store [[TMP0]], ptr [[SAVED_VALUE4]], align 8, !tbaa [[TBAA4:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE4]], align 8, !tbaa [[TBAA8:![0-9]+]] -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.riscv.vmand.nxv64i1.i64( [[TMP0:%.*]], [[TMP1:%.*]], i64 256) // CHECK-NEXT: ret [[TMP2]] // fixed_bool1_t call_bool1_ff(fixed_bool1_t op1, fixed_bool1_t op2) { @@ -116,14 +110,8 @@ fixed_float64m1_t call_float64_fs(fixed_float64m1_t op1, vfloat64m1_t op2) { // CHECK-LABEL: @call_bool1_fs( // CHECK-NEXT: entry: -// CHECK-NEXT: [[SAVED_VALUE2:%.*]] = alloca , align 8 -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 8 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv64i1.i64( [[OP1_COERCE:%.*]], [[OP2:%.*]], i64 256) -// CHECK-NEXT: store [[TMP0]], ptr [[SAVED_VALUE2]], align 8, !tbaa [[TBAA4]] -// CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE2]], align 8, !tbaa [[TBAA8]] -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 8 -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.riscv.vmand.nxv64i1.i64( [[TMP0:%.*]], [[OP2:%.*]], i64 256) +// CHECK-NEXT: ret [[TMP1]] // fixed_bool1_t call_bool1_fs(fixed_bool1_t op1, vbool1_t op2) { return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen); @@ -162,14 +150,8 @@ fixed_float64m1_t call_float64_ss(vfloat64m1_t op1, vfloat64m1_t op2) { // CHECK-LABEL: @call_bool1_ss( // CHECK-NEXT: entry: -// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 8 -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv64i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 256) -// CHECK-NEXT: store [[TMP0]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]] -// CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA8]] -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 8 -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: ret [[TMP0]] // fixed_bool1_t call_bool1_ss(vbool1_t op1, vbool1_t op2) { return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen); diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c index fe278174bf681..3806c3e1b30bb 100644 --- a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c +++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c @@ -65,13 +65,7 @@ fixed_float64m1_t from_vfloat64m1_t(vfloat64m1_t type) { // CHECK-LABEL: @from_vbool1_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 8 -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 8 -// CHECK-NEXT: store [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4:![0-9]+]] -// CHECK-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA8:![0-9]+]] -// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[RETVAL_COERCE]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[RETVAL_COERCE]], align 8 -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: ret [[TYPE:%.*]] // fixed_bool1_t from_vbool1_t(vbool1_t type) { return type; @@ -79,7 +73,7 @@ fixed_bool1_t from_vbool1_t(vbool1_t type) { // CHECK-LABEL: @to_vbool1_t( // CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TYPE_COERCE:%.*]] +// CHECK-NEXT: ret [[TMP0:%.*]] // vbool1_t to_vbool1_t(fixed_bool1_t type) { return type; @@ -105,8 +99,8 @@ vbool4_t to_vbool4_t(fixed_bool4_t type) { // CHECK-NEXT: entry: // CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 -// CHECK-NEXT: store [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA9:![0-9]+]] -// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA8]] +// CHECK-NEXT: store [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA4:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA8:![0-9]+]] // CHECK-NEXT: store <1 x i8> [[TMP0]], ptr [[RETVAL_COERCE]], align 1 // CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-NEXT: ret [[TMP1]] diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c index ac22bdce0da3e..eb769fadda9a8 100644 --- a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c +++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c @@ -53,25 +53,24 @@ fixed_bool32_t global_bool32; // CHECK-NEXT: [[M_ADDR:%.*]] = alloca , align 1 // CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca , align 1 // CHECK-NEXT: [[MASK:%.*]] = alloca , align 1 -// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <32 x i8>, align 32 // CHECK-NEXT: store [[M:%.*]], ptr [[M_ADDR]], align 1 // CHECK-NEXT: store [[VEC:%.*]], ptr [[VEC_ADDR]], align 1 // CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[M_ADDR]], align 1 // CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @global_bool1, align 8 -// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[SAVED_VALUE]], align 32 -// CHECK-NEXT: [[TMP2:%.*]] = load , ptr [[SAVED_VALUE]], align 32 +// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-NEXT: [[TMP3:%.*]] = call @llvm.riscv.vmand.nxv64i1.i64( [[TMP0]], [[TMP2]], i64 256) // CHECK-NEXT: store [[TMP3]], ptr [[MASK]], align 1 // CHECK-NEXT: [[TMP4:%.*]] = load , ptr [[MASK]], align 1 // CHECK-NEXT: [[TMP5:%.*]] = load , ptr [[VEC_ADDR]], align 1 // CHECK-NEXT: [[TMP6:%.*]] = load <256 x i8>, ptr @global_vec_int8m8, align 8 -// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv64i8.v256i8( undef, <256 x i8> [[TMP6]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = call @llvm.riscv.vadd.mask.nxv64i8.nxv64i8.i64( poison, [[TMP5]], [[CAST_SCALABLE]], [[TMP4]], i64 256, i64 3) +// CHECK-NEXT: [[CAST_SCALABLE1:%.*]] = call @llvm.vector.insert.nxv64i8.v256i8( undef, <256 x i8> [[TMP6]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = call @llvm.riscv.vadd.mask.nxv64i8.nxv64i8.i64( poison, [[TMP5]], [[CAST_SCALABLE1]], [[TMP4]], i64 256, i64 3) // CHECK-NEXT: [[CAST_FIXED:%.*]] = call <256 x i8> @llvm.vector.extract.v256i8.nxv64i8( [[TMP7]], i64 0) // CHECK-NEXT: store <256 x i8> [[CAST_FIXED]], ptr [[RETVAL]], align 8 // CHECK-NEXT: [[TMP8:%.*]] = load <256 x i8>, ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[CAST_SCALABLE1:%.*]] = call @llvm.vector.insert.nxv64i8.v256i8( undef, <256 x i8> [[TMP8]], i64 0) -// CHECK-NEXT: ret [[CAST_SCALABLE1]] +// CHECK-NEXT: [[CAST_SCALABLE2:%.*]] = call @llvm.vector.insert.nxv64i8.v256i8( undef, <256 x i8> [[TMP8]], i64 0) +// CHECK-NEXT: ret [[CAST_SCALABLE2]] // fixed_int8m8_t test_bool1(vbool1_t m, vint8m8_t vec) { vbool1_t mask = __riscv_vmand(m, global_bool1, __riscv_v_fixed_vlen); @@ -181,15 +180,15 @@ fixed_int32m1_t array_arg(fixed_int32m1_t arr[]) { // CHECK-NEXT: [[RETVAL:%.*]] = alloca <32 x i8>, align 8 // CHECK-NEXT: [[ARR:%.*]] = alloca [3 x <32 x i8>], align 8 // CHECK-NEXT: [[PARR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 8 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <32 x i8>], ptr [[ARR]], i64 0, i64 0 // CHECK-NEXT: store ptr [[ARRAYIDX]], ptr [[PARR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PARR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 8 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 8 [[RETVAL]], i64 32, i1 false) -// CHECK-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 8 -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast [[CAST_SCALABLE]] to +// CHECK-NEXT: ret [[TMP3]] // fixed_bool1_t address_of_array_idx_bool1() { fixed_bool1_t arr[3]; diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-globals.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-globals.c index d7df1a24bbfb0..31a245dcb2240 100644 --- a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-globals.c +++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-globals.c @@ -56,18 +56,16 @@ void write_global_i64(vint64m1_t v) { global_i64 = v; } // CHECK-64-LABEL: @write_global_bool1( // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 8 -// CHECK-64-NEXT: store [[V:%.*]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA7:![0-9]+]] -// CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]] -// CHECK-64-NEXT: store <8 x i8> [[TMP0]], ptr @global_bool1, align 8, !tbaa [[TBAA4]] +// CHECK-64-NEXT: [[TMP0:%.*]] = bitcast [[V:%.*]] to +// CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv8i8( [[TMP0]], i64 0) +// CHECK-64-NEXT: store <8 x i8> [[CAST_FIXED]], ptr @global_bool1, align 8, !tbaa [[TBAA4]] // CHECK-64-NEXT: ret void // // CHECK-256-LABEL: @write_global_bool1( // CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 8 -// CHECK-256-NEXT: store [[V:%.*]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA7:![0-9]+]] -// CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]] -// CHECK-256-NEXT: store <32 x i8> [[TMP0]], ptr @global_bool1, align 8, !tbaa [[TBAA4]] +// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast [[V:%.*]] to +// CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[TMP0]], i64 0) +// CHECK-256-NEXT: store <32 x i8> [[CAST_FIXED]], ptr @global_bool1, align 8, !tbaa [[TBAA4]] // CHECK-256-NEXT: ret void // void write_global_bool1(vbool1_t v) { global_bool1 = v; } @@ -92,7 +90,7 @@ void write_global_bool4(vbool4_t v) { global_bool4 = v; } // CHECK-256-LABEL: @write_global_bool32( // CHECK-256-NEXT: entry: // CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 -// CHECK-256-NEXT: store [[V:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA9:![0-9]+]] +// CHECK-256-NEXT: store [[V:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA7:![0-9]+]] // CHECK-256-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA4]] // CHECK-256-NEXT: store <1 x i8> [[TMP0]], ptr @global_bool32, align 1, !tbaa [[TBAA4]] // CHECK-256-NEXT: ret void @@ -120,18 +118,16 @@ vint64m1_t read_global_i64() { return global_i64; } // CHECK-64-LABEL: @read_global_bool1( // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool1, align 8, !tbaa [[TBAA4]] -// CHECK-64-NEXT: store <8 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]] -// CHECK-64-NEXT: [[TMP1:%.*]] = load , ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]] +// CHECK-64-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8i8.v8i8( undef, <8 x i8> [[TMP0]], i64 0) +// CHECK-64-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-64-NEXT: ret [[TMP1]] // // CHECK-256-LABEL: @read_global_bool1( // CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca <32 x i8>, align 32 // CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr @global_bool1, align 8, !tbaa [[TBAA4]] -// CHECK-256-NEXT: store <32 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 32, !tbaa [[TBAA4]] -// CHECK-256-NEXT: [[TMP1:%.*]] = load , ptr [[SAVED_VALUE]], align 32, !tbaa [[TBAA4]] +// CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[TMP0]], i64 0) +// CHECK-256-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-256-NEXT: ret [[TMP1]] // vbool1_t read_global_bool1() { return global_bool1; }