Skip to content

Commit

Permalink
[IRGen][AArch64][RISCV] Generalize bitcast between i1 predicate vecto…
Browse files Browse the repository at this point in the history
…r and i8 fixed vector. (#76548)

Instead of only handling vscale x 16 x i1 predicate vectors, handle any
scalable i1 vector where the known minimum is divisible by 8.

This is used on RISC-V where we have multiple sizes of predicate
types.
  • Loading branch information
topperc committed Feb 13, 2024
1 parent a7cebad commit 9be7b0a
Show file tree
Hide file tree
Showing 7 changed files with 94 additions and 130 deletions.
50 changes: 25 additions & 25 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1301,27 +1301,25 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
// If coercing a fixed vector to a scalable vector for ABI compatibility, and
// the types match, use the llvm.vector.insert intrinsic to perform the
// conversion.
if (auto *ScalableDst = dyn_cast<llvm::ScalableVectorType>(Ty)) {
if (auto *FixedSrc = dyn_cast<llvm::FixedVectorType>(SrcTy)) {
// If we are casting a fixed i8 vector to a scalable 16 x i1 predicate
if (auto *ScalableDstTy = dyn_cast<llvm::ScalableVectorType>(Ty)) {
if (auto *FixedSrcTy = dyn_cast<llvm::FixedVectorType>(SrcTy)) {
// If we are casting a fixed i8 vector to a scalable i1 predicate
// vector, use a vector insert and bitcast the result.
bool NeedsBitcast = false;
auto PredType =
llvm::ScalableVectorType::get(CGF.Builder.getInt1Ty(), 16);
llvm::Type *OrigType = Ty;
if (ScalableDst == PredType &&
FixedSrc->getElementType() == CGF.Builder.getInt8Ty()) {
ScalableDst = llvm::ScalableVectorType::get(CGF.Builder.getInt8Ty(), 2);
NeedsBitcast = true;
if (ScalableDstTy->getElementType()->isIntegerTy(1) &&
ScalableDstTy->getElementCount().isKnownMultipleOf(8) &&
FixedSrcTy->getElementType()->isIntegerTy(8)) {
ScalableDstTy = llvm::ScalableVectorType::get(
FixedSrcTy->getElementType(),
ScalableDstTy->getElementCount().getKnownMinValue() / 8);
}
if (ScalableDst->getElementType() == FixedSrc->getElementType()) {
if (ScalableDstTy->getElementType() == FixedSrcTy->getElementType()) {
auto *Load = CGF.Builder.CreateLoad(Src);
auto *UndefVec = llvm::UndefValue::get(ScalableDst);
auto *UndefVec = llvm::UndefValue::get(ScalableDstTy);
auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
llvm::Value *Result = CGF.Builder.CreateInsertVector(
ScalableDst, UndefVec, Load, Zero, "cast.scalable");
if (NeedsBitcast)
Result = CGF.Builder.CreateBitCast(Result, OrigType);
ScalableDstTy, UndefVec, Load, Zero, "cast.scalable");
if (ScalableDstTy != Ty)
Result = CGF.Builder.CreateBitCast(Result, Ty);
return Result;
}
}
Expand Down Expand Up @@ -3199,13 +3197,14 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
llvm::Value *Coerced = Fn->getArg(FirstIRArg);
if (auto *VecTyFrom =
dyn_cast<llvm::ScalableVectorType>(Coerced->getType())) {
// If we are casting a scalable 16 x i1 predicate vector to a fixed i8
// If we are casting a scalable i1 predicate vector to a fixed i8
// vector, bitcast the source and use a vector extract.
auto PredType =
llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
if (VecTyFrom == PredType &&
if (VecTyFrom->getElementType()->isIntegerTy(1) &&
VecTyFrom->getElementCount().isKnownMultipleOf(8) &&
VecTyTo->getElementType() == Builder.getInt8Ty()) {
VecTyFrom = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2);
VecTyFrom = llvm::ScalableVectorType::get(
VecTyTo->getElementType(),
VecTyFrom->getElementCount().getKnownMinValue() / 8);
Coerced = Builder.CreateBitCast(Coerced, VecTyFrom);
}
if (VecTyFrom->getElementType() == VecTyTo->getElementType()) {
Expand Down Expand Up @@ -5877,12 +5876,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// If coercing a fixed vector from a scalable vector for ABI
// compatibility, and the types match, use the llvm.vector.extract
// intrinsic to perform the conversion.
if (auto *FixedDst = dyn_cast<llvm::FixedVectorType>(RetIRTy)) {
if (auto *FixedDstTy = dyn_cast<llvm::FixedVectorType>(RetIRTy)) {
llvm::Value *V = CI;
if (auto *ScalableSrc = dyn_cast<llvm::ScalableVectorType>(V->getType())) {
if (FixedDst->getElementType() == ScalableSrc->getElementType()) {
if (auto *ScalableSrcTy =
dyn_cast<llvm::ScalableVectorType>(V->getType())) {
if (FixedDstTy->getElementType() == ScalableSrcTy->getElementType()) {
llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty);
V = Builder.CreateExtractVector(FixedDst, V, Zero, "cast.fixed");
V = Builder.CreateExtractVector(FixedDstTy, V, Zero, "cast.fixed");
return RValue::get(V);
}
}
Expand Down
51 changes: 25 additions & 26 deletions clang/lib/CodeGen/CGExprScalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2137,26 +2137,24 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
// If Src is a fixed vector and Dst is a scalable vector, and both have the
// same element type, use the llvm.vector.insert intrinsic to perform the
// bitcast.
if (const auto *FixedSrc = dyn_cast<llvm::FixedVectorType>(SrcTy)) {
if (const auto *ScalableDst = dyn_cast<llvm::ScalableVectorType>(DstTy)) {
// If we are casting a fixed i8 vector to a scalable 16 x i1 predicate
if (auto *FixedSrcTy = dyn_cast<llvm::FixedVectorType>(SrcTy)) {
if (auto *ScalableDstTy = dyn_cast<llvm::ScalableVectorType>(DstTy)) {
// If we are casting a fixed i8 vector to a scalable i1 predicate
// vector, use a vector insert and bitcast the result.
bool NeedsBitCast = false;
auto PredType = llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
llvm::Type *OrigType = DstTy;
if (ScalableDst == PredType &&
FixedSrc->getElementType() == Builder.getInt8Ty()) {
DstTy = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2);
ScalableDst = cast<llvm::ScalableVectorType>(DstTy);
NeedsBitCast = true;
if (ScalableDstTy->getElementType()->isIntegerTy(1) &&
ScalableDstTy->getElementCount().isKnownMultipleOf(8) &&
FixedSrcTy->getElementType()->isIntegerTy(8)) {
ScalableDstTy = llvm::ScalableVectorType::get(
FixedSrcTy->getElementType(),
ScalableDstTy->getElementCount().getKnownMinValue() / 8);
}
if (FixedSrc->getElementType() == ScalableDst->getElementType()) {
llvm::Value *UndefVec = llvm::UndefValue::get(DstTy);
if (FixedSrcTy->getElementType() == ScalableDstTy->getElementType()) {
llvm::Value *UndefVec = llvm::UndefValue::get(ScalableDstTy);
llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
llvm::Value *Result = Builder.CreateInsertVector(
DstTy, UndefVec, Src, Zero, "cast.scalable");
if (NeedsBitCast)
Result = Builder.CreateBitCast(Result, OrigType);
ScalableDstTy, UndefVec, Src, Zero, "cast.scalable");
if (Result->getType() != DstTy)
Result = Builder.CreateBitCast(Result, DstTy);
return Result;
}
}
Expand All @@ -2165,18 +2163,19 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
// If Src is a scalable vector and Dst is a fixed vector, and both have the
// same element type, use the llvm.vector.extract intrinsic to perform the
// bitcast.
if (const auto *ScalableSrc = dyn_cast<llvm::ScalableVectorType>(SrcTy)) {
if (const auto *FixedDst = dyn_cast<llvm::FixedVectorType>(DstTy)) {
// If we are casting a scalable 16 x i1 predicate vector to a fixed i8
if (auto *ScalableSrcTy = dyn_cast<llvm::ScalableVectorType>(SrcTy)) {
if (auto *FixedDstTy = dyn_cast<llvm::FixedVectorType>(DstTy)) {
// If we are casting a scalable i1 predicate vector to a fixed i8
// vector, bitcast the source and use a vector extract.
auto PredType = llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
if (ScalableSrc == PredType &&
FixedDst->getElementType() == Builder.getInt8Ty()) {
SrcTy = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2);
ScalableSrc = cast<llvm::ScalableVectorType>(SrcTy);
Src = Builder.CreateBitCast(Src, SrcTy);
if (ScalableSrcTy->getElementType()->isIntegerTy(1) &&
ScalableSrcTy->getElementCount().isKnownMultipleOf(8) &&
FixedDstTy->getElementType()->isIntegerTy(8)) {
ScalableSrcTy = llvm::ScalableVectorType::get(
FixedDstTy->getElementType(),
ScalableSrcTy->getElementCount().getKnownMinValue() / 8);
Src = Builder.CreateBitCast(Src, ScalableSrcTy);
}
if (ScalableSrc->getElementType() == FixedDst->getElementType()) {
if (ScalableSrcTy->getElementType() == FixedDstTy->getElementType()) {
llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
return Builder.CreateExtractVector(DstTy, Src, Zero, "cast.fixed");
}
Expand Down
36 changes: 15 additions & 21 deletions clang/test/CodeGen/attr-riscv-rvv-vector-bits-bitcast.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,29 +177,26 @@ void write_float64m1(struct struct_float64m1 *s, vfloat64m1_t x) {

// CHECK-64-LABEL: @read_bool1(
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8
// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[Y]], align 8, !tbaa [[TBAA4]]
// CHECK-64-NEXT: store <8 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]]
// CHECK-64-NEXT: [[TMP1:%.*]] = load <vscale x 64 x i1>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]]
// CHECK-64-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v8i8(<vscale x 8 x i8> undef, <8 x i8> [[TMP0]], i64 0)
// CHECK-64-NEXT: [[TMP1:%.*]] = bitcast <vscale x 8 x i8> [[CAST_SCALABLE]] to <vscale x 64 x i1>
// CHECK-64-NEXT: ret <vscale x 64 x i1> [[TMP1]]
//
// CHECK-128-LABEL: @read_bool1(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <16 x i8>, align 16
// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 16
// CHECK-128-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Y]], align 8, !tbaa [[TBAA4]]
// CHECK-128-NEXT: store <16 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 16, !tbaa [[TBAA4]]
// CHECK-128-NEXT: [[TMP1:%.*]] = load <vscale x 64 x i1>, ptr [[SAVED_VALUE]], align 16, !tbaa [[TBAA4]]
// CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> [[TMP0]], i64 0)
// CHECK-128-NEXT: [[TMP1:%.*]] = bitcast <vscale x 8 x i8> [[CAST_SCALABLE]] to <vscale x 64 x i1>
// CHECK-128-NEXT: ret <vscale x 64 x i1> [[TMP1]]
//
// CHECK-256-LABEL: @read_bool1(
// CHECK-256-NEXT: entry:
// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca <32 x i8>, align 32
// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 32
// CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[Y]], align 8, !tbaa [[TBAA4]]
// CHECK-256-NEXT: store <32 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 32, !tbaa [[TBAA4]]
// CHECK-256-NEXT: [[TMP1:%.*]] = load <vscale x 64 x i1>, ptr [[SAVED_VALUE]], align 32, !tbaa [[TBAA4]]
// CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[TMP0]], i64 0)
// CHECK-256-NEXT: [[TMP1:%.*]] = bitcast <vscale x 8 x i8> [[CAST_SCALABLE]] to <vscale x 64 x i1>
// CHECK-256-NEXT: ret <vscale x 64 x i1> [[TMP1]]
//
vbool1_t read_bool1(struct struct_bool1 *s) {
Expand All @@ -208,29 +205,26 @@ vbool1_t read_bool1(struct struct_bool1 *s) {

// CHECK-64-LABEL: @write_bool1(
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 64 x i1>, align 8
// CHECK-64-NEXT: store <vscale x 64 x i1> [[X:%.*]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA7:![0-9]+]]
// CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]]
// CHECK-64-NEXT: [[TMP0:%.*]] = bitcast <vscale x 64 x i1> [[X:%.*]] to <vscale x 8 x i8>
// CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv8i8(<vscale x 8 x i8> [[TMP0]], i64 0)
// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 8
// CHECK-64-NEXT: store <8 x i8> [[TMP0]], ptr [[Y]], align 8, !tbaa [[TBAA4]]
// CHECK-64-NEXT: store <8 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA4]]
// CHECK-64-NEXT: ret void
//
// CHECK-128-LABEL: @write_bool1(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 64 x i1>, align 16
// CHECK-128-NEXT: store <vscale x 64 x i1> [[X:%.*]], ptr [[SAVED_VALUE]], align 16, !tbaa [[TBAA7:![0-9]+]]
// CHECK-128-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[SAVED_VALUE]], align 16, !tbaa [[TBAA4]]
// CHECK-128-NEXT: [[TMP0:%.*]] = bitcast <vscale x 64 x i1> [[X:%.*]] to <vscale x 8 x i8>
// CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> [[TMP0]], i64 0)
// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 16
// CHECK-128-NEXT: store <16 x i8> [[TMP0]], ptr [[Y]], align 8, !tbaa [[TBAA4]]
// CHECK-128-NEXT: store <16 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA4]]
// CHECK-128-NEXT: ret void
//
// CHECK-256-LABEL: @write_bool1(
// CHECK-256-NEXT: entry:
// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 64 x i1>, align 8
// CHECK-256-NEXT: store <vscale x 64 x i1> [[X:%.*]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA7:![0-9]+]]
// CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]]
// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast <vscale x 64 x i1> [[X:%.*]] to <vscale x 8 x i8>
// CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[TMP0]], i64 0)
// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 32
// CHECK-256-NEXT: store <32 x i8> [[TMP0]], ptr [[Y]], align 8, !tbaa [[TBAA4]]
// CHECK-256-NEXT: store <32 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA4]]
// CHECK-256-NEXT: ret void
//
void write_bool1(struct struct_bool1 *s, vbool1_t x) {
Expand Down
26 changes: 4 additions & 22 deletions clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,7 @@ fixed_float64m1_t call_float64_ff(fixed_float64m1_t op1, fixed_float64m1_t op2)

// CHECK-LABEL: @call_bool1_ff(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 64 x i1>, align 8
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 64 x i1>, align 8
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 64 x i1> @llvm.riscv.vmand.nxv64i1.i64(<vscale x 64 x i1> [[OP1_COERCE:%.*]], <vscale x 64 x i1> [[OP2_COERCE:%.*]], i64 256)
// CHECK-NEXT: store <vscale x 64 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 8, !tbaa [[TBAA4:![0-9]+]]
// CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE4]], align 8, !tbaa [[TBAA8:![0-9]+]]
// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 64 x i1>, ptr [[RETVAL_COERCE]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 64 x i1> @llvm.riscv.vmand.nxv64i1.i64(<vscale x 64 x i1> [[TMP0:%.*]], <vscale x 64 x i1> [[TMP1:%.*]], i64 256)
// CHECK-NEXT: ret <vscale x 64 x i1> [[TMP2]]
//
fixed_bool1_t call_bool1_ff(fixed_bool1_t op1, fixed_bool1_t op2) {
Expand Down Expand Up @@ -116,14 +110,8 @@ fixed_float64m1_t call_float64_fs(fixed_float64m1_t op1, vfloat64m1_t op2) {

// CHECK-LABEL: @call_bool1_fs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 64 x i1>, align 8
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 64 x i1>, align 8
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 64 x i1> @llvm.riscv.vmand.nxv64i1.i64(<vscale x 64 x i1> [[OP1_COERCE:%.*]], <vscale x 64 x i1> [[OP2:%.*]], i64 256)
// CHECK-NEXT: store <vscale x 64 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 8, !tbaa [[TBAA4]]
// CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE2]], align 8, !tbaa [[TBAA8]]
// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 64 x i1>, ptr [[RETVAL_COERCE]], align 8
// CHECK-NEXT: ret <vscale x 64 x i1> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 64 x i1> @llvm.riscv.vmand.nxv64i1.i64(<vscale x 64 x i1> [[TMP0:%.*]], <vscale x 64 x i1> [[OP2:%.*]], i64 256)
// CHECK-NEXT: ret <vscale x 64 x i1> [[TMP1]]
//
fixed_bool1_t call_bool1_fs(fixed_bool1_t op1, vbool1_t op2) {
return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen);
Expand Down Expand Up @@ -162,14 +150,8 @@ fixed_float64m1_t call_float64_ss(vfloat64m1_t op1, vfloat64m1_t op2) {

// CHECK-LABEL: @call_bool1_ss(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 64 x i1>, align 8
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 64 x i1>, align 8
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 64 x i1> @llvm.riscv.vmand.nxv64i1.i64(<vscale x 64 x i1> [[OP1:%.*]], <vscale x 64 x i1> [[OP2:%.*]], i64 256)
// CHECK-NEXT: store <vscale x 64 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]]
// CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA8]]
// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 64 x i1>, ptr [[RETVAL_COERCE]], align 8
// CHECK-NEXT: ret <vscale x 64 x i1> [[TMP2]]
// CHECK-NEXT: ret <vscale x 64 x i1> [[TMP0]]
//
fixed_bool1_t call_bool1_ss(vbool1_t op1, vbool1_t op2) {
return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen);
Expand Down
14 changes: 4 additions & 10 deletions clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,21 +65,15 @@ fixed_float64m1_t from_vfloat64m1_t(vfloat64m1_t type) {

// CHECK-LABEL: @from_vbool1_t(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 64 x i1>, align 8
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 64 x i1>, align 8
// CHECK-NEXT: store <vscale x 64 x i1> [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4:![0-9]+]]
// CHECK-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA8:![0-9]+]]
// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[RETVAL_COERCE]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load <vscale x 64 x i1>, ptr [[RETVAL_COERCE]], align 8
// CHECK-NEXT: ret <vscale x 64 x i1> [[TMP1]]
// CHECK-NEXT: ret <vscale x 64 x i1> [[TYPE:%.*]]
//
fixed_bool1_t from_vbool1_t(vbool1_t type) {
return type;
}

// CHECK-LABEL: @to_vbool1_t(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <vscale x 64 x i1> [[TYPE_COERCE:%.*]]
// CHECK-NEXT: ret <vscale x 64 x i1> [[TMP0:%.*]]
//
vbool1_t to_vbool1_t(fixed_bool1_t type) {
return type;
Expand All @@ -105,8 +99,8 @@ vbool4_t to_vbool4_t(fixed_bool4_t type) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 2 x i1>, align 1
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
// CHECK-NEXT: store <vscale x 2 x i1> [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA9:![0-9]+]]
// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA8]]
// CHECK-NEXT: store <vscale x 2 x i1> [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA4:![0-9]+]]
// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA8:![0-9]+]]
// CHECK-NEXT: store <1 x i8> [[TMP0]], ptr [[RETVAL_COERCE]], align 1
// CHECK-NEXT: [[TMP1:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
// CHECK-NEXT: ret <vscale x 2 x i1> [[TMP1]]
Expand Down

0 comments on commit 9be7b0a

Please sign in to comment.