diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index d07fb1ed3f344..68d77f76666f6 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5279,30 +5279,50 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, dyn_cast(ArgInfo.getCoerceToType()); if (STy && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) { llvm::Type *SrcTy = Src.getElementType(); - uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy); - uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(STy); - - // If the source type is smaller than the destination type of the - // coerce-to logic, copy the source value into a temp alloca the size - // of the destination type to allow loading all of it. The bits past - // the source value are left undef. - if (SrcSize < DstSize) { - Address TempAlloca - = CreateTempAlloca(STy, Src.getAlignment(), - Src.getName() + ".coerce"); - Builder.CreateMemCpy(TempAlloca, Src, SrcSize); - Src = TempAlloca; + llvm::TypeSize SrcTypeSize = + CGM.getDataLayout().getTypeAllocSize(SrcTy); + llvm::TypeSize DstTypeSize = CGM.getDataLayout().getTypeAllocSize(STy); + if (SrcTypeSize.isScalable()) { + assert(STy->containsHomogeneousScalableVectorTypes() && + "ABI only supports structure with homogeneous scalable vector " + "type"); + assert(SrcTypeSize == DstTypeSize && + "Only allow non-fractional movement of structure with " + "homogeneous scalable vector type"); + assert(NumIRArgs == STy->getNumElements()); + + llvm::Value *StoredStructValue = + Builder.CreateLoad(Src, Src.getName() + ".tuple"); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + llvm::Value *Extract = Builder.CreateExtractValue( + StoredStructValue, i, Src.getName() + ".extract" + Twine(i)); + IRCallArgs[FirstIRArg + i] = Extract; + } } else { - Src = Src.withElementType(STy); - } + uint64_t SrcSize = SrcTypeSize.getFixedValue(); + uint64_t DstSize = DstTypeSize.getFixedValue(); + + // If the source type is smaller than the destination type of the + // coerce-to logic, copy the source value into a temp alloca the size + // of the destination type to allow loading all of it. The bits past + // the source value are left undef. + if (SrcSize < DstSize) { + Address TempAlloca = CreateTempAlloca(STy, Src.getAlignment(), + Src.getName() + ".coerce"); + Builder.CreateMemCpy(TempAlloca, Src, SrcSize); + Src = TempAlloca; + } else { + Src = Src.withElementType(STy); + } - assert(NumIRArgs == STy->getNumElements()); - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Address EltPtr = Builder.CreateStructGEP(Src, i); - llvm::Value *LI = Builder.CreateLoad(EltPtr); - if (ArgHasMaybeUndefAttr) - LI = Builder.CreateFreeze(LI); - IRCallArgs[FirstIRArg + i] = LI; + assert(NumIRArgs == STy->getNumElements()); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + Address EltPtr = Builder.CreateStructGEP(Src, i); + llvm::Value *LI = Builder.CreateLoad(EltPtr); + if (ArgHasMaybeUndefAttr) + LI = Builder.CreateFreeze(LI); + IRCallArgs[FirstIRArg + i] = LI; + } } } else { // In the simple case, just pass the coerced loaded value. diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c index f4235795a8622..f8d755992eeac 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c @@ -90,3 +90,36 @@ void baz(__rvv_int32m1x2_t v_tuple) { __rvv_int32m1x2_t qux(__rvv_int32m1x2_t v_tuple) { return v_tuple; } + +// O0-LABEL: define dso_local { , } @quux +// O0-SAME: ( [[V_TUPLE_COERCE0:%.*]], [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] { +// O0-NEXT: entry: +// O0-NEXT: [[V_TUPLE:%.*]] = alloca { , }, align 4 +// O0-NEXT: [[V_TUPLE_ADDR:%.*]] = alloca { , }, align 4 +// O0-NEXT: [[COERCE:%.*]] = alloca { , }, align 4 +// O0-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0]], 0 +// O0-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1]], 1 +// O0-NEXT: store { , } [[TMP1]], ptr [[V_TUPLE]], align 4 +// O0-NEXT: [[V_TUPLE1:%.*]] = load { , }, ptr [[V_TUPLE]], align 4 +// O0-NEXT: store { , } [[V_TUPLE1]], ptr [[V_TUPLE_ADDR]], align 4 +// O0-NEXT: [[TMP2:%.*]] = load { , }, ptr [[V_TUPLE_ADDR]], align 4 +// O0-NEXT: store { , } [[TMP2]], ptr [[COERCE]], align 4 +// O0-NEXT: [[COERCE_TUPLE:%.*]] = load { , }, ptr [[COERCE]], align 4 +// O0-NEXT: [[COERCE_EXTRACT0:%.*]] = extractvalue { , } [[COERCE_TUPLE]], 0 +// O0-NEXT: [[COERCE_EXTRACT1:%.*]] = extractvalue { , } [[COERCE_TUPLE]], 1 +// O0-NEXT: [[CALL:%.*]] = call { , } @qux( [[COERCE_EXTRACT0]], [[COERCE_EXTRACT1]]) +// O0-NEXT: ret { , } [[CALL]] +// +// AFTER_MEM2REG-LABEL: define dso_local { , } @quux +// AFTER_MEM2REG-SAME: ( [[V_TUPLE_COERCE0:%.*]], [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] { +// AFTER_MEM2REG-NEXT: entry: +// AFTER_MEM2REG-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0]], 0 +// AFTER_MEM2REG-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1]], 1 +// AFTER_MEM2REG-NEXT: [[COERCE_EXTRACT0:%.*]] = extractvalue { , } [[TMP1]], 0 +// AFTER_MEM2REG-NEXT: [[COERCE_EXTRACT1:%.*]] = extractvalue { , } [[TMP1]], 1 +// AFTER_MEM2REG-NEXT: [[CALL:%.*]] = call { , } @qux( [[COERCE_EXTRACT0]], [[COERCE_EXTRACT1]]) +// AFTER_MEM2REG-NEXT: ret { , } [[CALL]] +// +__rvv_int32m1x2_t quux(__rvv_int32m1x2_t v_tuple) { + return qux(v_tuple); +} diff --git a/llvm/test/Transforms/SROA/scalable-vector-struct.ll b/llvm/test/Transforms/SROA/scalable-vector-struct.ll index 92cd44d2b5ac3..1af4fbbd9254b 100644 --- a/llvm/test/Transforms/SROA/scalable-vector-struct.ll +++ b/llvm/test/Transforms/SROA/scalable-vector-struct.ll @@ -20,3 +20,34 @@ define %struct.test @alloca( %x, %y) { %val = load %struct.test, %struct.test* %addr, align 4 ret %struct.test %val } + + +define { , } @return_tuple( %v_tuple.coerce0, %v_tuple.coerce1) { +; CHECK-LABEL: @return_tuple( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0:%.*]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1:%.*]], 1 +; CHECK-NEXT: [[COERCE_EXTRACT0:%.*]] = extractvalue { , } [[TMP1]], 0 +; CHECK-NEXT: [[COERCE_EXTRACT1:%.*]] = extractvalue { , } [[TMP1]], 1 +; CHECK-NEXT: [[CALL:%.*]] = call { , } @foo( [[COERCE_EXTRACT0]], [[COERCE_EXTRACT1]]) +; CHECK-NEXT: ret { , } [[CALL]] +; +entry: + %v_tuple = alloca { , }, align 4 + %v_tuple.addr = alloca { , }, align 4 + %coerce = alloca { , }, align 4 + %0 = insertvalue { , } poison, %v_tuple.coerce0, 0 + %1 = insertvalue { , } %0, %v_tuple.coerce1, 1 + store { , } %1, ptr %v_tuple, align 4 + %v_tuple1 = load { , }, ptr %v_tuple, align 4 + store { , } %v_tuple1, ptr %v_tuple.addr, align 4 + %2 = load { , }, ptr %v_tuple.addr, align 4 + store { , } %2, ptr %coerce, align 4 + %coerce.tuple = load { , }, ptr %coerce, align 4 + %coerce.extract0 = extractvalue { , } %coerce.tuple, 0 + %coerce.extract1 = extractvalue { , } %coerce.tuple, 1 + %call = call { , } @foo( %coerce.extract0, %coerce.extract1) + ret { , } %call +} + +declare { , } @foo(, )