-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[clang][WebAssembly] Return aggregate values indirectly in swiftcc by default #168652
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
… default The Swift calling convention on Wasm has historically returned aggregate values directly at the LLVM IR level due to the use of the generic `SwiftABIInfo` implementation. The direct return at LLVM IR level will cause unnecessary stack allocation and memory copies for each aggregate return value at ISel/MI time, which misses lots of optimization opportunities. This change provides a SwiftABIInfo hook implementation for Wasm targets to limit the number of scalar return values up to 1 by default, which will use sret for aggregate return values. Also adds basic support for the experimental multivalue ABI.
🐧 Linux x64 Test Results
|
|
@llvm/pr-subscribers-backend-webassembly Author: Yuta Saito (kateinoigakukun) ChangesThe Swift calling convention on Wasm has historically returned aggregate values directly at the LLVM IR level due to the use of the generic This change provides a SwiftABIInfo hook implementation for Wasm targets to limit the number of scalar return values up to 1 by default, which will use sret for aggregate return values. Also adds basic support for the experimental multivalue ABI. Here is an example of such inefficient codegen: target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-i128:128-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-wasip1"
define hidden swiftcc { i32, i32, i32 } @<!-- -->thunk() #<!-- -->0 {
entry:
%call = tail call swiftcc { i32, i32, i32 } @<!-- -->f(ptr noundef nonnull @<!-- -->thunk) #<!-- -->2
ret { i32, i32, i32 } %call
}
declare swiftcc { i32, i32, i32 } @<!-- -->f(ptr noundef)(func $thunk (type 0) (param i32 i32 i32)
(local i32 i32 i32)
global.get 0
i32.const 16
i32.sub
local.tee 3
global.set 0
local.get 3
i32.const 1
local.get 3
local.get 3
call 0
local.get 3
i32.load offset=4
local.set 4
local.get 3
i32.load offset=8
local.set 5
local.get 0
local.get 3
i32.load
i32.store
local.get 0
local.get 5
i32.store offset=8
local.get 0
local.get 4
i32.store offset=4
local.get 3
i32.const 16
i32.add
global.set 0)
(elem (;0;) (i32.const 1) func $thunk)This explicit indirection at LLVM IR generation stage is important for Swift because we have a lot of call-forwarding thunk functions (like protocol witness method and merged function thunks). After this patch, the IR and the lowered code look like below: target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-i128:128-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-wasip1"
%struct.foo = type { i32, i32, i32 }
define hidden swiftcc void @<!-- -->thunk(ptr dead_on_unwind noalias writable sret(%struct.foo) align 4 %agg.result) #<!-- -->0 {
entry:
call swiftcc void @<!-- -->f(ptr dead_on_unwind writable sret(%struct.foo) align 4 %agg.result, ptr noundef @<!-- -->thunk)
ret void
}
declare swiftcc void @<!-- -->f(ptr dead_on_unwind writable sret(%struct.foo) align 4, ptr noundef) #<!-- -->1 (func $thunk (type 0) (param i32 i32 i32)
local.get 0
i32.const 1
local.get 0
local.get 0
call 0)
(elem (;0;) (i32.const 1) func $thunk)Full diff: https://github.com/llvm/llvm-project/pull/168652.diff 4 Files Affected:
diff --git a/clang/lib/CodeGen/ABIInfo.cpp b/clang/lib/CodeGen/ABIInfo.cpp
index acd678193b5a8..974b9267c002b 100644
--- a/clang/lib/CodeGen/ABIInfo.cpp
+++ b/clang/lib/CodeGen/ABIInfo.cpp
@@ -254,6 +254,22 @@ void ABIInfo::createCoercedStore(llvm::Value *Val, Address DstAddr,
// Pin the vtable to this file.
SwiftABIInfo::~SwiftABIInfo() = default;
+void SwiftABIInfo::countOccupiedRegisters(ArrayRef<llvm::Type *> scalarTypes,
+ unsigned &intCount, unsigned &fpCount,
+ unsigned maxIntRegisterBitWidth) {
+ for (llvm::Type *type : scalarTypes) {
+ if (type->isPointerTy()) {
+ intCount++;
+ } else if (auto *intTy = dyn_cast<llvm::IntegerType>(type)) {
+ intCount += (intTy->getBitWidth() + maxIntRegisterBitWidth - 1) /
+ maxIntRegisterBitWidth;
+ } else {
+ assert(type->isVectorTy() || type->isFloatingPointTy());
+ fpCount++;
+ }
+ }
+}
+
/// Does the given lowering require more than the given number of
/// registers when expanded?
///
@@ -269,18 +285,10 @@ SwiftABIInfo::~SwiftABIInfo() = default;
/// return registers.
bool SwiftABIInfo::occupiesMoreThan(ArrayRef<llvm::Type *> scalarTypes,
unsigned maxAllRegisters) const {
+ // Use the pointer width as the maximum integer register bit width by default.
+ unsigned ptrWidth = CGT.getTarget().getPointerWidth(LangAS::Default);
unsigned intCount = 0, fpCount = 0;
- for (llvm::Type *type : scalarTypes) {
- if (type->isPointerTy()) {
- intCount++;
- } else if (auto intTy = dyn_cast<llvm::IntegerType>(type)) {
- auto ptrWidth = CGT.getTarget().getPointerWidth(LangAS::Default);
- intCount += (intTy->getBitWidth() + ptrWidth - 1) / ptrWidth;
- } else {
- assert(type->isVectorTy() || type->isFloatingPointTy());
- fpCount++;
- }
- }
+ countOccupiedRegisters(scalarTypes, intCount, fpCount, ptrWidth);
return (intCount + fpCount > maxAllRegisters);
}
diff --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h
index 130fcd375e90a..7d4f4ddc963e8 100644
--- a/clang/lib/CodeGen/ABIInfo.h
+++ b/clang/lib/CodeGen/ABIInfo.h
@@ -147,6 +147,9 @@ class SwiftABIInfo {
CodeGenTypes &CGT;
bool SwiftErrorInRegister;
+ static void countOccupiedRegisters(ArrayRef<llvm::Type *> scalarTypes,
+ unsigned &intCount, unsigned &fpCount,
+ unsigned maxIntRegisterBitWidth);
bool occupiesMoreThan(ArrayRef<llvm::Type *> scalarTypes,
unsigned maxAllRegisters) const;
diff --git a/clang/lib/CodeGen/Targets/WebAssembly.cpp b/clang/lib/CodeGen/Targets/WebAssembly.cpp
index ebe996a4edd8d..756ba4fdfc28e 100644
--- a/clang/lib/CodeGen/Targets/WebAssembly.cpp
+++ b/clang/lib/CodeGen/Targets/WebAssembly.cpp
@@ -45,13 +45,44 @@ class WebAssemblyABIInfo final : public ABIInfo {
AggValueSlot Slot) const override;
};
+class WebAssemblySwiftABIInfo final : public SwiftABIInfo {
+ WebAssemblyABIKind Kind;
+
+public:
+ explicit WebAssemblySwiftABIInfo(CodeGen::CodeGenTypes &CGT,
+ WebAssemblyABIKind K)
+ : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/false), Kind(K) {}
+
+ bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys,
+ bool AsReturnValue) const override {
+ unsigned maxIntRegisterBitWidth = 64;
+ unsigned intCount = 0, fpCount = 0;
+ countOccupiedRegisters(ComponentTys, intCount, fpCount,
+ maxIntRegisterBitWidth);
+
+ if (AsReturnValue) {
+ if (Kind == WebAssemblyABIKind::ExperimentalMV) {
+ // If the experimental multivalue ABI is enabled, try to return up to 2
+ // values for each of int and fp, which is a very conservative value
+ // based on the number of available physical gp return registers used in
+ // the major engines to minimize stack spills at JIT time.
+ return intCount > 2 || fpCount > 2;
+ }
+ // By default, limit to 1 total register.
+ return (intCount + fpCount > 1);
+ }
+ // For an argument, limit to 4 total registers, which is the default limit
+ // used by the default SwiftABIInfo implementation.
+ return (intCount + fpCount > 4);
+ }
+};
+
class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo {
public:
explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
WebAssemblyABIKind K)
: TargetCodeGenInfo(std::make_unique<WebAssemblyABIInfo>(CGT, K)) {
- SwiftInfo =
- std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/false);
+ SwiftInfo = std::make_unique<WebAssemblySwiftABIInfo>(CGT, K);
}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
diff --git a/clang/test/CodeGen/WebAssembly/wasm-return-swiftcc.c b/clang/test/CodeGen/WebAssembly/wasm-return-swiftcc.c
new file mode 100644
index 0000000000000..d1fb9add8e7bf
--- /dev/null
+++ b/clang/test/CodeGen/WebAssembly/wasm-return-swiftcc.c
@@ -0,0 +1,82 @@
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple wasm64-unknown-unknown %s -emit-llvm -target-abi experimental-mv -o - | FileCheck %s -check-prefix=EXPERIMENTAL-MV
+
+typedef struct {
+ int aa;
+ int bb;
+} s1;
+
+// Multiple-element structs should be returned through sret.
+// CHECK: define swiftcc void @return_s1(ptr dead_on_unwind noalias writable sret(%struct.s1) align 4 %agg.result)
+// EXPERIMENTAL-MV: define swiftcc i64 @return_s1()
+__attribute__((swiftcall))
+s1 return_s1(void) {
+ s1 foo;
+ return foo;
+}
+
+typedef struct {
+ int cc;
+} s2;
+
+// Single-element structs should be returned directly.
+// CHECK: define swiftcc i32 @return_s2()
+// EXPERIMENTAL-MV: define swiftcc i32 @return_s2()
+__attribute__((swiftcall))
+s2 return_s2(void) {
+ s2 foo;
+ return foo;
+}
+
+typedef struct {
+ char c1[4];
+} s3;
+
+// CHECK: define swiftcc i32 @return_s3()
+// EXPERIMENTAL-MV: define swiftcc i32 @return_s3()
+__attribute__((swiftcall))
+s3 return_s3(void) {
+ s3 foo;
+ return foo;
+}
+
+typedef struct {
+ int bf1 : 4;
+ int bf2 : 3;
+ int bf3 : 8;
+} s4;
+
+// CHECK: define swiftcc i16 @return_s4()
+// EXPERIMENTAL-MV: define swiftcc i16 @return_s4()
+__attribute__((swiftcall))
+s4 return_s4(void) {
+ s4 foo;
+ return foo;
+}
+
+// Single-element structs fitting in a i64 should be returned directly.
+typedef struct {
+ long long v;
+} s5;
+
+// CHECK: define swiftcc i64 @return_s5()
+// EXPERIMENTAL-MV: define swiftcc i64 @return_s5()
+__attribute__((swiftcall))
+s5 return_s5(void) {
+ s5 foo;
+ return foo;
+}
+
+// Multiple-element structs not fitting in a i64
+typedef struct {
+ long long v1;
+ long long v2;
+} s6;
+
+// CHECK: define swiftcc void @return_s6(ptr dead_on_unwind noalias writable sret(%struct.s6) align 8 %agg.result)
+// EXPERIMENTAL-MV: define swiftcc { i64, i64 } @return_s6()
+__attribute__((swiftcall))
+s6 return_s6(void) {
+ s6 foo;
+ return foo;
+}
|
|
@llvm/pr-subscribers-clang Author: Yuta Saito (kateinoigakukun) ChangesThe Swift calling convention on Wasm has historically returned aggregate values directly at the LLVM IR level due to the use of the generic This change provides a SwiftABIInfo hook implementation for Wasm targets to limit the number of scalar return values up to 1 by default, which will use sret for aggregate return values. Also adds basic support for the experimental multivalue ABI. Here is an example of such inefficient codegen: target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-i128:128-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-wasip1"
define hidden swiftcc { i32, i32, i32 } @<!-- -->thunk() #<!-- -->0 {
entry:
%call = tail call swiftcc { i32, i32, i32 } @<!-- -->f(ptr noundef nonnull @<!-- -->thunk) #<!-- -->2
ret { i32, i32, i32 } %call
}
declare swiftcc { i32, i32, i32 } @<!-- -->f(ptr noundef)(func $thunk (type 0) (param i32 i32 i32)
(local i32 i32 i32)
global.get 0
i32.const 16
i32.sub
local.tee 3
global.set 0
local.get 3
i32.const 1
local.get 3
local.get 3
call 0
local.get 3
i32.load offset=4
local.set 4
local.get 3
i32.load offset=8
local.set 5
local.get 0
local.get 3
i32.load
i32.store
local.get 0
local.get 5
i32.store offset=8
local.get 0
local.get 4
i32.store offset=4
local.get 3
i32.const 16
i32.add
global.set 0)
(elem (;0;) (i32.const 1) func $thunk)This explicit indirection at LLVM IR generation stage is important for Swift because we have a lot of call-forwarding thunk functions (like protocol witness method and merged function thunks). After this patch, the IR and the lowered code look like below: target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-i128:128-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-wasip1"
%struct.foo = type { i32, i32, i32 }
define hidden swiftcc void @<!-- -->thunk(ptr dead_on_unwind noalias writable sret(%struct.foo) align 4 %agg.result) #<!-- -->0 {
entry:
call swiftcc void @<!-- -->f(ptr dead_on_unwind writable sret(%struct.foo) align 4 %agg.result, ptr noundef @<!-- -->thunk)
ret void
}
declare swiftcc void @<!-- -->f(ptr dead_on_unwind writable sret(%struct.foo) align 4, ptr noundef) #<!-- -->1 (func $thunk (type 0) (param i32 i32 i32)
local.get 0
i32.const 1
local.get 0
local.get 0
call 0)
(elem (;0;) (i32.const 1) func $thunk)Full diff: https://github.com/llvm/llvm-project/pull/168652.diff 4 Files Affected:
diff --git a/clang/lib/CodeGen/ABIInfo.cpp b/clang/lib/CodeGen/ABIInfo.cpp
index acd678193b5a8..974b9267c002b 100644
--- a/clang/lib/CodeGen/ABIInfo.cpp
+++ b/clang/lib/CodeGen/ABIInfo.cpp
@@ -254,6 +254,22 @@ void ABIInfo::createCoercedStore(llvm::Value *Val, Address DstAddr,
// Pin the vtable to this file.
SwiftABIInfo::~SwiftABIInfo() = default;
+void SwiftABIInfo::countOccupiedRegisters(ArrayRef<llvm::Type *> scalarTypes,
+ unsigned &intCount, unsigned &fpCount,
+ unsigned maxIntRegisterBitWidth) {
+ for (llvm::Type *type : scalarTypes) {
+ if (type->isPointerTy()) {
+ intCount++;
+ } else if (auto *intTy = dyn_cast<llvm::IntegerType>(type)) {
+ intCount += (intTy->getBitWidth() + maxIntRegisterBitWidth - 1) /
+ maxIntRegisterBitWidth;
+ } else {
+ assert(type->isVectorTy() || type->isFloatingPointTy());
+ fpCount++;
+ }
+ }
+}
+
/// Does the given lowering require more than the given number of
/// registers when expanded?
///
@@ -269,18 +285,10 @@ SwiftABIInfo::~SwiftABIInfo() = default;
/// return registers.
bool SwiftABIInfo::occupiesMoreThan(ArrayRef<llvm::Type *> scalarTypes,
unsigned maxAllRegisters) const {
+ // Use the pointer width as the maximum integer register bit width by default.
+ unsigned ptrWidth = CGT.getTarget().getPointerWidth(LangAS::Default);
unsigned intCount = 0, fpCount = 0;
- for (llvm::Type *type : scalarTypes) {
- if (type->isPointerTy()) {
- intCount++;
- } else if (auto intTy = dyn_cast<llvm::IntegerType>(type)) {
- auto ptrWidth = CGT.getTarget().getPointerWidth(LangAS::Default);
- intCount += (intTy->getBitWidth() + ptrWidth - 1) / ptrWidth;
- } else {
- assert(type->isVectorTy() || type->isFloatingPointTy());
- fpCount++;
- }
- }
+ countOccupiedRegisters(scalarTypes, intCount, fpCount, ptrWidth);
return (intCount + fpCount > maxAllRegisters);
}
diff --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h
index 130fcd375e90a..7d4f4ddc963e8 100644
--- a/clang/lib/CodeGen/ABIInfo.h
+++ b/clang/lib/CodeGen/ABIInfo.h
@@ -147,6 +147,9 @@ class SwiftABIInfo {
CodeGenTypes &CGT;
bool SwiftErrorInRegister;
+ static void countOccupiedRegisters(ArrayRef<llvm::Type *> scalarTypes,
+ unsigned &intCount, unsigned &fpCount,
+ unsigned maxIntRegisterBitWidth);
bool occupiesMoreThan(ArrayRef<llvm::Type *> scalarTypes,
unsigned maxAllRegisters) const;
diff --git a/clang/lib/CodeGen/Targets/WebAssembly.cpp b/clang/lib/CodeGen/Targets/WebAssembly.cpp
index ebe996a4edd8d..756ba4fdfc28e 100644
--- a/clang/lib/CodeGen/Targets/WebAssembly.cpp
+++ b/clang/lib/CodeGen/Targets/WebAssembly.cpp
@@ -45,13 +45,44 @@ class WebAssemblyABIInfo final : public ABIInfo {
AggValueSlot Slot) const override;
};
+class WebAssemblySwiftABIInfo final : public SwiftABIInfo {
+ WebAssemblyABIKind Kind;
+
+public:
+ explicit WebAssemblySwiftABIInfo(CodeGen::CodeGenTypes &CGT,
+ WebAssemblyABIKind K)
+ : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/false), Kind(K) {}
+
+ bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys,
+ bool AsReturnValue) const override {
+ unsigned maxIntRegisterBitWidth = 64;
+ unsigned intCount = 0, fpCount = 0;
+ countOccupiedRegisters(ComponentTys, intCount, fpCount,
+ maxIntRegisterBitWidth);
+
+ if (AsReturnValue) {
+ if (Kind == WebAssemblyABIKind::ExperimentalMV) {
+ // If the experimental multivalue ABI is enabled, try to return up to 2
+ // values for each of int and fp, which is a very conservative value
+ // based on the number of available physical gp return registers used in
+ // the major engines to minimize stack spills at JIT time.
+ return intCount > 2 || fpCount > 2;
+ }
+ // By default, limit to 1 total register.
+ return (intCount + fpCount > 1);
+ }
+ // For an argument, limit to 4 total registers, which is the default limit
+ // used by the default SwiftABIInfo implementation.
+ return (intCount + fpCount > 4);
+ }
+};
+
class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo {
public:
explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
WebAssemblyABIKind K)
: TargetCodeGenInfo(std::make_unique<WebAssemblyABIInfo>(CGT, K)) {
- SwiftInfo =
- std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/false);
+ SwiftInfo = std::make_unique<WebAssemblySwiftABIInfo>(CGT, K);
}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
diff --git a/clang/test/CodeGen/WebAssembly/wasm-return-swiftcc.c b/clang/test/CodeGen/WebAssembly/wasm-return-swiftcc.c
new file mode 100644
index 0000000000000..d1fb9add8e7bf
--- /dev/null
+++ b/clang/test/CodeGen/WebAssembly/wasm-return-swiftcc.c
@@ -0,0 +1,82 @@
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple wasm64-unknown-unknown %s -emit-llvm -target-abi experimental-mv -o - | FileCheck %s -check-prefix=EXPERIMENTAL-MV
+
+typedef struct {
+ int aa;
+ int bb;
+} s1;
+
+// Multiple-element structs should be returned through sret.
+// CHECK: define swiftcc void @return_s1(ptr dead_on_unwind noalias writable sret(%struct.s1) align 4 %agg.result)
+// EXPERIMENTAL-MV: define swiftcc i64 @return_s1()
+__attribute__((swiftcall))
+s1 return_s1(void) {
+ s1 foo;
+ return foo;
+}
+
+typedef struct {
+ int cc;
+} s2;
+
+// Single-element structs should be returned directly.
+// CHECK: define swiftcc i32 @return_s2()
+// EXPERIMENTAL-MV: define swiftcc i32 @return_s2()
+__attribute__((swiftcall))
+s2 return_s2(void) {
+ s2 foo;
+ return foo;
+}
+
+typedef struct {
+ char c1[4];
+} s3;
+
+// CHECK: define swiftcc i32 @return_s3()
+// EXPERIMENTAL-MV: define swiftcc i32 @return_s3()
+__attribute__((swiftcall))
+s3 return_s3(void) {
+ s3 foo;
+ return foo;
+}
+
+typedef struct {
+ int bf1 : 4;
+ int bf2 : 3;
+ int bf3 : 8;
+} s4;
+
+// CHECK: define swiftcc i16 @return_s4()
+// EXPERIMENTAL-MV: define swiftcc i16 @return_s4()
+__attribute__((swiftcall))
+s4 return_s4(void) {
+ s4 foo;
+ return foo;
+}
+
+// Single-element structs fitting in a i64 should be returned directly.
+typedef struct {
+ long long v;
+} s5;
+
+// CHECK: define swiftcc i64 @return_s5()
+// EXPERIMENTAL-MV: define swiftcc i64 @return_s5()
+__attribute__((swiftcall))
+s5 return_s5(void) {
+ s5 foo;
+ return foo;
+}
+
+// Multiple-element structs not fitting in a i64
+typedef struct {
+ long long v1;
+ long long v2;
+} s6;
+
+// CHECK: define swiftcc void @return_s6(ptr dead_on_unwind noalias writable sret(%struct.s6) align 8 %agg.result)
+// EXPERIMENTAL-MV: define swiftcc { i64, i64 } @return_s6()
+__attribute__((swiftcall))
+s6 return_s6(void) {
+ s6 foo;
+ return foo;
+}
|
|
@llvm/pr-subscribers-clang-codegen Author: Yuta Saito (kateinoigakukun) ChangesThe Swift calling convention on Wasm has historically returned aggregate values directly at the LLVM IR level due to the use of the generic This change provides a SwiftABIInfo hook implementation for Wasm targets to limit the number of scalar return values up to 1 by default, which will use sret for aggregate return values. Also adds basic support for the experimental multivalue ABI. Here is an example of such inefficient codegen: target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-i128:128-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-wasip1"
define hidden swiftcc { i32, i32, i32 } @<!-- -->thunk() #<!-- -->0 {
entry:
%call = tail call swiftcc { i32, i32, i32 } @<!-- -->f(ptr noundef nonnull @<!-- -->thunk) #<!-- -->2
ret { i32, i32, i32 } %call
}
declare swiftcc { i32, i32, i32 } @<!-- -->f(ptr noundef)(func $thunk (type 0) (param i32 i32 i32)
(local i32 i32 i32)
global.get 0
i32.const 16
i32.sub
local.tee 3
global.set 0
local.get 3
i32.const 1
local.get 3
local.get 3
call 0
local.get 3
i32.load offset=4
local.set 4
local.get 3
i32.load offset=8
local.set 5
local.get 0
local.get 3
i32.load
i32.store
local.get 0
local.get 5
i32.store offset=8
local.get 0
local.get 4
i32.store offset=4
local.get 3
i32.const 16
i32.add
global.set 0)
(elem (;0;) (i32.const 1) func $thunk)This explicit indirection at LLVM IR generation stage is important for Swift because we have a lot of call-forwarding thunk functions (like protocol witness method and merged function thunks). After this patch, the IR and the lowered code look like below: target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-i128:128-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-wasip1"
%struct.foo = type { i32, i32, i32 }
define hidden swiftcc void @<!-- -->thunk(ptr dead_on_unwind noalias writable sret(%struct.foo) align 4 %agg.result) #<!-- -->0 {
entry:
call swiftcc void @<!-- -->f(ptr dead_on_unwind writable sret(%struct.foo) align 4 %agg.result, ptr noundef @<!-- -->thunk)
ret void
}
declare swiftcc void @<!-- -->f(ptr dead_on_unwind writable sret(%struct.foo) align 4, ptr noundef) #<!-- -->1 (func $thunk (type 0) (param i32 i32 i32)
local.get 0
i32.const 1
local.get 0
local.get 0
call 0)
(elem (;0;) (i32.const 1) func $thunk)Full diff: https://github.com/llvm/llvm-project/pull/168652.diff 4 Files Affected:
diff --git a/clang/lib/CodeGen/ABIInfo.cpp b/clang/lib/CodeGen/ABIInfo.cpp
index acd678193b5a8..974b9267c002b 100644
--- a/clang/lib/CodeGen/ABIInfo.cpp
+++ b/clang/lib/CodeGen/ABIInfo.cpp
@@ -254,6 +254,22 @@ void ABIInfo::createCoercedStore(llvm::Value *Val, Address DstAddr,
// Pin the vtable to this file.
SwiftABIInfo::~SwiftABIInfo() = default;
+void SwiftABIInfo::countOccupiedRegisters(ArrayRef<llvm::Type *> scalarTypes,
+ unsigned &intCount, unsigned &fpCount,
+ unsigned maxIntRegisterBitWidth) {
+ for (llvm::Type *type : scalarTypes) {
+ if (type->isPointerTy()) {
+ intCount++;
+ } else if (auto *intTy = dyn_cast<llvm::IntegerType>(type)) {
+ intCount += (intTy->getBitWidth() + maxIntRegisterBitWidth - 1) /
+ maxIntRegisterBitWidth;
+ } else {
+ assert(type->isVectorTy() || type->isFloatingPointTy());
+ fpCount++;
+ }
+ }
+}
+
/// Does the given lowering require more than the given number of
/// registers when expanded?
///
@@ -269,18 +285,10 @@ SwiftABIInfo::~SwiftABIInfo() = default;
/// return registers.
bool SwiftABIInfo::occupiesMoreThan(ArrayRef<llvm::Type *> scalarTypes,
unsigned maxAllRegisters) const {
+ // Use the pointer width as the maximum integer register bit width by default.
+ unsigned ptrWidth = CGT.getTarget().getPointerWidth(LangAS::Default);
unsigned intCount = 0, fpCount = 0;
- for (llvm::Type *type : scalarTypes) {
- if (type->isPointerTy()) {
- intCount++;
- } else if (auto intTy = dyn_cast<llvm::IntegerType>(type)) {
- auto ptrWidth = CGT.getTarget().getPointerWidth(LangAS::Default);
- intCount += (intTy->getBitWidth() + ptrWidth - 1) / ptrWidth;
- } else {
- assert(type->isVectorTy() || type->isFloatingPointTy());
- fpCount++;
- }
- }
+ countOccupiedRegisters(scalarTypes, intCount, fpCount, ptrWidth);
return (intCount + fpCount > maxAllRegisters);
}
diff --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h
index 130fcd375e90a..7d4f4ddc963e8 100644
--- a/clang/lib/CodeGen/ABIInfo.h
+++ b/clang/lib/CodeGen/ABIInfo.h
@@ -147,6 +147,9 @@ class SwiftABIInfo {
CodeGenTypes &CGT;
bool SwiftErrorInRegister;
+ static void countOccupiedRegisters(ArrayRef<llvm::Type *> scalarTypes,
+ unsigned &intCount, unsigned &fpCount,
+ unsigned maxIntRegisterBitWidth);
bool occupiesMoreThan(ArrayRef<llvm::Type *> scalarTypes,
unsigned maxAllRegisters) const;
diff --git a/clang/lib/CodeGen/Targets/WebAssembly.cpp b/clang/lib/CodeGen/Targets/WebAssembly.cpp
index ebe996a4edd8d..756ba4fdfc28e 100644
--- a/clang/lib/CodeGen/Targets/WebAssembly.cpp
+++ b/clang/lib/CodeGen/Targets/WebAssembly.cpp
@@ -45,13 +45,44 @@ class WebAssemblyABIInfo final : public ABIInfo {
AggValueSlot Slot) const override;
};
+class WebAssemblySwiftABIInfo final : public SwiftABIInfo {
+ WebAssemblyABIKind Kind;
+
+public:
+ explicit WebAssemblySwiftABIInfo(CodeGen::CodeGenTypes &CGT,
+ WebAssemblyABIKind K)
+ : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/false), Kind(K) {}
+
+ bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys,
+ bool AsReturnValue) const override {
+ unsigned maxIntRegisterBitWidth = 64;
+ unsigned intCount = 0, fpCount = 0;
+ countOccupiedRegisters(ComponentTys, intCount, fpCount,
+ maxIntRegisterBitWidth);
+
+ if (AsReturnValue) {
+ if (Kind == WebAssemblyABIKind::ExperimentalMV) {
+ // If the experimental multivalue ABI is enabled, try to return up to 2
+ // values for each of int and fp, which is a very conservative value
+ // based on the number of available physical gp return registers used in
+ // the major engines to minimize stack spills at JIT time.
+ return intCount > 2 || fpCount > 2;
+ }
+ // By default, limit to 1 total register.
+ return (intCount + fpCount > 1);
+ }
+ // For an argument, limit to 4 total registers, which is the default limit
+ // used by the default SwiftABIInfo implementation.
+ return (intCount + fpCount > 4);
+ }
+};
+
class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo {
public:
explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
WebAssemblyABIKind K)
: TargetCodeGenInfo(std::make_unique<WebAssemblyABIInfo>(CGT, K)) {
- SwiftInfo =
- std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/false);
+ SwiftInfo = std::make_unique<WebAssemblySwiftABIInfo>(CGT, K);
}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
diff --git a/clang/test/CodeGen/WebAssembly/wasm-return-swiftcc.c b/clang/test/CodeGen/WebAssembly/wasm-return-swiftcc.c
new file mode 100644
index 0000000000000..d1fb9add8e7bf
--- /dev/null
+++ b/clang/test/CodeGen/WebAssembly/wasm-return-swiftcc.c
@@ -0,0 +1,82 @@
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple wasm64-unknown-unknown %s -emit-llvm -target-abi experimental-mv -o - | FileCheck %s -check-prefix=EXPERIMENTAL-MV
+
+typedef struct {
+ int aa;
+ int bb;
+} s1;
+
+// Multiple-element structs should be returned through sret.
+// CHECK: define swiftcc void @return_s1(ptr dead_on_unwind noalias writable sret(%struct.s1) align 4 %agg.result)
+// EXPERIMENTAL-MV: define swiftcc i64 @return_s1()
+__attribute__((swiftcall))
+s1 return_s1(void) {
+ s1 foo;
+ return foo;
+}
+
+typedef struct {
+ int cc;
+} s2;
+
+// Single-element structs should be returned directly.
+// CHECK: define swiftcc i32 @return_s2()
+// EXPERIMENTAL-MV: define swiftcc i32 @return_s2()
+__attribute__((swiftcall))
+s2 return_s2(void) {
+ s2 foo;
+ return foo;
+}
+
+typedef struct {
+ char c1[4];
+} s3;
+
+// CHECK: define swiftcc i32 @return_s3()
+// EXPERIMENTAL-MV: define swiftcc i32 @return_s3()
+__attribute__((swiftcall))
+s3 return_s3(void) {
+ s3 foo;
+ return foo;
+}
+
+typedef struct {
+ int bf1 : 4;
+ int bf2 : 3;
+ int bf3 : 8;
+} s4;
+
+// CHECK: define swiftcc i16 @return_s4()
+// EXPERIMENTAL-MV: define swiftcc i16 @return_s4()
+__attribute__((swiftcall))
+s4 return_s4(void) {
+ s4 foo;
+ return foo;
+}
+
+// Single-element structs fitting in a i64 should be returned directly.
+typedef struct {
+ long long v;
+} s5;
+
+// CHECK: define swiftcc i64 @return_s5()
+// EXPERIMENTAL-MV: define swiftcc i64 @return_s5()
+__attribute__((swiftcall))
+s5 return_s5(void) {
+ s5 foo;
+ return foo;
+}
+
+// Multiple-element structs not fitting in a i64
+typedef struct {
+ long long v1;
+ long long v2;
+} s6;
+
+// CHECK: define swiftcc void @return_s6(ptr dead_on_unwind noalias writable sret(%struct.s6) align 8 %agg.result)
+// EXPERIMENTAL-MV: define swiftcc { i64, i64 } @return_s6()
+__attribute__((swiftcall))
+s6 return_s6(void) {
+ s6 foo;
+ return foo;
+}
|
|
This seems reasonable. One other thing to think about: have you looked at clang's multi-value ABI ( |
The Swift calling convention on Wasm has historically returned aggregate values directly at the LLVM IR level due to the use of the generic
SwiftABIInfoimplementation. The direct return at LLVM IR level will cause unnecessary stack allocation and memory copies for each aggregate return value at ISel/MI time, which misses lots of optimization opportunities.This change provides a SwiftABIInfo hook implementation for Wasm targets to limit the number of scalar return values up to 1 by default, which will use sret for aggregate return values. Also adds basic support for the experimental multivalue ABI.
Here is an example of such inefficient codegen:
This explicit indirection at LLVM IR generation stage is important for Swift because we have a lot of call-forwarding thunk functions (like protocol witness method and merged function thunks).
After this patch, the IR and the lowered code look like below: