Skip to content

Commit 5c7b43a

Browse files
[clang][AArch32] Correctly align HA arguments when passed on the stack
Analogously to https://reviews.llvm.org/D98794 this patch uses the `alignstack` attribute to fix incorrect passing of homogeneous aggregate (HA) arguments on AArch32. The EABI/AAPCS was recently updated to clarify how VFP co-processor candidates are aligned: ARM-software/abi-aa@4488e34 Differential Revision: https://reviews.llvm.org/D100853
1 parent 822be4b commit 5c7b43a

File tree

5 files changed

+665
-5
lines changed

5 files changed

+665
-5
lines changed

clang/lib/CodeGen/TargetInfo.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6440,7 +6440,16 @@ ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty,
64406440
return ABIArgInfo::getDirect(Ty, 0, nullptr, false);
64416441
}
64426442
}
6443-
return ABIArgInfo::getDirect(nullptr, 0, nullptr, false);
6443+
unsigned Align = 0;
6444+
if (getABIKind() == ARMABIInfo::AAPCS ||
6445+
getABIKind() == ARMABIInfo::AAPCS_VFP) {
6446+
// For alignment adjusted HFAs, cap the argument alignment to 8, leave it
6447+
// default otherwise.
6448+
Align = getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
6449+
unsigned BaseAlign = getContext().getTypeAlignInChars(Base).getQuantity();
6450+
Align = (Align > BaseAlign && Align >= 8) ? 8 : 0;
6451+
}
6452+
return ABIArgInfo::getDirect(nullptr, 0, nullptr, false, Align);
64446453
}
64456454

64466455
ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
// RUN: %clang_cc1 -triple armv7-eabi -emit-llvm %s -o - | \
2+
// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
3+
// RUN: %clang_cc1 -triple armv7-eabi -target-abi aapcs -mfloat-abi hard -emit-llvm %s -o - | \
4+
// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-HARD
5+
// REQUIRES: arm-registered-target
6+
7+
// CHECK: %struct.S0 = type { [4 x float] }
8+
// CHECK: %struct.S1 = type { [2 x float] }
9+
// CHECK: %struct.S2 = type { [4 x float] }
10+
// CHECK: %struct.D0 = type { [2 x double] }
11+
// CHECK: %struct.D1 = type { [2 x double] }
12+
// CHECK: %struct.D2 = type { [4 x double] }
13+
14+
typedef struct {
15+
float v[4];
16+
} S0;
17+
18+
float f0(S0 s) {
19+
// CHECK-SOFT: define{{.*}} float @f0([4 x i32] %s.coerce)
20+
// CHECK-HARD: define{{.*}} arm_aapcs_vfpcc float @f0(%struct.S0 %s.coerce)
21+
return s.v[0];
22+
}
23+
24+
float f0call() {
25+
S0 s = {0.0f, };
26+
return f0(s);
27+
// CHECK-SOFT: call float @f0([4 x i32]
28+
// CHECK-HARD: call arm_aapcs_vfpcc float @f0(%struct.S0
29+
}
30+
31+
typedef struct {
32+
__attribute__((aligned(8))) float v[2];
33+
} S1;
34+
35+
float f1(S1 s) {
36+
// CHECK-SOFT: define{{.*}} float @f1([1 x i64]
37+
// CHECK-HARD: define{{.*}} arm_aapcs_vfpcc float @f1(%struct.S1 alignstack(8)
38+
return s.v[0];
39+
}
40+
41+
float f1call() {
42+
S1 s = {0.0f, };
43+
return f1(s);
44+
// CHECK-SOFT: call float @f1([1 x i64
45+
// CHECK-HARD: call arm_aapcs_vfpcc float @f1(%struct.S1 alignstack(8)
46+
}
47+
48+
typedef struct {
49+
__attribute__((aligned(16))) float v[4];
50+
} S2;
51+
52+
float f2(S2 s) {
53+
// CHECK-SOFT: define{{.*}} float @f2([2 x i64]
54+
// CHECK-HARD: define{{.*}} arm_aapcs_vfpcc float @f2(%struct.S2 alignstack(8)
55+
return s.v[0];
56+
}
57+
58+
float f2call() {
59+
S2 s = {0.0f, };
60+
return f2(s);
61+
// CHECK-SOFT: call float @f2([2 x i64]
62+
// CHECK-HARD: call arm_aapcs_vfpcc float @f2(%struct.S2 alignstack(8)
63+
}
64+
65+
typedef struct {
66+
double v[2];
67+
} D0;
68+
69+
double g0(D0 d) {
70+
// CHECK-SOFT: define{{.*}} double @g0([2 x i64]
71+
// CHECK-HARD: define{{.*}} arm_aapcs_vfpcc double @g0(%struct.D0 %d.coerce
72+
return d.v[0];
73+
}
74+
75+
double g0call() {
76+
D0 d = {0.0, };
77+
return g0(d);
78+
// CHECK-SOFT: call double @g0([2 x i64]
79+
// CHECK-HARD: call arm_aapcs_vfpcc double @g0(%struct.D0 %1
80+
}
81+
82+
typedef struct {
83+
__attribute__((aligned(16))) double v[2];
84+
} D1;
85+
86+
double g1(D1 d) {
87+
// CHECK-SOFT: define{{.*}} double @g1([2 x i64]
88+
// CHECK-HARD: define{{.*}} arm_aapcs_vfpcc double @g1(%struct.D1 alignstack(8)
89+
return d.v[0];
90+
}
91+
92+
double g1call() {
93+
D1 d = {0.0, };
94+
return g1(d);
95+
// CHECK-SOFT: call double @g1([2 x i64]
96+
// CHECK-HARD: call arm_aapcs_vfpcc double @g1(%struct.D1 alignstack(8)
97+
}
98+
99+
typedef struct {
100+
__attribute__((aligned(32))) double v[4];
101+
} D2;
102+
103+
double g2(D2 d) {
104+
// CHECK-SOFT: define{{.*}} double @g2([4 x i64]
105+
// CHECK-HARD: define{{.*}} arm_aapcs_vfpcc double @g2(%struct.D2 alignstack(8)
106+
return d.v[0];
107+
}
108+
109+
double g2call() {
110+
D2 d = {0.0, };
111+
return g2(d);
112+
// CHECK-SOFT: call double @g2([4 x i64]
113+
// CHECK-HARD: call arm_aapcs_vfpcc double @g2(%struct.D2 alignstack(8)
114+
}

llvm/lib/Target/ARM/ARMCallingConv.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -256,22 +256,26 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned ValNo, MVT ValVT,
256256
}
257257
PendingMembers.clear();
258258
return true;
259-
} else if (LocVT != MVT::i32)
259+
}
260+
261+
if (LocVT != MVT::i32)
260262
RegList = SRegList;
261263

262264
// Mark all regs as unavailable (AAPCS rule C.2.vfp for VFP, C.6 for core)
263265
for (auto Reg : RegList)
264266
State.AllocateReg(Reg);
265267

268+
// Clamp the alignment between 4 and 8.
269+
if (State.getMachineFunction().getSubtarget<ARMSubtarget>().isTargetAEABI())
270+
Alignment = ArgFlags.getNonZeroMemAlign() <= 4 ? Align(4) : Align(8);
271+
266272
// After the first item has been allocated, the rest are packed as tightly as
267273
// possible. (E.g. an incoming i64 would have starting Align of 8, but we'll
268274
// be allocating a bunch of i32 slots).
269-
const Align RestAlign = std::min(Alignment, Align(Size));
270-
271275
for (auto &It : PendingMembers) {
272276
It.convertToMem(State.AllocateStack(Size, Alignment));
273277
State.addLoc(It);
274-
Alignment = RestAlign;
278+
Alignment = Align(1);
275279
}
276280

277281
// All pending members have now been allocated

0 commit comments

Comments
 (0)