Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AArch64][SVE] Handle consecutive Predicates in CC_AArch64_Custom_Block #90122

Merged
merged 5 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 23 additions & 6 deletions llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
static const MCPhysReg ZRegList[] = {AArch64::Z0, AArch64::Z1, AArch64::Z2,
AArch64::Z3, AArch64::Z4, AArch64::Z5,
AArch64::Z6, AArch64::Z7};
static const MCPhysReg PRegList[] = {AArch64::P0, AArch64::P1, AArch64::P2,
AArch64::P3};

static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
Expand All @@ -59,11 +61,17 @@ static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
// CCAssignFn again we want it to behave as if all remaining registers are
// allocated. This will force the code to pass the tuple indirectly in
// accordance with the PCS.
bool RegsAllocated[8];
bool ZRegsAllocated[8];
for (int I = 0; I < 8; I++) {
RegsAllocated[I] = State.isAllocated(ZRegList[I]);
ZRegsAllocated[I] = State.isAllocated(ZRegList[I]);
State.AllocateReg(ZRegList[I]);
}
// The same applies to P registers.
bool PRegsAllocated[4];
for (int I = 0; I < 4; I++) {
PRegsAllocated[I] = State.isAllocated(PRegList[I]);
State.AllocateReg(PRegList[I]);
}

auto &It = PendingMembers[0];
CCAssignFn *AssignFn =
Expand All @@ -79,8 +87,11 @@ static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
// Return the register state back to how it was before, leaving any
// unallocated registers available for other smaller types.
for (int I = 0; I < 8; I++)
if (!RegsAllocated[I])
if (!ZRegsAllocated[I])
State.DeallocateReg(ZRegList[I]);
for (int I = 0; I < 4; I++)
if (!PRegsAllocated[I])
State.DeallocateReg(PRegList[I]);

// All pending members have now been allocated
PendingMembers.clear();
Expand Down Expand Up @@ -140,9 +151,15 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
RegList = DRegList;
else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector())
RegList = QRegList;
else if (LocVT.isScalableVector())
RegList = ZRegList;
else {
else if (LocVT.isScalableVector()) {
// Scalable masks should be pass by Predicate registers.
if (LocVT == MVT::nxv1i1 || LocVT == MVT::nxv2i1 || LocVT == MVT::nxv4i1 ||
LocVT == MVT::nxv8i1 || LocVT == MVT::nxv16i1 ||
LocVT == MVT::aarch64svcount)
RegList = PRegList;
else
RegList = ZRegList;
} else {
// Not an array we want to split up after all.
return false;
}
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7178,7 +7178,6 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinValue();
unsigned NumParts = 1;
if (Ins[i].Flags.isInConsecutiveRegs()) {
assert(!Ins[i].Flags.isInConsecutiveRegsLast());
while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
++NumParts;
}
Expand Down Expand Up @@ -8175,7 +8174,6 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
uint64_t PartSize = StoreSize;
unsigned NumParts = 1;
if (Outs[i].Flags.isInConsecutiveRegs()) {
assert(!Outs[i].Flags.isInConsecutiveRegsLast());
while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
++NumParts;
StoreSize *= NumParts;
Expand Down
197 changes: 197 additions & 0 deletions llvm/test/CodeGen/AArch64/sve-calling-convention-byref.ll
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,203 @@ define aarch64_sve_vector_pcs <vscale x 16 x i1> @caller_with_many_svepred_arg(<
ret <vscale x 16 x i1> %ret
}

; Test that arg2 is passed through x0, i.e., x0 = &%arg2; and return values are loaded from x0:
; P0 = ldr [x0]
define aarch64_sve_vector_pcs <vscale x 16 x i1> @callee_with_svepred_arg_4xv16i1_1xv16i1([4 x <vscale x 16 x i1>] %arg1, [1 x <vscale x 16 x i1>] %arg2) {
; CHECK: name: callee_with_svepred_arg_4xv16i1_1xv16i1
; CHECK: [[BASE:%[0-9]+]]:gpr64common = COPY $x0
; CHECK: [[PRED0:%[0-9]+]]:ppr = LDR_PXI [[BASE]], 0 :: (load (<vscale x 1 x s16>))
; CHECK: $p0 = COPY [[PRED0]]
; CHECK: RET_ReallyLR implicit $p0
%res = extractvalue [1 x <vscale x 16 x i1>] %arg2, 0
ret <vscale x 16 x i1> %res
}

; Test that arg1 is stored to the stack from p0; and the stack location is passed throuch x0 to setup the call:
; str P0, [stack_loc_for_args]
; x0 = stack_loc_for_args
define aarch64_sve_vector_pcs <vscale x 16 x i1> @caller_with_svepred_arg_1xv16i1_4xv16i1([1 x <vscale x 16 x i1>] %arg1, [4 x <vscale x 16 x i1>] %arg2) {
; CHECK: name: caller_with_svepred_arg_1xv16i1_4xv16i1
; CHECK: stack:
; CHECK: - { id: 0, name: '', type: default, offset: 0, size: 2, alignment: 2,
; CHECK-NEXT: stack-id: scalable-vector,
; CHECK: [[PRED0:%[0-9]+]]:ppr = COPY $p0
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
; CHECK: STR_PXI [[PRED0]], %stack.0, 0 :: (store (<vscale x 1 x s16>) into %stack.0)
; CHECK: [[STACK:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0, 0
; CHECK: $x0 = COPY [[STACK]]
; CHECK: BL @callee_with_svepred_arg_4xv16i1_1xv16i1, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $p0, implicit $p1, implicit $p2, implicit $p3, implicit $x0, implicit-def $sp, implicit-def $p0
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
%res = call <vscale x 16 x i1> @callee_with_svepred_arg_4xv16i1_1xv16i1([4 x <vscale x 16 x i1>] %arg2, [1 x <vscale x 16 x i1>] %arg1)
ret <vscale x 16 x i1> %res
}

; Test that arg2 is passed through x0, i.e., x0 = &%arg2; and return values are loaded from x0:
; P0 = ldr [x0]
; P1 = ldr [x0 + sizeof(Px)]
; P2 = ldr [x0 + 2*sizeof(Px)]
; P3 = ldr [x0 + 3*sizeof(Px)]
define aarch64_sve_vector_pcs [4 x <vscale x 16 x i1>] @callee_with_svepred_arg_4xv16i1_4xv16i1([4 x <vscale x 16 x i1>] %arg1, [4 x <vscale x 16 x i1>] %arg2) {
; CHECK: name: callee_with_svepred_arg_4xv16i1_4xv16i1
; CHECK: [[BASE:%[0-9]+]]:gpr64common = COPY $x0
; CHECK: [[OFFSET1:%[0-9]+]]:gpr64 = CNTD_XPiI 31, 1, implicit $vg
; CHECK: [[ADDR1:%[0-9]+]]:gpr64common = nuw ADDXrr [[BASE]], killed [[OFFSET1]]
; CHECK: [[PRED1:%[0-9]+]]:ppr = LDR_PXI killed [[ADDR1]], 0 :: (load (<vscale x 1 x s16>))
; CHECK: [[OFFSET2:%[0-9]+]]:gpr64 = CNTW_XPiI 31, 1, implicit $vg
; CHECK: [[ADDR2:%[0-9]+]]:gpr64common = ADDXrr [[BASE]], killed [[OFFSET2]]
; CHECK: [[PRED2:%[0-9]+]]:ppr = LDR_PXI killed [[ADDR2]], 0 :: (load (<vscale x 1 x s16>))
; CHECK: [[OFFSET3:%[0-9]+]]:gpr64 = CNTD_XPiI 31, 3, implicit $vg
; CHECK: [[ADDR3:%[0-9]+]]:gpr64common = ADDXrr [[BASE]], killed [[OFFSET3]]
; CHECK: [[PRED3:%[0-9]+]]:ppr = LDR_PXI killed [[ADDR3]], 0 :: (load (<vscale x 1 x s16>))
; CHECK: [[PRED0:%[0-9]+]]:ppr = LDR_PXI [[BASE]], 0 :: (load (<vscale x 1 x s16>))
; CHECK: $p0 = COPY [[PRED0]]
; CHECK: $p1 = COPY [[PRED1]]
; CHECK: $p2 = COPY [[PRED2]]
; CHECK: $p3 = COPY [[PRED3]]
; CHECK: RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3
ret [4 x <vscale x 16 x i1>] %arg2
}

; Test that arg1 is stored to the stack from p0~p3; and the stack location is passed throuch x0 to setup the call:
; str P0, [stack_loc_for_args]
; str P1, [stack_loc_for_args + sizeof(Px)]
; str P2, [stack_loc_for_args + 2*sizeof(Px)]
; str P3, [stack_loc_for_args + 3*sizeof(Px)]
; x0 = stack_loc_for_args
define [4 x <vscale x 16 x i1>] @caller_with_svepred_arg_4xv16i1_4xv16i1([4 x <vscale x 16 x i1>] %arg1, [4 x <vscale x 16 x i1>] %arg2) {
; CHECK: name: caller_with_svepred_arg_4xv16i1_4xv16i1
; CHECK: stack:
; CHECK: - { id: 0, name: '', type: default, offset: 0, size: 8, alignment: 2,
; CHECK-NEXT: stack-id: scalable-vector,
; CHECK: [[PRED3:%[0-9]+]]:ppr = COPY $p3
; CHECK: [[PRED2:%[0-9]+]]:ppr = COPY $p2
; CHECK: [[PRED1:%[0-9]+]]:ppr = COPY $p1
; CHECK: [[PRED0:%[0-9]+]]:ppr = COPY $p0
; CHECK: [[OFFSET1:%[0-9]+]]:gpr64 = CNTD_XPiI 31, 1, implicit $vg
; CHECK: [[OFFSET2:%[0-9]+]]:gpr64 = CNTW_XPiI 31, 1, implicit $vg
; CHECK: [[OFFSET3:%[0-9]+]]:gpr64 = CNTD_XPiI 31, 3, implicit $vg
; CHECK: [[STACK:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0, 0
; CHECK: [[ADDR3:%[0-9]+]]:gpr64common = ADDXrr [[STACK]], [[OFFSET3]]
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
; CHECK: STR_PXI [[PRED3]], killed [[ADDR3]], 0 :: (store (<vscale x 1 x s16>))
; CHECK: [[ADDR2:%[0-9]+]]:gpr64common = ADDXrr [[STACK]], [[OFFSET2]]
; CHECK: STR_PXI [[PRED2]], killed [[ADDR2]], 0 :: (store (<vscale x 1 x s16>))
; CHECK: [[ADDR1:%[0-9]+]]:gpr64common = nuw ADDXrr [[STACK]], [[OFFSET1]]
; CHECK: STR_PXI [[PRED1]], killed [[ADDR1]], 0 :: (store (<vscale x 1 x s16>))
; CHECK: STR_PXI [[PRED0]], %stack.0, 0 :: (store (<vscale x 1 x s16>) into %stack.0)
; CHECK: $x0 = COPY [[STACK]]
; CHECK: BL @callee_with_svepred_arg_4xv16i1_4xv16i1, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $p0, implicit $p1, implicit $p2, implicit $p3, implicit $x0, implicit-def $sp, implicit-def $p0, implicit-def $p1, implicit-def $p2, implicit-def $p3
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
%res = call [4 x <vscale x 16 x i1>] @callee_with_svepred_arg_4xv16i1_4xv16i1([4 x <vscale x 16 x i1>] %arg2, [4 x <vscale x 16 x i1>] %arg1)
ret [4 x <vscale x 16 x i1>] %res
}

; Test that arg2 is passed through x0, i.e., x0 = &%arg2; and return values are loaded from x0:
; P0 = ldr [x0]
; P1 = ldr [x0 + sizeof(Px)]
; P2 = ldr [x0 + 2*sizeof(Px)]
; P3 = ldr [x0 + 3*sizeof(Px)]
define aarch64_sve_vector_pcs [2 x <vscale x 32 x i1>] @callee_with_svepred_arg_1xv16i1_2xv32i1([1 x <vscale x 16 x i1>] %arg1, [2 x <vscale x 32 x i1>] %arg2) {
; CHECK: name: callee_with_svepred_arg_1xv16i1_2xv32i1
; CHECK: [[BASE:%[0-9]+]]:gpr64common = COPY $x0
; CHECK: [[OFFSET1:%[0-9]+]]:gpr64 = CNTD_XPiI 31, 1, implicit $vg
; CHECK: [[ADDR1:%[0-9]+]]:gpr64common = nuw ADDXrr [[BASE]], killed [[OFFSET1]]
; CHECK: [[PRED1:%[0-9]+]]:ppr = LDR_PXI killed [[ADDR1]], 0 :: (load (<vscale x 1 x s16>))
; CHECK: [[OFFSET2:%[0-9]+]]:gpr64 = CNTW_XPiI 31, 1, implicit $vg
; CHECK: [[ADDR2:%[0-9]+]]:gpr64common = ADDXrr [[BASE]], killed [[OFFSET2]]
; CHECK: [[PRED2:%[0-9]+]]:ppr = LDR_PXI killed [[ADDR2]], 0 :: (load (<vscale x 1 x s16>))
; CHECK: [[OFFSET3:%[0-9]+]]:gpr64 = CNTD_XPiI 31, 3, implicit $vg
; CHECK: [[ADDR3:%[0-9]+]]:gpr64common = ADDXrr [[BASE]], killed [[OFFSET3]]
; CHECK: [[PRED3:%[0-9]+]]:ppr = LDR_PXI killed [[ADDR3]], 0 :: (load (<vscale x 1 x s16>))
; CHECK: [[PRED0:%[0-9]+]]:ppr = LDR_PXI [[BASE]], 0 :: (load (<vscale x 1 x s16>))
; CHECK: $p0 = COPY [[PRED0]]
; CHECK: $p1 = COPY [[PRED1]]
; CHECK: $p2 = COPY [[PRED2]]
; CHECK: $p3 = COPY [[PRED3]]
; CHECK: RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3
ret [2 x <vscale x 32 x i1>] %arg2
}

; Test that arg1 is stored to the stack from p0~p3; and the stack location is passed throuch x0 to setup the call:
; str P0, [stack_loc_for_args]
; str P1, [stack_loc_for_args + sizeof(Px)]
; str P2, [stack_loc_for_args + 2*sizeof(Px)]
; str P3, [stack_loc_for_args + 3*sizeof(Px)]
; x0 = stack_loc_for_args
define [2 x <vscale x 32 x i1>] @caller_with_svepred_arg_2xv32i1_1xv16i1([2 x <vscale x 32 x i1>] %arg1, [1 x <vscale x 16 x i1>] %arg2) {
; CHECK: name: caller_with_svepred_arg_2xv32i1_1xv16i1
; CHECK: stack:
; CHECK: - { id: 0, name: '', type: default, offset: 0, size: 8, alignment: 2,
; CHECK-NEXT: stack-id: scalable-vector,
; CHECK: [[PRED3:%[0-9]+]]:ppr = COPY $p3
; CHECK: [[PRED2:%[0-9]+]]:ppr = COPY $p2
; CHECK: [[PRED1:%[0-9]+]]:ppr = COPY $p1
; CHECK: [[PRED0:%[0-9]+]]:ppr = COPY $p0
; CHECK: [[OFFSET3:%[0-9]+]]:gpr64 = CNTD_XPiI 31, 3, implicit $vg
; CHECK: [[STACK:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0, 0
; CHECK: [[ADDR3:%[0-9]+]]:gpr64common = ADDXrr [[STACK]], killed [[OFFSET3]]
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
; CHECK: STR_PXI [[PRED3]], killed [[ADDR3]], 0 :: (store (<vscale x 1 x s16>))
; CHECK: [[OFFSET2:%[0-9]+]]:gpr64 = CNTW_XPiI 31, 1, implicit $vg
; CHECK: [[ADDR2:%[0-9]+]]:gpr64common = ADDXrr [[STACK]], killed [[OFFSET2]]
; CHECK: STR_PXI [[PRED2]], killed [[ADDR2]], 0 :: (store (<vscale x 1 x s16>))
; CHECK: [[OFFSET1:%[0-9]+]]:gpr64 = CNTD_XPiI 31, 1, implicit $vg
; CHECK: [[ADDR1:%[0-9]+]]:gpr64common = nuw ADDXrr [[STACK]], killed [[OFFSET1]]
; CHECK: STR_PXI [[PRED1]], killed [[ADDR1]], 0 :: (store (<vscale x 1 x s16>))
; CHECK: STR_PXI [[PRED0]], %stack.0, 0 :: (store (<vscale x 1 x s16>) into %stack.0)
; CHECK: $x0 = COPY [[STACK]]
; CHECK: BL @callee_with_svepred_arg_1xv16i1_2xv32i1, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $p0, implicit $x0, implicit-def $sp, implicit-def $p0, implicit-def $p1, implicit-def $p2, implicit-def $p3
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
%res = call [2 x <vscale x 32 x i1>] @callee_with_svepred_arg_1xv16i1_2xv32i1([1 x <vscale x 16 x i1>] %arg2, [2 x <vscale x 32 x i1>] %arg1)
ret [2 x <vscale x 32 x i1>] %res
}

; Test that arg1 and arg3 are passed via P0~P3, arg1 is passed indirectly through address on stack in x0
define aarch64_sve_vector_pcs [4 x <vscale x 16 x i1>] @callee_with_svepred_arg_2xv16i1_4xv16i1_2xv16i1([2 x <vscale x 16 x i1>] %arg1, [4 x <vscale x 16 x i1>] %arg2, [2 x <vscale x 16 x i1>] %arg3) nounwind {
; CHECK: name: callee_with_svepred_arg_2xv16i1_4xv16i1_2xv16i1
; CHECK: [[P3:%[0-9]+]]:ppr = COPY $p3
; CHECK: [[P2:%[0-9]+]]:ppr = COPY $p2
; CHECK: [[X0:%[0-9]+]]:gpr64common = COPY $x0
; CHECK: [[P1:%[0-9]+]]:ppr = COPY $p1
; CHECK: [[P0:%[0-9]+]]:ppr = COPY $p0
; CHECK: [[OFFSET3:%[0-9]+]]:gpr64 = CNTD_XPiI 31, 3, implicit $vg
; CHECK: [[ADDR3:%[0-9]+]]:gpr64common = ADDXrr [[X0]], killed [[OFFSET3]]
; CHECK: [[P7:%[0-9]+]]:ppr = LDR_PXI killed [[ADDR3]], 0 :: (load (<vscale x 1 x s16>))
; CHECK: [[OFFSET2:%[0-9]+]]:gpr64 = CNTW_XPiI 31, 1, implicit $vg
; CHECK: [[ADDR2:%[0-9]+]]:gpr64common = ADDXrr [[X0]], killed [[OFFSET2]]
; CHECK: [[P6:%[0-9]+]]:ppr = LDR_PXI killed [[ADDR2]], 0 :: (load (<vscale x 1 x s16>))
; CHECK: [[OFFSET1:%[0-9]+]]:gpr64 = CNTD_XPiI 31, 1, implicit $vg
; CHECK: [[ADDR1:%[0-9]+]]:gpr64common = nuw ADDXrr [[X0]], killed [[OFFSET1]]
; CHECK: [[P5:%[0-9]+]]:ppr = LDR_PXI killed [[ADDR1]], 0 :: (load (<vscale x 1 x s16>))
; CHECK: [[P4:%[0-9]+]]:ppr = LDR_PXI [[X0]], 0 :: (load (<vscale x 1 x s16>))
; CHECK: [[RES0:%[0-9]+]]:ppr = AND_PPzPP [[P0]], [[P0]], killed [[P4]]
; CHECK: [[RES1:%[0-9]+]]:ppr = AND_PPzPP [[P1]], [[P1]], killed [[P5]]
; CHECK: [[RES2:%[0-9]+]]:ppr = AND_PPzPP [[P2]], [[P2]], killed [[P6]]
; CHECK: [[RES3:%[0-9]+]]:ppr = AND_PPzPP [[P3]], [[P3]], killed [[P7]]
; CHECK: $p0 = COPY [[RES0]]
; CHECK: $p1 = COPY [[RES1]]
; CHECK: $p2 = COPY [[RES2]]
; CHECK: $p3 = COPY [[RES3]]
; CHECK: RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3
%p0 = extractvalue [2 x <vscale x 16 x i1>] %arg1, 0
%p1 = extractvalue [2 x <vscale x 16 x i1>] %arg1, 1
%p2 = extractvalue [2 x <vscale x 16 x i1>] %arg3, 0
%p3 = extractvalue [2 x <vscale x 16 x i1>] %arg3, 1
%p4 = extractvalue [4 x <vscale x 16 x i1>] %arg2, 0
%p5 = extractvalue [4 x <vscale x 16 x i1>] %arg2, 1
%p6 = extractvalue [4 x <vscale x 16 x i1>] %arg2, 2
%p7 = extractvalue [4 x <vscale x 16 x i1>] %arg2, 3
%r0 = and <vscale x 16 x i1> %p0, %p4
%r1 = and <vscale x 16 x i1> %p1, %p5
%r2 = and <vscale x 16 x i1> %p2, %p6
%r3 = and <vscale x 16 x i1> %p3, %p7
%1 = insertvalue [4 x <vscale x 16 x i1>] undef, <vscale x 16 x i1> %r0, 0
%2 = insertvalue [4 x <vscale x 16 x i1>] %1, <vscale x 16 x i1> %r1, 1
%3 = insertvalue [4 x <vscale x 16 x i1>] %2, <vscale x 16 x i1> %r2, 2
%4 = insertvalue [4 x <vscale x 16 x i1>] %3, <vscale x 16 x i1> %r3, 3
ret [4 x <vscale x 16 x i1>] %4
}

; Test that z8 and z9, passed by reference, are loaded from a location that is passed on the stack.
; i.e. x0 = %x0
; :
Expand Down
Loading
Loading