Skip to content

Commit

Permalink
[AIX] Add support for non var_arg extended vector ABI calling convent…
Browse files Browse the repository at this point in the history
…ion on AIX

This patch enables passing non variadic vector type parameters on the caller and callee side and vector return on AIX that are passed in vector registers only.

So far, support is enabled for only the AIX extended Altivec ABI Calling convention.

Reviewed By: sfertile, DiggerLin

Differential Revision: https://reviews.llvm.org/D86476
  • Loading branch information
ZarkoT committed Nov 26, 2020
1 parent 3f6c856 commit 6d648e6
Show file tree
Hide file tree
Showing 8 changed files with 218 additions and 46 deletions.
57 changes: 39 additions & 18 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Expand Up @@ -6998,31 +6998,24 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
.Options.EnableAIXExtendedAltivecABI)
report_fatal_error("the default Altivec AIX ABI is not yet supported");

if (ValVT.isVector() && State.getMachineFunction()
.getTarget()
.Options.EnableAIXExtendedAltivecABI)
report_fatal_error("the extended Altivec AIX ABI is not yet supported");

assert((!ValVT.isInteger() ||
(ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) &&
"Integer argument exceeds register size: should have been legalized");

if (ValVT == MVT::f128)
report_fatal_error("f128 is unimplemented on AIX.");

if (ArgFlags.isNest())
report_fatal_error("Nest arguments are unimplemented.");

if (ValVT.isVector() || LocVT.isVector())
report_fatal_error("Vector arguments are unimplemented on AIX.");

static const MCPhysReg GPR_32[] = {// 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10};
static const MCPhysReg GPR_64[] = {// 64-bit registers.
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10};

static const MCPhysReg VR[] = {// Vector registers.
PPC::V2, PPC::V3, PPC::V4, PPC::V5,
PPC::V6, PPC::V7, PPC::V8, PPC::V9,
PPC::V10, PPC::V11, PPC::V12, PPC::V13};

if (ArgFlags.isByVal()) {
if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
report_fatal_error("Pass-by-value arguments with alignment greater than "
Expand Down Expand Up @@ -7118,6 +7111,25 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,

return false;
}
case MVT::v4f32:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
case MVT::v2i64:
case MVT::v2f64:
case MVT::v1i128: {
if (State.isVarArg())
report_fatal_error(
"variadic arguments for vector types are unimplemented for AIX");

if (unsigned VReg = State.AllocateReg(VR))
State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
else {
report_fatal_error(
"passing vector parameters to the stack is unimplemented for AIX");
}
return false;
}
}
return true;
}
Expand All @@ -7138,6 +7150,14 @@ static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
return &PPC::F4RCRegClass;
case MVT::f64:
return &PPC::F8RCRegClass;
case MVT::v4f32:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
case MVT::v2i64:
case MVT::v2f64:
case MVT::v1i128:
return &PPC::VRRCRegClass;
}
}

Expand Down Expand Up @@ -7254,6 +7274,9 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
CCValAssign &VA = ArgLocs[I++];
MVT LocVT = VA.getLocVT();
ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
if (VA.isMemLoc() && VA.getValVT().isVector())
report_fatal_error(
"passing vector parameters to the stack is unimplemented for AIX");

// For compatibility with the AIX XL compiler, the float args in the
// parameter save area are initialized even if the argument is available
Expand Down Expand Up @@ -7451,8 +7474,6 @@ SDValue PPCTargetLowering::LowerCall_AIX(

const PPCSubtarget& Subtarget =
static_cast<const PPCSubtarget&>(DAG.getSubtarget());
if (Subtarget.hasAltivec())
report_fatal_error("Altivec support is unimplemented on AIX.");

MachineFunction &MF = DAG.getMachineFunction();
SmallVector<CCValAssign, 16> ArgLocs;
Expand Down Expand Up @@ -7598,6 +7619,10 @@ SDValue PPCTargetLowering::LowerCall_AIX(
const MVT LocVT = VA.getLocVT();
const MVT ValVT = VA.getValVT();

if (VA.isMemLoc() && VA.getValVT().isVector())
report_fatal_error(
"passing vector parameters to the stack is unimplemented for AIX");

switch (VA.getLocInfo()) {
default:
report_fatal_error("Unexpected argument extension type.");
Expand Down Expand Up @@ -7742,10 +7767,6 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,

SDValue Arg = OutVals[RealResIdx];

if (Subtarget.isAIXABI() &&
(VA.getLocVT().isVector() || VA.getValVT().isVector()))
report_fatal_error("Returning vector types not yet supported on AIX.");

switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
Expand Up @@ -231,8 +231,10 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
}

if (Subtarget.isAIXABI()) {
assert(!Subtarget.hasAltivec() && "Altivec is not implemented on AIX yet.");
return TM.isPPC64() ? CSR_PPC64_RegMask : CSR_AIX32_RegMask;
return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask
: CSR_PPC64_RegMask)
: (Subtarget.hasAltivec() ? CSR_AIX32_Altivec_RegMask
: CSR_AIX32_RegMask);
}

if (CC == CallingConv::Cold) {
Expand Down
26 changes: 16 additions & 10 deletions llvm/test/CodeGen/PowerPC/aix-cc-altivec.ll
@@ -1,12 +1,7 @@
; RUN: not --crash llc < %s -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 2>&1 | FileCheck %s
; RUN: not --crash llc < %s -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 2>&1 | FileCheck %s

; This test expects a compiler diagnostic for an AIX limitation on Altivec
; support. When the Altivec limitation diagnostic is removed, this test
; should compile clean and fail in order to alert the author to validate the
; instructions emitted to initialize the GPR for the double vararg.
; The mfvsrwz and mfvsrd instructions should be used to initialize the GPR for
; the double vararg without going through memory.
; RUN: llc < %s -mtriple powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr8 2>&1
; | FileCheck %s --check-prefix=ASM64
; RUN: llc < %s -mtriple powerpc-ibm-aix-xcoff -vec-extabi -mcpu=pwr8 2>&1
; | FileCheck %s --check-prefix=ASM32

@f1 = global float 0.000000e+00, align 4

Expand All @@ -20,4 +15,15 @@ entry:

declare void @test_vararg(i32, ...)

; CHECK: LLVM ERROR: Altivec support is unimplemented on AIX.

; ASM64: xscvdpspn
; ASM64: mffprd
; ASM64: xxsldwi
; ASM64: mffprwz


; ASM32: lfsx
; ASM32: fmr
; ASM32: stfs
; ASM32: lwz
; ASM32: stfd
115 changes: 115 additions & 0 deletions llvm/test/CodeGen/PowerPC/aix-cc-ext-vec-abi.ll
@@ -0,0 +1,115 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \
; RUN: -vec-extabi -mtriple powerpc-ibm-aix-xcoff < %s | \
; RUN: FileCheck --check-prefixes=ASM32,ASM %s

; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \
; RUN: -vec-extabi -mtriple powerpc64-ibm-aix-xcoff < %s | \
; RUN: FileCheck --check-prefixes=ASM64,ASM %s

define dso_local <4 x i32> @vec_callee(<4 x i32> %vec1, <4 x i32> %vec2, <4 x i32> %vec3, <4 x i32> %vec4, <4 x i32> %vec5, <4 x i32> %vec6, <4 x i32> %vec7, <4 x i32> %vec8, <4 x i32> %vec9, <4 x i32> %vec10, <4 x i32> %vec11, <4 x i32> %vec12) {
entry:
%add = add <4 x i32> %vec1, %vec2
%add1 = add <4 x i32> %add, %vec3
%add2 = add <4 x i32> %add1, %vec4
%add3 = add <4 x i32> %add2, %vec5
%add4 = add <4 x i32> %add3, %vec6
%add5 = add <4 x i32> %add4, %vec7
%add6 = add <4 x i32> %add5, %vec8
%add7 = add <4 x i32> %add6, %vec9
%add8 = add <4 x i32> %add7, %vec10
%add9 = add <4 x i32> %add8, %vec11
%add10 = add <4 x i32> %add9, %vec12
ret <4 x i32> %add10
}

; ASM-LABEL: .vec_callee:

; ASM: %entry
; ASM-DAG: vadduwm 2, 2, 3
; ASM-DAG: vadduwm 2, 2, 4
; ASM-DAG: vadduwm 2, 2, 5
; ASM-DAG: vadduwm 2, 2, 6
; ASM-DAG: vadduwm 2, 2, 7
; ASM-DAG: vadduwm 2, 2, 8
; ASM-DAG: vadduwm 2, 2, 9
; ASM-DAG: vadduwm 2, 2, 10
; ASM-DAG: vadduwm 2, 2, 11
; ASM-DAG: vadduwm 2, 2, 12
; ASM-DAG: vadduwm 2, 2, 13
; ASM: blr

define dso_local i32 @vec_caller() {
entry:
%call = call <4 x i32> @vec_callee(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>, <4 x i32> <i32 13, i32 14, i32 15, i32 16>, <4 x i32> <i32 17, i32 18, i32 19, i32 20>, <4 x i32> <i32 21, i32 22, i32 23, i32 24>, <4 x i32> <i32 25, i32 26, i32 27, i32 28>, <4 x i32> <i32 29, i32 30, i32 31, i32 32>, <4 x i32> <i32 33, i32 34, i32 35, i32 36>, <4 x i32> <i32 37, i32 38, i32 39, i32 40>, <4 x i32> <i32 41, i32 42, i32 43, i32 44>, <4 x i32> <i32 45, i32 46, i32 47, i32 48>)
ret i32 0
}

; ASM-LABEL: .vec_caller:
; ASM32: # %bb.0: # %entry
; ASM32-DAG: mflr 0
; ASM32-DAG: stw 0, 8(1)
; ASM32-DAG: stwu 1, -64(1)
; ASM32-DAG: lwz [[REG1:[0-9]+]], L..C0(2)
; ASM32-DAG: lxvw4x 34, 0, [[REG1]]
; ASM32-DAG: lwz [[REG2:[0-9]+]], L..C1(2)
; ASM32-DAG: lxvw4x 35, 0, [[REG2]]
; ASM32-DAG: lwz [[REG3:[0-9]+]], L..C2(2)
; ASM32-DAG: lxvw4x 36, 0, [[REG3]]
; ASM32-DAG: lwz [[REG4:[0-9]+]], L..C3(2)
; ASM32-DAG: lxvw4x 37, 0, [[REG4]]
; ASM32-DAG: lwz [[REG5:[0-9]+]], L..C4(2)
; ASM32-DAG: lxvw4x 38, 0, [[REG5]]
; ASM32-DAG: lwz [[REG6:[0-9]+]], L..C5(2)
; ASM32-DAG: lxvw4x 39, 0, [[REG6]]
; ASM32-DAG: lwz [[REG7:[0-9]+]], L..C6(2)
; ASM32-DAG: lxvw4x 40, 0, [[REG7]]
; ASM32-DAG: lwz [[REG8:[0-9]+]], L..C7(2)
; ASM32-DAG: lxvw4x 41, 0, [[REG8]]
; ASM32-DAG: lwz [[REG9:[0-9]+]], L..C8(2)
; ASM32-DAG: lxvw4x 42, 0, [[REG9]]
; ASM32-DAG: lwz [[REG10:[0-9]+]], L..C9(2)
; ASM32-DAG: lxvw4x 43, 0, [[REG10]]
; ASM32-DAG: lwz [[REG11:[0-9]+]], L..C10(2)
; ASM32-DAG: lxvw4x 44, 0, [[REG11]]
; ASM32-DAG: lwz [[REG12:[0-9]+]], L..C11(2)
; ASM32-DAG: lxvw4x 45, 0, [[REG12]]
; ASM32-DAG: bl .vec_callee
; ASM32-DAG: li 3, 0
; ASM32-DAG: addi 1, 1, 64
; ASM32-DAG: lwz 0, 8(1)
; ASM32-DAG: mtlr 0
; ASM32: blr

; ASM64: # %entry
; ASM64-DAG: std 0, 16(1)
; ASM64-DAG: stdu 1, -112(1)
; ASM64-DAG: ld [[REG1:[0-9]+]], L..C0(2)
; ASM64-DAG: lxvw4x 34, 0, [[REG1]]
; ASM64-DAG: ld [[REG2:[0-9]+]], L..C1(2)
; ASM64-DAG: lxvw4x 35, 0, [[REG2]]
; ASM64-DAG: ld [[REG3:[0-9]+]], L..C2(2)
; ASM64-DAG: lxvw4x 36, 0, [[REG3]]
; ASM64-DAG: ld [[REG4:[0-9]+]], L..C3(2)
; ASM64-DAG: lxvw4x 37, 0, [[REG4]]
; ASM64-DAG: ld [[REG5:[0-9]+]], L..C4(2)
; ASM64-DAG: lxvw4x 38, 0, [[REG5]]
; ASM64-DAG: ld [[REG6:[0-9]+]], L..C5(2)
; ASM64-DAG: lxvw4x 39, 0, [[REG6]]
; ASM64-DAG: ld [[REG7:[0-9]+]], L..C6(2)
; ASM64-DAG: lxvw4x 40, 0, [[REG7]]
; ASM64-DAG: ld [[REG8:[0-9]+]], L..C7(2)
; ASM64-DAG: lxvw4x 41, 0, [[REG8]]
; ASM64-DAG: ld [[REG9:[0-9]+]], L..C8(2)
; ASM64-DAG: lxvw4x 42, 0, [[REG9]]
; ASM64-DAG: ld [[REG10:[0-9]+]], L..C9(2)
; ASM64-DAG: lxvw4x 43, 0, [[REG10]]
; ASM64-DAG: ld [[REG11:[0-9]+]], L..C10(2)
; ASM64-DAG: lxvw4x 44, 0, [[REG11]]
; ASM64-DAG: ld [[REG12:[0-9]+]], L..C11(2)
; ASM64-DAG: lxvw4x 45, 0, [[REG12]]
; ASM64-DAG: bl .vec_callee
; ASM64-DAG: li 3, 0
; ASM64-DAG: addi 1, 1, 112
; ASM64-DAG: ld 0, 16(1)
; ASM64-DAG: mtlr 0
; ASM64: blr
4 changes: 0 additions & 4 deletions llvm/test/CodeGen/PowerPC/aix-vec-abi.ll
@@ -1,12 +1,8 @@
; RUN: not --crash llc < %s -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 2>&1 | FileCheck %s --check-prefix=DFLTERROR
; RUN: not --crash llc < %s -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 2>&1 | FileCheck %s --check-prefix=DFLTERROR

; RUN: not --crash llc < %s -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 -vec-extabi 2>&1 | FileCheck %s --check-prefix=VEXTERROR
; RUN: not --crash llc < %s -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 -vec-extabi 2>&1 | FileCheck %s --check-prefix=VEXTERROR

define void @vec_callee(<4 x i32> %vec1) {
ret void
}

; DFLTERROR: LLVM ERROR: the default Altivec AIX ABI is not yet supported
; VEXTERROR: LLVM ERROR: the extended Altivec AIX ABI is not yet supported
12 changes: 0 additions & 12 deletions llvm/test/CodeGen/PowerPC/aix-vector-return.ll

This file was deleted.

17 changes: 17 additions & 0 deletions llvm/test/CodeGen/PowerPC/aix-vector-stack-caller.ll
@@ -0,0 +1,17 @@
; RUN: not --crash llc < %s -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \
; RUN: -vec-extabi -mtriple powerpc-ibm-aix-xcoff 2>&1 | \
; RUN: FileCheck %s --check-prefix=AIX-ERROR

; RUN: not --crash llc < %s -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \
; RUN: -vec-extabi -mtriple powerpc64-ibm-aix-xcoff 2>&1 | \
; RUN: FileCheck %s --check-prefix=AIX-ERROR

define dso_local i32 @vec_caller() {
entry:
%call = call i32 bitcast (i32 (...)* @vec_callee_stack to i32 (<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)*)(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>, <4 x i32> <i32 13, i32 14, i32 15, i32 16>, <4 x i32> <i32 17, i32 18, i32 19, i32 20>, <4 x i32> <i32 21, i32 22, i32 23, i32 24>, <4 x i32> <i32 25, i32 26, i32 27, i32 28>, <4 x i32> <i32 29, i32 30, i32 31, i32 32>, <4 x i32> <i32 33, i32 34, i32 35, i32 36>, <4 x i32> <i32 37, i32 38, i32 39, i32 40>, <4 x i32> <i32 41, i32 42, i32 43, i32 44>, <4 x i32> <i32 45, i32 46, i32 47, i32 48>, <4 x i32> <i32 49, i32 50, i32 51, i32 52>, <4 x i32> <i32 53, i32 54, i32 55, i32 56>)
ret i32 0
}

declare i32 @vec_callee_stack(...)

; AIX-ERROR: LLVM ERROR: passing vector parameters to the stack is unimplemented for AIX
27 changes: 27 additions & 0 deletions llvm/test/CodeGen/PowerPC/aix-vector-stack.ll
@@ -0,0 +1,27 @@
; RUN: not --crash llc < %s -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \
; RUN: -vec-extabi -mtriple powerpc-ibm-aix-xcoff 2>&1 | \
; RUN: FileCheck %s --check-prefix=AIX-ERROR

; RUN: not --crash llc < %s -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \
; RUN: -vec-extabi -mtriple powerpc64-ibm-aix-xcoff 2>&1 | \
; RUN: FileCheck %s --check-prefix=AIX-ERROR

define dso_local <4 x i32> @vec_callee_stack(<4 x i32> %vec1, <4 x i32> %vec2, <4 x i32> %vec3, <4 x i32> %vec4, <4 x i32> %vec5, <4 x i32> %vec6, <4 x i32> %vec7, <4 x i32> %vec8, <4 x i32> %vec9, <4 x i32> %vec10, <4 x i32> %vec11, <4 x i32> %vec12, <4 x i32> %vec13, <4 x i32> %vec14) {
entry:
%add = add <4 x i32> %vec1, %vec2
%add1 = add <4 x i32> %add, %vec3
%add2 = add <4 x i32> %add1, %vec4
%add3 = add <4 x i32> %add2, %vec5
%add4 = add <4 x i32> %add3, %vec6
%add5 = add <4 x i32> %add4, %vec7
%add6 = add <4 x i32> %add5, %vec8
%add7 = add <4 x i32> %add6, %vec9
%add8 = add <4 x i32> %add7, %vec10
%add9 = add <4 x i32> %add8, %vec11
%add10 = add <4 x i32> %add9, %vec12
%add11 = add <4 x i32> %add10, %vec13
%add12 = add <4 x i32> %add11, %vec14
ret <4 x i32> %add12
}

; AIX-ERROR: LLVM ERROR: passing vector parameters to the stack is unimplemented for AIX

0 comments on commit 6d648e6

Please sign in to comment.