Skip to content

Commit

Permalink
[flang] Add -fppc-native-vector-element-order option to control the e…
Browse files Browse the repository at this point in the history
…lement order in PowerPC vector types

This patch also adds a LIT test for the vec_cvf intrinsic that
can be affected by the option.

Co-authored-by: Mark Danial <Mark.Danial@ibm.com>
Co-authored-by: Daniel Chen <cdchen@ca.ibm.com>

Differential Revision: https://reviews.llvm.org/D155852
  • Loading branch information
kkwli committed Aug 4, 2023
1 parent e21b1dd commit 00769d6
Show file tree
Hide file tree
Showing 16 changed files with 118 additions and 19 deletions.
3 changes: 3 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -5416,6 +5416,9 @@ defm xor_operator : OptInFC1FFlag<"xor-operator", "Enable .XOR. as a synonym of
defm logical_abbreviations : OptInFC1FFlag<"logical-abbreviations", "Enable logical abbreviations">;
defm implicit_none : OptInFC1FFlag<"implicit-none", "No implicit typing allowed unless overridden by IMPLICIT statements">;
defm underscoring : OptInFC1FFlag<"underscoring", "Appends one trailing underscore to external names">;
defm ppc_native_vec_elem_order: BoolOptionWithoutMarshalling<"f", "ppc-native-vector-element-order",
PosFlag<SetTrue, [], "Specifies PowerPC native vector element order (default)">,
NegFlag<SetFalse, [], "Specifies PowerPC non-native vector element order">>;

def fno_automatic : Flag<["-"], "fno-automatic">, Group<f_Group>,
HelpText<"Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE">;
Expand Down
4 changes: 3 additions & 1 deletion clang/lib/Driver/ToolChains/Flang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,9 @@ void Flang::addCodegenOptions(const ArgList &Args,
CmdArgs.push_back("-fversion-loops-for-stride");

Args.AddAllArgs(CmdArgs, {options::OPT_flang_experimental_hlfir,
options::OPT_flang_experimental_polymorphism});
options::OPT_flang_experimental_polymorphism,
options::OPT_fno_ppc_native_vec_elem_order,
options::OPT_fppc_native_vec_elem_order});
}

void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
Expand Down
11 changes: 6 additions & 5 deletions flang/include/flang/Lower/CustomIntrinsicCall.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,12 @@ lowerCustomIntrinsic(fir::FirOpBuilder &builder, mlir::Location loc,
/// Generate the FIR+MLIR operations for the generic intrinsic \p name
/// with argument \p args and expected result type \p resultType.
/// Returned fir::ExtendedValue is the returned Fortran intrinsic value.
fir::ExtendedValue genIntrinsicCall(fir::FirOpBuilder &builder,
mlir::Location loc, llvm::StringRef name,
std::optional<mlir::Type> resultType,
llvm::ArrayRef<fir::ExtendedValue> args,
StatementContext &stmtCtx);
fir::ExtendedValue
genIntrinsicCall(fir::FirOpBuilder &builder, mlir::Location loc,
llvm::StringRef name, std::optional<mlir::Type> resultType,
llvm::ArrayRef<fir::ExtendedValue> args,
StatementContext &stmtCtx,
Fortran::lower::AbstractConverter *converter = nullptr);

} // namespace lower
} // namespace Fortran
Expand Down
3 changes: 3 additions & 0 deletions flang/include/flang/Lower/LoweringOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,8 @@ ENUM_LOWERINGOPT(PolymorphicTypeImpl, unsigned, 1, 0)
/// Off by default until fully ready.
ENUM_LOWERINGOPT(LowerToHighLevelFIR, unsigned, 1, 0)

/// If true, reverse PowerPC native vector element order.
ENUM_LOWERINGOPT(NoPPCNativeVecElemOrder, unsigned, 1, 0)

#undef LOWERINGOPT
#undef ENUM_LOWERINGOPT
11 changes: 8 additions & 3 deletions flang/include/flang/Optimizer/Builder/IntrinsicCall.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#ifndef FORTRAN_LOWER_INTRINSICCALL_H
#define FORTRAN_LOWER_INTRINSICCALL_H

#include "flang/Lower/AbstractConverter.h"
#include "flang/Optimizer/Builder/BoxValue.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/Runtime/Character.h"
Expand All @@ -34,7 +35,8 @@ class StatementContext;
std::pair<fir::ExtendedValue, bool>
genIntrinsicCall(fir::FirOpBuilder &, mlir::Location, llvm::StringRef name,
std::optional<mlir::Type> resultType,
llvm::ArrayRef<fir::ExtendedValue> args);
llvm::ArrayRef<fir::ExtendedValue> args,
Fortran::lower::AbstractConverter *converter = nullptr);

/// Enums used to templatize and share lowering of MIN and MAX.
enum class Extremum { Min, Max };
Expand Down Expand Up @@ -124,8 +126,10 @@ struct IntrinsicArgumentLoweringRules;
struct IntrinsicLibrary {

// Constructors.
explicit IntrinsicLibrary(fir::FirOpBuilder &builder, mlir::Location loc)
: builder{builder}, loc{loc} {}
explicit IntrinsicLibrary(
fir::FirOpBuilder &builder, mlir::Location loc,
Fortran::lower::AbstractConverter *converter = nullptr)
: builder{builder}, loc{loc}, converter{converter} {}
IntrinsicLibrary() = delete;
IntrinsicLibrary(const IntrinsicLibrary &) = delete;

Expand Down Expand Up @@ -416,6 +420,7 @@ struct IntrinsicLibrary {
fir::FirOpBuilder &builder;
mlir::Location loc;
bool resultMustBeFreed = false;
Fortran::lower::AbstractConverter *converter = nullptr;
};

struct IntrinsicDummyArgument {
Expand Down
5 changes: 5 additions & 0 deletions flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,11 @@ struct PPCIntrinsicLibrary : IntrinsicLibrary {
PPCIntrinsicLibrary() = delete;
PPCIntrinsicLibrary(const PPCIntrinsicLibrary &) = delete;

// Helper functions for vector element ordering.
bool isBEVecElemOrderOnLE();
bool isNativeVecElemOrderOnLE();
bool changeVecElemOrder();

// PPC MMA intrinsic generic handler
template <MMAOp IntrId, MMAHandlerOp HandlerOp>
void genMmaIntr(llvm::ArrayRef<fir::ExtendedValue>);
Expand Down
5 changes: 5 additions & 0 deletions flang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -954,6 +954,11 @@ bool CompilerInvocation::createFromArgs(
res.loweringOpts.setPolymorphicTypeImpl(true);
}

// -fno-ppc-native-vector-element-order
if (args.hasArg(clang::driver::options::OPT_fno_ppc_native_vec_elem_order)) {
res.loweringOpts.setNoPPCNativeVecElemOrder(true);
}

success &= parseFrontendArgs(res.getFrontendOpts(), args, diags);
parseTargetArgs(res.getTargetOpts(), args);
parsePreprocessorArgs(res.getPreprocessorOpts(), args);
Expand Down
2 changes: 1 addition & 1 deletion flang/lib/Lower/ConvertExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1928,7 +1928,7 @@ class ScalarExprLowering {
}
// Let the intrinsic library lower the intrinsic procedure call
return Fortran::lower::genIntrinsicCall(builder, getLoc(), name, resultType,
operands, stmtCtx);
operands, stmtCtx, &converter);
}

/// helper to detect statement functions
Expand Down
5 changes: 3 additions & 2 deletions flang/lib/Lower/CustomIntrinsicCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,10 @@ Fortran::lower::genIntrinsicCall(fir::FirOpBuilder &builder, mlir::Location loc,
llvm::StringRef name,
std::optional<mlir::Type> resultType,
llvm::ArrayRef<fir::ExtendedValue> args,
Fortran::lower::StatementContext &stmtCtx) {
Fortran::lower::StatementContext &stmtCtx,
Fortran::lower::AbstractConverter *converter) {
auto [result, mustBeFreed] =
fir::genIntrinsicCall(builder, loc, name, resultType, args);
fir::genIntrinsicCall(builder, loc, name, resultType, args, converter);
if (mustBeFreed) {
mlir::Value addr = fir::getBase(result);
if (auto *box = result.getBoxOf<fir::BoxValue>())
Expand Down
7 changes: 4 additions & 3 deletions flang/lib/Optimizer/Builder/IntrinsicCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5770,9 +5770,10 @@ lowerIntrinsicArgumentAs(const IntrinsicArgumentLoweringRules &rules,
std::pair<fir::ExtendedValue, bool>
genIntrinsicCall(fir::FirOpBuilder &builder, mlir::Location loc,
llvm::StringRef name, std::optional<mlir::Type> resultType,
llvm::ArrayRef<fir::ExtendedValue> args) {
return IntrinsicLibrary{builder, loc}.genIntrinsicCall(name, resultType,
args);
llvm::ArrayRef<fir::ExtendedValue> args,
Fortran::lower::AbstractConverter *converter) {
return IntrinsicLibrary{builder, loc, converter}.genIntrinsicCall(
name, resultType, args);
}

mlir::Value genMax(fir::FirOpBuilder &builder, mlir::Location loc,
Expand Down
22 changes: 18 additions & 4 deletions flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,20 @@ checkPPCMathOperationsRange(llvm::StringRef name) {
return ppcMathOps.equal_range(name);
}

// Helper functions for vector element ordering.
bool PPCIntrinsicLibrary::isBEVecElemOrderOnLE() {
return (Fortran::evaluate::isHostLittleEndian &&
converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
}
bool PPCIntrinsicLibrary::isNativeVecElemOrderOnLE() {
return (Fortran::evaluate::isHostLittleEndian &&
!converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
}
bool PPCIntrinsicLibrary::changeVecElemOrder() {
return (Fortran::evaluate::isHostLittleEndian !=
converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
}

static mlir::FunctionType genMmaVpFuncType(mlir::MLIRContext *context,
int quadCnt, int pairCnt, int vecCnt,
int intCnt = 0,
Expand Down Expand Up @@ -1014,8 +1028,8 @@ PPCIntrinsicLibrary::genVecConvert(mlir::Type resultType,

mlir::Value newArgs[]{vArg1};
if (vecTyInfo.isFloat32()) {
// TODO: Handle element ordering
newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);
if (changeVecElemOrder())
newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);

const llvm::StringRef fname{"llvm.ppc.vsx.xvcvspdp"};
auto ftype{
Expand All @@ -1036,8 +1050,8 @@ PPCIntrinsicLibrary::genVecConvert(mlir::Type resultType,
auto mvf32Ty{mlir::VectorType::get(4, f32type)};
newArgs[0] = builder.createConvert(loc, mvf32Ty, newArgs[0]);

// TODO: Handle element ordering
newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);
if (changeVecElemOrder())
newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);

return builder.createConvert(loc, fvf32Ty, newArgs[0]);
}
Expand Down
4 changes: 4 additions & 0 deletions flang/test/Driver/driver-help-hidden.f90
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
! CHECK-NEXT: -fno-automatic Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE
! CHECK-NEXT: -fno-color-diagnostics Disable colors in diagnostics
! CHECK-NEXT: -fno-integrated-as Disable the integrated assembler
! CHECK-NEXT: -fno-ppc-native-vector-element-order
! CHECK-NEXT: Specifies PowerPC non-native vector element order
! CHECK-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros
! CHECK-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default)
! CHECK-NEXT: -fno-version-loops-for-stride
Expand All @@ -63,6 +65,8 @@
! CHECK-NEXT: -foptimization-record-passes=<regex>
! CHECK-NEXT: Only include passes which match a specified regular expression in the generated optimization record (by default, include all passes)
! CHECK-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
! CHECK-NEXT: -fppc-native-vector-element-order
! CHECK-NEXT: Specifies PowerPC native vector element order
! CHECK-NEXT: -freciprocal-math Allow division operations to be reassociated
! CHECK-NEXT: -fsave-optimization-record=<format>
! CHECK-NEXT: Generate an optimization record file in a specific format
Expand Down
8 changes: 8 additions & 0 deletions flang/test/Driver/driver-help.f90
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
! HELP-NEXT: -fno-automatic Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE
! HELP-NEXT: -fno-color-diagnostics Disable colors in diagnostics
! HELP-NEXT: -fno-integrated-as Disable the integrated assembler
! HELP-NEXT: -fno-ppc-native-vector-element-order
! HELP-NEXT: Specifies PowerPC non-native vector element order
! HELP-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros
! HELP-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default)
! HELP-NEXT: -fno-version-loops-for-stride
Expand All @@ -59,6 +61,8 @@
! HELP-NEXT: -foptimization-record-passes=<regex>
! HELP-NEXT: Only include passes which match a specified regular expression in the generated optimization record (by default, include all passes)
! HELP-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
! HELP-NEXT: -fppc-native-vector-element-order
! HELP-NEXT: Specifies PowerPC native vector element order
! HELP-NEXT: -freciprocal-math Allow division operations to be reassociated
! HELP-NEXT: -fsave-optimization-record=<format>
! HELP-NEXT: Generate an optimization record file in a specific format
Expand Down Expand Up @@ -158,6 +162,8 @@
! HELP-FC1-NEXT: Do not use the analyzed objects when unparsing
! HELP-FC1-NEXT: -fno-automatic Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE
! HELP-FC1-NEXT: -fno-debug-pass-manager Disables debug printing for the new pass manager
! HELP-FC1-NEXT: -fno-ppc-native-vector-element-order
! HELP-FC1-NEXT: Specifies PowerPC non-native vector element order
! HELP-FC1-NEXT: -fno-reformat Dump the cooked character stream in -E mode
! HELP-FC1-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros
! HELP-FC1-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default)
Expand All @@ -173,6 +179,8 @@
! HELP-FC1-NEXT: Set OpenMP version (e.g. 45 for OpenMP 4.5, 50 for OpenMP 5.0). Default value is 50 for Clang and 11 for Flang
! HELP-FC1-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code.
! HELP-FC1-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
! HELP-FC1-NEXT: -fppc-native-vector-element-order
! HELP-FC1-NEXT: Specifies PowerPC native vector element order
! HELP-FC1-NEXT: -freciprocal-math Allow division operations to be reassociated
! HELP-FC1-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size
! HELP-FC1-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages
Expand Down
4 changes: 4 additions & 0 deletions flang/test/Driver/frontend-forwarding.f90
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
! RUN: -fversion-loops-for-stride \
! RUN: -flang-experimental-polymorphism \
! RUN: -flang-experimental-hlfir \
! RUN: -fno-ppc-native-vector-element-order \
! RUN: -fppc-native-vector-element-order \
! RUN: -mllvm -print-before-all \
! RUN: -save-temps=obj \
! RUN: -P \
Expand All @@ -40,5 +42,7 @@
! CHECK: "-fversion-loops-for-stride"
! CHECK: "-flang-experimental-polymorphism"
! CHECK: "-flang-experimental-hlfir"
! CHECK: "-fno-ppc-native-vector-element-order"
! CHECK: "-fppc-native-vector-element-order"
! CHECK: "-mllvm" "-print-before-all"
! CHECK: "-save-temps=obj"
37 changes: 37 additions & 0 deletions flang/test/Lower/PowerPC/ppc-vec_cvf-elem-order.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
! RUN: bbc -emit-fir %s -fno-ppc-native-vector-element-order=true -o - | FileCheck --check-prefixes="FIR" %s
! RUN: %flang_fc1 -emit-llvm %s -fno-ppc-native-vector-element-order -o - | FileCheck --check-prefixes="LLVMIR" %s
! REQUIRES: target=powerpc{{.*}}

! CHECK-LABEL: vec_cvf_test_r4r8
subroutine vec_cvf_test_r4r8(arg1)
vector(real(8)), intent(in) :: arg1
vector(real(4)) :: r
r = vec_cvf(arg1)

! FIR: %[[arg:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
! FIR: %[[carg:.*]] = fir.convert %[[arg]] : (!fir.vector<2:f64>) -> vector<2xf64>
! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.xvcvdpsp(%[[carg]]) fastmath<contract> : (vector<2xf64>) -> !fir.vector<4:f32>
! FIR: %[[ccall:.*]] = fir.convert %[[call]] : (!fir.vector<4:f32>) -> vector<4xf32>
! FIR: %[[r:.*]] = fir.convert %[[ccall]] : (vector<4xf32>) -> !fir.vector<4:f32>
! FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>

! LLVMIR: %[[arg:.*]] = load <2 x double>, ptr %{{.*}}, align 16
! LLVMIR: %[[call:.*]] = call contract <4 x float> @llvm.ppc.vsx.xvcvdpsp(<2 x double> %[[arg]])
! LLVMIR: store <4 x float> %[[call]], ptr %{{.*}}, align 16
end subroutine vec_cvf_test_r4r8

! CHECK-LABEL: vec_cvf_test_r8r4
subroutine vec_cvf_test_r8r4(arg1)
vector(real(4)), intent(in) :: arg1
vector(real(8)) :: r
r = vec_cvf(arg1)

! FIR: %[[arg:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
! FIR: %[[carg:.*]] = fir.convert %[[arg]] : (!fir.vector<4:f32>) -> vector<4xf32>
! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.xvcvspdp(%[[carg]]) fastmath<contract> : (vector<4xf32>) -> !fir.vector<2:f64>
! FIR: fir.store %[[call]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>

! LLVMIR: %[[arg:.*]] = load <4 x float>, ptr %{{.*}}, align 16
! LLVMIR: %[[r:.*]] = call contract <2 x double> @llvm.ppc.vsx.xvcvspdp(<4 x float> %[[arg]])
! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
end subroutine vec_cvf_test_r8r4
6 changes: 6 additions & 0 deletions flang/tools/bbc/bbc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,11 @@ static llvm::cl::opt<bool> enablePolymorphic(
llvm::cl::desc("enable polymorphic type lowering (experimental)"),
llvm::cl::init(false));

static llvm::cl::opt<bool> enableNoPPCNativeVecElemOrder(
"fno-ppc-native-vector-element-order",
llvm::cl::desc("no PowerPC native vector element order."),
llvm::cl::init(false));

static llvm::cl::opt<bool> useHLFIR("hlfir",
llvm::cl::desc("Lower to high level FIR"),
llvm::cl::init(false));
Expand Down Expand Up @@ -289,6 +294,7 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
// Use default lowering options for bbc.
Fortran::lower::LoweringOptions loweringOptions{};
loweringOptions.setPolymorphicTypeImpl(enablePolymorphic);
loweringOptions.setNoPPCNativeVecElemOrder(enableNoPPCNativeVecElemOrder);
loweringOptions.setLowerToHighLevelFIR(useHLFIR || emitHLFIR);
auto burnside = Fortran::lower::LoweringBridge::create(
ctx, semanticsContext, defKinds, semanticsContext.intrinsics(),
Expand Down

0 comments on commit 00769d6

Please sign in to comment.