64 changes: 4 additions & 60 deletions flang/lib/Optimizer/CodeGen/Target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,63 +59,6 @@ static void typeTodo(const llvm::fltSemantics *sem, mlir::Location loc,
}
}

/// Return the size and alignment of FIR types.
/// TODO: consider moving this to a DataLayoutTypeInterface implementation
/// for FIR types. It should first be ensured that it is OK to open the gate of
/// target dependent type size inquiries in lowering. It would also not be
/// straightforward given the need for a kind map that would need to be
/// converted in terms of mlir::DataLayoutEntryKey.
static std::pair<std::uint64_t, unsigned short>
getSizeAndAlignment(mlir::Location loc, mlir::Type ty,
const mlir::DataLayout &dl,
const fir::KindMapping &kindMap) {
if (mlir::isa<mlir::IntegerType, mlir::FloatType, mlir::ComplexType>(ty)) {
llvm::TypeSize size = dl.getTypeSize(ty);
unsigned short alignment = dl.getTypeABIAlignment(ty);
return {size, alignment};
}
if (auto firCmplx = mlir::dyn_cast<fir::ComplexType>(ty)) {
auto [floatSize, floatAlign] =
getSizeAndAlignment(loc, firCmplx.getEleType(kindMap), dl, kindMap);
return {llvm::alignTo(floatSize, floatAlign) + floatSize, floatAlign};
}
if (auto real = mlir::dyn_cast<fir::RealType>(ty))
return getSizeAndAlignment(loc, real.getFloatType(kindMap), dl, kindMap);

if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(ty)) {
auto [eleSize, eleAlign] =
getSizeAndAlignment(loc, seqTy.getEleTy(), dl, kindMap);

std::uint64_t size =
llvm::alignTo(eleSize, eleAlign) * seqTy.getConstantArraySize();
return {size, eleAlign};
}
if (auto recTy = mlir::dyn_cast<fir::RecordType>(ty)) {
std::uint64_t size = 0;
unsigned short align = 1;
for (auto component : recTy.getTypeList()) {
auto [compSize, compAlign] =
getSizeAndAlignment(loc, component.second, dl, kindMap);
size =
llvm::alignTo(size, compAlign) + llvm::alignTo(compSize, compAlign);
align = std::max(align, compAlign);
}
return {size, align};
}
if (auto logical = mlir::dyn_cast<fir::LogicalType>(ty)) {
mlir::Type intTy = mlir::IntegerType::get(
logical.getContext(), kindMap.getLogicalBitsize(logical.getFKind()));
return getSizeAndAlignment(loc, intTy, dl, kindMap);
}
if (auto character = mlir::dyn_cast<fir::CharacterType>(ty)) {
mlir::Type intTy = mlir::IntegerType::get(
character.getContext(),
kindMap.getCharacterBitsize(character.getFKind()));
return getSizeAndAlignment(loc, intTy, dl, kindMap);
}
TODO(loc, "computing size of a component");
}

namespace {
template <typename S>
struct GenericTarget : public CodeGenSpecifics {
Expand Down Expand Up @@ -489,7 +432,7 @@ struct TargetX86_64 : public GenericTarget<TargetX86_64> {
}
mlir::Type compType = component.second;
auto [compSize, compAlign] =
getSizeAndAlignment(loc, compType, getDataLayout(), kindMap);
fir::getTypeSizeAndAlignment(loc, compType, getDataLayout(), kindMap);
byteOffset = llvm::alignTo(byteOffset, compAlign);
ArgClass LoComp, HiComp;
classify(loc, compType, byteOffset, LoComp, HiComp);
Expand All @@ -510,7 +453,7 @@ struct TargetX86_64 : public GenericTarget<TargetX86_64> {
mlir::Type eleTy = seqTy.getEleTy();
const std::uint64_t arraySize = seqTy.getConstantArraySize();
auto [eleSize, eleAlign] =
getSizeAndAlignment(loc, eleTy, getDataLayout(), kindMap);
fir::getTypeSizeAndAlignment(loc, eleTy, getDataLayout(), kindMap);
std::uint64_t eleStorageSize = llvm::alignTo(eleSize, eleAlign);
for (std::uint64_t i = 0; i < arraySize; ++i) {
byteOffset = llvm::alignTo(byteOffset, eleAlign);
Expand Down Expand Up @@ -697,7 +640,8 @@ struct TargetX86_64 : public GenericTarget<TargetX86_64> {
CodeGenSpecifics::Marshalling passOnTheStack(mlir::Location loc,
mlir::Type ty) const {
CodeGenSpecifics::Marshalling marshal;
auto sizeAndAlign = getSizeAndAlignment(loc, ty, getDataLayout(), kindMap);
auto sizeAndAlign =
fir::getTypeSizeAndAlignment(loc, ty, getDataLayout(), kindMap);
// The stack is always 8 byte aligned (note 14 in 3.2.3).
unsigned short align =
std::max(sizeAndAlign.second, static_cast<unsigned short>(8));
Expand Down
53 changes: 53 additions & 0 deletions flang/lib/Optimizer/Dialect/FIRType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/ISO_Fortran_binding_wrapper.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/FIRDialect.h"
#include "flang/Optimizer/Dialect/Support/KindMapping.h"
#include "flang/Tools/PointerModels.h"
Expand Down Expand Up @@ -1339,3 +1340,55 @@ void FIROpsDialect::registerTypes() {
fir::LLVMPointerType::attachInterface<
OpenACCPointerLikeModel<fir::LLVMPointerType>>(*getContext());
}

std::pair<std::uint64_t, unsigned short>
fir::getTypeSizeAndAlignment(mlir::Location loc, mlir::Type ty,
const mlir::DataLayout &dl,
const fir::KindMapping &kindMap) {
if (mlir::isa<mlir::IntegerType, mlir::FloatType, mlir::ComplexType>(ty)) {
llvm::TypeSize size = dl.getTypeSize(ty);
unsigned short alignment = dl.getTypeABIAlignment(ty);
return {size, alignment};
}
if (auto firCmplx = mlir::dyn_cast<fir::ComplexType>(ty)) {
auto [floatSize, floatAlign] =
getTypeSizeAndAlignment(loc, firCmplx.getEleType(kindMap), dl, kindMap);
return {llvm::alignTo(floatSize, floatAlign) + floatSize, floatAlign};
}
if (auto real = mlir::dyn_cast<fir::RealType>(ty))
return getTypeSizeAndAlignment(loc, real.getFloatType(kindMap), dl,
kindMap);

if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(ty)) {
auto [eleSize, eleAlign] =
getTypeSizeAndAlignment(loc, seqTy.getEleTy(), dl, kindMap);

std::uint64_t size =
llvm::alignTo(eleSize, eleAlign) * seqTy.getConstantArraySize();
return {size, eleAlign};
}
if (auto recTy = mlir::dyn_cast<fir::RecordType>(ty)) {
std::uint64_t size = 0;
unsigned short align = 1;
for (auto component : recTy.getTypeList()) {
auto [compSize, compAlign] =
getTypeSizeAndAlignment(loc, component.second, dl, kindMap);
size =
llvm::alignTo(size, compAlign) + llvm::alignTo(compSize, compAlign);
align = std::max(align, compAlign);
}
return {size, align};
}
if (auto logical = mlir::dyn_cast<fir::LogicalType>(ty)) {
mlir::Type intTy = mlir::IntegerType::get(
logical.getContext(), kindMap.getLogicalBitsize(logical.getFKind()));
return getTypeSizeAndAlignment(loc, intTy, dl, kindMap);
}
if (auto character = mlir::dyn_cast<fir::CharacterType>(ty)) {
mlir::Type intTy = mlir::IntegerType::get(
character.getContext(),
kindMap.getCharacterBitsize(character.getFKind()));
return getTypeSizeAndAlignment(loc, intTy, dl, kindMap);
}
TODO(loc, "computing size of a component");
}
19 changes: 14 additions & 5 deletions flang/lib/Optimizer/Transforms/LoopVersioning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/Dialect/Support/FIRContext.h"
#include "flang/Optimizer/Dialect/Support/KindMapping.h"
#include "flang/Optimizer/Support/DataLayout.h"
#include "flang/Optimizer/Transforms/Passes.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Dominance.h"
Expand Down Expand Up @@ -241,6 +242,12 @@ void LoopVersioningPass::runOnOperation() {
mlir::ModuleOp module = func->getParentOfType<mlir::ModuleOp>();
fir::KindMapping kindMap = fir::getKindMapping(module);
mlir::SmallVector<ArgInfo, 4> argsOfInterest;
std::optional<mlir::DataLayout> dl =
fir::support::getOrSetDataLayout(module, /*allowDefaultLayout=*/false);
if (!dl)
mlir::emitError(module.getLoc(),
"data layout attribute is required to perform " DEBUG_TYPE
"pass");
for (auto &arg : args) {
// Optional arguments must be checked for IsPresent before
// looking for the bounds. They are unsupported for the time being.
Expand All @@ -256,11 +263,13 @@ void LoopVersioningPass::runOnOperation() {
seqTy.getShape()[0] == fir::SequenceType::getUnknownExtent()) {
size_t typeSize = 0;
mlir::Type elementType = fir::unwrapSeqOrBoxedSeqType(arg.getType());
if (elementType.isa<mlir::FloatType>() ||
elementType.isa<mlir::IntegerType>())
typeSize = elementType.getIntOrFloatBitWidth() / 8;
else if (auto cty = elementType.dyn_cast<fir::ComplexType>())
typeSize = 2 * cty.getEleType(kindMap).getIntOrFloatBitWidth() / 8;
if (mlir::isa<mlir::FloatType>(elementType) ||
mlir::isa<mlir::IntegerType>(elementType) ||
mlir::isa<fir::ComplexType>(elementType)) {
auto [eleSize, eleAlign] = fir::getTypeSizeAndAlignment(
arg.getLoc(), elementType, *dl, kindMap);
typeSize = llvm::alignTo(eleSize, eleAlign);
}
if (typeSize)
argsOfInterest.push_back({arg, typeSize, rank, {}});
else
Expand Down
2 changes: 1 addition & 1 deletion flang/lib/Semantics/semantics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ SemanticsContext::SemanticsContext(
globalScope_{*this}, intrinsicModulesScope_{globalScope_.MakeScope(
Scope::Kind::IntrinsicModules, nullptr)},
foldingContext_{parser::ContextualMessages{&messages_}, defaultKinds_,
intrinsics_, targetCharacteristics_, languageFeatures_} {}
intrinsics_, targetCharacteristics_, languageFeatures_, tempNames_} {}

SemanticsContext::~SemanticsContext() {}

Expand Down
8 changes: 8 additions & 0 deletions flang/test/Evaluate/rewrite07.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
! RUN: %flang_fc1 -fdebug-unparse %s 2>&1 | FileCheck %s

subroutine test_pack_size_rewrite(x, mask)
real :: x(:)
logical, intent(in) :: mask(:)
! CHECK: CALL test(count(mask,kind=8_8))
call test(size(pack(x, mask), dim=1, kind=8))
end subroutine
34 changes: 34 additions & 0 deletions flang/test/Lower/Intrinsics/system-optional.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
! RUN: bbc -emit-hlfir %s -o - | FileCheck %s

! CHECK-LABEL: func.func @_QPall_args(
! CHECK-SAME: %[[commandArg:.*]]: !fir.boxchar<1> {fir.bindc_name = "command", fir.optional},
! CHECK-SAME: %[[exitstatArg:.*]]: !fir.ref<i32> {fir.bindc_name = "exitstat", fir.optional}) {
subroutine all_args(command, exitstat)
CHARACTER(*), OPTIONAL :: command
INTEGER, OPTIONAL :: exitstat
call system(command, exitstat)

! CHECK-NEXT: %[[cmdstatVal:.*]] = fir.alloca i16
! CHECK-NEXT: %[[commandUnbox:.*]]:2 = fir.unboxchar %[[commandArg]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
! CHECK-NEXT: %[[commandDeclare:.*]]:2 = hlfir.declare %[[commandUnbox]]#0 typeparams %[[commandUnbox]]#1 {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFall_argsEcommand"} : (!fir.ref<!fir.char<1,?>>, index) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
! CHECK-NEXT: %[[exitstatDeclare:.*]]:2 = hlfir.declare %[[exitstatArg]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFall_argsEexitstat"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK-NEXT: %[[exitstatIsPresent:.*]] = fir.is_present %[[exitstatDeclare]]#0 : (!fir.ref<i32>) -> i1
! CHECK-NEXT: %[[commandBox:.*]] = fir.embox %[[commandDeclare]]#1 typeparams %[[commandUnbox]]#1 : (!fir.ref<!fir.char<1,?>>, index) -> !fir.box<!fir.char<1,?>>
! CHECK-NEXT: %[[exitstatBox:.*]] = fir.embox %[[exitstatDeclare]]#1 : (!fir.ref<i32>) -> !fir.box<i32>
! CHECK-NEXT: %[[absentIntBox:.*]] = fir.absent !fir.box<i32>
! CHECK-NEXT: %[[exitstatRealBox:.*]] = arith.select %[[exitstatIsPresent]], %[[exitstatBox]], %[[absentIntBox]] : !fir.box<i32>
! CHECK-NEXT: %[[true:.*]] = arith.constant true
! CHECK-NEXT: %[[c0_i2:.*]] = arith.constant 0 : i2
! CHECK-NEXT: %[[c0_i16:.*]] = fir.convert %[[c0_i2]] : (i2) -> i16
! CHECK-NEXT: fir.store %[[c0_i16]] to %[[cmdstatVal]] : !fir.ref<i16>
! CHECK-NEXT: %[[cmdstatBox:.*]] = fir.embox %[[cmdstatVal]] : (!fir.ref<i16>) -> !fir.box<i16>
! CHECK-NEXT: %[[absentBox:.*]] = fir.absent !fir.box<none>
! CHECK: %[[c9_i32:.*]] = arith.constant 9 : i32
! CHECK-NEXT: %[[command:.*]] = fir.convert %[[commandBox]] : (!fir.box<!fir.char<1,?>>) -> !fir.box<none>
! CHECK-NEXT: %[[exitstat:.*]] = fir.convert %[[exitstatRealBox]] : (!fir.box<i32>) -> !fir.box<none>
! CHECK-NEXT: %[[cmdstat:.*]] = fir.convert %[[cmdstatBox]] : (!fir.box<i16>) -> !fir.box<none>
! CHECK: %[[VAL_16:.*]] = fir.call @_FortranAExecuteCommandLine(%[[command]], %[[true]], %[[exitstat]], %[[cmdstat]], %[[absentBox]], %[[VAL_15:.*]], %[[c9_i32]]) fastmath<contract> : (!fir.box<none>, i1, !fir.box<none>, !fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> none
! CHECK-NEXT: return
! CHECK-NEXT: }

end subroutine all_args
53 changes: 53 additions & 0 deletions flang/test/Lower/Intrinsics/system.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
! RUN: bbc -emit-hlfir %s -o - | FileCheck %s

! CHECK-LABEL: func.func @_QPall_args(
! CHECK-SAME: %[[commandArg:.*]]: !fir.boxchar<1> {fir.bindc_name = "command"},
! CHECK-SAME: %[[exitstatArg:.*]]: !fir.ref<i32> {fir.bindc_name = "exitstat"}) {
subroutine all_args(command, exitstat)
CHARACTER(*) :: command
INTEGER :: exitstat
call system(command, exitstat)
! CHECK-NEXT: %[[cmdstatVal:.*]] = fir.alloca i16
! CHECK-NEXT: %[[commandUnbox:.*]]:2 = fir.unboxchar %[[commandArg]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
! CHECK-NEXT: %[[commandDeclare:.*]]:2 = hlfir.declare %[[commandUnbox]]#0 typeparams %[[commandUnbox]]#1 {uniq_name = "_QFall_argsEcommand"} : (!fir.ref<!fir.char<1,?>>, index) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
! CHECK-NEXT: %[[exitstatDeclare:.*]]:2 = hlfir.declare %[[exitstatArg]] {uniq_name = "_QFall_argsEexitstat"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK-NEXT: %[[commandBox:.*]] = fir.embox %[[commandDeclare]]#1 typeparams %[[commandUnbox]]#1 : (!fir.ref<!fir.char<1,?>>, index) -> !fir.box<!fir.char<1,?>>
! CHECK-NEXT: %[[exitstatBox:.*]] = fir.embox %[[exitstatDeclare]]#1 : (!fir.ref<i32>) -> !fir.box<i32>
! CHECK-NEXT: %[[true:.*]] = arith.constant true
! CHECK-NEXT: %[[c0_i2:.*]] = arith.constant 0 : i2
! CHECK-NEXT: %[[c0_i16:.*]] = fir.convert %[[c0_i2]] : (i2) -> i16
! CHECK-NEXT: fir.store %[[c0_i16]] to %[[cmdstatVal]] : !fir.ref<i16>
! CHECK-NEXT: %[[cmdstatBox:.*]] = fir.embox %[[cmdstatVal]] : (!fir.ref<i16>) -> !fir.box<i16>
! CHECK-NEXT: %[[absentBox:.*]] = fir.absent !fir.box<none>
! CHECK: %[[c9_i32:.*]] = arith.constant 9 : i32
! CHECK-NEXT: %[[command:.*]] = fir.convert %[[commandBox]] : (!fir.box<!fir.char<1,?>>) -> !fir.box<none>
! CHECK-NEXT: %[[exitstat:.*]] = fir.convert %[[exitstatBox]] : (!fir.box<i32>) -> !fir.box<none>
! CHECK-NEXT: %[[cmdstat:.*]] = fir.convert %[[cmdstatBox]] : (!fir.box<i16>) -> !fir.box<none>
! CHECK: %[[VAL_13:.*]] = fir.call @_FortranAExecuteCommandLine(%[[command]], %[[true]], %[[exitstat]], %[[cmdstat]], %[[absentBox]], %[[VAL_12:.*]], %[[c9_i32]]) fastmath<contract> : (!fir.box<none>, i1, !fir.box<none>, !fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> none
! CHECK-NEXT: return
! CHECK-NEXT: }
end subroutine all_args

! CHECK-LABEL: func.func @_QPonly_command(
! CHECK-SAME: %[[commandArg:.*]]: !fir.boxchar<1> {fir.bindc_name = "command"}) {
subroutine only_command(command)
CHARACTER(*) :: command
call system(command)
! CHECK-NEXT: %[[cmdstatVal:.*]] = fir.alloca i16
! CHECK-NEXT: %[[commandUnbox:.*]]:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
! CHECK-NEXT: %[[commandDeclare:.*]]:2 = hlfir.declare %[[commandUnbox]]#0 typeparams %[[commandUnbox]]#1 {uniq_name = "_QFonly_commandEcommand"} : (!fir.ref<!fir.char<1,?>>, index) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
! CHECK-NEXT: %[[commandBox:.*]] = fir.embox %[[commandDeclare]]#1 typeparams %[[commandUnbox]]#1 : (!fir.ref<!fir.char<1,?>>, index) -> !fir.box<!fir.char<1,?>>
! CHECK-NEXT: %[[true:.*]] = arith.constant true
! CHECK-NEXT: %[[absentBox:.*]] = fir.absent !fir.box<none>
! CHECK-NEXT: %[[c0_i2:.*]] = arith.constant 0 : i2
! CHECK-NEXT: %[[c0_i16:.*]] = fir.convert %[[c0_i2]] : (i2) -> i16
! CHECK-NEXT: fir.store %[[c0_i16]] to %[[cmdstatVal]] : !fir.ref<i16>
! CHECK-NEXT: %[[cmdstatBox:.*]] = fir.embox %[[cmdstatVal]] : (!fir.ref<i16>) -> !fir.box<i16>
! CHECK-NEXT: %[[absentBox2:.*]] = fir.absent !fir.box<none>
! CHECK: %[[c35_i32:.*]] = arith.constant 35 : i32
! CHECK-NEXT: %[[command:.*]] = fir.convert %[[commandBox]] : (!fir.box<!fir.char<1,?>>) -> !fir.box<none>
! CHECK-NEXT: %[[cmdstat:.*]] = fir.convert %[[cmdstatBox]] : (!fir.box<i16>) -> !fir.box<none>
! CHECK: %[[VAL_12:.*]] = fir.call @_FortranAExecuteCommandLine(%[[command]], %[[true]], %[[absentBox]], %[[cmdstat]], %[[absentBox2]], %[[VAL_11:.*]], %[[c35_i32]]) fastmath<contract> : (!fir.box<none>, i1, !fir.box<none>, !fir.box<none>, !fir.box<none>, !fir.ref<i8>, i32) -> none
! CHECK-NEXT: return
! CHECK-NEXT: }
end subroutine only_command
86 changes: 85 additions & 1 deletion flang/test/Transforms/loop-versioning.fir
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
// sum = sum + a(i)
// end do
// end subroutine sum1d
module {
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} {
func.func @sum1d(%arg0: !fir.box<!fir.array<?xf64>> {fir.bindc_name = "a"}, %arg1: !fir.ref<i32> {fir.bindc_name = "n"}) {
%decl = fir.declare %arg0 {uniq_name = "a"} : (!fir.box<!fir.array<?xf64>>) -> !fir.box<!fir.array<?xf64>>
%rebox = fir.rebox %decl : (!fir.box<!fir.array<?xf64>>) -> !fir.box<!fir.array<?xf64>>
Expand Down Expand Up @@ -1556,5 +1556,89 @@ func.func @minloc(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "x"}, %ar
// CHECK: fir.if %{{.*}} {
// CHECK: {{.*}} = arith.cmpi eq, %[[V17]], %c2147483647_i32

func.func @_QPtest_real10(%arg0: !fir.box<!fir.array<?x?xf80>> {fir.bindc_name = "a"}) -> f80 {
%c10 = arith.constant 10 : index
%c1 = arith.constant 1 : index
%cst = arith.constant 0.000000e+00 : f80
%0 = fir.declare %arg0 {fortran_attrs = #fir.var_attrs<intent_in>, uniq_name = "_QFtest_real10Ea"} : (!fir.box<!fir.array<?x?xf80>>) -> !fir.box<!fir.array<?x?xf80>>
%1 = fir.rebox %0 : (!fir.box<!fir.array<?x?xf80>>) -> !fir.box<!fir.array<?x?xf80>>
%2 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_real10Ei"}
%3 = fir.declare %2 {uniq_name = "_QFtest_real10Ei"} : (!fir.ref<i32>) -> !fir.ref<i32>
%4 = fir.alloca f80 {bindc_name = "res", uniq_name = "_QFtest_real10Eres"}
%5 = fir.declare %4 {uniq_name = "_QFtest_real10Eres"} : (!fir.ref<f80>) -> !fir.ref<f80>
%6 = fir.address_of(@_QFtest_real10ECxdp) : !fir.ref<i32>
%7 = fir.declare %6 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QFtest_real10ECxdp"} : (!fir.ref<i32>) -> !fir.ref<i32>
fir.store %cst to %5 : !fir.ref<f80>
%8 = fir.convert %c1 : (index) -> i32
%9:2 = fir.do_loop %arg1 = %c1 to %c10 step %c1 iter_args(%arg2 = %8) -> (index, i32) {
fir.store %arg2 to %3 : !fir.ref<i32>
%11 = fir.load %5 : !fir.ref<f80>
%12 = fir.load %3 : !fir.ref<i32>
%13 = fir.convert %12 : (i32) -> i64
%14 = fir.array_coor %1 %13, %13 : (!fir.box<!fir.array<?x?xf80>>, i64, i64) -> !fir.ref<f80>
%15 = fir.load %14 : !fir.ref<f80>
%16 = arith.addf %11, %15 fastmath<contract> : f80
fir.store %16 to %5 : !fir.ref<f80>
%17 = arith.addi %arg1, %c1 : index
%18 = fir.load %3 : !fir.ref<i32>
%19 = arith.addi %18, %8 : i32
fir.result %17, %19 : index, i32
}
fir.store %9#1 to %3 : !fir.ref<i32>
%10 = fir.load %5 : !fir.ref<f80>
return %10 : f80
}
// CHECK-LABEL: func.func @_QPtest_real10(
// CHECK: fir.if
// CHECK: fir.do_loop
// CHECK-DAG: arith.shrsi %{{[^,]*}}, %[[SHIFT:.*]] : index
// CHECK-DAG: %[[SHIFT]] = arith.constant 4 : index
// CHECK: fir.result
// CHECK: } else {
// CHECK: fir.do_loop

func.func @_QPtest_complex10(%arg0: !fir.box<!fir.array<?x?x!fir.complex<10>>> {fir.bindc_name = "a"}) -> !fir.complex<10> {
%c10 = arith.constant 10 : index
%c1 = arith.constant 1 : index
%cst = arith.constant 0.000000e+00 : f80
%0 = fir.declare %arg0 {fortran_attrs = #fir.var_attrs<intent_in>, uniq_name = "_QFtest_complex10Ea"} : (!fir.box<!fir.array<?x?x!fir.complex<10>>>) -> !fir.box<!fir.array<?x?x!fir.complex<10>>>
%1 = fir.rebox %0 : (!fir.box<!fir.array<?x?x!fir.complex<10>>>) -> !fir.box<!fir.array<?x?x!fir.complex<10>>>
%2 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_complex10Ei"}
%3 = fir.declare %2 {uniq_name = "_QFtest_complex10Ei"} : (!fir.ref<i32>) -> !fir.ref<i32>
%4 = fir.alloca !fir.complex<10> {bindc_name = "res", uniq_name = "_QFtest_complex10Eres"}
%5 = fir.declare %4 {uniq_name = "_QFtest_complex10Eres"} : (!fir.ref<!fir.complex<10>>) -> !fir.ref<!fir.complex<10>>
%6 = fir.address_of(@_QFtest_complex10ECxdp) : !fir.ref<i32>
%7 = fir.declare %6 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QFtest_complex10ECxdp"} : (!fir.ref<i32>) -> !fir.ref<i32>
%8 = fir.undefined !fir.complex<10>
%9 = fir.insert_value %8, %cst, [0 : index] : (!fir.complex<10>, f80) -> !fir.complex<10>
%10 = fir.insert_value %9, %cst, [1 : index] : (!fir.complex<10>, f80) -> !fir.complex<10>
fir.store %10 to %5 : !fir.ref<!fir.complex<10>>
%11 = fir.convert %c1 : (index) -> i32
%12:2 = fir.do_loop %arg1 = %c1 to %c10 step %c1 iter_args(%arg2 = %11) -> (index, i32) {
fir.store %arg2 to %3 : !fir.ref<i32>
%14 = fir.load %5 : !fir.ref<!fir.complex<10>>
%15 = fir.load %3 : !fir.ref<i32>
%16 = fir.convert %15 : (i32) -> i64
%17 = fir.array_coor %1 %16, %16 : (!fir.box<!fir.array<?x?x!fir.complex<10>>>, i64, i64) -> !fir.ref<!fir.complex<10>>
%18 = fir.load %17 : !fir.ref<!fir.complex<10>>
%19 = fir.addc %14, %18 {fastmath = #arith.fastmath<contract>} : !fir.complex<10>
fir.store %19 to %5 : !fir.ref<!fir.complex<10>>
%20 = arith.addi %arg1, %c1 : index
%21 = fir.load %3 : !fir.ref<i32>
%22 = arith.addi %21, %11 : i32
fir.result %20, %22 : index, i32
}
fir.store %12#1 to %3 : !fir.ref<i32>
%13 = fir.load %5 : !fir.ref<!fir.complex<10>>
return %13 : !fir.complex<10>
}
// CHECK-LABEL: func.func @_QPtest_complex10(
// CHECK: fir.if
// CHECK: fir.do_loop
// CHECK-DAG: arith.shrsi %{{[^,]*}}, %[[SHIFT:.*]] : index
// CHECK-DAG: %[[SHIFT]] = arith.constant 5 : index
// CHECK: fir.result
// CHECK: } else {
// CHECK: fir.do_loop

} // End module
3 changes: 2 additions & 1 deletion flang/unittests/Evaluate/expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ int main() {
auto intrinsics{Fortran::evaluate::IntrinsicProcTable::Configure(defaults)};
TargetCharacteristics targetCharacteristics;
Fortran::common::LanguageFeatureControl languageFeatures;
std::set<std::string> tempNames;
FoldingContext context{Fortran::parser::ContextualMessages{nullptr}, defaults,
intrinsics, targetCharacteristics, languageFeatures};
intrinsics, targetCharacteristics, languageFeatures, tempNames};
ex1 = Fold(context, std::move(ex1));
MATCH("-10_4", ex1.AsFortran());
MATCH("1_4/2_4", (DefaultIntegerExpr{1} / DefaultIntegerExpr{2}).AsFortran());
Expand Down
5 changes: 3 additions & 2 deletions flang/unittests/Evaluate/folding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,11 @@ void TestHostRuntimeSubnormalFlushing() {
TargetCharacteristics noFlushingTargetCharacteristics;
noFlushingTargetCharacteristics.set_areSubnormalsFlushedToZero(false);
Fortran::common::LanguageFeatureControl languageFeatures;
std::set<std::string> tempNames;
FoldingContext flushingContext{messages, defaults, intrinsics,
flushingTargetCharacteristics, languageFeatures};
flushingTargetCharacteristics, languageFeatures, tempNames};
FoldingContext noFlushingContext{messages, defaults, intrinsics,
noFlushingTargetCharacteristics, languageFeatures};
noFlushingTargetCharacteristics, languageFeatures, tempNames};

DynamicType r4{R4{}.GetType()};
// Test subnormal argument flushing
Expand Down
5 changes: 3 additions & 2 deletions flang/unittests/Evaluate/intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ struct TestCall {
auto messages{strings.Messages(buffer)};
TargetCharacteristics targetCharacteristics;
common::LanguageFeatureControl languageFeatures;
FoldingContext context{
messages, defaults, table, targetCharacteristics, languageFeatures};
FoldingContext context{messages, defaults, table, targetCharacteristics,
languageFeatures, tempNames};
std::optional<SpecificCall> si{table.Probe(call, args, context)};
if (resultType.has_value()) {
TEST(si.has_value());
Expand Down Expand Up @@ -142,6 +142,7 @@ struct TestCall {
ActualArguments args;
std::string name;
std::vector<std::string> keywords;
std::set<std::string> tempNames;
};

void TestIntrinsics() {
Expand Down
54 changes: 54 additions & 0 deletions flang/unittests/Runtime/CommandTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,60 @@ TEST_F(ZeroArguments, ECLInvalidCommandAsyncDontAffectAsync) {
*command.get(), false, nullptr, nullptr, nullptr));
}

TEST_F(ZeroArguments, SystemValidCommandExitStat) {
// envrionment setup for SYSTEM from EXECUTE_COMMAND_LINE runtime
OwningPtr<Descriptor> cmdStat{IntDescriptor(202)};
bool wait{true};
// setup finished

OwningPtr<Descriptor> command{CharDescriptor("echo hi")};
OwningPtr<Descriptor> exitStat{EmptyIntDescriptor()};

RTNAME(ExecuteCommandLine)
(*command.get(), wait, exitStat.get(), cmdStat.get(), nullptr);
CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 0);
}

TEST_F(ZeroArguments, SystemInvalidCommandExitStat) {
// envrionment setup for SYSTEM from EXECUTE_COMMAND_LINE runtime
OwningPtr<Descriptor> cmdStat{IntDescriptor(202)};
bool wait{true};
// setup finished

OwningPtr<Descriptor> command{CharDescriptor("InvalidCommand")};
OwningPtr<Descriptor> exitStat{EmptyIntDescriptor()};

RTNAME(ExecuteCommandLine)
(*command.get(), wait, exitStat.get(), cmdStat.get(), nullptr);
#ifdef _WIN32
CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 1);
#else
CheckDescriptorEqInt<std::int64_t>(exitStat.get(), 127);
#endif
}

TEST_F(ZeroArguments, SystemValidCommandOptionalExitStat) {
// envrionment setup for SYSTEM from EXECUTE_COMMAND_LINE runtime
OwningPtr<Descriptor> cmdStat{IntDescriptor(202)};
bool wait{true};
// setup finished

OwningPtr<Descriptor> command{CharDescriptor("echo hi")};
EXPECT_NO_FATAL_FAILURE(RTNAME(ExecuteCommandLine)(
*command.get(), wait, nullptr, cmdStat.get(), nullptr));
}

TEST_F(ZeroArguments, SystemInvalidCommandOptionalExitStat) {
// envrionment setup for SYSTEM from EXECUTE_COMMAND_LINE runtime
OwningPtr<Descriptor> cmdStat{IntDescriptor(202)};
bool wait{true};
// setup finished

OwningPtr<Descriptor> command{CharDescriptor("InvalidCommand")};
EXPECT_NO_FATAL_FAILURE(RTNAME(ExecuteCommandLine)(
*command.get(), wait, nullptr, cmdStat.get(), nullptr););
}

static const char *oneArgArgv[]{"aProgram", "anArgumentOfLength20"};
class OneArgument : public CommandFixture {
protected:
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/Support/X86FoldTablesUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ enum {
TB_BCAST_SH = 6 << TB_BCAST_TYPE_SHIFT,
TB_BCAST_MASK = 0x7 << TB_BCAST_TYPE_SHIFT,

// Unused bits 15-16
// Unused bits 14-16
};
} // namespace llvm
#endif // LLVM_SUPPORT_X86FOLDTABLESUTILS_H
3 changes: 2 additions & 1 deletion llvm/include/llvm/TargetParser/AArch64TargetParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -813,7 +813,8 @@ struct CpuAlias {
StringRef Name;
};

inline constexpr CpuAlias CpuAliases[] = {{"grace", "neoverse-v2"}};
inline constexpr CpuAlias CpuAliases[] = {{"cobalt-100", "neoverse-n2"},
{"grace", "neoverse-v2"}};

bool getExtensionFeatures(
const AArch64::ExtensionBitset &Extensions,
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26949,7 +26949,7 @@ bool AArch64TargetLowering::isComplexDeinterleavingOperationSupported(
return false;

// If the vector is scalable, SVE is enabled, implying support for complex
// numbers. Otherwirse, we need to ensure complex number support is avaialble
// numbers. Otherwise, we need to ensure complex number support is available
if (!VTy->isScalableTy() && !Subtarget->hasComplxNum())
return false;

Expand All @@ -26965,7 +26965,7 @@ bool AArch64TargetLowering::isComplexDeinterleavingOperationSupported(
!llvm::isPowerOf2_32(VTyWidth))
return false;

if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2()) {
if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2() && VTy->isScalableTy()) {
unsigned ScalarWidth = ScalarTy->getScalarSizeInBits();
return 8 <= ScalarWidth && ScalarWidth <= 64;
}
Expand Down
72 changes: 24 additions & 48 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8127,57 +8127,33 @@ static unsigned getBroadcastOpcode(const X86FoldTableEntry *I,
assert((SpillSize == 64 || STI.hasVLX()) &&
"Can't broadcast less than 64 bytes without AVX512VL!");

#define CASE_BCAST_TYPE_OPC(TYPE, OP16, OP32, OP64) \
case TYPE: \
switch (SpillSize) { \
default: \
llvm_unreachable("Unknown spill size"); \
case 16: \
return X86::OP16; \
case 32: \
return X86::OP32; \
case 64: \
return X86::OP64; \
} \
break;

switch (I->Flags & TB_BCAST_MASK) {
default:
llvm_unreachable("Unexpected broadcast type!");
case TB_BCAST_D:
switch (SpillSize) {
default:
llvm_unreachable("Unknown spill size");
case 16:
return X86::VPBROADCASTDZ128rm;
case 32:
return X86::VPBROADCASTDZ256rm;
case 64:
return X86::VPBROADCASTDZrm;
}
break;
case TB_BCAST_Q:
switch (SpillSize) {
default:
llvm_unreachable("Unknown spill size");
case 16:
return X86::VPBROADCASTQZ128rm;
case 32:
return X86::VPBROADCASTQZ256rm;
case 64:
return X86::VPBROADCASTQZrm;
}
break;
case TB_BCAST_SS:
switch (SpillSize) {
default:
llvm_unreachable("Unknown spill size");
case 16:
return X86::VBROADCASTSSZ128rm;
case 32:
return X86::VBROADCASTSSZ256rm;
case 64:
return X86::VBROADCASTSSZrm;
}
break;
case TB_BCAST_SD:
switch (SpillSize) {
default:
llvm_unreachable("Unknown spill size");
case 16:
return X86::VMOVDDUPZ128rm;
case 32:
return X86::VBROADCASTSDZ256rm;
case 64:
return X86::VBROADCASTSDZrm;
}
break;
CASE_BCAST_TYPE_OPC(TB_BCAST_W, VPBROADCASTWZ128rm, VPBROADCASTWZ256rm,
VPBROADCASTWZrm)
CASE_BCAST_TYPE_OPC(TB_BCAST_D, VPBROADCASTDZ128rm, VPBROADCASTDZ256rm,
VPBROADCASTDZrm)
CASE_BCAST_TYPE_OPC(TB_BCAST_Q, VPBROADCASTQZ128rm, VPBROADCASTQZ256rm,
VPBROADCASTQZrm)
CASE_BCAST_TYPE_OPC(TB_BCAST_SS, VBROADCASTSSZ128rm, VBROADCASTSSZ256rm,
VBROADCASTSSZrm)
CASE_BCAST_TYPE_OPC(TB_BCAST_SD, VMOVDDUPZ128rm, VBROADCASTSDZ256rm,
VBROADCASTSDZrm)
}
}

Expand Down
27 changes: 27 additions & 0 deletions llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve -o - | FileCheck %s
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - | FileCheck %s

target triple = "aarch64"

Expand Down Expand Up @@ -158,6 +159,32 @@ entry:
ret <16 x half> %interleaved.vec
}


; Expected not to transform as it is integer
define <16 x i16> @complex_add_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK-LABEL: complex_add_v16i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: uzp1 v4.8h, v2.8h, v3.8h
; CHECK-NEXT: uzp1 v5.8h, v0.8h, v1.8h
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h
; CHECK-NEXT: uzp2 v1.8h, v2.8h, v3.8h
; CHECK-NEXT: sub v2.8h, v4.8h, v0.8h
; CHECK-NEXT: add v1.8h, v1.8h, v5.8h
; CHECK-NEXT: zip1 v0.8h, v2.8h, v1.8h
; CHECK-NEXT: zip2 v1.8h, v2.8h, v1.8h
; CHECK-NEXT: ret
entry:
%a.real = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%a.imag = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
%b.real = shufflevector <16 x i16> %b, <16 x i16> zeroinitializer, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%b.imag = shufflevector <16 x i16> %b, <16 x i16> zeroinitializer, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
%0 = sub <8 x i16> %b.real, %a.imag
%1 = add <8 x i16> %b.imag, %a.real
%interleaved.vec = shufflevector <8 x i16> %0, <8 x i16> %1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
ret <16 x i16> %interleaved.vec
}


declare { <2 x half>, <2 x half> } @llvm.experimental.vector.deinterleave2.v4f16(<4 x half>)
declare <4 x half> @llvm.experimental.vector.interleave2.v4f16(<2 x half>, <2 x half>)

Expand Down
1,458 changes: 729 additions & 729 deletions llvm/test/CodeGen/X86/shift-amount-mod.ll

Large diffs are not rendered by default.

196 changes: 98 additions & 98 deletions llvm/test/CodeGen/X86/shift-and.ll
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-unknown-unknown | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=i386-unknown-unknown | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64

define i32 @t1(i32 %t, i32 %val) nounwind {
; X32-LABEL: t1:
; X32: # %bb.0:
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: shll %cl, %eax
; X32-NEXT: retl
; X86-LABEL: t1:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll %cl, %eax
; X86-NEXT: retl
;
; X64-LABEL: t1:
; X64: # %bb.0:
Expand All @@ -23,12 +23,12 @@ define i32 @t1(i32 %t, i32 %val) nounwind {
}

define i32 @t2(i32 %t, i32 %val) nounwind {
; X32-LABEL: t2:
; X32: # %bb.0:
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: shll %cl, %eax
; X32-NEXT: retl
; X86-LABEL: t2:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll %cl, %eax
; X86-NEXT: retl
;
; X64-LABEL: t2:
; X64: # %bb.0:
Expand All @@ -45,11 +45,11 @@ define i32 @t2(i32 %t, i32 %val) nounwind {
@X = internal global i16 0

define void @t3(i16 %t) nounwind {
; X32-LABEL: t3:
; X32: # %bb.0:
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: sarw %cl, X
; X32-NEXT: retl
; X86-LABEL: t3:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: sarw %cl, X
; X86-NEXT: retl
;
; X64-LABEL: t3:
; X64: # %bb.0:
Expand All @@ -65,23 +65,23 @@ define void @t3(i16 %t) nounwind {
}

define i64 @t4(i64 %t, i64 %val) nounwind {
; X32-LABEL: t4:
; X32: # %bb.0:
; X32-NEXT: pushl %esi
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl %esi, %edx
; X32-NEXT: shrl %cl, %edx
; X32-NEXT: shrdl %cl, %esi, %eax
; X32-NEXT: testb $32, %cl
; X32-NEXT: je .LBB3_2
; X32-NEXT: # %bb.1:
; X32-NEXT: movl %edx, %eax
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: .LBB3_2:
; X32-NEXT: popl %esi
; X32-NEXT: retl
; X86-LABEL: t4:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %edx
; X86-NEXT: shrl %cl, %edx
; X86-NEXT: shrdl %cl, %esi, %eax
; X86-NEXT: testb $32, %cl
; X86-NEXT: je .LBB3_2
; X86-NEXT: # %bb.1:
; X86-NEXT: movl %edx, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: .LBB3_2:
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: t4:
; X64: # %bb.0:
Expand All @@ -96,23 +96,23 @@ define i64 @t4(i64 %t, i64 %val) nounwind {
}

define i64 @t5(i64 %t, i64 %val) nounwind {
; X32-LABEL: t5:
; X32: # %bb.0:
; X32-NEXT: pushl %esi
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl %esi, %edx
; X32-NEXT: shrl %cl, %edx
; X32-NEXT: shrdl %cl, %esi, %eax
; X32-NEXT: testb $32, %cl
; X32-NEXT: je .LBB4_2
; X32-NEXT: # %bb.1:
; X32-NEXT: movl %edx, %eax
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: .LBB4_2:
; X32-NEXT: popl %esi
; X32-NEXT: retl
; X86-LABEL: t5:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %edx
; X86-NEXT: shrl %cl, %edx
; X86-NEXT: shrdl %cl, %esi, %eax
; X86-NEXT: testb $32, %cl
; X86-NEXT: je .LBB4_2
; X86-NEXT: # %bb.1:
; X86-NEXT: movl %edx, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: .LBB4_2:
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: t5:
; X64: # %bb.0:
Expand All @@ -127,28 +127,28 @@ define i64 @t5(i64 %t, i64 %val) nounwind {
}

define void @t5ptr(i64 %t, ptr %ptr) nounwind {
; X32-LABEL: t5ptr:
; X32: # %bb.0:
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %edx
; X32-NEXT: movl 4(%eax), %edi
; X32-NEXT: movl %edi, %esi
; X32-NEXT: shrl %cl, %esi
; X32-NEXT: shrdl %cl, %edi, %edx
; X32-NEXT: testb $32, %cl
; X32-NEXT: je .LBB5_2
; X32-NEXT: # %bb.1:
; X32-NEXT: movl %esi, %edx
; X32-NEXT: xorl %esi, %esi
; X32-NEXT: .LBB5_2:
; X32-NEXT: movl %edx, (%eax)
; X32-NEXT: movl %esi, 4(%eax)
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: retl
; X86-LABEL: t5ptr:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: movl 4(%eax), %edi
; X86-NEXT: movl %edi, %esi
; X86-NEXT: shrl %cl, %esi
; X86-NEXT: shrdl %cl, %edi, %edx
; X86-NEXT: testb $32, %cl
; X86-NEXT: je .LBB5_2
; X86-NEXT: # %bb.1:
; X86-NEXT: movl %esi, %edx
; X86-NEXT: xorl %esi, %esi
; X86-NEXT: .LBB5_2:
; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: t5ptr:
; X64: # %bb.0:
Expand All @@ -166,23 +166,23 @@ define void @t5ptr(i64 %t, ptr %ptr) nounwind {

; rdar://11866926
define i64 @t6(i64 %key, ptr nocapture %val) nounwind {
; X32-LABEL: t6:
; X32: # %bb.0:
; X32-NEXT: pushl %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: shrdl $3, %eax, %ecx
; X32-NEXT: movl %eax, %esi
; X32-NEXT: shrl $3, %esi
; X32-NEXT: movl (%edx), %eax
; X32-NEXT: movl 4(%edx), %edx
; X32-NEXT: addl $-1, %eax
; X32-NEXT: adcl $-1, %edx
; X32-NEXT: andl %ecx, %eax
; X32-NEXT: andl %esi, %edx
; X32-NEXT: popl %esi
; X32-NEXT: retl
; X86-LABEL: t6:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrdl $3, %eax, %ecx
; X86-NEXT: movl %eax, %esi
; X86-NEXT: shrl $3, %esi
; X86-NEXT: movl (%edx), %eax
; X86-NEXT: movl 4(%edx), %edx
; X86-NEXT: addl $-1, %eax
; X86-NEXT: adcl $-1, %edx
; X86-NEXT: andl %ecx, %eax
; X86-NEXT: andl %esi, %edx
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: t6:
; X64: # %bb.0:
Expand All @@ -199,13 +199,13 @@ define i64 @t6(i64 %key, ptr nocapture %val) nounwind {
}

define i64 @big_mask_constant(i64 %x) nounwind {
; X32-LABEL: big_mask_constant:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: andl $4, %eax
; X32-NEXT: shll $25, %eax
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: retl
; X86-LABEL: big_mask_constant:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andl $4, %eax
; X86-NEXT: shll $25, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: retl
;
; X64-LABEL: big_mask_constant:
; X64: # %bb.0:
Expand Down
646 changes: 323 additions & 323 deletions llvm/test/CodeGen/X86/shift-combine.ll

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llvm/unittests/TargetParser/TargetParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1651,7 +1651,7 @@ INSTANTIATE_TEST_SUITE_P(
"8.2-A")));

// Note: number of CPUs includes aliases.
static constexpr unsigned NumAArch64CPUArchs = 67;
static constexpr unsigned NumAArch64CPUArchs = 68;

TEST(TargetParserTest, testAArch64CPUArchList) {
SmallVector<StringRef, NumAArch64CPUArchs> List;
Expand Down
108 changes: 62 additions & 46 deletions openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,64 @@ uint32_t determineNumberOfThreads(int32_t NumThreadsClause) {

extern "C" {

[[clang::always_inline]] void __kmpc_parallel_spmd(IdentTy *ident,
int32_t num_threads,
void *fn, void **args,
const int64_t nargs) {
uint32_t TId = mapping::getThreadIdInBlock();
uint32_t NumThreads = determineNumberOfThreads(num_threads);
uint32_t PTeamSize =
NumThreads == mapping::getMaxTeamThreads() ? 0 : NumThreads;
// Avoid the race between the read of the `icv::Level` above and the write
// below by synchronizing all threads here.
synchronize::threadsAligned(atomic::seq_cst);
{
// Note that the order here is important. `icv::Level` has to be updated
// last or the other updates will cause a thread specific state to be
// created.
state::ValueRAII ParallelTeamSizeRAII(state::ParallelTeamSize, PTeamSize,
1u, TId == 0, ident,
/*ForceTeamState=*/true);
state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, TId == 0, ident,
/*ForceTeamState=*/true);
state::ValueRAII LevelRAII(icv::Level, 1u, 0u, TId == 0, ident,
/*ForceTeamState=*/true);

// Synchronize all threads after the main thread (TId == 0) set up the
// team state properly.
synchronize::threadsAligned(atomic::acq_rel);

state::ParallelTeamSize.assert_eq(PTeamSize, ident,
/*ForceTeamState=*/true);
icv::ActiveLevel.assert_eq(1u, ident, /*ForceTeamState=*/true);
icv::Level.assert_eq(1u, ident, /*ForceTeamState=*/true);

// Ensure we synchronize before we run user code to avoid invalidating the
// assumptions above.
synchronize::threadsAligned(atomic::relaxed);

if (!PTeamSize || TId < PTeamSize)
invokeMicrotask(TId, 0, fn, args, nargs);

// Synchronize all threads at the end of a parallel region.
synchronize::threadsAligned(atomic::seq_cst);
}

// Synchronize all threads to make sure every thread exits the scope above;
// otherwise the following assertions and the assumption in
// __kmpc_target_deinit may not hold.
synchronize::threadsAligned(atomic::acq_rel);

state::ParallelTeamSize.assert_eq(1u, ident, /*ForceTeamState=*/true);
icv::ActiveLevel.assert_eq(0u, ident, /*ForceTeamState=*/true);
icv::Level.assert_eq(0u, ident, /*ForceTeamState=*/true);

// Ensure we synchronize to create an aligned region around the assumptions.
synchronize::threadsAligned(atomic::relaxed);

return;
}

[[clang::always_inline]] void
__kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
int32_t num_threads, int proc_bind, void *fn,
Expand Down Expand Up @@ -112,52 +170,10 @@ __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
uint32_t MaxTeamThreads = mapping::getMaxTeamThreads();
uint32_t PTeamSize = NumThreads == MaxTeamThreads ? 0 : NumThreads;
if (mapping::isSPMDMode()) {
// Avoid the race between the read of the `icv::Level` above and the write
// below by synchronizing all threads here.
synchronize::threadsAligned(atomic::seq_cst);
{
// Note that the order here is important. `icv::Level` has to be updated
// last or the other updates will cause a thread specific state to be
// created.
state::ValueRAII ParallelTeamSizeRAII(state::ParallelTeamSize, PTeamSize,
1u, TId == 0, ident,
/*ForceTeamState=*/true);
state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, TId == 0,
ident, /*ForceTeamState=*/true);
state::ValueRAII LevelRAII(icv::Level, 1u, 0u, TId == 0, ident,
/*ForceTeamState=*/true);

// Synchronize all threads after the main thread (TId == 0) set up the
// team state properly.
synchronize::threadsAligned(atomic::acq_rel);

state::ParallelTeamSize.assert_eq(PTeamSize, ident,
/*ForceTeamState=*/true);
icv::ActiveLevel.assert_eq(1u, ident, /*ForceTeamState=*/true);
icv::Level.assert_eq(1u, ident, /*ForceTeamState=*/true);

// Ensure we synchronize before we run user code to avoid invalidating the
// assumptions above.
synchronize::threadsAligned(atomic::relaxed);

if (!PTeamSize || TId < PTeamSize)
invokeMicrotask(TId, 0, fn, args, nargs);

// Synchronize all threads at the end of a parallel region.
synchronize::threadsAligned(atomic::seq_cst);
}

// Synchronize all threads to make sure every thread exits the scope above;
// otherwise the following assertions and the assumption in
// __kmpc_target_deinit may not hold.
synchronize::threadsAligned(atomic::acq_rel);

state::ParallelTeamSize.assert_eq(1u, ident, /*ForceTeamState=*/true);
icv::ActiveLevel.assert_eq(0u, ident, /*ForceTeamState=*/true);
icv::Level.assert_eq(0u, ident, /*ForceTeamState=*/true);

// Ensure we synchronize to create an aligned region around the assumptions.
synchronize::threadsAligned(atomic::relaxed);
// This was moved to its own routine so it could be called directly
// in certain situations to avoid resource consumption of unused
// logic in parallel_51.
__kmpc_parallel_spmd(ident, num_threads, fn, args, nargs);

return;
}
Expand Down