635 changes: 41 additions & 594 deletions libc/src/math/amdgpu/CMakeLists.txt

Large diffs are not rendered by default.

601 changes: 0 additions & 601 deletions libc/src/math/nvptx/CMakeLists.txt

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion libc/src/math/nvptx/llrint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

namespace LIBC_NAMESPACE {

LLVM_LIBC_FUNCTION(long long, llrint, (double x)) { return __nv_llrint(x); }
LLVM_LIBC_FUNCTION(long long, llrint, (double x)) {
return static_cast<long long>(__builtin_rint(x));
}

} // namespace LIBC_NAMESPACE
4 changes: 3 additions & 1 deletion libc/src/math/nvptx/llrintf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

namespace LIBC_NAMESPACE {

LLVM_LIBC_FUNCTION(long long, llrintf, (float x)) { return __nv_llrintf(x); }
LLVM_LIBC_FUNCTION(long long, llrintf, (float x)) {
return static_cast<long long>(__builtin_rintf(x));
}

} // namespace LIBC_NAMESPACE
4 changes: 3 additions & 1 deletion libc/src/math/nvptx/lrint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

namespace LIBC_NAMESPACE {

LLVM_LIBC_FUNCTION(long, lrint, (double x)) { return __nv_lrint(x); }
LLVM_LIBC_FUNCTION(long, lrint, (double x)) {
return static_cast<long>(__builtin_rint(x));
}

} // namespace LIBC_NAMESPACE
18 changes: 18 additions & 0 deletions libc/src/stdio/scanf_core/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,26 @@
if(LIBC_CONF_SCANF_DISABLE_FLOAT)
list(APPEND scanf_config_copts "-DLIBC_COPT_SCANF_DISABLE_FLOAT")
endif()
if(LIBC_CONF_SCANF_DISABLE_INDEX_MODE)
list(APPEND scanf_config_copts "-DLIBC_COPT_SCANF_DISABLE_INDEX_MODE")
endif()
if(scanf_config_copts)
list(PREPEND scanf_config_copts "COMPILE_OPTIONS")
endif()

add_header_library(
scanf_config
HDRS
scanf_config.h
${scanf_config_copts}
)

add_header_library(
core_structs
HDRS
core_structs.h
DEPENDS
.scanf_config
libc.src.__support.CPP.string_view
libc.src.__support.CPP.bitset
libc.src.__support.FPUtil.fp_bits
Expand Down
36 changes: 36 additions & 0 deletions libcxx/test/std/numerics/complex.number/complex/bit_cast.pass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: c++03, c++11, c++14, c++17

// Make sure that std::bit_cast works with std::complex. Test case extracted from
// https://github.com/llvm/llvm-project/issues/94620.

#include <bit>
#include <complex>

template <class T>
constexpr void test() {
using Complex = std::complex<T>;
unsigned char data[sizeof(Complex)] = {0};

[[maybe_unused]] Complex c = std::bit_cast<Complex>(data);
}

constexpr bool test_all() {
test<float>();
test<double>();
test<long double>();
return true;
}

int main(int, char**) {
test_all();
static_assert(test_all());
return 0;
}
2 changes: 1 addition & 1 deletion libcxxabi/src/cxa_exception_storage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ extern "C" {
} // extern "C"
} // namespace __cxxabiv1

#elif defined(HAS_THREAD_LOCAL)
#elif __has_feature(cxx_thread_local)

namespace __cxxabiv1 {
namespace {
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def int_aarch64_frint64x
: DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0> ],
[ IntrNoMem ]>;


//===----------------------------------------------------------------------===//
// HINT

Expand All @@ -65,6 +66,8 @@ def int_aarch64_hint : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
def int_aarch64_break : Intrinsic<[], [llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects, IntrNoReturn, IntrCold, ImmArg<ArgIndex<0>>]>;

def int_aarch64_hlt : Intrinsic<[], [llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects, IntrNoReturn, IntrCold, ImmArg<ArgIndex<0>>]>;

def int_aarch64_prefetch : Intrinsic<[],
[llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -3111,7 +3111,8 @@ def BRK : ExceptionGeneration<0b001, 0b00, "brk",
def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>;
def HLT : ExceptionGeneration<0b010, 0b00, "hlt">;
def HLT : ExceptionGeneration<0b010, 0b00, "hlt",
[(int_aarch64_hlt timm32_0_65535:$imm)]>;
def HVC : ExceptionGeneration<0b000, 0b10, "hvc">;
def SMC : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>;
def SVC : ExceptionGeneration<0b000, 0b01, "svc">;
Expand Down
29 changes: 25 additions & 4 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2213,6 +2213,27 @@ class BoUpSLP {
return getNumLanes() == 2 || Cnt > 1;
}

/// Checks if there is at least single compatible operand in lanes other
/// than \p Lane, compatible with the operand \p Op.
bool canBeVectorized(Instruction *Op, unsigned OpIdx, unsigned Lane) const {
bool OpAPO = getData(OpIdx, Lane).APO;
for (unsigned Ln = 0, Lns = getNumLanes(); Ln != Lns; ++Ln) {
if (Ln == Lane)
continue;
if (any_of(seq<unsigned>(getNumOperands()), [&](unsigned OpI) {
const OperandData &Data = getData(OpI, Ln);
if (Data.APO != OpAPO || Data.IsUsed)
return true;
Value *OpILn = getValue(OpI, Ln);
return (L && L->isLoopInvariant(OpILn)) ||
(getSameOpcode({Op, OpILn}, TLI).getOpcode() &&
Op->getParent() == cast<Instruction>(OpILn)->getParent());
}))
return true;
}
return false;
}

public:
/// Initialize with all the operands of the instruction vector \p RootVL.
VLOperands(ArrayRef<Value *> RootVL, const BoUpSLP &R)
Expand Down Expand Up @@ -2268,14 +2289,14 @@ class BoUpSLP {
// side.
if (isa<LoadInst>(OpLane0))
ReorderingModes[OpIdx] = ReorderingMode::Load;
else if (isa<Instruction>(OpLane0)) {
else if (auto *OpILane0 = dyn_cast<Instruction>(OpLane0)) {
// Check if OpLane0 should be broadcast.
if (shouldBroadcast(OpLane0, OpIdx, FirstLane))
if (shouldBroadcast(OpLane0, OpIdx, FirstLane) ||
!canBeVectorized(OpILane0, OpIdx, FirstLane))
ReorderingModes[OpIdx] = ReorderingMode::Splat;
else
ReorderingModes[OpIdx] = ReorderingMode::Opcode;
}
else if (isa<Constant>(OpLane0))
} else if (isa<Constant>(OpLane0))
ReorderingModes[OpIdx] = ReorderingMode::Constant;
else if (isa<Argument>(OpLane0))
// Our best hope is a Splat. It may save some cost in some cases.
Expand Down
10 changes: 10 additions & 0 deletions llvm/test/CodeGen/AArch64/arm64-hlt.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s

define void @foo() nounwind {
; CHECK-LABEL: foo
; CHECK: hlt #0x2
tail call void @llvm.aarch64.hlt(i32 2)
ret void
}

declare void @llvm.aarch64.hlt(i32 immarg) nounwind
10 changes: 5 additions & 5 deletions llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ define void @foo() local_unnamed_addr {
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 0), align 4
; CHECK-NEXT: [[ARRAYIDX372:%.*]] = getelementptr inbounds [4 x [4 x i32]], ptr @dct_luma, i64 0, i64 3, i64 0
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 2), align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ADD277]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> undef, [[TMP5]]
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 poison, i32 undef, i32 poison, i32 poison>, i32 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 undef, i32 poison, i32 undef, i32 undef>, i32 [[ADD277]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP6]], <i32 6, i32 6, i32 6, i32 6>
; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[ARRAYIDX372]], align 4
; CHECK-NEXT: unreachable
Expand Down
17 changes: 4 additions & 13 deletions llvm/test/Transforms/SLPVectorizer/X86/reordering-single-phi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,18 @@ define void @test() {
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 [[TMP3]]
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX11]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 [[TMP5]]
; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX31]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP2]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 poison>
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 5, i32 poison>
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP6]], i32 3
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[TMP12]], <4 x float> [[TMP9]], <4 x i32> <i32 0, i32 4, i32 poison, i32 6>
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x float> [[TMP13]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 3>
; CHECK-NEXT: [[TMP14:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP14]], <4 x float> poison, <4 x i32> <i32 poison, i32 0, i32 1, i32 2>
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP0]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <4 x float> [[TMP11]], [[TMP14]]
; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[ARRAYIDX6]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 5
; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[TMP16]] = load float, ptr [[ARRAYIDX41]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP14]], i32 3
; CHECK-NEXT: [[MUL45:%.*]] = fmul fast float [[TMP16]], [[TMP6]]
; CHECK-NEXT: store float [[MUL45]], ptr [[ARRAYIDX31]], align 4
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[INDVARS_IV]], 31990
Expand Down
88 changes: 88 additions & 0 deletions mlir/test/Integration/Dialect/Arith/CPU/addition.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// RUN: mlir-opt %s --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \
// RUN: --convert-func-to-llvm --convert-arith-to-llvm | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: --shared-libs=%mlir_c_runner_utils | \
// RUN: FileCheck %s --match-full-lines

func.func @addi_i1(%v1 : i1, %v2 : i1) {
vector.print str "@addi_i1\n"
%res = arith.addi %v1, %v2 : i1
vector.print %res : i1
return
}

func.func @addi() {
// ------------------------------------------------
// Test i1
// ------------------------------------------------

// addi on i1
// addi(0, 1) : i1 = 1 : i1; addi(0, -1) : i1 = 1
%false = arith.constant 0 : i1
%true = arith.constant 1 : i1

// CHECK-LABEL: @addi_i1
// CHECK-NEXT: 1
func.call @addi_i1(%false, %true) : (i1, i1) -> ()

// CHECK-LABEL: @addi_i1
// CHECK-NEXT: 1
%true_based_on_non_zero_val = arith.constant -1 : i1
func.call @addi_i1(%false, %true_based_on_non_zero_val) : (i1, i1) -> ()

// ------------------------------------------------
// TODO: Test i8, i16 etc..
// ------------------------------------------------

return
}

func.func @addui_extended_i1(%v1 : i1, %v2 : i1) {
vector.print str "@addui_extended_i1\n"
%res, %overflow = arith.addui_extended %v1, %v2 : i1, i1
vector.print %res : i1
vector.print %overflow : i1
return
}

func.func @addi_extended() {
// ------------------------------------------------
// Test i1
// ------------------------------------------------

// addui_extended on i1
// addui_extended 1 1 : i1 = 0, 1
%true = arith.constant 1 : i1
%false = arith.constant 0 : i1

// CHECK-LABEL: @addui_extended_i1
// CHECK-NEXT: 0
// CHECK-NEXT: 1
func.call @addui_extended_i1(%true, %true) : (i1, i1) -> ()

// CHECK-LABEL: @addui_extended_i1
// CHECK-NEXT: 1
// CHECK-NEXT: 0
func.call @addui_extended_i1(%true, %false) : (i1, i1) -> ()

// CHECK-LABEL: @addui_extended_i1
// CHECK-NEXT: 1
// CHECK-NEXT: 0
func.call @addui_extended_i1(%false, %true) : (i1, i1) -> ()

// CHECK-LABEL: @addui_extended_i1
// CHECK-NEXT: 0
// CHECK-NEXT: 0
func.call @addui_extended_i1(%false, %false) : (i1, i1) -> ()

// ------------------------------------------------
// TODO: Test i8, i16 etc..
// ------------------------------------------------
return
}

func.func @entry() {
func.call @addi() : () -> ()
func.call @addi_extended() : () -> ()
return
}
119 changes: 119 additions & 0 deletions mlir/test/Integration/Dialect/Arith/CPU/multiplication.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// RUN: mlir-opt %s --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \
// RUN: --convert-func-to-llvm --convert-arith-to-llvm | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: --shared-libs=%mlir_c_runner_utils | \
// RUN: FileCheck %s --match-full-lines

func.func @mulsi_extended_i1(%v1 : i1, %v2 : i1) {
vector.print str "@mulsi_extended_i1\n"
%low, %high = arith.mulsi_extended %v1, %v2 : i1
vector.print %low : i1
vector.print %high : i1
return
}

func.func @mulsi_extended_i8(%v1 : i8, %v2 : i8) {
vector.print str "@mulsi_extended_i8\n"
%low, %high = arith.mulsi_extended %v1, %v2 : i8
vector.print %low : i8
vector.print %high : i8
return
}

func.func @mulsi_extended() {
// ------------------------------------------------
// Test i1
// ------------------------------------------------

// mulsi_extended on i1, tests for overflow bit
// mulsi_extended 1, 1 : i1 = (1, 0)
%true = arith.constant true
%false = arith.constant false

// CHECK-LABEL: @mulsi_extended_i1
// CHECK-NEXT: 1
// CHECK-NEXT: 0
func.call @mulsi_extended_i1(%true, %true) : (i1, i1) -> ()

// CHECK-LABEL: @mulsi_extended_i1
// CHECK-NEXT: 0
// CHECK-NEXT: 0
func.call @mulsi_extended_i1(%true, %false) : (i1, i1) -> ()

// CHECK-LABEL: @mulsi_extended_i1
// CHECK-NEXT: 0
// CHECK-NEXT: 0
func.call @mulsi_extended_i1(%false, %true) : (i1, i1) -> ()

// CHECK-LABEL: @mulsi_extended_i1
// CHECK-NEXT: 0
// CHECK-NEXT: 0
func.call @mulsi_extended_i1(%false, %false) : (i1, i1) -> ()

// ------------------------------------------------
// Test i8
// ------------------------------------------------
// mulsi extended versions, with overflow
%c_100_i8 = arith.constant -100 : i8

// mulsi_extended -100, -100 : i8 = (16, 39)
// CHECK-LABEL: @mulsi_extended_i8
// CHECK-NEXT: 16
// CHECK-NEXT: 39
func.call @mulsi_extended_i8(%c_100_i8, %c_100_i8) : (i8, i8) -> ()

// ------------------------------------------------
// TODO: Test i16, i32 etc..
// ------------------------------------------------
return
}

func.func @mului_extended_i8(%v1 : i8, %v2 : i8) {
vector.print str "@mului_extended_i8\n"
%low, %high = arith.mului_extended %v1, %v2 : i8
vector.print %low : i8
vector.print %high : i8
return
}

func.func @mului_extended() {
// ------------------------------------------------
// Test i8
// ------------------------------------------------
%c_n100_i8 = arith.constant -100 : i8
%c_156_i8 = arith.constant 156 : i8

// mului_extended -100, -100 : i8 = (16, 95)
// and on equivalent representations (e.g. 156 === -100 (mod 256))

// CHECK-LABEL: @mului_extended_i8
// CHECK-NEXT: 16
// CHECK-NEXT: 95
func.call @mului_extended_i8(%c_n100_i8, %c_n100_i8) : (i8, i8) -> ()

// CHECK-LABEL: @mului_extended_i8
// CHECK-NEXT: 16
// CHECK-NEXT: 95
func.call @mului_extended_i8(%c_n100_i8, %c_156_i8) : (i8, i8) -> ()

// CHECK-LABEL: @mului_extended_i8
// CHECK-NEXT: 16
// CHECK-NEXT: 95
func.call @mului_extended_i8(%c_156_i8, %c_n100_i8) : (i8, i8) -> ()

// CHECK-LABEL: @mului_extended_i8
// CHECK-NEXT: 16
// CHECK-NEXT: 95
func.call @mului_extended_i8(%c_156_i8, %c_156_i8) : (i8, i8) -> ()

// ------------------------------------------------
// TODO: Test i1, i16, i32 etc..
// ------------------------------------------------
return
}

func.func @entry() {
func.call @mulsi_extended() : () -> ()
func.call @mului_extended() : () -> ()
return
}