3 changes: 0 additions & 3 deletions clang/test/Analysis/scan-build/exclude_directories.test
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
// FIXME: Actually, "perl".
REQUIRES: shell

RUN: rm -rf %t.output_dir && mkdir %t.output_dir
RUN: %scan-build -o %t.output_dir %clang -S \
RUN: %S/Inputs/multidirectory_project/directory1/file1.c \
Expand Down
3 changes: 0 additions & 3 deletions clang/test/Analysis/scan-build/help.test
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
// FIXME: Actually, "perl".
REQUIRES: shell

RUN: %scan-build -h | FileCheck %s
RUN: %scan-build --help | FileCheck %s

Expand Down
1 change: 0 additions & 1 deletion clang/test/Analysis/scan-build/html_output.test
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
// FIXME: Actually, "perl".
REQUIRES: shell

RUN: rm -rf %t.output_dir && mkdir %t.output_dir
Expand Down
13 changes: 8 additions & 5 deletions clang/test/Analysis/scan-build/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# -*- Python -*-

import lit.util
import lit.formats
import os
import platform

use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
config.test_format = lit.formats.ShTest(use_lit_shell == "0")
Expand All @@ -12,13 +12,16 @@ clang_path = config.clang if config.have_llvm_driver else os.path.realpath(confi
config.substitutions.append(
(
"%scan-build",
"'%s' --use-analyzer=%s "
"'%s' '%s' --use-analyzer=%s "
% (
lit.util.which(
"scan-build",
os.path.join(config.clang_src_dir, "tools", "scan-build", "bin"),
config.perl_executable,
os.path.join(
config.clang_src_dir, "tools", "scan-build", "bin", "scan-build"
),
clang_path,
),
)
)

if not config.perl_executable or platform.system() == "Windows":
config.unsupported = True
1 change: 0 additions & 1 deletion clang/test/Analysis/scan-build/plist_html_output.test
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
// FIXME: Actually, "perl".
REQUIRES: shell

RUN: rm -rf %t.output_dir && mkdir %t.output_dir
Expand Down
1 change: 0 additions & 1 deletion clang/test/Analysis/scan-build/plist_output.test
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
// FIXME: Actually, "perl".
REQUIRES: shell

RUN: rm -rf %t.output_dir && mkdir %t.output_dir
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
// FIXME: Actually, "perl".
REQUIRES: shell

RUN: rm -rf %t.output_dir && mkdir %t.output_dir
RUN: cp %S/report-1.html %t.output_dir
RUN: cp %S/report-2.html %t.output_dir
Expand Down
3 changes: 0 additions & 3 deletions clang/test/Analysis/scan-build/silence-core-checkers.test
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
// FIXME: Actually, "perl".
REQUIRES: shell

RUN: rm -rf %t.output_dir && mkdir %t.output_dir
RUN: %scan-build -o %t.output_dir \
RUN: %clang -S %S/Inputs/null_dereference_and_division_by_zero.c \
Expand Down
31 changes: 31 additions & 0 deletions clang/test/CodeGen/linking-bitcode-postopt.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// REQUIRES: amdgpu-registered-target

// Test that -mlink-bitcode-postopt correctly enables LinkInModulesPass

// RUN: %clang_cc1 -triple amdgcn-- -emit-llvm-bc -o /dev/null \
// RUN: -mllvm -print-pipeline-passes \
// RUN: %s 2>&1 | FileCheck --check-prefixes=DEFAULT %s

// DEFAULT-NOT: LinkInModulesPass

// RUN: %clang_cc1 -triple amdgcn-- -emit-llvm-bc -o /dev/null \
// RUN: -mllvm -print-pipeline-passes \
// RUN: -mlink-builtin-bitcode-postopt \
// RUN: %s 2>&1 | FileCheck --check-prefixes=OPTION-POSITIVE %s

// OPTION-POSITIVE: LinkInModulesPass

// RUN: %clang_cc1 -triple amdgcn-- -emit-llvm-bc -o /dev/null \
// RUN: -mllvm -print-pipeline-passes \
// RUN: -mno-link-builtin-bitcode-postopt \
// RUN: %s 2>&1 | FileCheck --check-prefixes=OPTION-NEGATIVE %s

// OPTION-NEGATIVE-NOT: LinkInModulesPass

// RUN: %clang_cc1 -triple amdgcn-- -emit-llvm-bc -o /dev/null \
// RUN: -mllvm -print-pipeline-passes \
// RUN: -mlink-builtin-bitcode-postopt \
// RUN: -mno-link-builtin-bitcode-postopt \
// RUN: %s 2>&1 | FileCheck --check-prefixes=OPTION-POSITIVE-NEGATIVE %s

// OPTION-POSITIVE-NEGATIVE-NOT: LinkInModulesPass
16 changes: 6 additions & 10 deletions clang/test/CodeGenCoroutines/coro-dwarf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,10 @@ void f_coro(int val, MoveOnly moParam, MoveAndCopy mcParam) {
// CHECK: !{{[0-9]+}} = !DILocalVariable(name: "mcParam", arg: 3, scope: ![[SP]], file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
// CHECK: !{{[0-9]+}} = !DILocalVariable(name: "__promise",

// CHECK: !{{[0-9]+}} = distinct !DISubprogram(linkageName: "_Z6f_coroi8MoveOnly11MoveAndCopy.__await_suspend_wrapper__init"
// CHECK-NEXT: !{{[0-9]+}} = !DIFile
// CHECK-NEXT: !{{[0-9]+}} = !DISubroutineType
// CHECK-NEXT: !{{[0-9]+}} = !DILocalVariable(arg: 1,
// CHECK-NEXT: !{{[0-9]+}} = !DILocation
// CHECK-NEXT: !{{[0-9]+}} = !DILocalVariable(arg: 2,
// CHECK: ![[INIT:[0-9]+]] = distinct !DISubprogram(linkageName: "_Z6f_coroi8MoveOnly11MoveAndCopy.__await_suspend_wrapper__init"
// CHECK: !{{[0-9]+}} = !DILocalVariable(arg: 1, scope: ![[INIT]]
// CHECK: !{{[0-9]+}} = !DILocalVariable(arg: 2, scope: ![[INIT]]

// CHECK: !{{[0-9]+}} = distinct !DISubprogram(linkageName: "_Z6f_coroi8MoveOnly11MoveAndCopy.__await_suspend_wrapper__final"
// CHECK-NEXT: !{{[0-9]+}} = !DILocalVariable(arg: 1,
// CHECK-NEXT: !{{[0-9]+}} = !DILocation
// CHECK-NEXT: !{{[0-9]+}} = !DILocalVariable(arg: 2,
// CHECK: ![[FINAL:[0-9]+]] = distinct !DISubprogram(linkageName: "_Z6f_coroi8MoveOnly11MoveAndCopy.__await_suspend_wrapper__final"
// CHECK: !{{[0-9]+}} = !DILocalVariable(arg: 1, scope: ![[FINAL]]
// CHECK: !{{[0-9]+}} = !DILocalVariable(arg: 2, scope: ![[FINAL]]
58 changes: 0 additions & 58 deletions clang/test/OpenMP/target_ast_print.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1201,64 +1201,6 @@ foo();
}
#endif // OMP52

#ifdef OMP60

///==========================================================================///
// RUN: %clang_cc1 -DOMP60 -verify -Wno-vla -fopenmp -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix OMP60
// RUN: %clang_cc1 -DOMP60 -fopenmp -fopenmp-version=60 -x c++ -std=c++11 -emit-pch -o %t %s
// RUN: %clang_cc1 -DOMP60 -fopenmp -fopenmp-version=60 -std=c++11 -include-pch %t -fsyntax-only -verify -Wno-vla %s -ast-print | FileCheck %s --check-prefix OMP60

// RUN: %clang_cc1 -DOMP60 -verify -Wno-vla -fopenmp-simd -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix OMP60
// RUN: %clang_cc1 -DOMP60 -fopenmp-simd -fopenmp-version=60 -x c++ -std=c++11 -emit-pch -o %t %s
// RUN: %clang_cc1 -DOMP60 -fopenmp-simd -fopenmp-version=60 -std=c++11 -include-pch %t -fsyntax-only -verify -Wno-vla %s -ast-print | FileCheck %s --check-prefix OMP60

void foo() {}
template <typename T, int C>
T tmain(T argc, T *argv) {
T i;
#pragma omp target map(from always: i)
foo();
#pragma omp target map(from, close: i)
foo();
#pragma omp target map(always,close: i)
foo();
return 0;
}
//OMP60: template <typename T, int C> T tmain(T argc, T *argv) {
//OMP60-NEXT: T i;
//OMP60-NEXT: #pragma omp target map(always,from: i)
//OMP60-NEXT: foo();
//OMP60-NEXT: #pragma omp target map(close,from: i)
//OMP60-NEXT: foo();
//OMP60-NEXT: #pragma omp target map(always,close,tofrom: i)
//OMP60-NEXT: foo();
//OMP60-NEXT: return 0;
//OMP60-NEXT:}
//OMP60: template<> int tmain<int, 5>(int argc, int *argv) {
//OMP60-NEXT: int i;
//OMP60-NEXT: #pragma omp target map(always,from: i)
//OMP60-NEXT: foo();
//OMP60-NEXT: #pragma omp target map(close,from: i)
//OMP60-NEXT: foo();
//OMP60-NEXT: #pragma omp target map(always,close,tofrom: i)
//OMP60-NEXT: foo();
//OMP60-NEXT: return 0;
//OMP60-NEXT:}
//OMP60: template<> char tmain<char, 1>(char argc, char *argv) {
//OMP60-NEXT: char i;
//OMP60-NEXT: #pragma omp target map(always,from: i)
//OMP60-NEXT: foo();
//OMP60-NEXT: #pragma omp target map(close,from: i)
//OMP60-NEXT: foo();
//OMP60-NEXT: #pragma omp target map(always,close,tofrom: i)
//OMP60-NEXT: foo();
//OMP60-NEXT: return 0;
//OMP60-NEXT:}
int main (int argc, char **argv) {
return tmain<int, 5>(argc, &argc) + tmain<char, 1>(argv[0][0], argv[0]);
}
#endif // OMP60

#ifdef OMPX

// RUN: %clang_cc1 -DOMPX -verify -Wno-vla -fopenmp -fopenmp-extensions -ast-print %s | FileCheck %s --check-prefix=OMPX
Expand Down
105 changes: 46 additions & 59 deletions clang/test/OpenMP/target_map_messages.cpp

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions clang/test/lit.site.cfg.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ config.enable_backtrace = @ENABLE_BACKTRACES@
config.enable_threads = @LLVM_ENABLE_THREADS@
config.reverse_iteration = @LLVM_ENABLE_REVERSE_ITERATION@
config.host_arch = "@HOST_ARCH@"
config.perl_executable = "@PERL_EXECUTABLE@"
config.python_executable = "@Python3_EXECUTABLE@"
config.use_z3_solver = lit_config.params.get('USE_Z3_SOLVER', "@USE_Z3_SOLVER@")
config.has_plugins = @CLANG_PLUGIN_SUPPORT@
Expand Down
2 changes: 1 addition & 1 deletion clang/unittests/Analysis/FlowSensitive/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ add_clang_unittest(ClangAnalysisFlowSensitiveTests
SignAnalysisTest.cpp
SimplifyConstraintsTest.cpp
SingleVarConstantPropagationTest.cpp
SolverTest.cpp
TestingSupport.cpp
TestingSupportTest.cpp
TransferBranchTest.cpp
TransferTest.cpp
TypeErasedDataflowAnalysisTest.cpp
UncheckedOptionalAccessModelTest.cpp
ValueTest.cpp
WatchedLiteralsSolverTest.cpp
)

clang_target_link_libraries(ClangAnalysisFlowSensitiveTests
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
//===- unittests/Analysis/FlowSensitive/WatchedLiteralsSolverTest.cpp -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h"
#include "SolverTest.h"

namespace clang::dataflow::test {

template <>
WatchedLiteralsSolver
SolverTest<WatchedLiteralsSolver>::createSolverWithLowTimeout() {
return WatchedLiteralsSolver(10);
}

namespace {

INSTANTIATE_TYPED_TEST_SUITE_P(WatchedLiteralsSolverTest, SolverTest,
WatchedLiteralsSolver);

} // namespace
} // namespace clang::dataflow::test
8 changes: 8 additions & 0 deletions flang/include/flang/Frontend/CompilerInvocation.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,10 @@ class CompilerInvocation : public CompilerInvocationBase {
// Fortran Dialect options
Fortran::common::IntrinsicTypeDefaultKinds defaultKinds;

// Fortran Warning options
bool enableConformanceChecks = false;
bool enableUsageChecks = false;
bool disableWarnings = false;

/// Used in e.g. unparsing to dump the analyzed rather than the original
/// parse-tree objects.
Expand Down Expand Up @@ -197,6 +199,9 @@ class CompilerInvocation : public CompilerInvocationBase {
bool &getEnableUsageChecks() { return enableUsageChecks; }
const bool &getEnableUsageChecks() const { return enableUsageChecks; }

bool &getDisableWarnings() { return disableWarnings; }
const bool &getDisableWarnings() const { return disableWarnings; }

Fortran::parser::AnalyzedObjectsAsFortran &getAsFortran() {
return asFortran;
}
Expand Down Expand Up @@ -226,6 +231,9 @@ class CompilerInvocation : public CompilerInvocationBase {
// Enables the usage checks
void setEnableUsageChecks() { enableUsageChecks = true; }

// Disables all Warnings
void setDisableWarnings() { disableWarnings = true; }

/// Useful setters
void setArgv0(const char *dir) { argv0 = dir; }

Expand Down
10 changes: 10 additions & 0 deletions flang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -975,6 +975,11 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args,
res.setEnableConformanceChecks();
res.setEnableUsageChecks();
}

// -w
if (args.hasArg(clang::driver::options::OPT_w))
res.setDisableWarnings();

// -std=f2018
// TODO: Set proper options when more fortran standards
// are supported.
Expand Down Expand Up @@ -1403,6 +1408,11 @@ void CompilerInvocation::setFortranOpts() {

if (getEnableUsageChecks())
fortranOptions.features.WarnOnAllUsage();

if (getDisableWarnings()) {
fortranOptions.features.DisableAllNonstandardWarnings();
fortranOptions.features.DisableAllUsageWarnings();
}
}

std::unique_ptr<Fortran::semantics::SemanticsContext>
Expand Down
47 changes: 32 additions & 15 deletions flang/lib/Semantics/check-omp-structure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,52 +84,69 @@ class OmpWorkshareBlockChecker {
parser::CharBlock source_;
};

class OmpCycleChecker {
class OmpCycleAndExitChecker {
public:
OmpCycleChecker(SemanticsContext &context, std::int64_t cycleLevel)
: context_{context}, cycleLevel_{cycleLevel} {}
OmpCycleAndExitChecker(SemanticsContext &context, std::int64_t level)
: context_{context}, level_{level} {}

template <typename T> bool Pre(const T &) { return true; }
template <typename T> void Post(const T &) {}

bool Pre(const parser::DoConstruct &dc) {
cycleLevel_--;
level_--;
const auto &constructName{std::get<0>(std::get<0>(dc.t).statement.t)};
if (constructName) {
constructNamesAndLevels_.emplace(
constructName.value().ToString(), cycleLevel_);
constructName.value().ToString(), level_);
}
return true;
}

void Post(const parser::DoConstruct &dc) { level_++; }

bool Pre(const parser::CycleStmt &cyclestmt) {
std::map<std::string, std::int64_t>::iterator it;
bool err{false};
if (cyclestmt.v) {
it = constructNamesAndLevels_.find(cyclestmt.v->source.ToString());
err = (it != constructNamesAndLevels_.end() && it->second > 0);
} else {
// If there is no label then the cycle statement is associated with the
// closest enclosing DO. Use its level for the checks.
err = cycleLevel_ > 0;
} else { // If there is no label then use the level of the last enclosing DO
err = level_ > 0;
}
if (err) {
context_.Say(*cycleSource_,
context_.Say(*source_,
"CYCLE statement to non-innermost associated loop of an OpenMP DO "
"construct"_err_en_US);
}
return true;
}

bool Pre(const parser::ExitStmt &exitStmt) {
std::map<std::string, std::int64_t>::iterator it;
bool err{false};
if (exitStmt.v) {
it = constructNamesAndLevels_.find(exitStmt.v->source.ToString());
err = (it != constructNamesAndLevels_.end() && it->second >= 0);
} else { // If there is no label then use the level of the last enclosing DO
err = level_ >= 0;
}
if (err) {
context_.Say(*source_,
"EXIT statement terminates associated loop of an OpenMP DO "
"construct"_err_en_US);
}
return true;
}

bool Pre(const parser::Statement<parser::ActionStmt> &actionstmt) {
cycleSource_ = &actionstmt.source;
source_ = &actionstmt.source;
return true;
}

private:
SemanticsContext &context_;
const parser::CharBlock *cycleSource_;
std::int64_t cycleLevel_;
const parser::CharBlock *source_;
std::int64_t level_;
std::map<std::string, std::int64_t> constructNamesAndLevels_;
};

Expand Down Expand Up @@ -657,8 +674,8 @@ std::int64_t OmpStructureChecker::GetOrdCollapseLevel(
void OmpStructureChecker::CheckCycleConstraints(
const parser::OpenMPLoopConstruct &x) {
std::int64_t ordCollapseLevel{GetOrdCollapseLevel(x)};
OmpCycleChecker ompCycleChecker{context_, ordCollapseLevel};
parser::Walk(x, ompCycleChecker);
OmpCycleAndExitChecker checker{context_, ordCollapseLevel};
parser::Walk(x, checker);
}

void OmpStructureChecker::CheckDistLinear(
Expand Down
31 changes: 31 additions & 0 deletions flang/test/Driver/w-option.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
! Test the default setting. Emit warnings only.
! RUN: %flang -c %s 2>&1 | FileCheck %s -check-prefix=DEFAULT

! Test that the warnings are not generated with `-w` option.
! RUN: %flang -c -w %s 2>&1 | FileCheck --allow-empty %s -check-prefix=WARNING

! Test that warnings are portability messages are generated.
! RUN: %flang -c -pedantic %s 2>&1 | FileCheck %s -check-prefixes=DEFAULT,PORTABILITY

! Test that warnings and portability messages are not generated.
! TODO: Support the last flag wins behaviour.
! RUN: %flang -c -pedantic -w %s 2>&1 | FileCheck --allow-empty %s -check-prefixes=WARNING,PORTABILITY-WARNING
! RUN: %flang -c -w -pedantic %s 2>&1 | FileCheck --allow-empty %s -check-prefixes=WARNING,PORTABILITY-WARNING
! DEFAULT: warning: Label '40' is in a construct that should not be used as a branch target here
! DEFAULT: warning: Label '50' is in a construct that should not be used as a branch target here
! WARNING-NOT: warning
! PORTABILITY: portability: Statement function 'sf1' should not contain an array constructor
! PORTABILITY-WARNING-NOT: portability

subroutine sub01(n)
integer n
GOTO (40,50,60) n
if (n .eq. 1) then
40 print *, "xyz"
50 end if
60 continue
end subroutine sub01

subroutine sub02
sf1(n) = sum([(j,j=1,n)])
end subroutine sub02
31 changes: 31 additions & 0 deletions flang/test/Semantics/OpenMP/do08.f90
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

program omp
integer i, j, k
logical cond(10,10,10)
cond = .false.

!ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct.
!$omp do collapse(3)
Expand Down Expand Up @@ -135,4 +137,33 @@ program omp
end do foo
!$omp end do

!$omp do collapse(3)
loopk: do k=1,10
loopj: do j=1,10
loopi: do i=1,10
ifi : if (.true.) then
!ERROR: EXIT statement terminates associated loop of an OpenMP DO construct
if (cond(i,j,k)) exit
if (cond(i,j,k)) exit ifi
!ERROR: EXIT statement terminates associated loop of an OpenMP DO construct
if (cond(i,j,k)) exit loopi
!ERROR: EXIT statement terminates associated loop of an OpenMP DO construct
if (cond(i,j,k)) exit loopj
end if ifi
end do loopi
end do loopj
end do loopk
!$omp end do

!$omp do collapse(2)
loopk: do k=1,10
loopj: do j=1,10
do i=1,10
end do
!ERROR: EXIT statement terminates associated loop of an OpenMP DO construct
if (cond(i,j,k)) exit
end do loopj
end do loopk
!$omp end do

end program omp
3 changes: 2 additions & 1 deletion libcxx/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ endif()
add_library( cxx-benchmarks-flags-libcxx INTERFACE)
target_link_libraries( cxx-benchmarks-flags-libcxx INTERFACE cxx-benchmarks-flags)
target_compile_options(cxx-benchmarks-flags-libcxx INTERFACE ${SANITIZER_FLAGS} -Wno-user-defined-literals -Wno-suggest-override)
target_link_options( cxx-benchmarks-flags-libcxx INTERFACE -nostdlib++ "-L${BENCHMARK_LIBCXX_INSTALL}/lib" "-L${BENCHMARK_LIBCXX_INSTALL}/lib64" ${SANITIZER_FLAGS})
target_link_options( cxx-benchmarks-flags-libcxx INTERFACE -lm -nostdlib++ "-L${BENCHMARK_LIBCXX_INSTALL}/lib" "-L${BENCHMARK_LIBCXX_INSTALL}/lib64" ${SANITIZER_FLAGS})

set(libcxx_benchmark_targets)

Expand Down Expand Up @@ -220,6 +220,7 @@ set(BENCHMARK_TESTS
lexicographical_compare_three_way.bench.cpp
map.bench.cpp
monotonic_buffer.bench.cpp
numeric/gcd.bench.cpp
ordered_set.bench.cpp
shared_mutex_vs_mutex.bench.cpp
stop_token.bench.cpp
Expand Down
53 changes: 53 additions & 0 deletions libcxx/benchmarks/numeric/gcd.bench.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <array>
#include <benchmark/benchmark.h>
#include <cstring>
#include <numeric>
#include <random>

template <class T>
static std::array<T, 1000> generate(std::uniform_int_distribution<T> distribution = std::uniform_int_distribution<T>{
std::numeric_limits<T>::min() + 1, std::numeric_limits<T>::max()}) {
std::mt19937 generator;
std::array<T, 1000> result;
std::generate_n(result.begin(), result.size(), [&] { return distribution(generator); });
return result;
}

static void bm_gcd_random(benchmark::State& state) {
std::array data = generate<int>();
while (state.KeepRunningBatch(data.size()))
for (auto v0 : data)
for (auto v1 : data)
benchmark::DoNotOptimize(std::gcd(v0, v1));
}
BENCHMARK(bm_gcd_random);

static void bm_gcd_trivial(benchmark::State& state) {
int lhs = ~static_cast<int>(0), rhs = 1;
for (auto _ : state) {
benchmark::DoNotOptimize(lhs);
benchmark::DoNotOptimize(rhs);
benchmark::DoNotOptimize(std::gcd(lhs, rhs));
}
}
BENCHMARK(bm_gcd_trivial);

static void bm_gcd_complex(benchmark::State& state) {
int lhs = 2971215073, rhs = 1836311903;
for (auto _ : state) {
benchmark::DoNotOptimize(lhs);
benchmark::DoNotOptimize(rhs);
benchmark::DoNotOptimize(std::gcd(lhs, rhs));
}
}
BENCHMARK(bm_gcd_complex);

BENCHMARK_MAIN();
5 changes: 2 additions & 3 deletions libcxx/include/__functional/is_transparent.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#define _LIBCPP___FUNCTIONAL_IS_TRANSPARENT

#include <__config>
#include <__type_traits/integral_constant.h>
#include <__type_traits/void_t.h>

#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
Expand All @@ -23,10 +22,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER >= 14

template <class _Tp, class, class = void>
struct __is_transparent : false_type {};
inline const bool __is_transparent_v = false;

template <class _Tp, class _Up>
struct __is_transparent<_Tp, _Up, __void_t<typename _Tp::is_transparent> > : true_type {};
inline const bool __is_transparent_v<_Tp, _Up, __void_t<typename _Tp::is_transparent> > = true;

#endif

Expand Down
44 changes: 42 additions & 2 deletions libcxx/include/__numeric/gcd_lcm.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
#ifndef _LIBCPP___NUMERIC_GCD_LCM_H
#define _LIBCPP___NUMERIC_GCD_LCM_H

#include <__algorithm/min.h>
#include <__assert>
#include <__bit/countr.h>
#include <__config>
#include <__type_traits/common_type.h>
#include <__type_traits/is_integral.h>
Expand Down Expand Up @@ -50,9 +52,47 @@ struct __ct_abs<_Result, _Source, false> {
};

template <class _Tp>
_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN _Tp __gcd(_Tp __m, _Tp __n) {
_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN _Tp __gcd(_Tp __a, _Tp __b) {
static_assert((!is_signed<_Tp>::value), "");
return __n == 0 ? __m : std::__gcd<_Tp>(__n, __m % __n);

// From: https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor
//
// If power of two divides both numbers, we can push it out.
// - gcd( 2^x * a, 2^x * b) = 2^x * gcd(a, b)
//
// If and only if exactly one number is even, we can divide that number by that power.
// - if a, b are odd, then gcd(2^x * a, b) = gcd(a, b)
//
// And standard gcd algorithm where instead of modulo, minus is used.

if (__a < __b) {
_Tp __tmp = __b;
__b = __a;
__a = __tmp;
}
if (__b == 0)
return __a;
__a %= __b; // Make both argument of the same size, and early result in the easy case.
if (__a == 0)
return __b;

int __az = std::__countr_zero(__a);
int __bz = std::__countr_zero(__b);
int __shift = std::min(__az, __bz);
__a >>= __az;
__b >>= __bz;
do {
_Tp __diff = __a - __b;
if (__a > __b) {
__a = __b;
__b = __diff;
} else {
__b = __b - __a;
}
if (__diff != 0)
__b >>= std::__countr_zero(__diff);
} while (__b != 0);
return __a << __shift;
}

template <class _Tp, class _Up>
Expand Down
40 changes: 20 additions & 20 deletions libcxx/include/map
Original file line number Diff line number Diff line change
Expand Up @@ -1367,27 +1367,27 @@ public:
_LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __tree_.find(__k); }
_LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __tree_.find(__k); }
#if _LIBCPP_STD_VER >= 14
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
return __tree_.find(__k);
}
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
return __tree_.find(__k);
}
#endif

_LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const { return __tree_.__count_unique(__k); }
#if _LIBCPP_STD_VER >= 14
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
return __tree_.__count_multi(__k);
}
#endif

#if _LIBCPP_STD_VER >= 20
_LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
return find(__k) != end();
}
Expand All @@ -1396,12 +1396,12 @@ public:
_LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __k) { return __tree_.lower_bound(__k); }
_LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __k) const { return __tree_.lower_bound(__k); }
#if _LIBCPP_STD_VER >= 14
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _K2& __k) {
return __tree_.lower_bound(__k);
}

template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _K2& __k) const {
return __tree_.lower_bound(__k);
}
Expand All @@ -1410,11 +1410,11 @@ public:
_LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __k) { return __tree_.upper_bound(__k); }
_LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __k) const { return __tree_.upper_bound(__k); }
#if _LIBCPP_STD_VER >= 14
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _K2& __k) {
return __tree_.upper_bound(__k);
}
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _K2& __k) const {
return __tree_.upper_bound(__k);
}
Expand All @@ -1427,11 +1427,11 @@ public:
return __tree_.__equal_range_unique(__k);
}
#if _LIBCPP_STD_VER >= 14
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
return __tree_.__equal_range_multi(__k);
}
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
return __tree_.__equal_range_multi(__k);
}
Expand Down Expand Up @@ -1959,27 +1959,27 @@ public:
_LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __tree_.find(__k); }
_LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __tree_.find(__k); }
#if _LIBCPP_STD_VER >= 14
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
return __tree_.find(__k);
}
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
return __tree_.find(__k);
}
#endif

_LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const { return __tree_.__count_multi(__k); }
#if _LIBCPP_STD_VER >= 14
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
return __tree_.__count_multi(__k);
}
#endif

#if _LIBCPP_STD_VER >= 20
_LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
return find(__k) != end();
}
Expand All @@ -1988,12 +1988,12 @@ public:
_LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __k) { return __tree_.lower_bound(__k); }
_LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __k) const { return __tree_.lower_bound(__k); }
#if _LIBCPP_STD_VER >= 14
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _K2& __k) {
return __tree_.lower_bound(__k);
}

template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _K2& __k) const {
return __tree_.lower_bound(__k);
}
Expand All @@ -2002,11 +2002,11 @@ public:
_LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __k) { return __tree_.upper_bound(__k); }
_LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __k) const { return __tree_.upper_bound(__k); }
#if _LIBCPP_STD_VER >= 14
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _K2& __k) {
return __tree_.upper_bound(__k);
}
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _K2& __k) const {
return __tree_.upper_bound(__k);
}
Expand All @@ -2019,11 +2019,11 @@ public:
return __tree_.__equal_range_multi(__k);
}
#if _LIBCPP_STD_VER >= 14
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
return __tree_.__equal_range_multi(__k);
}
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
return __tree_.__equal_range_multi(__k);
}
Expand Down
40 changes: 20 additions & 20 deletions libcxx/include/set
Original file line number Diff line number Diff line change
Expand Up @@ -825,27 +825,27 @@ public:
_LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __tree_.find(__k); }
_LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __tree_.find(__k); }
#if _LIBCPP_STD_VER >= 14
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
return __tree_.find(__k);
}
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
return __tree_.find(__k);
}
#endif

_LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const { return __tree_.__count_unique(__k); }
#if _LIBCPP_STD_VER >= 14
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
return __tree_.__count_multi(__k);
}
#endif

#if _LIBCPP_STD_VER >= 20
_LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
return find(__k) != end();
}
Expand All @@ -854,12 +854,12 @@ public:
_LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __k) { return __tree_.lower_bound(__k); }
_LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __k) const { return __tree_.lower_bound(__k); }
#if _LIBCPP_STD_VER >= 14
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _K2& __k) {
return __tree_.lower_bound(__k);
}

template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _K2& __k) const {
return __tree_.lower_bound(__k);
}
Expand All @@ -868,11 +868,11 @@ public:
_LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __k) { return __tree_.upper_bound(__k); }
_LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __k) const { return __tree_.upper_bound(__k); }
#if _LIBCPP_STD_VER >= 14
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _K2& __k) {
return __tree_.upper_bound(__k);
}
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _K2& __k) const {
return __tree_.upper_bound(__k);
}
Expand All @@ -885,11 +885,11 @@ public:
return __tree_.__equal_range_unique(__k);
}
#if _LIBCPP_STD_VER >= 14
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
return __tree_.__equal_range_multi(__k);
}
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
return __tree_.__equal_range_multi(__k);
}
Expand Down Expand Up @@ -1283,27 +1283,27 @@ public:
_LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __tree_.find(__k); }
_LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __tree_.find(__k); }
#if _LIBCPP_STD_VER >= 14
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
return __tree_.find(__k);
}
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
return __tree_.find(__k);
}
#endif

_LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const { return __tree_.__count_multi(__k); }
#if _LIBCPP_STD_VER >= 14
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
return __tree_.__count_multi(__k);
}
#endif

#if _LIBCPP_STD_VER >= 20
_LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
return find(__k) != end();
}
Expand All @@ -1312,12 +1312,12 @@ public:
_LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __k) { return __tree_.lower_bound(__k); }
_LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __k) const { return __tree_.lower_bound(__k); }
#if _LIBCPP_STD_VER >= 14
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _K2& __k) {
return __tree_.lower_bound(__k);
}

template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _K2& __k) const {
return __tree_.lower_bound(__k);
}
Expand All @@ -1326,11 +1326,11 @@ public:
_LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __k) { return __tree_.upper_bound(__k); }
_LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __k) const { return __tree_.upper_bound(__k); }
#if _LIBCPP_STD_VER >= 14
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _K2& __k) {
return __tree_.upper_bound(__k);
}
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _K2& __k) const {
return __tree_.upper_bound(__k);
}
Expand All @@ -1343,11 +1343,11 @@ public:
return __tree_.__equal_range_multi(__k);
}
#if _LIBCPP_STD_VER >= 14
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
return __tree_.__equal_range_multi(__k);
}
template <typename _K2, enable_if_t<__is_transparent<_Compare, _K2>::value, int> = 0>
template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
_LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
return __tree_.__equal_range_multi(__k);
}
Expand Down
32 changes: 12 additions & 20 deletions libcxx/include/unordered_map
Original file line number Diff line number Diff line change
Expand Up @@ -1384,22 +1384,19 @@ public:
_LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __table_.find(__k); }
_LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __table_.find(__k); }
#if _LIBCPP_STD_VER >= 20
template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
return __table_.find(__k);
}
template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
return __table_.find(__k);
}
#endif // _LIBCPP_STD_VER >= 20

_LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const { return __table_.__count_unique(__k); }
#if _LIBCPP_STD_VER >= 20
template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
return __table_.__count_unique(__k);
}
Expand All @@ -1408,8 +1405,7 @@ public:
#if _LIBCPP_STD_VER >= 20
_LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }

template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
return find(__k) != end();
}
Expand All @@ -1423,12 +1419,12 @@ public:
}
#if _LIBCPP_STD_VER >= 20
template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
return __table_.__equal_range_unique(__k);
}
template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
return __table_.__equal_range_unique(__k);
}
Expand Down Expand Up @@ -2135,22 +2131,19 @@ public:
_LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __table_.find(__k); }
_LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __table_.find(__k); }
#if _LIBCPP_STD_VER >= 20
template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
return __table_.find(__k);
}
template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
return __table_.find(__k);
}
#endif // _LIBCPP_STD_VER >= 20

_LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const { return __table_.__count_multi(__k); }
#if _LIBCPP_STD_VER >= 20
template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
return __table_.__count_multi(__k);
}
Expand All @@ -2159,8 +2152,7 @@ public:
#if _LIBCPP_STD_VER >= 20
_LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }

template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
return find(__k) != end();
}
Expand All @@ -2174,12 +2166,12 @@ public:
}
#if _LIBCPP_STD_VER >= 20
template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
return __table_.__equal_range_multi(__k);
}
template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
return __table_.__equal_range_multi(__k);
}
Expand Down
36 changes: 12 additions & 24 deletions libcxx/include/unordered_set
Original file line number Diff line number Diff line change
Expand Up @@ -839,22 +839,19 @@ public:
_LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __table_.find(__k); }
_LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __table_.find(__k); }
#if _LIBCPP_STD_VER >= 20
template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
return __table_.find(__k);
}
template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
return __table_.find(__k);
}
#endif // _LIBCPP_STD_VER >= 20

_LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const { return __table_.__count_unique(__k); }
#if _LIBCPP_STD_VER >= 20
template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
return __table_.__count_unique(__k);
}
Expand All @@ -863,8 +860,7 @@ public:
#if _LIBCPP_STD_VER >= 20
_LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }

template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
return find(__k) != end();
}
Expand All @@ -877,13 +873,11 @@ public:
return __table_.__equal_range_unique(__k);
}
#if _LIBCPP_STD_VER >= 20
template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
return __table_.__equal_range_unique(__k);
}
template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
return __table_.__equal_range_unique(__k);
}
Expand Down Expand Up @@ -1442,22 +1436,19 @@ public:
_LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __table_.find(__k); }
_LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __table_.find(__k); }
#if _LIBCPP_STD_VER >= 20
template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
return __table_.find(__k);
}
template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
return __table_.find(__k);
}
#endif // _LIBCPP_STD_VER >= 20

_LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const { return __table_.__count_multi(__k); }
#if _LIBCPP_STD_VER >= 20
template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
return __table_.__count_multi(__k);
}
Expand All @@ -1466,8 +1457,7 @@ public:
#if _LIBCPP_STD_VER >= 20
_LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }

template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
return find(__k) != end();
}
Expand All @@ -1480,13 +1470,11 @@ public:
return __table_.__equal_range_multi(__k);
}
#if _LIBCPP_STD_VER >= 20
template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
return __table_.__equal_range_multi(__k);
}
template <class _K2,
enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
_LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
return __table_.__equal_range_multi(__k);
}
Expand Down
1 change: 1 addition & 0 deletions libcxx/test/libcxx/transitive_includes/cxx03.csv
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,7 @@ numeric cstddef
numeric cstdint
numeric execution
numeric functional
numeric initializer_list
numeric iterator
numeric limits
numeric new
Expand Down
1 change: 1 addition & 0 deletions libcxx/test/libcxx/transitive_includes/cxx11.csv
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,7 @@ numeric cstddef
numeric cstdint
numeric execution
numeric functional
numeric initializer_list
numeric iterator
numeric limits
numeric new
Expand Down
1 change: 1 addition & 0 deletions libcxx/test/libcxx/transitive_includes/cxx14.csv
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,7 @@ numeric cstddef
numeric cstdint
numeric execution
numeric functional
numeric initializer_list
numeric iterator
numeric limits
numeric new
Expand Down
1 change: 1 addition & 0 deletions libcxx/test/libcxx/transitive_includes/cxx17.csv
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,7 @@ numeric cstddef
numeric cstdint
numeric execution
numeric functional
numeric initializer_list
numeric iterator
numeric limits
numeric new
Expand Down
1 change: 1 addition & 0 deletions libcxx/test/libcxx/transitive_includes/cxx20.csv
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,7 @@ numeric cstddef
numeric cstdint
numeric execution
numeric functional
numeric initializer_list
numeric iterator
numeric limits
numeric new
Expand Down
23 changes: 23 additions & 0 deletions libcxx/test/libcxx/transitive_includes/cxx26.csv
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,29 @@ experimental/simd limits
experimental/type_traits initializer_list
experimental/type_traits type_traits
experimental/utility utility
experimental/vector experimental/memory_resource
experimental/vector vector
ext/hash_map algorithm
ext/hash_map cmath
ext/hash_map cstddef
ext/hash_map cstdint
ext/hash_map cstring
ext/hash_map functional
ext/hash_map initializer_list
ext/hash_map limits
ext/hash_map new
ext/hash_map stdexcept
ext/hash_map string
ext/hash_set algorithm
ext/hash_set cmath
ext/hash_set cstddef
ext/hash_set cstdint
ext/hash_set cstring
ext/hash_set functional
ext/hash_set initializer_list
ext/hash_set limits
ext/hash_set new
ext/hash_set string
filesystem compare
filesystem cstddef
filesystem cstdint
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <cassert>
#include <climits>
#include <cstdint>
#include <random>
#include <type_traits>

#include "test_macros.h"
Expand Down Expand Up @@ -48,6 +49,74 @@ constexpr bool test0(int in1, int in2, int out)
return true;
}

template <typename T>
T basic_gcd_(T m, T n) {
return n == 0 ? m : basic_gcd_<T>(n, m % n);
}

template <typename T>
T basic_gcd(T m, T n) {
using Tp = std::make_unsigned_t<T>;
if (m < 0 && m != std::numeric_limits<T>::min())
m = -m;
if (n < 0 && n != std::numeric_limits<T>::min())
n = -n;
return basic_gcd_(static_cast<Tp>(m), static_cast<Tp>(n));
}

template <typename Input>
void do_fuzzy_tests() {
std::mt19937 gen(1938);
std::uniform_int_distribution<Input> distrib;

constexpr int nb_rounds = 10000;
for (int i = 0; i < nb_rounds; ++i) {
Input n = distrib(gen);
Input m = distrib(gen);
assert(std::gcd(n, m) == basic_gcd(n, m));
}
}

template <typename Input>
void do_limit_tests() {
Input inputs[] = {
// The behavior of std::gcd is undefined if the absolute value of one of its
// operand is not representable in the result type.
std::numeric_limits<Input>::min() + (std::is_signed<Input>::value ? 3 : 0),
std::numeric_limits<Input>::min() + 1,
std::numeric_limits<Input>::min() + 2,
std::numeric_limits<Input>::max(),
std::numeric_limits<Input>::max() - 1,
std::numeric_limits<Input>::max() - 2,
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
(Input)-1,
(Input)-2,
(Input)-3,
(Input)-4,
(Input)-5,
(Input)-6,
(Input)-7,
(Input)-8,
(Input)-9,
(Input)-10,
};

for (auto n : inputs) {
for (auto m : inputs) {
assert(std::gcd(n, m) == basic_gcd(n, m));
}
}
}

template <typename Input1, typename Input2 = Input1>
constexpr bool do_test(int = 0)
Expand Down Expand Up @@ -143,5 +212,23 @@ int main(int argc, char**)
assert(res == 2);
}

return 0;
do_fuzzy_tests<std::int8_t>();
do_fuzzy_tests<std::int16_t>();
do_fuzzy_tests<std::int32_t>();
do_fuzzy_tests<std::int64_t>();
do_fuzzy_tests<std::uint8_t>();
do_fuzzy_tests<std::uint16_t>();
do_fuzzy_tests<std::uint32_t>();
do_fuzzy_tests<std::uint64_t>();

do_limit_tests<std::int8_t>();
do_limit_tests<std::int16_t>();
do_limit_tests<std::int32_t>();
do_limit_tests<std::int64_t>();
do_limit_tests<std::uint8_t>();
do_limit_tests<std::uint16_t>();
do_limit_tests<std::uint32_t>();
do_limit_tests<std::uint64_t>();

return 0;
}
8 changes: 5 additions & 3 deletions lld/test/ELF/mips-eh_frame-pic.s
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# RUN: llvm-mc -filetype=obj -triple=mips64-unknown-linux --position-independent %s -o %t-pic.o
# RUN: llvm-readobj -r %t-pic.o | FileCheck %s --check-prefixes=RELOCS,PIC64-RELOCS
# RUN: ld.lld -shared %t-pic.o -o %t-pic.so
# RUN: llvm-dwarfdump --eh-frame %t-pic.so | FileCheck %s --check-prefix=PIC-EH-FRAME
# RUN: llvm-dwarfdump --eh-frame %t-pic.so | FileCheck %s --check-prefix=PIC64-EH-FRAME

## Also check MIPS32:
# RUN: llvm-mc -filetype=obj -triple=mips-unknown-linux %s -o %t-nopic32.o
Expand All @@ -31,7 +31,7 @@
# RUN: llvm-mc -filetype=obj -triple=mips-unknown-linux --position-independent %s -o %t-pic32.o
# RUN: llvm-readobj -r %t-pic32.o | FileCheck %s --check-prefixes=RELOCS,PIC32-RELOCS
# RUN: ld.lld -shared %t-pic32.o -o %t-pic32.so
# RUN: llvm-dwarfdump --eh-frame %t-pic32.so | FileCheck %s --check-prefix=PIC-EH-FRAME
# RUN: llvm-dwarfdump --eh-frame %t-pic32.so | FileCheck %s --check-prefix=PIC32-EH-FRAME

# RELOCS: .rel{{a?}}.eh_frame {
# ABS32-RELOCS-NEXT: 0x1C R_MIPS_32 .text
Expand All @@ -44,7 +44,9 @@
## ^^ fde pointer encoding: DW_EH_PE_sdata8
# ABS32-EH-FRAME: Augmentation data: 0B
## ^^ fde pointer encoding: DW_EH_PE_sdata4
# PIC-EH-FRAME: Augmentation data: 1B
# PIC32-EH-FRAME: Augmentation data: 1B
## ^^ fde pointer encoding: DW_EH_PE_pcrel | DW_EH_PE_sdata4
# PIC64-EH-FRAME: Augmentation data: 1B
## ^^ fde pointer encoding: DW_EH_PE_pcrel | DW_EH_PE_sdata4
## Note: ld.bfd converts the R_MIPS_64 relocs to DW_EH_PE_pcrel | DW_EH_PE_sdata8
## for N64 ABI (and DW_EH_PE_pcrel | DW_EH_PE_sdata4 for MIPS32)
Expand Down
13 changes: 9 additions & 4 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -599,10 +599,6 @@ class CombinerHelper {
/// This variant does not erase \p MI after calling the build function.
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo);

/// Use a function which takes in a MachineIRBuilder to perform a combine.
/// By default, it erases the instruction \p MI from the function.
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo);

bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo);
bool matchFunnelShiftToRotate(MachineInstr &MI);
void applyFunnelShiftToRotate(MachineInstr &MI);
Expand Down Expand Up @@ -814,6 +810,12 @@ class CombinerHelper {
/// Match constant LHS ops that should be commuted.
bool matchCommuteConstantToRHS(MachineInstr &MI);

/// Combine sext of trunc.
bool matchSextOfTrunc(const MachineOperand &MO, BuildFnTy &MatchInfo);

/// Combine zext of trunc.
bool matchZextOfTrunc(const MachineOperand &MO, BuildFnTy &MatchInfo);

/// Match constant LHS FP ops that should be commuted.
bool matchCommuteFPConstantToRHS(MachineInstr &MI);

Expand Down Expand Up @@ -857,6 +859,9 @@ class CombinerHelper {
/// register and different indices.
bool matchExtractVectorElementWithDifferentIndices(const MachineOperand &MO,
BuildFnTy &MatchInfo);
/// Use a function which takes in a MachineIRBuilder to perform a combine.
/// By default, it erases the instruction def'd on \p MO from the function.
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo);

/// Combine insert vector element OOB.
bool matchInsertVectorElementOOB(MachineInstr &MI, BuildFnTy &MatchInfo);
Expand Down
53 changes: 53 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,59 @@ class GFreeze : public GenericMachineInstr {
}
};

/// Represents a cast operation.
/// It models the llvm::CastInst concept.
/// The exception is bitcast.
class GCastOp : public GenericMachineInstr {
public:
Register getSrcReg() const { return getOperand(1).getReg(); }

static bool classof(const MachineInstr *MI) {
switch (MI->getOpcode()) {
case TargetOpcode::G_ADDRSPACE_CAST:
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_FPTRUNC:
case TargetOpcode::G_INTTOPTR:
case TargetOpcode::G_PTRTOINT:
case TargetOpcode::G_SEXT:
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_TRUNC:
case TargetOpcode::G_UITOFP:
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_ANYEXT:
return true;
default:
return false;
}
};
};

/// Represents a sext.
class GSext : public GCastOp {
public:
static bool classof(const MachineInstr *MI) {
return MI->getOpcode() == TargetOpcode::G_SEXT;
};
};

/// Represents a zext.
class GZext : public GCastOp {
public:
static bool classof(const MachineInstr *MI) {
return MI->getOpcode() == TargetOpcode::G_ZEXT;
};
};

/// Represents a trunc.
class GTrunc : public GCastOp {
public:
static bool classof(const MachineInstr *MI) {
return MI->getOpcode() == TargetOpcode::G_TRUNC;
};
};

} // namespace llvm

#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
6 changes: 4 additions & 2 deletions llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,8 @@ class MachineIRBuilder {
/// \pre \p Op must be smaller than \p Res
///
/// \return The newly created instruction.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op);
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op,
std::optional<unsigned> Flags = std::nullopt);

/// Build and insert \p Res = G_SEXT \p Op, \p Res = G_TRUNC \p Op, or
/// \p Res = COPY \p Op depending on the differing sizes of \p Res and \p Op.
Expand Down Expand Up @@ -1231,7 +1232,8 @@ class MachineIRBuilder {
/// \pre \p Res must be smaller than \p Op
///
/// \return The newly created instruction.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op);
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op,
std::optional<unsigned> Flags = std::nullopt);

/// Build and insert a \p Res = G_ICMP \p Pred, \p Op0, \p Op1
///
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/IR/InstrTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -2614,6 +2614,11 @@ class CallBase : public Instruction {
op_iterator populateBundleOperandInfos(ArrayRef<OperandBundleDef> Bundles,
const unsigned BeginIndex);

/// Return true if the call has deopt state bundle.
bool hasDeoptState() const {
return getOperandBundle(LLVMContext::OB_deopt).has_value();
}

public:
/// Return the BundleOpInfo for the operand at index OpIdx.
///
Expand Down
18 changes: 17 additions & 1 deletion llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ def FmContract : MIFlagEnum<"FmContract">;
def FmAfn : MIFlagEnum<"FmAfn">;
def FmReassoc : MIFlagEnum<"FmReassoc">;
def IsExact : MIFlagEnum<"IsExact">;
def NoSWrap : MIFlagEnum<"NoSWrap">;
def NoUWrap : MIFlagEnum<"NoUWrap">;

def MIFlags;
// def not; -> Already defined as a SDNode
Expand Down Expand Up @@ -1501,6 +1503,20 @@ def extract_vector_element_freeze : GICombineRule<
[{ return Helper.matchExtractVectorElementWithFreeze(${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;

def sext_trunc : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (G_TRUNC $src, $x, (MIFlags NoSWrap)),
(G_SEXT $root, $src),
[{ return Helper.matchSextOfTrunc(${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;

def zext_trunc : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (G_TRUNC $src, $x, (MIFlags NoUWrap)),
(G_ZEXT $root, $src),
[{ return Helper.matchZextOfTrunc(${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;

def extract_vector_element_shuffle_vector : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (G_SHUFFLE_VECTOR $src, $src1, $src2, $mask),
Expand Down Expand Up @@ -1666,7 +1682,7 @@ def all_combines : GICombineGroup<[trivial_combines, vector_ops_combines,
sub_add_reg, select_to_minmax, redundant_binop_in_equality,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
combine_concat_vector, double_icmp_zero_and_or_combine, match_addos,
combine_shuffle_concat]>;
sext_trunc, zext_trunc, combine_shuffle_concat]>;

// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/TargetParser/Triple.h
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,10 @@ class Triple {
/// (SubArch). This should only be called with Vulkan SPIR-V triples.
VersionTuple getVulkanVersion() const;

/// Parse the DXIL version number from the DXIL version
/// (SubArch). This should only be called with DXIL triples.
VersionTuple getDXILVersion() const;

/// @}
/// @name Direct Component Access
/// @{
Expand Down
83 changes: 75 additions & 8 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4137,14 +4137,6 @@ void CombinerHelper::applyBuildFn(
MI.eraseFromParent();
}

void CombinerHelper::applyBuildFnMO(const MachineOperand &MO,
BuildFnTy &MatchInfo) {
MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI);
Builder.setInstrAndDebugLoc(*Root);
MatchInfo(Builder);
Root->eraseFromParent();
}

void CombinerHelper::applyBuildFnNoErase(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
MatchInfo(Builder);
Expand Down Expand Up @@ -7252,3 +7244,78 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {

return false;
}

void CombinerHelper::applyBuildFnMO(const MachineOperand &MO,
BuildFnTy &MatchInfo) {
MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI);
MatchInfo(Builder);
Root->eraseFromParent();
}

bool CombinerHelper::matchSextOfTrunc(const MachineOperand &MO,
BuildFnTy &MatchInfo) {
GSext *Sext = cast<GSext>(getDefIgnoringCopies(MO.getReg(), MRI));
GTrunc *Trunc = cast<GTrunc>(getDefIgnoringCopies(Sext->getSrcReg(), MRI));

Register Dst = Sext->getReg(0);
Register Src = Trunc->getSrcReg();

LLT DstTy = MRI.getType(Dst);
LLT SrcTy = MRI.getType(Src);

if (DstTy == SrcTy) {
MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, Src); };
return true;
}

if (DstTy.getScalarSizeInBits() < SrcTy.getScalarSizeInBits() &&
isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}})) {
MatchInfo = [=](MachineIRBuilder &B) {
B.buildTrunc(Dst, Src, MachineInstr::MIFlag::NoSWrap);
};
return true;
}

if (DstTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits() &&
isLegalOrBeforeLegalizer({TargetOpcode::G_SEXT, {DstTy, SrcTy}})) {
MatchInfo = [=](MachineIRBuilder &B) { B.buildSExt(Dst, Src); };
return true;
}

return false;
}

bool CombinerHelper::matchZextOfTrunc(const MachineOperand &MO,
BuildFnTy &MatchInfo) {
GZext *Zext = cast<GZext>(getDefIgnoringCopies(MO.getReg(), MRI));
GTrunc *Trunc = cast<GTrunc>(getDefIgnoringCopies(Zext->getSrcReg(), MRI));

Register Dst = Zext->getReg(0);
Register Src = Trunc->getSrcReg();

LLT DstTy = MRI.getType(Dst);
LLT SrcTy = MRI.getType(Src);

if (DstTy == SrcTy) {
MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, Src); };
return true;
}

if (DstTy.getScalarSizeInBits() < SrcTy.getScalarSizeInBits() &&
isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}})) {
MatchInfo = [=](MachineIRBuilder &B) {
B.buildTrunc(Dst, Src, MachineInstr::MIFlag::NoUWrap);
};
return true;
}

if (DstTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits() &&
isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {DstTy, SrcTy}})) {
MatchInfo = [=](MachineIRBuilder &B) {
B.buildZExt(Dst, Src, MachineInstr::MIFlag::NonNeg);
};
return true;
}

return false;
}
5 changes: 4 additions & 1 deletion llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,11 @@ KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) {

KnownBits GISelKnownBits::getKnownBits(Register R) {
const LLT Ty = MRI.getType(R);
// Since the number of lanes in a scalable vector is unknown at compile time,
// we track one bit which is implicitly broadcast to all lanes. This means
// that all lanes in a scalable vector are considered demanded.
APInt DemandedElts =
Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
return getKnownBits(R, DemandedElts);
}

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2851,7 +2851,7 @@ bool IRTranslator::translateInvoke(const User &U,
return false;

// FIXME: support whatever these are.
if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
if (I.hasDeoptState())
return false;

// FIXME: support control flow guard targets.
Expand Down
12 changes: 7 additions & 5 deletions llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -490,8 +490,9 @@ MachineInstrBuilder MachineIRBuilder::buildSExt(const DstOp &Res,
}

MachineInstrBuilder MachineIRBuilder::buildZExt(const DstOp &Res,
const SrcOp &Op) {
return buildInstr(TargetOpcode::G_ZEXT, Res, Op);
const SrcOp &Op,
std::optional<unsigned> Flags) {
return buildInstr(TargetOpcode::G_ZEXT, Res, Op, Flags);
}

unsigned MachineIRBuilder::getBoolExtOp(bool IsVec, bool IsFP) const {
Expand Down Expand Up @@ -869,9 +870,10 @@ MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
return buildIntrinsic(ID, Results, HasSideEffects, isConvergent);
}

MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res,
const SrcOp &Op) {
return buildInstr(TargetOpcode::G_TRUNC, Res, Op);
MachineInstrBuilder
MachineIRBuilder::buildTrunc(const DstOp &Res, const SrcOp &Op,
std::optional<unsigned> Flags) {
return buildInstr(TargetOpcode::G_TRUNC, Res, Op, Flags);
}

MachineInstrBuilder
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3357,7 +3357,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
break;
}
}
} else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) {
} else if (I.hasDeoptState()) {
// Currently we do not lower any intrinsic calls with deopt operand bundles.
// Eventually we will support lowering the @llvm.experimental.deoptimize
// intrinsic, and right now there are no plans to support other intrinsics
Expand Down Expand Up @@ -9197,7 +9197,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {

SDValue Callee = getValue(I.getCalledOperand());

if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
if (I.hasDeoptState())
LowerCallSiteWithDeoptBundle(&I, Callee, nullptr);
else
// Check if we can potentially perform a tail call. More detailed checking
Expand Down
4 changes: 1 addition & 3 deletions llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,13 +212,11 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
// identify N64 from just a triple.
TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
dwarf::DW_EH_PE_sdata4;
// We don't support PC-relative LSDA references in GAS so we use the default
// DW_EH_PE_absptr for those.

// FreeBSD must be explicit about the data size and using pcrel since it's
// assembler/linker won't do the automatic conversion that the Linux tools
// do.
if (TgtM.getTargetTriple().isOSFreeBSD()) {
if (isPositionIndependent() || TgtM.getTargetTriple().isOSFreeBSD()) {
PersonalityEncoding |= dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
}
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/MC/MCObjectFileInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,9 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
case Triple::mips64el:
// We cannot use DW_EH_PE_sdata8 for the large PositionIndependent case
// since there is no R_MIPS_PC64 relocation (only a 32-bit version).
if (PositionIndependent && !Large)
// In fact DW_EH_PE_sdata4 is enough for us now, and GNU ld doesn't
// support pcrel|sdata8 well. Let's use sdata4 for now.
if (PositionIndependent)
FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
else
FDECFIEncoding = Ctx->getAsmInfo()->getCodePointerSize() == 4
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/DirectX/DXILMetadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,18 @@ void dxil::createShaderModelMD(Module &M) {
Entry->addOperand(MDNode::get(Ctx, Vals));
}

void dxil::createDXILVersionMD(Module &M) {
Triple TT(Triple::normalize(M.getTargetTriple()));
VersionTuple Ver = TT.getDXILVersion();
LLVMContext &Ctx = M.getContext();
IRBuilder<> B(Ctx);
NamedMDNode *Entry = M.getOrInsertNamedMetadata("dx.version");
Metadata *Vals[2];
Vals[0] = ConstantAsMetadata::get(B.getInt32(Ver.getMajor()));
Vals[1] = ConstantAsMetadata::get(B.getInt32(Ver.getMinor().value_or(0)));
Entry->addOperand(MDNode::get(Ctx, Vals));
}

static uint32_t getShaderStage(Triple::EnvironmentType Env) {
return (uint32_t)Env - (uint32_t)llvm::Triple::Pixel;
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/DirectX/DXILMetadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class ValidatorVersionMD {
};

void createShaderModelMD(Module &M);
void createDXILVersionMD(Module &M);
void createEntryMD(Module &M, const uint64_t ShaderFlags);

} // namespace dxil
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ bool DXILTranslateMetadata::runOnModule(Module &M) {
if (ValVerMD.isEmpty())
ValVerMD.update(VersionTuple(1, 0));
dxil::createShaderModelMD(M);
dxil::createDXILVersionMD(M);

const dxil::Resources &Res =
getAnalysis<DXILResourceWrapper>().getDXILResource();
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6125,6 +6125,9 @@ NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
if (Ty->isHalfTy() && STI.getSmVersion() >= 70 &&
STI.getPTXVersion() >= 63)
return AtomicExpansionKind::None;
if (Ty->isBFloatTy() && STI.getSmVersion() >= 90 &&
STI.getPTXVersion() >= 78)
return AtomicExpansionKind::None;
if (Ty->isFloatTy())
return AtomicExpansionKind::None;
if (Ty->isDoubleTy() && STI.hasAtomAddF64())
Expand Down
11 changes: 10 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -1545,7 +1545,7 @@ multiclass F_ATOMIC_2_imp<ValueType ptrT, NVPTXRegClass ptrclass,
def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
!strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
[(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>,
Requires<!if(!eq(TypeStr, ".f16"), [Predicate<"false">], Pred)>;
Requires<!if(!or(!eq(TypeStr, ".f16"), !eq(TypeStr, ".bf16")), [Predicate<"false">], Pred)>;
}
multiclass F_ATOMIC_2<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
Expand Down Expand Up @@ -1662,6 +1662,13 @@ defm INT_PTX_ATOM_ADD_S_F16 : F_ATOMIC_2<f16, Int16Regs, ".shared", ".f16", ".ad
defm INT_PTX_ATOM_ADD_GEN_F16 : F_ATOMIC_2<f16, Int16Regs, "", ".f16", ".add.noftz",
atomic_load_add_gen, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;

defm INT_PTX_ATOM_ADD_G_BF16 : F_ATOMIC_2<bf16, Int16Regs, ".global", ".bf16", ".add.noftz",
atomic_load_add_g, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>;
defm INT_PTX_ATOM_ADD_S_BF16 : F_ATOMIC_2<bf16, Int16Regs, ".shared", ".bf16", ".add.noftz",
atomic_load_add_s, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>;
defm INT_PTX_ATOM_ADD_GEN_BF16 : F_ATOMIC_2<bf16, Int16Regs, "", ".bf16", ".add.noftz",
atomic_load_add_gen, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>;

defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add",
atomic_load_add_g, f32imm, fpimm>;
defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add",
Expand Down Expand Up @@ -2174,6 +2181,8 @@ multiclass ATOM2_add_impl<string OpStr> {
defm _s32 : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
defm _u64 : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, []>;
defm _bf16 : ATOM2S_impl<OpStr, "f", "bf16", bf16, Int16Regs, bf16imm, fpimm, bf16,
[hasSM<90>, hasPTX<78>]>;
defm _f16 : ATOM2S_impl<OpStr, "f", "f16", f16, Int16Regs, f16imm, fpimm, f16,
[hasSM<70>, hasPTX<63>]>;
defm _f32 : ATOM2S_impl<OpStr, "f", "f32", f32, Float32Regs, f32imm, fpimm, f32,
Expand Down
45 changes: 23 additions & 22 deletions llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,8 @@ class PPCAsmPrinter : public AsmPrinter {
void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
void EmitAIXTlsCallHelper(const MachineInstr *MI);
const MCExpr *getAdjustedLocalExecExpr(const MachineOperand &MO,
int64_t Offset);
const MCExpr *getAdjustedFasterLocalExpr(const MachineOperand &MO,
int64_t Offset);
bool runOnMachineFunction(MachineFunction &MF) override {
Subtarget = &MF.getSubtarget<PPCSubtarget>();
bool Changed = AsmPrinter::runOnMachineFunction(MF);
Expand Down Expand Up @@ -1598,7 +1598,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
// machine operand (which is a TargetGlobalTLSAddress) is expected to be
// the same operand for both loads and stores.
for (const MachineOperand &TempMO : MI->operands()) {
if (((TempMO.getTargetFlags() == PPCII::MO_TPREL_FLAG)) &&
if (((TempMO.getTargetFlags() == PPCII::MO_TPREL_FLAG ||
TempMO.getTargetFlags() == PPCII::MO_TLSLD_FLAG)) &&
TempMO.getOperandNo() == 1)
OpNum = 1;
}
Expand Down Expand Up @@ -1634,8 +1635,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
case PPC::ADDI8: {
// A faster non-TOC-based local-[exec|dynamic] sequence is represented by
// `addi` or a load/store instruction (that directly loads or stores off of
// the thread pointer) with an immediate operand having the MO_TPREL_FLAG.
// Such instructions do not otherwise arise.
// the thread pointer) with an immediate operand having the
// [MO_TPREL_FLAG|MO_TLSLD_FLAG]. Such instructions do not otherwise arise.
if (!HasAIXSmallLocalTLS)
break;
bool IsMIADDI8 = MI->getOpcode() == PPC::ADDI8;
Expand All @@ -1647,7 +1648,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
Flag == PPCII::MO_TPREL_PCREL_FLAG || Flag == PPCII::MO_TLSLD_FLAG) {
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);

const MCExpr *Expr = getAdjustedLocalExecExpr(MO, MO.getOffset());
const MCExpr *Expr = getAdjustedFasterLocalExpr(MO, MO.getOffset());
if (Expr)
TmpInst.getOperand(OpNum) = MCOperand::createExpr(Expr);

Expand Down Expand Up @@ -1677,28 +1678,25 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInst);
}

// For non-TOC-based local-exec variables that have a non-zero offset,
// For non-TOC-based local-[exec|dynamic] variables that have a non-zero offset,
// we need to create a new MCExpr that adds the non-zero offset to the address
// of the local-exec variable that will be used in either an addi, load or
// store. However, the final displacement for these instructions must be
// of the local-[exec|dynamic] variable that will be used in either an addi,
// load or store. However, the final displacement for these instructions must be
// between [-32768, 32768), so if the TLS address + its non-zero offset is
// greater than 32KB, a new MCExpr is produced to accommodate this situation.
const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
int64_t Offset) {
const MCExpr *
PPCAsmPrinter::getAdjustedFasterLocalExpr(const MachineOperand &MO,
int64_t Offset) {
// Non-zero offsets (for loads, stores or `addi`) require additional handling.
// When the offset is zero, there is no need to create an adjusted MCExpr.
if (!Offset)
return nullptr;

assert(MO.isGlobal() && "Only expecting a global MachineOperand here!");
const GlobalValue *GValue = MO.getGlobal();
// TODO: Handle the aix-small-local-dynamic-tls non-zero offset case.
TLSModel::Model Model = TM.getTLSModel(GValue);
if (Model == TLSModel::LocalDynamic) {
return nullptr;
}
assert(Model == TLSModel::LocalExec &&
"Only local-exec accesses are handled!");
assert((Model == TLSModel::LocalExec || Model == TLSModel::LocalDynamic) &&
"Only local-[exec|dynamic] accesses are handled!");

bool IsGlobalADeclaration = GValue->isDeclarationForLinker();
// Find the GlobalVariable that corresponds to the particular TLS variable
Expand All @@ -1719,7 +1717,10 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
// For when TLS variables are extern, this is safe to do because we can
// assume that the address of extern TLS variables are zero.
const MCExpr *Expr = MCSymbolRefExpr::create(
getSymbol(GValue), MCSymbolRefExpr::VK_PPC_AIX_TLSLE, OutContext);
getSymbol(GValue),
Model == TLSModel::LocalExec ? MCSymbolRefExpr::VK_PPC_AIX_TLSLE
: MCSymbolRefExpr::VK_PPC_AIX_TLSLD,
OutContext);
Expr = MCBinaryExpr::createAdd(
Expr, MCConstantExpr::create(Offset, OutContext), OutContext);
if (FinalAddress >= 32768) {
Expand All @@ -1732,10 +1733,10 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
ptrdiff_t Delta = ((FinalAddress + 32768) & ~0xFFFF);
// Check that the total instruction displacement fits within [-32768,32768).
[[maybe_unused]] ptrdiff_t InstDisp = TLSVarAddress + Offset - Delta;
assert(((InstDisp < 32768) &&
(InstDisp >= -32768)) &&
"Expecting the instruction displacement for local-exec TLS "
"variables to be between [-32768, 32768)!");
assert(
((InstDisp < 32768) && (InstDisp >= -32768)) &&
"Expecting the instruction displacement for local-[exec|dynamic] TLS "
"variables to be between [-32768, 32768)!");
Expr = MCBinaryExpr::createAdd(
Expr, MCConstantExpr::create(-Delta, OutContext), OutContext);
}
Expand Down
89 changes: 44 additions & 45 deletions llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7587,29 +7587,23 @@ static bool hasAIXSmallTLSAttr(SDValue Val) {
return false;
}

// Is an ADDI eligible for folding for non-TOC-based local-exec accesses?
static bool isEligibleToFoldADDIForLocalExecAccesses(SelectionDAG *DAG,
SDValue ADDIToFold) {
// Is an ADDI eligible for folding for non-TOC-based local-[exec|dynamic]
// accesses?
static bool isEligibleToFoldADDIForFasterLocalAccesses(SelectionDAG *DAG,
SDValue ADDIToFold) {
// Check if ADDIToFold (the ADDI that we want to fold into local-exec
// accesses), is truly an ADDI.
if (!ADDIToFold.isMachineOpcode() ||
(ADDIToFold.getMachineOpcode() != PPC::ADDI8))
return false;

// Folding is only allowed for the AIX small-local-exec TLS target attribute
// or when the 'aix-small-tls' global variable attribute is present.
// Folding is only allowed for the AIX small-local-[exec|dynamic] TLS target
// attribute or when the 'aix-small-tls' global variable attribute is present.
const PPCSubtarget &Subtarget =
DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
SDValue TLSVarNode = ADDIToFold.getOperand(1);
if (!(Subtarget.hasAIXSmallLocalExecTLS() || hasAIXSmallTLSAttr(TLSVarNode)))
return false;

// The first operand of the ADDIToFold should be the thread pointer.
// This transformation is only performed if the first operand of the
// addi is the thread pointer.
SDValue TPRegNode = ADDIToFold.getOperand(0);
RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
if (!(Subtarget.hasAIXSmallLocalDynamicTLS() ||
Subtarget.hasAIXSmallLocalExecTLS() || hasAIXSmallTLSAttr(TLSVarNode)))
return false;

// The second operand of the ADDIToFold should be the global TLS address
Expand All @@ -7619,52 +7613,54 @@ static bool isEligibleToFoldADDIForLocalExecAccesses(SelectionDAG *DAG,
if (!GA)
return false;

// The local-exec TLS variable should only have the MO_TPREL_FLAG target flag,
// so this optimization is not performed otherwise if the flag is not set.
if (DAG->getTarget().getTLSModel(GA->getGlobal()) == TLSModel::LocalExec) {
// The first operand of the ADDIToFold should be the thread pointer.
// This transformation is only performed if the first operand of the
// addi is the thread pointer.
SDValue TPRegNode = ADDIToFold.getOperand(0);
RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
return false;
}

// The local-[exec|dynamic] TLS variable should only have the
// [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flags, so this optimization is not
// performed otherwise if the flag is not set.
unsigned TargetFlags = GA->getTargetFlags();
if (TargetFlags != PPCII::MO_TPREL_FLAG)
if (!(TargetFlags == PPCII::MO_TPREL_FLAG ||
TargetFlags == PPCII::MO_TLSLD_FLAG))
return false;

// If all conditions are satisfied, the ADDI is valid for folding.
return true;
}

// For non-TOC-based local-exec access where an addi is feeding into another
// addi, fold this sequence into a single addi if possible.
// Before this optimization, the sequence appears as:
// addi rN, r13, sym@le
// For non-TOC-based local-[exec|dynamic] access where an addi is feeding into
// another addi, fold this sequence into a single addi if possible. Before this
// optimization, the sequence appears as:
// addi rN, r13, sym@[le|ld]
// addi rM, rN, imm
// After this optimization, we can fold the two addi into a single one:
// addi rM, r13, sym@le + imm
static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
// addi rM, r13, sym@[le|ld] + imm
static void foldADDIForFasterLocalAccesses(SDNode *N, SelectionDAG *DAG) {
if (N->getMachineOpcode() != PPC::ADDI8)
return;

// InitialADDI is the addi feeding into N (also an addi), and the addi that
// we want optimized out.
SDValue InitialADDI = N->getOperand(0);

if (!isEligibleToFoldADDIForLocalExecAccesses(DAG, InitialADDI))
if (!isEligibleToFoldADDIForFasterLocalAccesses(DAG, InitialADDI))
return;

// At this point, InitialADDI can be folded into a non-TOC-based local-exec
// access. The first operand of InitialADDI should be the thread pointer,
// which has been checked in isEligibleToFoldADDIForLocalExecAccesses().
SDValue TPRegNode = InitialADDI.getOperand(0);
[[maybe_unused]] RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
[[maybe_unused]] const PPCSubtarget &Subtarget =
DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
assert((TPReg && (TPReg->getReg() == Subtarget.getThreadPointerRegister())) &&
"Expecting the first operand to be a thread pointer for folding addi "
"in local-exec accesses!");

// The second operand of the InitialADDI should be the global TLS address
// (the local-exec TLS variable), with the MO_TPREL_FLAG target flag.
// This has been checked in isEligibleToFoldADDIForLocalExecAccesses().
// (the local-[exec|dynamic] TLS variable), with the
// [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flag. This has been checked in
// isEligibleToFoldADDIForFasterLocalAccesses().
SDValue TLSVarNode = InitialADDI.getOperand(1);
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
assert(GA && "Expecting a valid GlobalAddressSDNode when folding addi into "
"local-exec accesses!");
"local-[exec|dynamic] accesses!");
unsigned TargetFlags = GA->getTargetFlags();

// The second operand of the addi that we want to preserve will be an
Expand All @@ -7676,7 +7672,7 @@ static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
TLSVarNode = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), MVT::i64,
Offset, TargetFlags);

(void)DAG->UpdateNodeOperands(N, TPRegNode, TLSVarNode);
(void)DAG->UpdateNodeOperands(N, InitialADDI.getOperand(0), TLSVarNode);
if (InitialADDI.getNode()->use_empty())
DAG->RemoveDeadNode(InitialADDI.getNode());
}
Expand All @@ -7693,8 +7689,9 @@ void PPCDAGToDAGISel::PeepholePPC64() {
if (isVSXSwap(SDValue(N, 0)))
reduceVSXSwap(N, CurDAG);

// This optimization is performed for non-TOC-based local-exec accesses.
foldADDIForLocalExecAccesses(N, CurDAG);
// This optimization is performed for non-TOC-based local-[exec|dynamic]
// accesses.
foldADDIForFasterLocalAccesses(N, CurDAG);

unsigned FirstOp;
unsigned StorageOpcode = N->getMachineOpcode();
Expand Down Expand Up @@ -7852,13 +7849,15 @@ void PPCDAGToDAGISel::PeepholePPC64() {
ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
ImmOpnd.getValueType());
} else if (Offset != 0) {
// This optimization is performed for non-TOC-based local-exec accesses.
if (isEligibleToFoldADDIForLocalExecAccesses(CurDAG, Base)) {
// This optimization is performed for non-TOC-based local-[exec|dynamic]
// accesses.
if (isEligibleToFoldADDIForFasterLocalAccesses(CurDAG, Base)) {
// Add the non-zero offset information into the load or store
// instruction to be used for non-TOC-based local-exec accesses.
// instruction to be used for non-TOC-based local-[exec|dynamic]
// accesses.
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
assert(GA && "Expecting a valid GlobalAddressSDNode when folding "
"addi into local-exec accesses!");
"addi into local-[exec|dynamic] accesses!");
ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
MVT::i64, Offset,
GA->getTargetFlags());
Expand Down
19 changes: 12 additions & 7 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40078,10 +40078,10 @@ static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL,

// Attempt to fold BLEND(PERMUTE(X),PERMUTE(Y)) -> PERMUTE(BLEND(X,Y))
// iff we don't demand the same element index for both X and Y.
static SDValue combineBlendOfPermutes(MVT VT, SDValue N0, SDValue N1,
ArrayRef<int> BlendMask,
const APInt &DemandedElts,
SelectionDAG &DAG, const SDLoc &DL) {
static SDValue
combineBlendOfPermutes(MVT VT, SDValue N0, SDValue N1, ArrayRef<int> BlendMask,
const APInt &DemandedElts, SelectionDAG &DAG,
const X86Subtarget &Subtarget, const SDLoc &DL) {
assert(isBlendOrUndef(BlendMask) && "Blend shuffle expected");
if (!N0.hasOneUse() || !N1.hasOneUse())
return SDValue();
Expand Down Expand Up @@ -40156,6 +40156,11 @@ static SDValue combineBlendOfPermutes(MVT VT, SDValue N0, SDValue N1,
return SDValue();
}

// Don't introduce lane-crossing permutes without AVX2.
if (VT.is256BitVector() && !Subtarget.hasAVX2() &&
isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), NewPermuteMask))
return SDValue();

SDValue NewBlend =
DAG.getVectorShuffle(VT, DL, DAG.getBitcast(VT, Ops0[0]),
DAG.getBitcast(VT, Ops1[0]), NewBlendMask);
Expand Down Expand Up @@ -41918,9 +41923,9 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
case X86ISD::BLENDI: {
SmallVector<int, 16> BlendMask;
DecodeBLENDMask(NumElts, Op.getConstantOperandVal(2), BlendMask);
if (SDValue R = combineBlendOfPermutes(VT.getSimpleVT(), Op.getOperand(0),
Op.getOperand(1), BlendMask,
DemandedElts, TLO.DAG, SDLoc(Op)))
if (SDValue R = combineBlendOfPermutes(
VT.getSimpleVT(), Op.getOperand(0), Op.getOperand(1), BlendMask,
DemandedElts, TLO.DAG, Subtarget, SDLoc(Op)))
return TLO.CombineTo(Op, R);
break;
}
Expand Down
31 changes: 16 additions & 15 deletions llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5828,14 +5828,17 @@ InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode,
Alignment, AddressSpace);
}

// If we didn't split, this will be a single gather/scatter instruction.
if (CostKind == TTI::TCK_CodeSize)
return 1;

// The gather / scatter cost is given by Intel architects. It is a rough
// number since we are looking at one instruction in a time.
const int GSOverhead = (Opcode == Instruction::Load)
? getGatherOverhead()
: getScatterOverhead();
const int GSOverhead = (Opcode == Instruction::Load) ? getGatherOverhead()
: getScatterOverhead();
return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
MaybeAlign(Alignment), AddressSpace,
TTI::TCK_RecipThroughput);
CostKind);
}

/// Return the cost of full scalarization of gather / scatter operation.
Expand Down Expand Up @@ -5892,19 +5895,17 @@ InstructionCost X86TTIImpl::getGatherScatterOpCost(
unsigned Opcode, Type *SrcVTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) {
if (CostKind != TTI::TCK_RecipThroughput) {
if ((Opcode == Instruction::Load &&
isLegalMaskedGather(SrcVTy, Align(Alignment)) &&
!forceScalarizeMaskedGather(cast<VectorType>(SrcVTy),
Align(Alignment))) ||
(Opcode == Instruction::Store &&
isLegalMaskedScatter(SrcVTy, Align(Alignment)) &&
!forceScalarizeMaskedScatter(cast<VectorType>(SrcVTy),
Align(Alignment))))
return 1;
if (CostKind != TTI::TCK_RecipThroughput &&
((Opcode == Instruction::Load &&
(!isLegalMaskedGather(SrcVTy, Align(Alignment)) ||
forceScalarizeMaskedGather(cast<VectorType>(SrcVTy),
Align(Alignment)))) ||
(Opcode == Instruction::Store &&
(!isLegalMaskedScatter(SrcVTy, Align(Alignment)) ||
forceScalarizeMaskedScatter(cast<VectorType>(SrcVTy),
Align(Alignment))))))
return BaseT::getGatherScatterOpCost(Opcode, SrcVTy, Ptr, VariableMask,
Alignment, CostKind, I);
}

assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter");
PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/TargetParser/Triple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1420,6 +1420,17 @@ VersionTuple Triple::getVulkanVersion() const {
return VersionTuple(0);
}

VersionTuple Triple::getDXILVersion() const {
if (getArch() != dxil || getOS() != ShaderModel)
llvm_unreachable("invalid DXIL triple");
StringRef Arch = getArchName();
Arch.consume_front("dxilv");
VersionTuple DXILVersion = parseVersionFromName(Arch);
// FIXME: validate DXIL version against Shader Model version.
// Tracked by https://github.com/llvm/llvm-project/issues/91388
return DXILVersion;
}

void Triple::setTriple(const Twine &Str) {
*this = Triple(Str);
}
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Transforms/IPO/SampleProfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1715,13 +1715,15 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
// if needed. Sample counts in profiles are 64-bit unsigned values,
// but internally branch weights are expressed as 32-bit values.
if (Weight > std::numeric_limits<uint32_t>::max()) {
LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)\n");
Weight = std::numeric_limits<uint32_t>::max();
}
if (!SampleProfileUseProfi) {
// Weight is added by one to avoid propagation errors introduced by
// 0 weights.
Weights.push_back(static_cast<uint32_t>(Weight + 1));
Weights.push_back(static_cast<uint32_t>(
Weight == std::numeric_limits<uint32_t>::max() ? Weight
: Weight + 1));
} else {
// Profi creates proper weights that do not require "+1" adjustments but
// we evenly split the weight among branches with the same destination.
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3046,8 +3046,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
// which doesn't know how to produce a proper deopt state. So if we see a
// non-leaf memcpy/memmove without deopt state just treat it as a leaf
// copy and don't produce a statepoint.
if (!AllowStatepointWithNoDeoptInfo &&
!Call->getOperandBundle(LLVMContext::OB_deopt)) {
if (!AllowStatepointWithNoDeoptInfo && !Call->hasDeoptState()) {
assert((isa<AtomicMemCpyInst>(Call) || isa<AtomicMemMoveInst>(Call)) &&
"Don't expect any other calls here!");
return false;
Expand Down
9 changes: 6 additions & 3 deletions llvm/lib/Transforms/Utils/ValueMapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -538,17 +538,20 @@ Value *Mapper::mapValue(const Value *V) {
}

void Mapper::remapDbgRecord(DbgRecord &DR) {
// Remap DILocations.
auto *MappedDILoc = mapMetadata(DR.getDebugLoc());
DR.setDebugLoc(DebugLoc(cast<DILocation>(MappedDILoc)));

if (DbgLabelRecord *DLR = dyn_cast<DbgLabelRecord>(&DR)) {
// Remap labels.
DLR->setLabel(cast<DILabel>(mapMetadata(DLR->getLabel())));
return;
}

DbgVariableRecord &V = cast<DbgVariableRecord>(DR);
// Remap variables and DILocations.
// Remap variables.
auto *MappedVar = mapMetadata(V.getVariable());
auto *MappedDILoc = mapMetadata(V.getDebugLoc());
V.setVariable(cast<DILocalVariable>(MappedVar));
V.setDebugLoc(DebugLoc(cast<DILocation>(MappedDILoc)));

bool IgnoreMissingLocals = Flags & RF_IgnoreMissingLocals;

Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11419,8 +11419,12 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy) {
if (Scalar->getType() != Ty) {
assert(Scalar->getType()->isIntegerTy() && Ty->isIntegerTy() &&
"Expected integer types only.");
Value *V = Scalar;
if (auto *CI = dyn_cast<CastInst>(Scalar);
isa_and_nonnull<SExtInst, ZExtInst>(CI))
V = CI->getOperand(0);
Scalar = Builder.CreateIntCast(
Scalar, Ty, !isKnownNonNegative(Scalar, SimplifyQuery(*DL)));
V, Ty, !isKnownNonNegative(Scalar, SimplifyQuery(*DL)));
}

Vec = Builder.CreateInsertElement(Vec, Scalar, Builder.getInt32(Pos));
Expand Down
12 changes: 6 additions & 6 deletions llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1961,17 +1961,17 @@ bool VectorCombine::foldTruncFromReductions(Instruction &I) {
if (!match(ReductionSrc, m_OneUse(m_Trunc(m_Value(TruncSrc)))))
return false;

auto *Trunc = cast<CastInst>(ReductionSrc);
auto *TruncSrcTy = cast<VectorType>(TruncSrc->getType());
auto *ReductionSrcTy = cast<VectorType>(ReductionSrc->getType());
Type *ResultTy = I.getType();

TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost OldCost =
TTI.getCastInstrCost(Instruction::Trunc, ReductionSrcTy, TruncSrcTy,
TTI::CastContextHint::None, CostKind, Trunc) +
TTI.getArithmeticReductionCost(ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
InstructionCost OldCost = TTI.getArithmeticReductionCost(
ReductionOpc, ReductionSrcTy, std::nullopt, CostKind);
if (auto *Trunc = dyn_cast<CastInst>(ReductionSrc))
OldCost +=
TTI.getCastInstrCost(Instruction::Trunc, ReductionSrcTy, TruncSrcTy,
TTI::CastContextHint::None, CostKind, Trunc);
InstructionCost NewCost =
TTI.getArithmeticReductionCost(ReductionOpc, TruncSrcTy, std::nullopt,
CostKind) +
Expand Down
Loading