34 changes: 34 additions & 0 deletions compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <fcntl.h>
#include <pthread.h>
#include <stdio.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/uio.h>

Expand All @@ -47,6 +48,7 @@ const char *const kOpenAtFunctionName = "openat64";
const char *const kOpenFunctionName = "open64";
const char *const kPreadFunctionName = "pread64";
const char *const kPwriteFunctionName = "pwrite64";
const char *const kMmapFunctionName = "mmap64";
#else
const char *const kCreatFunctionName = "creat";
const char *const kFcntlFunctionName = "fcntl";
Expand All @@ -55,6 +57,7 @@ const char *const kOpenAtFunctionName = "openat";
const char *const kOpenFunctionName = "open";
const char *const kPreadFunctionName = "pread";
const char *const kPwriteFunctionName = "pwrite";
const char *const kMmapFunctionName = "mmap";
#endif

using namespace testing;
Expand Down Expand Up @@ -179,6 +182,37 @@ TEST(TestRtsanInterceptors, PvallocDiesWhenRealtime) {
}
#endif

TEST(TestRtsanInterceptors, MmapDiesWhenRealtime) {
auto Func = []() {
void *_ = mmap(nullptr, 8, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
};
ExpectRealtimeDeath(Func, kMmapFunctionName);
ExpectNonRealtimeSurvival(Func);
}

TEST(TestRtsanInterceptors, MunmapDiesWhenRealtime) {
void *ptr = mmap(nullptr, 8, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
EXPECT_NE(ptr, nullptr);
auto Func = [ptr]() { munmap(ptr, 8); };
printf("Right before death munmap\n");
ExpectRealtimeDeath(Func, "munmap");
ExpectNonRealtimeSurvival(Func);
}

TEST(TestRtsanInterceptors, ShmOpenDiesWhenRealtime) {
auto Func = []() { shm_open("/rtsan_test_shm", O_CREAT | O_RDWR, 0); };
ExpectRealtimeDeath(Func, "shm_open");
ExpectNonRealtimeSurvival(Func);
}

TEST(TestRtsanInterceptors, ShmUnlinkDiesWhenRealtime) {
auto Func = []() { shm_unlink("/rtsan_test_shm"); };
ExpectRealtimeDeath(Func, "shm_unlink");
ExpectNonRealtimeSurvival(Func);
}

/*
Sleeping
*/
Expand Down
630 changes: 325 additions & 305 deletions compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions flang/test/Lower/OpenMP/Todo/task_detach.f90
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
! REQUIRES: openmp_runtime
! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
! RUN: %not_todo_cmd bbc -emit-fir %openmp_flags -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
! RUN: %not_todo_cmd %flang_fc1 -emit-fir %openmp_flags -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s

!===============================================================================
! `detach` clause
Expand Down
4 changes: 2 additions & 2 deletions flang/test/Parser/OpenMP/task.f90
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
! REQUIRES: openmp_runtime
! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=50 %s | FileCheck --ignore-case %s
! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=50 %s | FileCheck --ignore-case --check-prefix="CHECK-UNPARSE" %s
! RUN: %flang_fc1 %openmp_flags -fdebug-dump-parse-tree -fopenmp -fopenmp-version=50 %s | FileCheck --ignore-case %s
! RUN: %flang_fc1 %openmp_flags -fdebug-unparse -fopenmp -fopenmp-version=50 %s | FileCheck --ignore-case --check-prefix="CHECK-UNPARSE" %s

!CHECK: OmpBlockDirective -> llvm::omp::Directive = task
!CHECK: OmpClauseList -> OmpClause -> Detach -> OmpDetachClause -> OmpObject -> Designator -> DataRef -> Name = 'event'
Expand Down
2 changes: 1 addition & 1 deletion libcxx/docs/FeatureTestMacroTable.rst
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ Status
---------------------------------------------------------- -----------------
``__cpp_lib_shift`` ``201806L``
---------------------------------------------------------- -----------------
``__cpp_lib_smart_ptr_for_overwrite`` *unimplemented*
``__cpp_lib_smart_ptr_for_overwrite`` ``202002L``
---------------------------------------------------------- -----------------
``__cpp_lib_source_location`` ``201907L``
---------------------------------------------------------- -----------------
Expand Down
4 changes: 2 additions & 2 deletions libcxx/docs/Status/Cxx20Papers.csv
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
"`P0972R0 <https://wg21.link/P0972R0>`__","<chrono> ``zero()``\ , ``min()``\ , and ``max()``\ should be noexcept","2018-11 (San Diego)","|Complete|","8",""
"`P1006R1 <https://wg21.link/P1006R1>`__","Constexpr in std::pointer_traits","2018-11 (San Diego)","|Complete|","8",""
"`P1007R3 <https://wg21.link/P1007R3>`__","``std::assume_aligned``\ ","2018-11 (San Diego)","|Complete|","15",""
"`P1020R1 <https://wg21.link/P1020R1>`__","Smart pointer creation with default initialization","2018-11 (San Diego)","|Complete|","16",""
"`P1020R1 <https://wg21.link/P1020R1>`__","Smart pointer creation with default initialization","2018-11 (San Diego)","|Complete|","16","The feature-test macro was not set until LLVM 20."
"`P1032R1 <https://wg21.link/P1032R1>`__","Misc constexpr bits","2018-11 (San Diego)","|Complete|","13",""
"`P1085R2 <https://wg21.link/P1085R2>`__","Should Span be Regular?","2018-11 (San Diego)","|Complete|","8",""
"`P1123R0 <https://wg21.link/P1123R0>`__","Editorial Guidance for merging P0019r8 and P0528r3","2018-11 (San Diego)","","",""
Expand Down Expand Up @@ -177,7 +177,7 @@
"`P1963R0 <https://wg21.link/P1963R0>`__","Fixing US 313","2020-02 (Prague)","","",""
"`P1964R2 <https://wg21.link/P1964R2>`__","Wording for boolean-testable","2020-02 (Prague)","|Complete|","13",""
"`P1970R2 <https://wg21.link/P1970R2>`__","Consistency for size() functions: Add ranges::ssize","2020-02 (Prague)","|Complete|","15",""
"`P1973R1 <https://wg21.link/P1973R1>`__","Rename ""_default_init"" Functions, Rev1","2020-02 (Prague)","|Complete|","16",""
"`P1973R1 <https://wg21.link/P1973R1>`__","Rename ""_default_init"" Functions, Rev1","2020-02 (Prague)","|Complete|","16","The feature-test macro was not set until LLVM 20."
"`P1976R2 <https://wg21.link/P1976R2>`__","Fixed-size span construction from dynamic range","2020-02 (Prague)","|Complete|","11",""
"`P1981R0 <https://wg21.link/P1981R0>`__","Rename leap to leap_second","2020-02 (Prague)","|Complete|","19",""
"`P1982R0 <https://wg21.link/P1982R0>`__","Rename link to time_zone_link","2020-02 (Prague)","|Complete|","19",""
Expand Down
2 changes: 1 addition & 1 deletion libcxx/include/version
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ __cpp_lib_void_t 201411L <type_traits>
# undef __cpp_lib_shared_ptr_arrays
# define __cpp_lib_shared_ptr_arrays 201707L
# define __cpp_lib_shift 201806L
// # define __cpp_lib_smart_ptr_for_overwrite 202002L
# define __cpp_lib_smart_ptr_for_overwrite 202002L
# define __cpp_lib_source_location 201907L
# define __cpp_lib_span 202002L
# define __cpp_lib_ssize 201902L
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -390,17 +390,11 @@
# error "__cpp_lib_shared_ptr_weak_type should have the value 201606L in c++20"
# endif

# if !defined(_LIBCPP_VERSION)
# ifndef __cpp_lib_smart_ptr_for_overwrite
# error "__cpp_lib_smart_ptr_for_overwrite should be defined in c++20"
# endif
# if __cpp_lib_smart_ptr_for_overwrite != 202002L
# error "__cpp_lib_smart_ptr_for_overwrite should have the value 202002L in c++20"
# endif
# else // _LIBCPP_VERSION
# ifdef __cpp_lib_smart_ptr_for_overwrite
# error "__cpp_lib_smart_ptr_for_overwrite should not be defined because it is unimplemented in libc++!"
# endif
# ifndef __cpp_lib_smart_ptr_for_overwrite
# error "__cpp_lib_smart_ptr_for_overwrite should be defined in c++20"
# endif
# if __cpp_lib_smart_ptr_for_overwrite != 202002L
# error "__cpp_lib_smart_ptr_for_overwrite should have the value 202002L in c++20"
# endif

# ifdef __cpp_lib_smart_ptr_owner_equality
Expand Down Expand Up @@ -521,17 +515,11 @@
# error "__cpp_lib_shared_ptr_weak_type should have the value 201606L in c++23"
# endif

# if !defined(_LIBCPP_VERSION)
# ifndef __cpp_lib_smart_ptr_for_overwrite
# error "__cpp_lib_smart_ptr_for_overwrite should be defined in c++23"
# endif
# if __cpp_lib_smart_ptr_for_overwrite != 202002L
# error "__cpp_lib_smart_ptr_for_overwrite should have the value 202002L in c++23"
# endif
# else // _LIBCPP_VERSION
# ifdef __cpp_lib_smart_ptr_for_overwrite
# error "__cpp_lib_smart_ptr_for_overwrite should not be defined because it is unimplemented in libc++!"
# endif
# ifndef __cpp_lib_smart_ptr_for_overwrite
# error "__cpp_lib_smart_ptr_for_overwrite should be defined in c++23"
# endif
# if __cpp_lib_smart_ptr_for_overwrite != 202002L
# error "__cpp_lib_smart_ptr_for_overwrite should have the value 202002L in c++23"
# endif

# ifdef __cpp_lib_smart_ptr_owner_equality
Expand Down Expand Up @@ -652,17 +640,11 @@
# error "__cpp_lib_shared_ptr_weak_type should have the value 201606L in c++26"
# endif

# if !defined(_LIBCPP_VERSION)
# ifndef __cpp_lib_smart_ptr_for_overwrite
# error "__cpp_lib_smart_ptr_for_overwrite should be defined in c++26"
# endif
# if __cpp_lib_smart_ptr_for_overwrite != 202002L
# error "__cpp_lib_smart_ptr_for_overwrite should have the value 202002L in c++26"
# endif
# else // _LIBCPP_VERSION
# ifdef __cpp_lib_smart_ptr_for_overwrite
# error "__cpp_lib_smart_ptr_for_overwrite should not be defined because it is unimplemented in libc++!"
# endif
# ifndef __cpp_lib_smart_ptr_for_overwrite
# error "__cpp_lib_smart_ptr_for_overwrite should be defined in c++26"
# endif
# if __cpp_lib_smart_ptr_for_overwrite != 202002L
# error "__cpp_lib_smart_ptr_for_overwrite should have the value 202002L in c++26"
# endif

# if !defined(_LIBCPP_VERSION)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4344,17 +4344,11 @@
# error "__cpp_lib_shift should have the value 201806L in c++20"
# endif

# if !defined(_LIBCPP_VERSION)
# ifndef __cpp_lib_smart_ptr_for_overwrite
# error "__cpp_lib_smart_ptr_for_overwrite should be defined in c++20"
# endif
# if __cpp_lib_smart_ptr_for_overwrite != 202002L
# error "__cpp_lib_smart_ptr_for_overwrite should have the value 202002L in c++20"
# endif
# else // _LIBCPP_VERSION
# ifdef __cpp_lib_smart_ptr_for_overwrite
# error "__cpp_lib_smart_ptr_for_overwrite should not be defined because it is unimplemented in libc++!"
# endif
# ifndef __cpp_lib_smart_ptr_for_overwrite
# error "__cpp_lib_smart_ptr_for_overwrite should be defined in c++20"
# endif
# if __cpp_lib_smart_ptr_for_overwrite != 202002L
# error "__cpp_lib_smart_ptr_for_overwrite should have the value 202002L in c++20"
# endif

# ifdef __cpp_lib_smart_ptr_owner_equality
Expand Down Expand Up @@ -5929,17 +5923,11 @@
# error "__cpp_lib_shift should have the value 201806L in c++23"
# endif

# if !defined(_LIBCPP_VERSION)
# ifndef __cpp_lib_smart_ptr_for_overwrite
# error "__cpp_lib_smart_ptr_for_overwrite should be defined in c++23"
# endif
# if __cpp_lib_smart_ptr_for_overwrite != 202002L
# error "__cpp_lib_smart_ptr_for_overwrite should have the value 202002L in c++23"
# endif
# else // _LIBCPP_VERSION
# ifdef __cpp_lib_smart_ptr_for_overwrite
# error "__cpp_lib_smart_ptr_for_overwrite should not be defined because it is unimplemented in libc++!"
# endif
# ifndef __cpp_lib_smart_ptr_for_overwrite
# error "__cpp_lib_smart_ptr_for_overwrite should be defined in c++23"
# endif
# if __cpp_lib_smart_ptr_for_overwrite != 202002L
# error "__cpp_lib_smart_ptr_for_overwrite should have the value 202002L in c++23"
# endif

# ifdef __cpp_lib_smart_ptr_owner_equality
Expand Down Expand Up @@ -7829,17 +7817,11 @@
# error "__cpp_lib_shift should have the value 201806L in c++26"
# endif

# if !defined(_LIBCPP_VERSION)
# ifndef __cpp_lib_smart_ptr_for_overwrite
# error "__cpp_lib_smart_ptr_for_overwrite should be defined in c++26"
# endif
# if __cpp_lib_smart_ptr_for_overwrite != 202002L
# error "__cpp_lib_smart_ptr_for_overwrite should have the value 202002L in c++26"
# endif
# else // _LIBCPP_VERSION
# ifdef __cpp_lib_smart_ptr_for_overwrite
# error "__cpp_lib_smart_ptr_for_overwrite should not be defined because it is unimplemented in libc++!"
# endif
# ifndef __cpp_lib_smart_ptr_for_overwrite
# error "__cpp_lib_smart_ptr_for_overwrite should be defined in c++26"
# endif
# if __cpp_lib_smart_ptr_for_overwrite != 202002L
# error "__cpp_lib_smart_ptr_for_overwrite should have the value 202002L in c++26"
# endif

# if !defined(_LIBCPP_VERSION)
Expand Down
1 change: 0 additions & 1 deletion libcxx/utils/generate_feature_test_macro_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -1196,7 +1196,6 @@ def add_version_header(tc):
"name": "__cpp_lib_smart_ptr_for_overwrite",
"values": {"c++20": 202002},
"headers": ["memory"],
"unimplemented": True,
},
{
"name": "__cpp_lib_smart_ptr_owner_equality",
Expand Down
6 changes: 3 additions & 3 deletions lld/test/MachO/cgdata-generate.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

# RUN: rm -rf %t; split-file %s %t

# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
# RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
# RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-1-bytes.txt
# RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-1-bytes.txt
# RUN: sed "s/<RAW_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-template.s > %t/merge-1.s
# RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
# RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-2-bytes.txt
# RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-2-bytes.txt
# RUN: sed "s/<RAW_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-template.s > %t/merge-2.s

# RUN: llvm-mc -filetype obj -triple arm64-apple-darwin %t/merge-1.s -o %t/merge-1.o
Expand Down
13 changes: 0 additions & 13 deletions lld/test/wasm/Inputs/require-feature-foo.yaml

This file was deleted.

90 changes: 0 additions & 90 deletions lld/test/wasm/target-feature-required.yaml

This file was deleted.

11 changes: 0 additions & 11 deletions lld/test/wasm/target-feature-used.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@
# RUN: yaml2obj %S/Inputs/use-feature-foo.yaml -o %t.used.o
# RUN: wasm-ld --no-entry -o - %t1.o %t.used.o | obj2yaml | FileCheck %s --check-prefix USED

# RUN: yaml2obj %S/Inputs/require-feature-foo.yaml -o %t.required.o
# RUN: wasm-ld --no-entry -o - %t1.o %t.required.o | obj2yaml | FileCheck %s --check-prefix REQUIRED

# RUN: yaml2obj %S/Inputs/disallow-feature-foo.yaml -o %t.disallowed.o
# RUN: not wasm-ld --no-entry -o /dev/null %t1.o %t.disallowed.o 2>&1 | FileCheck %s --check-prefix DISALLOWED

Expand All @@ -24,7 +21,6 @@
# give the expected results:
#
# USED x USED => USED
# USED x REQUIRED => USED
# USED x DISALLOWED => Error
# USED x NONE => USED

Expand Down Expand Up @@ -73,13 +69,6 @@ Sections:
# USED-NEXT: Name: foo
# USED-NEXT: ...

# REQUIRED: - Type: CUSTOM
# REQUIRED: Name: target_features
# REQUIRED-NEXT: Features:
# REQUIRED-NEXT: - Prefix: USED
# REQUIRED-NEXT: Name: foo
# REQUIRED-NEXT: ...

# DISALLOWED: Target feature 'foo' used in {{.*}}target-feature-used.yaml.tmp1.o is disallowed by {{.*}}target-feature-used.yaml.tmp.disallowed.o. Use --no-check-features to suppress.{{$}}

# DISALLOWED-NOCHECK: - Type: CUSTOM
Expand Down
15 changes: 2 additions & 13 deletions lld/wasm/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,6 @@ void Writer::finalizeSections() {

void Writer::populateTargetFeatures() {
StringMap<std::string> used;
StringMap<std::string> required;
StringMap<std::string> disallowed;
SmallSet<std::string, 8> &allowed = out.targetFeaturesSec->features;
bool tlsUsed = false;
Expand All @@ -599,18 +598,14 @@ void Writer::populateTargetFeatures() {
goto done;
}

// Find the sets of used, required, and disallowed features
// Find the sets of used and disallowed features
for (ObjFile *file : ctx.objectFiles) {
StringRef fileName(file->getName());
for (auto &feature : file->getWasmObj()->getTargetFeatures()) {
switch (feature.Prefix) {
case WASM_FEATURE_PREFIX_USED:
used.insert({feature.Name, std::string(fileName)});
break;
case WASM_FEATURE_PREFIX_REQUIRED:
used.insert({feature.Name, std::string(fileName)});
required.insert({feature.Name, std::string(fileName)});
break;
case WASM_FEATURE_PREFIX_DISALLOWED:
disallowed.insert({feature.Name, std::string(fileName)});
break;
Expand Down Expand Up @@ -662,7 +657,7 @@ void Writer::populateTargetFeatures() {
}
}

// Validate the required and disallowed constraints for each file
// Validate the disallowed constraints for each file
for (ObjFile *file : ctx.objectFiles) {
StringRef fileName(file->getName());
SmallSet<std::string, 8> objectFeatures;
Expand All @@ -675,12 +670,6 @@ void Writer::populateTargetFeatures() {
fileName + " is disallowed by " + disallowed[feature.Name] +
". Use --no-check-features to suppress.");
}
for (const auto &feature : required.keys()) {
if (!objectFeatures.count(std::string(feature)))
error(Twine("Missing target feature '") + feature + "' in " + fileName +
", required by " + required[feature] +
". Use --no-check-features to suppress.");
}
}

done:
Expand Down
16 changes: 7 additions & 9 deletions llvm/docs/CommandGuide/llvm-cgdata.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,13 @@ SYNOPSIS
DESCRIPTION
-----------

The :program:llvm-cgdata utility parses raw codegen data embedded
in compiled binary files and merges them into a single .cgdata file.
It can also inspect and manipulate .cgdata files.
Currently, the tool supports saving and restoring outlined hash trees,
enabling global function outlining across modules, allowing for more
efficient function outlining in subsequent compilations.
The design is extensible, allowing for the incorporation of additional
codegen summaries and optimization techniques, such as global function
merging, in the future.
The :program:llvm-cgdata utility parses raw codegen data embedded in compiled
binary files and merges them into a single .cgdata file. It can also inspect
and manipulate .cgdata files. Currently, the tool supports saving and restoring
outlined hash trees and stable function maps, allowing for more efficient
function outlining and function merging across modules in subsequent
compilations. The design is extensible, allowing for the incorporation of
additional codegen summaries and optimization techniques.

COMMANDS
--------
Expand Down
1 change: 0 additions & 1 deletion llvm/include/llvm/BinaryFormat/Wasm.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,6 @@ const unsigned WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND = 0x3;
// Feature policy prefixes used in the custom "target_features" section
enum : uint8_t {
WASM_FEATURE_PREFIX_USED = '+',
WASM_FEATURE_PREFIX_REQUIRED = '=',
WASM_FEATURE_PREFIX_DISALLOWED = '-',
};

Expand Down
24 changes: 23 additions & 1 deletion llvm/include/llvm/CGData/CodeGenData.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/CGData/OutlinedHashTree.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
#include "llvm/CGData/StableFunctionMapRecord.h"
#include "llvm/IR/Module.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Caching.h"
Expand All @@ -41,7 +42,9 @@ enum class CGDataKind {
Unknown = 0x0,
// A function outlining info.
FunctionOutlinedHashTree = 0x1,
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionOutlinedHashTree)
// A function merging info.
StableFunctionMergingMap = 0x2,
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/StableFunctionMergingMap)
};

const std::error_category &cgdata_category();
Expand Down Expand Up @@ -108,6 +111,8 @@ enum CGDataMode {
class CodeGenData {
/// Global outlined hash tree that has oulined hash sequences across modules.
std::unique_ptr<OutlinedHashTree> PublishedHashTree;
/// Global stable function map that has stable function info across modules.
std::unique_ptr<StableFunctionMap> PublishedStableFunctionMap;

/// This flag is set when -fcodegen-data-generate is passed.
/// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds.
Expand All @@ -131,6 +136,9 @@ class CodeGenData {
bool hasOutlinedHashTree() {
return PublishedHashTree && !PublishedHashTree->empty();
}
bool hasStableFunctionMap() {
return PublishedStableFunctionMap && !PublishedStableFunctionMap->empty();
}

/// Returns the outlined hash tree. This can be globally used in a read-only
/// manner.
Expand All @@ -147,6 +155,12 @@ class CodeGenData {
// Ensure we disable emitCGData as we do not want to read and write both.
EmitCGData = false;
}
void
publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
PublishedStableFunctionMap = std::move(FunctionMap);
// Ensure we disable emitCGData as we do not want to read and write both.
EmitCGData = false;
}
};

namespace cgdata {
Expand All @@ -166,6 +180,11 @@ publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
}

inline void
publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
CodeGenData::getInstance().publishStableFunctionMap(std::move(FunctionMap));
}

struct StreamCacheData {
/// Backing buffer for serialized data stream.
SmallVector<SmallString<0>> Outputs;
Expand Down Expand Up @@ -249,6 +268,8 @@ enum CGDataVersion {
// Version 1 is the first version. This version supports the outlined
// hash tree.
Version1 = 1,
// Version 2 supports the stable function merging map.
Version2 = 2,
CurrentVersion = CG_DATA_INDEX_VERSION
};
const uint64_t Version = CGDataVersion::CurrentVersion;
Expand All @@ -258,6 +279,7 @@ struct Header {
uint32_t Version;
uint32_t DataKind;
uint64_t OutlinedHashTreeOffset;
uint64_t StableFunctionMapOffset;

// New fields should only be added at the end to ensure that the size
// computation is correct. The methods below need to be updated to ensure that
Expand Down
12 changes: 9 additions & 3 deletions llvm/include/llvm/CGData/CodeGenData.inc
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,33 @@
#define CG_DATA_DEFINED
CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON),
CG_DATA_OUTLINE_COFF, "__DATA,")
CG_DATA_SECT_ENTRY(CG_merge, CG_DATA_QUOTE(CG_DATA_MERGE_COMMON),
CG_DATA_MERGE_COFF, "__DATA,")

#undef CG_DATA_SECT_ENTRY
#endif

/* section name strings common to all targets other
than WIN32 */
#define CG_DATA_OUTLINE_COMMON __llvm_outline
#define CG_DATA_MERGE_COMMON __llvm_merge
/* Since cg data sections are not allocated, we don't need to
* access them at runtime.
*/
#define CG_DATA_OUTLINE_COFF ".loutline"
#define CG_DATA_MERGE_COFF ".lmerge"

#ifdef _WIN32
/* Runtime section names and name strings. */
#define CG_DATA_SECT_NAME CG_DATA_OUTLINE_COFF
#define CG_DATA_OUTLINE_SECT_NAME CG_DATA_OUTLINE_COFF
#define CG_DATA_MERGE_SECT_NAME CG_DATA_MERGE_COFF

#else
/* Runtime section names and name strings. */
#define CG_DATA_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON)
#define CG_DATA_OUTLINE_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON)
#define CG_DATA_MERGE_SECT_NAME CG_DATA_QUOTE(CG_DATA_MERGE_COMMON)

#endif

/* Indexed codegen data format version (start from 1). */
#define CG_DATA_INDEX_VERSION 1
#define CG_DATA_INDEX_VERSION 2
29 changes: 26 additions & 3 deletions llvm/include/llvm/CGData/CodeGenDataReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include "llvm/CGData/CodeGenData.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
#include "llvm/CGData/StableFunctionMapRecord.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/VirtualFileSystem.h"

Expand All @@ -36,10 +37,15 @@ class CodeGenDataReader {
virtual CGDataKind getDataKind() const = 0;
/// Return true if the data has an outlined hash tree.
virtual bool hasOutlinedHashTree() const = 0;
/// Return true if the data has a stable function map.
virtual bool hasStableFunctionMap() const = 0;
/// Return the outlined hash tree that is released from the reader.
std::unique_ptr<OutlinedHashTree> releaseOutlinedHashTree() {
return std::move(HashTreeRecord.HashTree);
}
std::unique_ptr<StableFunctionMap> releaseStableFunctionMap() {
return std::move(FunctionMapRecord.FunctionMap);
}

/// Factory method to create an appropriately typed reader for the given
/// codegen data file path and file system.
Expand All @@ -56,15 +62,21 @@ class CodeGenDataReader {
/// is used by `llvm-cgdata --merge` or ThinLTO's two-codegen rounds.
/// Optionally, \p CombinedHash can be used to compuate the combined hash of
/// the merged data.
static Error mergeFromObjectFile(const object::ObjectFile *Obj,
OutlinedHashTreeRecord &GlobalOutlineRecord,
stable_hash *CombinedHash = nullptr);
static Error
mergeFromObjectFile(const object::ObjectFile *Obj,
OutlinedHashTreeRecord &GlobalOutlineRecord,
StableFunctionMapRecord &GlobalFunctionMapRecord,
stable_hash *CombinedHash = nullptr);

protected:
/// The outlined hash tree that has been read. When it's released by
/// releaseOutlinedHashTree(), it's no longer valid.
OutlinedHashTreeRecord HashTreeRecord;

/// The stable function map that has been read. When it's released by
// releaseStableFunctionMap(), it's no longer valid.
StableFunctionMapRecord FunctionMapRecord;

/// Set the current error and return same.
Error error(cgdata_error Err, const std::string &ErrMsg = "") {
LastError = Err;
Expand Down Expand Up @@ -115,6 +127,11 @@ class IndexedCodeGenDataReader : public CodeGenDataReader {
return Header.DataKind &
static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
}
/// Return true if the header indicates the data has a stable function map.
bool hasStableFunctionMap() const override {
return Header.DataKind &
static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
}
};

/// This format is a simple text format that's suitable for test data.
Expand Down Expand Up @@ -150,6 +167,12 @@ class TextCodeGenDataReader : public CodeGenDataReader {
return static_cast<uint32_t>(DataKind) &
static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
}
/// Return true if the header indicates the data has a stable function map.
/// This does not mean that the data is still available.
bool hasStableFunctionMap() const override {
return static_cast<uint32_t>(DataKind) &
static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
}
};

} // end namespace llvm
Expand Down
17 changes: 16 additions & 1 deletion llvm/include/llvm/CGData/CodeGenDataWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include "llvm/CGData/CodeGenData.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
#include "llvm/CGData/StableFunctionMapRecord.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/Error.h"

Expand Down Expand Up @@ -57,16 +58,22 @@ class CodeGenDataWriter {
/// The outlined hash tree to be written.
OutlinedHashTreeRecord HashTreeRecord;

/// The stable function map to be written.
StableFunctionMapRecord FunctionMapRecord;

/// A bit mask describing the kind of the codegen data.
CGDataKind DataKind = CGDataKind::Unknown;

public:
CodeGenDataWriter() = default;
~CodeGenDataWriter() = default;

/// Add the outlined hash tree record. The input Record is released.
/// Add the outlined hash tree record. The input hash tree is released.
void addRecord(OutlinedHashTreeRecord &Record);

/// Add the stable function map record. The input function map is released.
void addRecord(StableFunctionMapRecord &Record);

/// Write the codegen data to \c OS
Error write(raw_fd_ostream &OS);

Expand All @@ -81,11 +88,19 @@ class CodeGenDataWriter {
return static_cast<uint32_t>(DataKind) &
static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
}
/// Return true if the header indicates the data has a stable function map.
bool hasStableFunctionMap() const {
return static_cast<uint32_t>(DataKind) &
static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
}

private:
/// The offset of the outlined hash tree in the file.
uint64_t OutlinedHashTreeOffset;

/// The offset of the stable function map in the file.
uint64_t StableFunctionMapOffset;

/// Write the codegen data header to \c COS
Error writeHeader(CGDataOStream &COS);

Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,9 @@ class CombinerHelper {
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);

// Transform a G_SUB with constant on the RHS to G_ADD.
bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo);

// Transform a G_SHL with an extended source into a narrower shift if
// possible.
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class JITLinkRedirectableSymbolManager : public RedirectableSymbolManager {
}

void emitRedirectableSymbols(std::unique_ptr<MaterializationResponsibility> R,
const SymbolAddrMap &InitialDests) override;
SymbolAddrMap InitialDests) override;

Error redirect(JITDylib &JD, const SymbolAddrMap &NewDests) override;

Expand Down
9 changes: 4 additions & 5 deletions llvm/include/llvm/ExecutionEngine/Orc/RedirectionManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ class RedirectableSymbolManager : public RedirectionManager {
public:
/// Create redirectable symbols with given symbol names and initial
/// desitnation symbol addresses.
Error createRedirectableSymbols(ResourceTrackerSP RT,
const SymbolMap &InitialDests);
Error createRedirectableSymbols(ResourceTrackerSP RT, SymbolMap InitialDests);

/// Create a single redirectable symbol with given symbol name and initial
/// desitnation symbol address.
Expand All @@ -60,17 +59,17 @@ class RedirectableSymbolManager : public RedirectionManager {
/// Emit redirectable symbol
virtual void
emitRedirectableSymbols(std::unique_ptr<MaterializationResponsibility> MR,
const SymbolMap &InitialDests) = 0;
SymbolMap InitialDests) = 0;
};

/// RedirectableMaterializationUnit materializes redirectable symbol
/// by invoking RedirectableSymbolManager::emitRedirectableSymbols
class RedirectableMaterializationUnit : public MaterializationUnit {
public:
RedirectableMaterializationUnit(RedirectableSymbolManager &RM,
const SymbolMap &InitialDests)
SymbolMap InitialDests)
: MaterializationUnit(convertToFlags(InitialDests)), RM(RM),
InitialDests(InitialDests) {}
InitialDests(std::move(InitialDests)) {}

StringRef getName() const override {
return "RedirectableSymbolMaterializationUnit";
Expand Down
13 changes: 11 additions & 2 deletions llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,14 @@ def mul_to_shl : GICombineRule<
[{ return Helper.matchCombineMulToShl(*${mi}, ${matchinfo}); }]),
(apply [{ Helper.applyCombineMulToShl(*${mi}, ${matchinfo}); }])>;

// (sub x, C) -> (add x, -C)
def sub_to_add : GICombineRule<
(defs root:$d, build_fn_matchinfo:$matchinfo),
(match (G_CONSTANT $c, $imm),
(G_SUB $d, $op1, $c):$mi,
[{ return Helper.matchCombineSubToAdd(*${mi}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFnNoErase(*${mi}, ${matchinfo}); }])>;

// shl ([asz]ext x), y => zext (shl x, y), if shift does not overflow int
def reduce_shl_of_extend_matchdata : GIDefMatchData<"RegisterImmPair">;
def reduce_shl_of_extend : GICombineRule<
Expand Down Expand Up @@ -1903,8 +1911,9 @@ def bitreverse_shift : GICombineGroup<[bitreverse_shl, bitreverse_lshr]>;
def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp,
select_to_iminmax, match_selects]>;

def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
mul_by_neg_one, idempotent_prop]>;
def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, sub_to_add,
add_p2i_to_ptradd, mul_by_neg_one,
idempotent_prop]>;

def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma,
combine_fadd_fpext_fmul_to_fmad_or_fma, combine_fadd_fma_fmul_to_fmad_or_fma,
Expand Down
30 changes: 18 additions & 12 deletions llvm/lib/CGData/CodeGenData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/CGData/CodeGenDataReader.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
#include "llvm/CGData/StableFunctionMapRecord.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Caching.h"
#include "llvm/Support/CommandLine.h"
Expand Down Expand Up @@ -163,6 +164,8 @@ CodeGenData &CodeGenData::getInstance() {
auto Reader = ReaderOrErr->get();
if (Reader->hasOutlinedHashTree())
Instance->publishOutlinedHashTree(Reader->releaseOutlinedHashTree());
if (Reader->hasStableFunctionMap())
Instance->publishStableFunctionMap(Reader->releaseStableFunctionMap());
}
});
return *Instance;
Expand All @@ -185,18 +188,14 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Curr) {
return make_error<CGDataError>(cgdata_error::unsupported_version);
H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);

switch (H.Version) {
// When a new field is added to the header add a case statement here to
// compute the size as offset of the new field + size of the new field. This
// relies on the field being added to the end of the list.
static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version1,
"Please update the size computation below if a new field has "
"been added to the header, if not add a case statement to "
"fall through to the latest version.");
case 1ull:
H.OutlinedHashTreeOffset =
static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version2,
"Please update the offset computation below if a new field has "
"been added to the header.");
H.OutlinedHashTreeOffset =
endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
if (H.Version >= 2)
H.StableFunctionMapOffset =
endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
}

return H;
}
Expand Down Expand Up @@ -257,6 +256,7 @@ std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,

Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) {
OutlinedHashTreeRecord GlobalOutlineRecord;
StableFunctionMapRecord GlobalStableFunctionMapRecord;
stable_hash CombinedHash = 0;
for (auto File : ObjFiles) {
if (File.empty())
Expand All @@ -270,12 +270,18 @@ Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) {

std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get();
if (auto E = CodeGenDataReader::mergeFromObjectFile(
Obj.get(), GlobalOutlineRecord, &CombinedHash))
Obj.get(), GlobalOutlineRecord, GlobalStableFunctionMapRecord,
&CombinedHash))
return E;
}

GlobalStableFunctionMapRecord.finalize();

if (!GlobalOutlineRecord.empty())
cgdata::publishOutlinedHashTree(std::move(GlobalOutlineRecord.HashTree));
if (!GlobalStableFunctionMapRecord.empty())
cgdata::publishStableFunctionMap(
std::move(GlobalStableFunctionMapRecord.FunctionMap));

return CombinedHash;
}
Expand Down
65 changes: 44 additions & 21 deletions llvm/lib/CGData/CodeGenDataReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,40 @@ setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {

Error CodeGenDataReader::mergeFromObjectFile(
const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord,
StableFunctionMapRecord &GlobalFunctionMapRecord,
stable_hash *CombinedHash) {
Triple TT = Obj->makeTriple();
auto CGOutLineName =
auto CGOutlineName =
getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
auto CGMergeName =
getCodeGenDataSectionName(CG_merge, TT.getObjectFormat(), false);

auto processSectionContents = [&](const StringRef &Name,
const StringRef &Contents) {
if (Name != CGOutlineName && Name != CGMergeName)
return;
if (CombinedHash)
*CombinedHash = stable_hash_combine(*CombinedHash, xxh3_64bits(Contents));
auto *Data = reinterpret_cast<const unsigned char *>(Contents.data());
auto *EndData = Data + Contents.size();
// In case dealing with an executable that has concatenated cgdata,
// we want to merge them into a single cgdata.
// Although it's not a typical workflow, we support this scenario
// by looping over all data in the sections.
if (Name == CGOutlineName) {
while (Data != EndData) {
OutlinedHashTreeRecord LocalOutlineRecord;
LocalOutlineRecord.deserialize(Data);
GlobalOutlineRecord.merge(LocalOutlineRecord);
}
} else if (Name == CGMergeName) {
while (Data != EndData) {
StableFunctionMapRecord LocalFunctionMapRecord;
LocalFunctionMapRecord.deserialize(Data);
GlobalFunctionMapRecord.merge(LocalFunctionMapRecord);
}
}
};

for (auto &Section : Obj->sections()) {
Expected<StringRef> NameOrErr = Section.getName();
Expand All @@ -44,23 +74,7 @@ Error CodeGenDataReader::mergeFromObjectFile(
Expected<StringRef> ContentsOrErr = Section.getContents();
if (!ContentsOrErr)
return ContentsOrErr.takeError();
auto *Data = reinterpret_cast<const unsigned char *>(ContentsOrErr->data());
auto *EndData = Data + ContentsOrErr->size();

if (*NameOrErr == CGOutLineName) {
if (CombinedHash)
*CombinedHash =
stable_hash_combine(*CombinedHash, xxh3_64bits(*ContentsOrErr));
// In case dealing with an executable that has concatenated cgdata,
// we want to merge them into a single cgdata.
// Although it's not a typical workflow, we support this scenario.
while (Data != EndData) {
OutlinedHashTreeRecord LocalOutlineRecord;
LocalOutlineRecord.deserialize(Data);
GlobalOutlineRecord.merge(LocalOutlineRecord);
}
}
// TODO: Add support for other cgdata sections.
processSectionContents(*NameOrErr, *ContentsOrErr);
}

return Error::success();
Expand All @@ -69,7 +83,8 @@ Error CodeGenDataReader::mergeFromObjectFile(
Error IndexedCodeGenDataReader::read() {
using namespace support;

// The smallest header with the version 1 is 24 bytes
// The smallest header with the version 1 is 24 bytes.
// Do not update this value even with the new version of the header.
const unsigned MinHeaderSize = 24;
if (DataBuffer->getBufferSize() < MinHeaderSize)
return error(cgdata_error::bad_header);
Expand All @@ -87,6 +102,12 @@ Error IndexedCodeGenDataReader::read() {
return error(cgdata_error::eof);
HashTreeRecord.deserialize(Ptr);
}
if (hasStableFunctionMap()) {
const unsigned char *Ptr = Start + Header.StableFunctionMapOffset;
if (Ptr >= End)
return error(cgdata_error::eof);
FunctionMapRecord.deserialize(Ptr);
}

return success();
}
Expand Down Expand Up @@ -152,6 +173,8 @@ Error TextCodeGenDataReader::read() {
StringRef Str = Line->drop_front().rtrim();
if (Str.equals_insensitive("outlined_hash_tree"))
DataKind |= CGDataKind::FunctionOutlinedHashTree;
else if (Str.equals_insensitive("stable_function_map"))
DataKind |= CGDataKind::StableFunctionMergingMap;
else
return error(cgdata_error::bad_header);
}
Expand All @@ -170,8 +193,8 @@ Error TextCodeGenDataReader::read() {
yaml::Input YOS(StringRef(Pos, Size));
if (hasOutlinedHashTree())
HashTreeRecord.deserializeYAML(YOS);

// TODO: Add more yaml cgdata in order
if (hasStableFunctionMap())
FunctionMapRecord.deserializeYAML(YOS);

return Error::success();
}
Expand Down
30 changes: 28 additions & 2 deletions llvm/lib/CGData/CodeGenDataWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ void CodeGenDataWriter::addRecord(OutlinedHashTreeRecord &Record) {
DataKind |= CGDataKind::FunctionOutlinedHashTree;
}

void CodeGenDataWriter::addRecord(StableFunctionMapRecord &Record) {
assert(Record.FunctionMap && "empty function map in the record");
FunctionMapRecord.FunctionMap = std::move(Record.FunctionMap);

DataKind |= CGDataKind::StableFunctionMergingMap;
}

Error CodeGenDataWriter::write(raw_fd_ostream &OS) {
CGDataOStream COS(OS);
return writeImpl(COS);
Expand All @@ -68,8 +75,11 @@ Error CodeGenDataWriter::writeHeader(CGDataOStream &COS) {
if (static_cast<bool>(DataKind & CGDataKind::FunctionOutlinedHashTree))
Header.DataKind |=
static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);

if (static_cast<bool>(DataKind & CGDataKind::StableFunctionMergingMap))
Header.DataKind |=
static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
Header.OutlinedHashTreeOffset = 0;
Header.StableFunctionMapOffset = 0;

// Only write up to the CGDataKind. We need to remember the offset of the
// remaining fields to allow back-patching later.
Expand All @@ -83,6 +93,12 @@ Error CodeGenDataWriter::writeHeader(CGDataOStream &COS) {
// Reserve the space for OutlinedHashTreeOffset field.
COS.write(0);

// Save the location of Header.StableFunctionMapOffset field in \c COS.
StableFunctionMapOffset = COS.tell();

// Reserve the space for StableFunctionMapOffset field.
COS.write(0);

return Error::success();
}

Expand All @@ -93,10 +109,14 @@ Error CodeGenDataWriter::writeImpl(CGDataOStream &COS) {
uint64_t OutlinedHashTreeFieldStart = COS.tell();
if (hasOutlinedHashTree())
HashTreeRecord.serialize(COS.OS);
uint64_t StableFunctionMapFieldStart = COS.tell();
if (hasStableFunctionMap())
FunctionMapRecord.serialize(COS.OS);

// Back patch the offsets.
CGDataPatchItem PatchItems[] = {
{OutlinedHashTreeOffset, &OutlinedHashTreeFieldStart, 1}};
{OutlinedHashTreeOffset, &OutlinedHashTreeFieldStart, 1},
{StableFunctionMapOffset, &StableFunctionMapFieldStart, 1}};
COS.patch(PatchItems);

return Error::success();
Expand All @@ -106,6 +126,9 @@ Error CodeGenDataWriter::writeHeaderText(raw_fd_ostream &OS) {
if (hasOutlinedHashTree())
OS << "# Outlined stable hash tree\n:outlined_hash_tree\n";

if (hasStableFunctionMap())
OS << "# Stable function map\n:stable_function_map\n";

// TODO: Add more data types in this header

return Error::success();
Expand All @@ -119,6 +142,9 @@ Error CodeGenDataWriter::writeText(raw_fd_ostream &OS) {
if (hasOutlinedHashTree())
HashTreeRecord.serializeYAML(YOS);

if (hasStableFunctionMap())
FunctionMapRecord.serializeYAML(YOS);

// TODO: Write more yaml cgdata in order

return Error::success();
Expand Down
25 changes: 25 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2044,6 +2044,31 @@ void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
Observer.changedInstr(MI);
}

bool CombinerHelper::matchCombineSubToAdd(MachineInstr &MI,
BuildFnTy &MatchInfo) {
GSub &Sub = cast<GSub>(MI);

LLT Ty = MRI.getType(Sub.getReg(0));

if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
return false;

if (!isConstantLegalOrBeforeLegalizer(Ty))
return false;

APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);

MatchInfo = [=, &MI](MachineIRBuilder &B) {
auto NegCst = B.buildConstant(Ty, -Imm);
Observer.changingInstr(MI);
MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
MI.getOperand(2).setReg(NegCst.getReg(0));
MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
Observer.changedInstr(MI);
};
return true;
}

// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
RegisterImmPair &MatchData) {
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/CodeGen/SafeStack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ class SafeStack {
SafeStack(Function &F, const TargetLoweringBase &TL, const DataLayout &DL,
DomTreeUpdater *DTU, ScalarEvolution &SE)
: F(F), TL(TL), DL(DL), DTU(DTU), SE(SE),
StackPtrTy(PointerType::getUnqual(F.getContext())),
StackPtrTy(DL.getAllocaPtrType(F.getContext())),
IntPtrTy(DL.getIntPtrType(F.getContext())),
Int32Ty(Type::getInt32Ty(F.getContext())) {}

Expand Down Expand Up @@ -616,7 +616,8 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
IRBuilder<> IRBUser(InsertBefore);
Value *Off =
IRBUser.CreatePtrAdd(BasePointer, ConstantInt::get(Int32Ty, -Offset));
Value *Replacement = IRBUser.CreateBitCast(Off, AI->getType(), Name);
Value *Replacement =
IRBUser.CreateAddrSpaceCast(Off, AI->getType(), Name);

if (auto *PHI = dyn_cast<PHINode>(User))
// PHI nodes may have multiple incoming edges from the same BB (why??),
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/CodeGen/TargetLoweringBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1849,7 +1849,8 @@ TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase &IRB,
auto UnsafeStackPtr =
dyn_cast_or_null<GlobalVariable>(M->getNamedValue(UnsafeStackPtrVar));

Type *StackPtrTy = PointerType::getUnqual(M->getContext());
const DataLayout &DL = M->getDataLayout();
PointerType *StackPtrTy = DL.getAllocaPtrType(M->getContext());

if (!UnsafeStackPtr) {
auto TLSModel = UseTLS ?
Expand All @@ -1863,6 +1864,8 @@ TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase &IRB,
UnsafeStackPtrVar, nullptr, TLSModel);
} else {
// The variable exists, check its type and attributes.
//
// FIXME: Move to IR verifier.
if (UnsafeStackPtr->getValueType() != StackPtrTy)
report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type");
if (UseTLS != UnsafeStackPtr->isThreadLocal())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ constexpr StringRef StubSuffix = "$__stub_ptr";

void JITLinkRedirectableSymbolManager::emitRedirectableSymbols(
std::unique_ptr<MaterializationResponsibility> R,
const SymbolAddrMap &InitialDests) {
SymbolAddrMap InitialDests) {

auto &ES = ObjLinkingLayer.getExecutionSession();
Triple TT = ES.getTargetTriple();
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/ExecutionEngine/Orc/RedirectionManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ using namespace llvm::orc;
void RedirectionManager::anchor() {}

Error RedirectableSymbolManager::createRedirectableSymbols(
ResourceTrackerSP RT, const SymbolMap &InitialDests) {
ResourceTrackerSP RT, SymbolMap InitialDests) {
auto &JD = RT->getJITDylib();
return JD.define(
std::make_unique<RedirectableMaterializationUnit>(*this, InitialDests),
RT);
return JD.define(std::make_unique<RedirectableMaterializationUnit>(
*this, std::move(InitialDests)),
RT);
}
1 change: 0 additions & 1 deletion llvm/lib/Object/WasmObjectFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -999,7 +999,6 @@ Error WasmObjectFile::parseTargetFeaturesSection(ReadContext &Ctx) {
Feature.Prefix = readUint8(Ctx);
switch (Feature.Prefix) {
case wasm::WASM_FEATURE_PREFIX_USED:
case wasm::WASM_FEATURE_PREFIX_REQUIRED:
case wasm::WASM_FEATURE_PREFIX_DISALLOWED:
break;
default:
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/ObjectYAML/WasmYAML.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,6 @@ void ScalarEnumerationTraits<WasmYAML::FeaturePolicyPrefix>::enumeration(
IO &IO, WasmYAML::FeaturePolicyPrefix &Kind) {
#define ECase(X) IO.enumCase(Kind, #X, wasm::WASM_FEATURE_PREFIX_##X);
ECase(USED);
ECase(REQUIRED);
ECase(DISALLOWED);
#undef ECase
}
Expand Down
47 changes: 29 additions & 18 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5495,6 +5495,13 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
}
case Intrinsic::amdgcn_mov_dpp8:
return LaneOp.addImm(MI.getOperand(3).getImm()).getReg(0);
case Intrinsic::amdgcn_update_dpp:
return LaneOp.addUse(Src1)
.addImm(MI.getOperand(4).getImm())
.addImm(MI.getOperand(5).getImm())
.addImm(MI.getOperand(6).getImm())
.addImm(MI.getOperand(7).getImm())
.getReg(0);
default:
llvm_unreachable("unhandled lane op");
}
Expand All @@ -5504,7 +5511,7 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
Register Src0 = MI.getOperand(2).getReg();
Register Src1, Src2;
if (IID == Intrinsic::amdgcn_readlane || IID == Intrinsic::amdgcn_writelane ||
IsSetInactive || IsPermLane16) {
IID == Intrinsic::amdgcn_update_dpp || IsSetInactive || IsPermLane16) {
Src1 = MI.getOperand(3).getReg();
if (IID == Intrinsic::amdgcn_writelane || IsPermLane16) {
Src2 = MI.getOperand(4).getReg();
Expand All @@ -5514,15 +5521,21 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
LLT Ty = MRI.getType(DstReg);
unsigned Size = Ty.getSizeInBits();

if (Size == 32) {
unsigned SplitSize = 32;
if (IID == Intrinsic::amdgcn_update_dpp && (Size % 64 == 0) &&
ST.hasDPALU_DPP() &&
AMDGPU::isLegalDPALU_DPPControl(MI.getOperand(4).getImm()))
SplitSize = 64;

if (Size == SplitSize) {
// Already legal
return true;
}

if (Size < 32) {
Src0 = B.buildAnyExt(S32, Src0).getReg(0);

if (IsSetInactive || IsPermLane16)
if (IID == Intrinsic::amdgcn_update_dpp || IsSetInactive || IsPermLane16)
Src1 = B.buildAnyExt(LLT::scalar(32), Src1).getReg(0);

if (IID == Intrinsic::amdgcn_writelane)
Expand All @@ -5534,31 +5547,28 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
return true;
}

if (Size % 32 != 0)
if (Size % SplitSize != 0)
return false;

LLT PartialResTy = S32;
LLT PartialResTy = LLT::scalar(SplitSize);
if (Ty.isVector()) {
LLT EltTy = Ty.getElementType();
switch (EltTy.getSizeInBits()) {
case 16:
PartialResTy = Ty.changeElementCount(ElementCount::getFixed(2));
break;
case 32:
unsigned EltSize = EltTy.getSizeInBits();
if (EltSize == SplitSize) {
PartialResTy = EltTy;
break;
default:
// Handle all other cases via S32 pieces;
break;
} else if (EltSize == 16 || EltSize == 32) {
unsigned NElem = SplitSize / EltSize;
PartialResTy = Ty.changeElementCount(ElementCount::getFixed(NElem));
}
// Handle all other cases via S32/S64 pieces;
}

SmallVector<Register, 2> PartialRes;
unsigned NumParts = Size / 32;
SmallVector<Register, 4> PartialRes;
unsigned NumParts = Size / SplitSize;
MachineInstrBuilder Src0Parts = B.buildUnmerge(PartialResTy, Src0);
MachineInstrBuilder Src1Parts, Src2Parts;

if (IsSetInactive || IsPermLane16)
if (IID == Intrinsic::amdgcn_update_dpp || IsSetInactive || IsPermLane16)
Src1Parts = B.buildUnmerge(PartialResTy, Src1);

if (IID == Intrinsic::amdgcn_writelane)
Expand All @@ -5567,7 +5577,7 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
for (unsigned i = 0; i < NumParts; ++i) {
Src0 = Src0Parts.getReg(i);

if (IsSetInactive || IsPermLane16)
if (IID == Intrinsic::amdgcn_update_dpp || IsSetInactive || IsPermLane16)
Src1 = Src1Parts.getReg(i);

if (IID == Intrinsic::amdgcn_writelane)
Expand Down Expand Up @@ -7555,6 +7565,7 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_set_inactive:
case Intrinsic::amdgcn_set_inactive_chain_arg:
case Intrinsic::amdgcn_mov_dpp8:
case Intrinsic::amdgcn_update_dpp:
return legalizeLaneOp(Helper, MI, IntrID);
case Intrinsic::amdgcn_s_buffer_prefetch_data:
return legalizeSBufferPrefetch(Helper, MI);
Expand Down
48 changes: 32 additions & 16 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6162,13 +6162,20 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
IID == Intrinsic::amdgcn_set_inactive_chain_arg;
SDLoc SL(N);
MVT IntVT = MVT::getIntegerVT(ValSize);
const GCNSubtarget *ST = TLI.getSubtarget();
unsigned SplitSize = 32;
if (IID == Intrinsic::amdgcn_update_dpp && (ValSize % 64 == 0) &&
ST->hasDPALU_DPP() &&
AMDGPU::isLegalDPALU_DPPControl(N->getConstantOperandVal(3)))
SplitSize = 64;

auto createLaneOp = [&DAG, &SL, N, IID](SDValue Src0, SDValue Src1,
SDValue Src2, MVT ValT) -> SDValue {
SmallVector<SDValue, 8> Operands;
switch (IID) {
case Intrinsic::amdgcn_permlane16:
case Intrinsic::amdgcn_permlanex16:
case Intrinsic::amdgcn_update_dpp:
Operands.push_back(N->getOperand(6));
Operands.push_back(N->getOperand(5));
Operands.push_back(N->getOperand(4));
Expand Down Expand Up @@ -6206,13 +6213,15 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
SDValue Src0 = N->getOperand(1);
SDValue Src1, Src2;
if (IID == Intrinsic::amdgcn_readlane || IID == Intrinsic::amdgcn_writelane ||
IID == Intrinsic::amdgcn_mov_dpp8 || IsSetInactive || IsPermLane16) {
IID == Intrinsic::amdgcn_mov_dpp8 ||
IID == Intrinsic::amdgcn_update_dpp || IsSetInactive || IsPermLane16) {
Src1 = N->getOperand(2);
if (IID == Intrinsic::amdgcn_writelane || IsPermLane16)
if (IID == Intrinsic::amdgcn_writelane ||
IID == Intrinsic::amdgcn_update_dpp || IsPermLane16)
Src2 = N->getOperand(3);
}

if (ValSize == 32) {
if (ValSize == SplitSize) {
// Already legal
return SDValue();
}
Expand All @@ -6222,7 +6231,7 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
Src0 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src0) : Src0,
SL, MVT::i32);

if (IsSetInactive || IsPermLane16) {
if (IID == Intrinsic::amdgcn_update_dpp || IsSetInactive || IsPermLane16) {
Src1 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src1) : Src1,
SL, MVT::i32);
}
Expand All @@ -6237,7 +6246,7 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
return IsFloat ? DAG.getBitcast(VT, Trunc) : Trunc;
}

if (ValSize % 32 != 0)
if (ValSize % SplitSize != 0)
return SDValue();

auto unrollLaneOp = [&DAG, &SL](SDNode *N) -> SDValue {
Expand Down Expand Up @@ -6284,21 +6293,26 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
switch (MVT::SimpleValueType EltTy =
VT.getVectorElementType().getSimpleVT().SimpleTy) {
case MVT::i32:
case MVT::f32: {
SDValue LaneOp = createLaneOp(Src0, Src1, Src2, VT.getSimpleVT());
return unrollLaneOp(LaneOp.getNode());
}
case MVT::f32:
if (SplitSize == 32) {
SDValue LaneOp = createLaneOp(Src0, Src1, Src2, VT.getSimpleVT());
return unrollLaneOp(LaneOp.getNode());
}
[[fallthrough]];
case MVT::i16:
case MVT::f16:
case MVT::bf16: {
MVT SubVecVT = MVT::getVectorVT(EltTy, 2);
unsigned SubVecNumElt =
SplitSize / VT.getVectorElementType().getSizeInBits();
MVT SubVecVT = MVT::getVectorVT(EltTy, SubVecNumElt);
SmallVector<SDValue, 4> Pieces;
SDValue Src0SubVec, Src1SubVec, Src2SubVec;
for (unsigned i = 0, EltIdx = 0; i < ValSize / 32; i++) {
for (unsigned i = 0, EltIdx = 0; i < ValSize / SplitSize; i++) {
Src0SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, SubVecVT, Src0,
DAG.getConstant(EltIdx, SL, MVT::i32));

if (IsSetInactive || IsPermLane16)
if (IID == Intrinsic::amdgcn_update_dpp || IsSetInactive ||
IsPermLane16)
Src1SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, SubVecVT, Src1,
DAG.getConstant(EltIdx, SL, MVT::i32));

Expand All @@ -6307,10 +6321,10 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
DAG.getConstant(EltIdx, SL, MVT::i32));

Pieces.push_back(
IsSetInactive || IsPermLane16
IID == Intrinsic::amdgcn_update_dpp || IsSetInactive || IsPermLane16
? createLaneOp(Src0SubVec, Src1SubVec, Src2, SubVecVT)
: createLaneOp(Src0SubVec, Src1, Src2SubVec, SubVecVT));
EltIdx += 2;
EltIdx += SubVecNumElt;
}
return DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, Pieces);
}
Expand All @@ -6320,10 +6334,11 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
}
}

MVT VecVT = MVT::getVectorVT(MVT::i32, ValSize / 32);
MVT VecVT =
MVT::getVectorVT(MVT::getIntegerVT(SplitSize), ValSize / SplitSize);
Src0 = DAG.getBitcast(VecVT, Src0);

if (IsSetInactive || IsPermLane16)
if (IID == Intrinsic::amdgcn_update_dpp || IsSetInactive || IsPermLane16)
Src1 = DAG.getBitcast(VecVT, Src1);

if (IID == Intrinsic::amdgcn_writelane)
Expand Down Expand Up @@ -8833,6 +8848,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::amdgcn_set_inactive:
case Intrinsic::amdgcn_set_inactive_chain_arg:
case Intrinsic::amdgcn_mov_dpp8:
case Intrinsic::amdgcn_update_dpp:
return lowerLaneOp(*this, Op.getNode(), DAG);
default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
Expand Down
9 changes: 0 additions & 9 deletions llvm/lib/Target/RISCV/RISCVGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -96,15 +96,6 @@ def gi_sh2add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<2>">,
def gi_sh3add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<3>">,
GIComplexPatternEquiv<sh3add_uw_op>;

// FIXME: Canonicalize (sub X, C) -> (add X, -C) earlier.
def : Pat<(XLenVT (sub GPR:$rs1, simm12Plus1:$imm)),
(ADDI GPR:$rs1, (NegImm simm12Plus1:$imm))>;

let Predicates = [IsRV64] in {
def : Pat<(i32 (sub GPR:$rs1, simm12Plus1i32:$imm)),
(ADDIW GPR:$rs1, (i64 (NegImm $imm)))>;
}

// Ptr type used in patterns with GlobalISelEmitter
def PtrVT : PtrValueTypeByHwMode<XLenVT, 0>;

Expand Down
33 changes: 24 additions & 9 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1333,9 +1333,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// expansion to a build_vector of 0s.
setOperationAction(ISD::UNDEF, VT, Custom);

setOperationAction({ISD::CONCAT_VECTORS, ISD::VECTOR_REVERSE,
ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
ISD::VECTOR_COMPRESS},
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_REVERSE,
ISD::VECTOR_SHUFFLE, ISD::VECTOR_COMPRESS},
VT, Custom);

// FIXME: mload, mstore, mgather, mscatter, vp_gather/scatter can be
Expand All @@ -1359,7 +1360,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
Custom);
setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
if (Subtarget.hasStdExtZfhmin()) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
} else {
Expand All @@ -1384,7 +1384,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (VT.getVectorElementType() == MVT::bf16) {
setOperationAction(ISD::BITCAST, VT, Custom);
setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
if (Subtarget.hasStdExtZfbfmin()) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
} else {
Expand All @@ -1406,10 +1405,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
continue;
}

setOperationAction({ISD::BUILD_VECTOR, ISD::VECTOR_SHUFFLE,
ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
ISD::SCALAR_TO_VECTOR},
VT, Custom);
setOperationAction({ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR}, VT,
Custom);

setOperationAction(
{ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
Expand Down Expand Up @@ -4819,6 +4816,24 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,

MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);

// zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
// vslide1{down,up}.vx instead.
if (VT.getVectorElementType() == MVT::bf16 ||
(VT.getVectorElementType() == MVT::f16 &&
!Subtarget.hasVInstructionsF16())) {
MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
Splat =
DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
V2 = DAG.getBitcast(
IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
SDValue Vec = DAG.getNode(
IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL,
IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
Vec = DAG.getBitcast(ContainerVT, Vec);
return convertFromScalableVector(VT, Vec, DAG, Subtarget);
}

auto OpCode = IsVSlidedown ?
(VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
(VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -2305,7 +2305,8 @@ foreach vti = AllIntegerVectors in {

// 11.16. Vector Integer Move Instructions
foreach vti = AllVectors in {
let Predicates = GetVTypePredicates<vti>.Predicates in {
defvar ivti = GetIntVTypeInfo<vti>.Vti;
let Predicates = GetVTypePredicates<ivti>.Predicates in {
def : Pat<(vti.Vector (riscv_vmv_v_v_vl vti.RegClass:$passthru,
vti.RegClass:$rs2, VLOpFrag)),
(!cast<Instruction>("PseudoVMV_V_V_"#vti.LMul.MX)
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,6 @@ void WebAssemblyAsmPrinter::EmitTargetFeatures(Module &M) {

// Silently ignore invalid metadata
if (Entry.Prefix != wasm::WASM_FEATURE_PREFIX_USED &&
Entry.Prefix != wasm::WASM_FEATURE_PREFIX_REQUIRED &&
Entry.Prefix != wasm::WASM_FEATURE_PREFIX_DISALLOWED)
return;

Expand Down
44 changes: 23 additions & 21 deletions llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1092,8 +1092,8 @@ static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB,
}
}

// Wrap the given range of instruction with try-delegate. RangeBegin and
// RangeEnd are inclusive.
// Wrap the given range of instructions with a try-delegate that targets
// 'UnwindDest'. RangeBegin and RangeEnd are inclusive.
void WebAssemblyCFGStackify::addNestedTryDelegate(
MachineInstr *RangeBegin, MachineInstr *RangeEnd,
MachineBasicBlock *UnwindDest) {
Expand Down Expand Up @@ -1141,23 +1141,24 @@ void WebAssemblyCFGStackify::addNestedTryDelegate(
} else {
// When the split pos is in the middle of a BB, we split the BB into two and
// put the 'delegate' BB in between. We normally create a split BB and make
// it a successor of the original BB (PostSplit == true), but in case the BB
// is an EH pad and the split pos is before 'catch', we should preserve the
// BB's property, including that it is an EH pad, in the later part of the
// BB, where 'catch' is. In this case we set PostSplit to false.
bool PostSplit = true;
// it a successor of the original BB (CatchAfterSplit == false), but in case
// the BB is an EH pad and there is a 'catch' after the split pos
// (CatchAfterSplit == true), we should preserve the BB's property,
// including that it is an EH pad, in the later part of the BB, where the
// 'catch' is.
bool CatchAfterSplit = false;
if (EndBB->isEHPad()) {
for (auto I = MachineBasicBlock::iterator(SplitPos), E = EndBB->end();
I != E; ++I) {
if (WebAssembly::isCatch(I->getOpcode())) {
PostSplit = false;
CatchAfterSplit = true;
break;
}
}
}

MachineBasicBlock *PreBB = nullptr, *PostBB = nullptr;
if (PostSplit) {
if (!CatchAfterSplit) {
// If the range's end instruction is in the middle of the BB, we split the
// BB into two and insert the delegate BB in between.
// - Before:
Expand Down Expand Up @@ -1208,7 +1209,7 @@ void WebAssemblyCFGStackify::addNestedTryDelegate(
PreBB->addSuccessor(PostBB);
}

// Add 'delegate' instruction in the delegate BB created above.
// Add a 'delegate' instruction in the delegate BB created above.
MachineInstr *Delegate = BuildMI(DelegateBB, RangeEnd->getDebugLoc(),
TII.get(WebAssembly::DELEGATE))
.addMBB(UnwindDest);
Expand Down Expand Up @@ -1243,7 +1244,7 @@ bool WebAssemblyCFGStackify::fixCallUnwindMismatches(MachineFunction &MF) {
// catch ;; N == 3
// end
// ;; N == 4 (to caller)

//
// 1. When an instruction may throw, but the EH pad it will unwind to can be
// different from the original CFG.
//
Expand Down Expand Up @@ -1272,9 +1273,9 @@ bool WebAssemblyCFGStackify::fixCallUnwindMismatches(MachineFunction &MF) {
// ...
// end_try
//
// Now if bar() throws, it is going to end up ip in bb2, not bb3, where it
// is supposed to end up. We solve this problem by wrapping the mismatching
// call with an inner try-delegate that rethrows the exception to the right
// Now if bar() throws, it is going to end up in bb2, not bb3, where it is
// supposed to end up. We solve this problem by wrapping the mismatching call
// with an inner try-delegate that rethrows the exception to the right
// 'catch'.
//
// try
Expand Down Expand Up @@ -1312,7 +1313,7 @@ bool WebAssemblyCFGStackify::fixCallUnwindMismatches(MachineFunction &MF) {
// ...
// end_try
//
// Now if bar() throws, it is going to end up ip in bb2, when it is supposed
// Now if bar() throws, it is going to end up in bb2, when it is supposed
// throw up to the caller. We solve this problem in the same way, but in this
// case 'delegate's immediate argument is the number of block depths + 1,
// which means it rethrows to the caller.
Expand All @@ -1336,7 +1337,7 @@ bool WebAssemblyCFGStackify::fixCallUnwindMismatches(MachineFunction &MF) {
// invoke within a BB.)

SmallVector<const MachineBasicBlock *, 8> EHPadStack;
// Range of intructions to be wrapped in a new nested try/catch. A range
// Range of intructions to be wrapped in a new nested try~delegate. A range
// exists in a single BB and does not span multiple BBs.
using TryRange = std::pair<MachineInstr *, MachineInstr *>;
// In original CFG, <unwind destination BB, a vector of try ranges>
Expand Down Expand Up @@ -1522,14 +1523,15 @@ bool WebAssemblyCFGStackify::fixCatchUnwindMismatches(MachineFunction &MF) {
// throws a foreign exception that is not caught by ehpad A, and its next
// destination should be the caller. But after control flow linearization,
// another EH pad can be placed in between (e.g. ehpad B here), making the
// next unwind destination incorrect. In this case, the foreign exception
// will instead go to ehpad B and will be caught there instead. In this
// example the correct next unwind destination is the caller, but it can be
// another outer catch in other cases.
// next unwind destination incorrect. In this case, the foreign exception will
// instead go to ehpad B and will be caught there instead. In this example the
// correct next unwind destination is the caller, but it can be another outer
// catch in other cases.
//
// There is no specific 'call' or 'throw' instruction to wrap with a
// try-delegate, so we wrap the whole try-catch-end with a try-delegate and
// make it rethrow to the right destination, as in the example below:
// make it rethrow to the right destination, which is the caller in the
// example below:
// try
// try ;; (new)
// try
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4495,7 +4495,7 @@ bool X86AsmParser::matchAndEmitIntelInstruction(
// compatible with gas.
StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken();
if (UnsizedMemOp) {
static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
static const char *const PtrSizedInstrs[] = {"call", "jmp", "push", "pop"};
for (const char *Instr : PtrSizedInstrs) {
if (Mnemonic == Instr) {
UnsizedMemOp->Mem.Size = getPointerWidth();
Expand Down
14 changes: 7 additions & 7 deletions llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2663,15 +2663,15 @@ LSRInstance::OptimizeLoopTermCond() {
// Conservatively avoid trying to use the post-inc value in non-latch
// exits if there may be pre-inc users in intervening blocks.
if (LatchBlock != ExitingBlock)
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
for (const IVStrideUse &UI : IU)
// Test if the use is reachable from the exiting block. This dominator
// query is a conservative approximation of reachability.
if (&*UI != CondUse &&
!DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
if (&UI != CondUse &&
!DT.properlyDominates(UI.getUser()->getParent(), ExitingBlock)) {
// Conservatively assume there may be reuse if the quotient of their
// strides could be a legal scale.
const SCEV *A = IU.getStride(*CondUse, L);
const SCEV *B = IU.getStride(*UI, L);
const SCEV *B = IU.getStride(UI, L);
if (!A || !B) continue;
if (SE.getTypeSizeInBits(A->getType()) !=
SE.getTypeSizeInBits(B->getType())) {
Expand All @@ -2692,9 +2692,9 @@ LSRInstance::OptimizeLoopTermCond() {
C->getValue().isMinSignedValue())
goto decline_post_inc;
// Check for possible scaled-address reuse.
if (isAddressUse(TTI, UI->getUser(), UI->getOperandValToReplace())) {
MemAccessTy AccessTy = getAccessType(
TTI, UI->getUser(), UI->getOperandValToReplace());
if (isAddressUse(TTI, UI.getUser(), UI.getOperandValToReplace())) {
MemAccessTy AccessTy =
getAccessType(TTI, UI.getUser(), UI.getOperandValToReplace());
int64_t Scale = C->getSExtValue();
if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
/*BaseOffset=*/0,
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
Expand All @@ -27,11 +26,11 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>

Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Transforms/Utils/CloneFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//

#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DomTreeUpdater.h"
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Transforms/Utils/Evaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Transforms/Utils/FixIrreducible.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Utils/FixIrreducible.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/Analysis/CycleAnalysis.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LoopInfo.h"
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Transforms/Utils/FunctionComparator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Attributes.h"
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/InstIterator.h"
Expand Down
3 changes: 0 additions & 3 deletions llvm/lib/Transforms/Utils/LoopUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/ilist_iterator.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/DomTreeUpdater.h"
Expand All @@ -46,7 +45,6 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
Expand All @@ -56,7 +54,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GenericDomTree.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <algorithm>

using namespace llvm;

Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Transforms/Utils/LowerSwitch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <vector>

Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Transforms/Utils/MisExpect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,13 @@
#include "llvm/Transforms/Utils/MisExpect.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormatVariadic.h"
#include <algorithm>
#include <cstdint>
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Transforms/Utils/MoveAutoInit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,9 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"

using namespace llvm;
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Transforms/Utils/PredicateInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/FormattedStream.h"
#include <algorithm>
#define DEBUG_TYPE "predicateinfo"
using namespace llvm;
using namespace PatternMatch;
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Transforms/Utils/SSAUpdater.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Transforms/Utils/SplitModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
#include <iterator>
#include <memory>
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
#include "llvm/Transforms/Utils.h"
using namespace llvm;

namespace {
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Transforms/Utils/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

#include "llvm/Transforms/Utils.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"

using namespace llvm;
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Transforms/Utils/VNCoercion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"

#define DEBUG_TYPE "vncoerce"

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,8 @@ body: |
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %a:_(s64) = COPY $x0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 71
; CHECK-NEXT: %sub:_(s64) = G_SUB %a, [[C]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -71
; CHECK-NEXT: %sub:_(s64) = G_ADD %a, [[C]]
; CHECK-NEXT: $x0 = COPY %sub(s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%a:_(s64) = COPY $x0
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ body: |
; CHECK-LABEL: name: test_combine_trunc_sub_i128
; CHECK: %lhs:_(s128) = COPY $q0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s128)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK-NEXT: %small:_(s32) = G_SUB [[TRUNC]], [[C]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -5
; CHECK-NEXT: %small:_(s32) = G_ADD [[TRUNC]], [[C]]
; CHECK-NEXT: $w0 = COPY %small(s32)
%lhs:_(s128) = COPY $q0
%rhs:_(s128) = G_CONSTANT i128 5
Expand All @@ -103,8 +103,8 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_trunc_sub_i128_multi_use
; CHECK: %lhs:_(s128) = COPY $q0
; CHECK-NEXT: %rhs:_(s128) = G_CONSTANT i128 5
; CHECK-NEXT: %res:_(s128) = G_SUB %lhs, %rhs
; CHECK-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 -5
; CHECK-NEXT: %res:_(s128) = G_ADD %lhs, [[C]]
; CHECK-NEXT: %small:_(s32) = G_TRUNC %res(s128)
; CHECK-NEXT: $q0 = COPY %res(s128)
; CHECK-NEXT: $w0 = COPY %small(s32)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ body: |
%11:_(s8) = G_CONSTANT i8 1
; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
%7:_(s8) = G_SUB %2, %11
; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}}
; CHECK: [[T4:%[0-9]+]]:_(s8) = G_ADD [[T3]], {{.*}}
G_BR %bb.3.exit
bb.3.exit:
; CHECK: bb.3.exit:
Expand Down Expand Up @@ -197,7 +197,7 @@ body: |
%7:_(s8) = G_CONSTANT i8 1
; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
%8:_(s8) = G_SUB %2, %7
; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}}
; CHECK: [[T4:%[0-9]+]]:_(s8) = G_ADD [[T3]], {{.*}}
G_BR %bb.3.exit
bb.3.exit:
; CHECK: bb.3.exit:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,8 +289,8 @@ body: |
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %x:_(s32) = COPY $w0
; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: %op:_(s32) = G_SUB %x, %cst
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
; CHECK-NEXT: $w0 = COPY %op(s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%x:_(s32) = COPY $w0
Expand Down Expand Up @@ -488,3 +488,66 @@ body: |
RET_ReallyLR implicit $w0

...
---
name: sub_to_add
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $w0
; CHECK-LABEL: name: sub_to_add
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %x:_(s32) = COPY $w0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
; CHECK-NEXT: $w0 = COPY %op(s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%x:_(s32) = COPY $w0
%cst:_(s32) = G_CONSTANT i32 1
%op:_(s32) = G_SUB %x(s32), %cst
$w0 = COPY %op(s32)
RET_ReallyLR implicit $w0

...
---
name: sub_to_add_nuw
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $w0
; CHECK-LABEL: name: sub_to_add_nuw
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %x:_(s32) = COPY $w0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
; CHECK-NEXT: $w0 = COPY %op(s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%x:_(s32) = COPY $w0
%cst:_(s32) = G_CONSTANT i32 1
%op:_(s32) = nuw G_SUB %x(s32), %cst
$w0 = COPY %op(s32)
RET_ReallyLR implicit $w0

...
---
name: sub_to_add_nsw
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $w0
; CHECK-LABEL: name: sub_to_add_nsw
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %x:_(s32) = COPY $w0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; CHECK-NEXT: %op:_(s32) = nsw G_ADD %x, [[C]]
; CHECK-NEXT: $w0 = COPY %op(s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%x:_(s32) = COPY $w0
%cst:_(s32) = G_CONSTANT i32 1
%op:_(s32) = nsw G_SUB %x(s32), %cst
$w0 = COPY %op(s32)
RET_ReallyLR implicit $w0

...
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1669,7 +1669,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v3
; GFX6-NEXT: v_lshr_b64 v[6:7], v[0:1], v3
; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], v8
; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 64, v3
; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v3
; GFX6-NEXT: v_ashr_i64 v[10:11], v[4:5], v3
; GFX6-NEXT: v_or_b32_e32 v6, v6, v8
; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v5
Expand All @@ -1692,7 +1692,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v3
; GFX8-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 64, v3
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xffffffc0, v3
; GFX8-NEXT: v_ashrrev_i64 v[10:11], v3, v[4:5]
; GFX8-NEXT: v_or_b32_e32 v6, v6, v8
; GFX8-NEXT: v_ashrrev_i32_e32 v8, 31, v5
Expand All @@ -1715,7 +1715,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX9-NEXT: v_sub_u32_e32 v8, 64, v3
; GFX9-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
; GFX9-NEXT: v_subrev_u32_e32 v2, 64, v3
; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
; GFX9-NEXT: v_ashrrev_i64 v[10:11], v3, v[4:5]
; GFX9-NEXT: v_or_b32_e32 v6, v6, v8
; GFX9-NEXT: v_ashrrev_i32_e32 v8, 31, v5
Expand All @@ -1735,7 +1735,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_bfe_i32 v4, v2, 0, 1
; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v3
; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
; GFX10-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3
; GFX10-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4
Expand All @@ -1758,7 +1758,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_bfe_i32 v4, v2, 0, 1
; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v3
; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
; GFX11-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3
; GFX11-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
; GFX11-NEXT: v_ashrrev_i32_e32 v5, 31, v4
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1438,7 +1438,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
; SI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
; SI-NEXT: v_ffbh_i32_e32 v3, 0
; SI-NEXT: v_add_i32_e32 v2, vcc, 32, v2
; SI-NEXT: v_subrev_i32_e32 v3, vcc, 1, v3
; SI-NEXT: v_add_i32_e32 v3, vcc, -1, v3
; SI-NEXT: v_mov_b32_e32 v1, 0
; SI-NEXT: v_min_u32_e32 v2, v3, v2
; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v2
Expand All @@ -1456,7 +1456,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
; VI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
; VI-NEXT: v_ffbh_i32_e32 v3, 0
; VI-NEXT: v_add_u32_e32 v2, vcc, 32, v2
; VI-NEXT: v_subrev_u32_e32 v3, vcc, 1, v3
; VI-NEXT: v_add_u32_e32 v3, vcc, -1, v3
; VI-NEXT: v_mov_b32_e32 v1, 0
; VI-NEXT: v_min_u32_e32 v2, v3, v2
; VI-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4101,7 +4101,7 @@ define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 {
; GFX10-NEXT: v_rcp_f32_e32 v1, 0x3f40e400
; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v0
; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
; GFX10-NEXT: v_subrev_nc_u32_e32 v0, 14, v0
; GFX10-NEXT: v_add_nc_u32_e32 v0, -14, v0
; GFX10-NEXT: v_mul_f32_e32 v1, v2, v1
; GFX10-NEXT: v_ldexp_f32 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -4112,10 +4112,9 @@ define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 {
; GFX11-NEXT: v_rcp_f32_e32 v1, 0x3f40e400
; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v0
; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_subrev_nc_u32_e32 v0, 14, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1
; GFX11-NEXT: v_dual_mul_f32 v1, v2, v1 :: v_dual_add_nc_u32 v0, -14, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_ldexp_f32 v0, v1, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; EG-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp:
Expand Down
Loading