474 changes: 314 additions & 160 deletions .github/workflows/release-binaries.yml

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions .github/workflows/release-tasks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,20 @@ jobs:
needs:
- validate-tag
- release-create
strategy:
fail-fast: false
matrix:
runs-on:
- ubuntu-22.04
- windows-2022
- macos-13
- macos-14

uses: ./.github/workflows/release-binaries.yml
with:
release-version: ${{ needs.validate-tag.outputs.release-version }}
upload: true
runs-on: ${{ matrix.runs-on }}

release-sources:
name: Package Release Sources
Expand Down
6 changes: 5 additions & 1 deletion clang/cmake/caches/Release.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,13 @@ endfunction()
# cache file to CMake via -C. e.g.
#
# cmake -D LLVM_RELEASE_ENABLE_PGO=ON -C Release.cmake
set (DEFAULT_RUNTIMES "compiler-rt;libcxx")
if (NOT WIN32)
list(APPEND DEFAULT_RUNTIMES "libcxxabi" "libunwind")
endif()
set(LLVM_RELEASE_ENABLE_LTO THIN CACHE STRING "")
set(LLVM_RELEASE_ENABLE_PGO ON CACHE BOOL "")
set(LLVM_RELEASE_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "")
set(LLVM_RELEASE_ENABLE_RUNTIMES ${DEFAULT_RUNTIMES} CACHE STRING "")
set(LLVM_RELEASE_ENABLE_PROJECTS "clang;lld;lldb;clang-tools-extra;bolt;polly;mlir;flang" CACHE STRING "")
# Note we don't need to add install here, since it is one of the pre-defined
# steps.
Expand Down
2 changes: 1 addition & 1 deletion libc/docs/dev/header_generation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Instructions
------------

Required Versions:
- Python Version: 3.6
- Python Version: 3.8
- PyYAML Version: 5.1

1. Keep full-build mode on when building, otherwise headers will not be
Expand Down
4 changes: 3 additions & 1 deletion llvm/docs/GettingStarted.rst
Original file line number Diff line number Diff line change
Expand Up @@ -291,10 +291,11 @@ uses the package and provides other details.
=========================================================== ============ ==========================================
Package Version Notes
=========================================================== ============ ==========================================
`CMake <http://cmake.org/>`__ >=3.20.0 Makefile/workspace generator
`CMake <http://cmake.org/>`_ >=3.20.0 Makefile/workspace generator
`python <http://www.python.org/>`_ >=3.8 Automated test suite\ :sup:`1`
`zlib <http://zlib.net>`_ >=1.2.3.4 Compression library\ :sup:`2`
`GNU Make <http://savannah.gnu.org/projects/make>`_ 3.79, 3.79.1 Makefile/build processor\ :sup:`3`
`PyYAML <https://pypi.org/project/PyYAML/>`_ >=5.1 Header generator\ :sup:`4`
=========================================================== ============ ==========================================

.. note::
Expand All @@ -305,6 +306,7 @@ Package Version Notes
#. Optional, adds compression / uncompression capabilities to selected LLVM
tools.
#. Optional, you can use any other build tool supported by CMake.
#. Only needed when building libc with New Headergen. Mainly used by libc.

Additionally, your compilation host is expected to have the usual plethora of
Unix utilities. Specifically:
Expand Down
13 changes: 9 additions & 4 deletions llvm/include/llvm/Analysis/Loads.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,7 @@ bool isDereferenceableAndAlignedPointer(const Value *V, Align Alignment,
/// quick local scan of the basic block containing ScanFrom, to determine if
/// the address is already accessed.
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size,
const DataLayout &DL,
Instruction *ScanFrom = nullptr,
const DataLayout &DL, Instruction *ScanFrom,
AssumptionCache *AC = nullptr,
const DominatorTree *DT = nullptr,
const TargetLibraryInfo *TLI = nullptr);
Expand Down Expand Up @@ -100,12 +99,18 @@ bool isDereferenceableReadOnlyLoop(Loop *L, ScalarEvolution *SE,
/// quick local scan of the basic block containing ScanFrom, to determine if
/// the address is already accessed.
bool isSafeToLoadUnconditionally(Value *V, Type *Ty, Align Alignment,
const DataLayout &DL,
Instruction *ScanFrom = nullptr,
const DataLayout &DL, Instruction *ScanFrom,
AssumptionCache *AC = nullptr,
const DominatorTree *DT = nullptr,
const TargetLibraryInfo *TLI = nullptr);

/// Return true if speculation of the given load must be suppressed to avoid
/// ordering or interfering with an active sanitizer. If not suppressed,
/// dereferenceability and alignment must be proven separately. Note: This
/// is only needed for raw reasoning; if you use the interface below
/// (isSafeToSpeculativelyExecute), this is handled internally.
bool mustSuppressSpeculation(const LoadInst &LI);

/// The default number of maximum instructions to scan in the block, used by
/// FindAvailableLoadedValue().
extern cl::opt<unsigned> DefMaxInstsToScan;
Expand Down
7 changes: 0 additions & 7 deletions llvm/include/llvm/Analysis/ValueTracking.h
Original file line number Diff line number Diff line change
Expand Up @@ -792,13 +792,6 @@ bool onlyUsedByLifetimeMarkers(const Value *V);
/// droppable instructions.
bool onlyUsedByLifetimeMarkersOrDroppableInsts(const Value *V);

/// Return true if speculation of the given load must be suppressed to avoid
/// ordering or interfering with an active sanitizer. If not suppressed,
/// dereferenceability and alignment must be proven separately. Note: This
/// is only needed for raw reasoning; if you use the interface below
/// (isSafeToSpeculativelyExecute), this is handled internally.
bool mustSuppressSpeculation(const LoadInst &LI);

/// Return true if the instruction does not have any effects besides
/// calculating the result and does not have undefined behavior.
///
Expand Down
8 changes: 8 additions & 0 deletions llvm/include/llvm/Support/MathExtras.h
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,14 @@ std::enable_if_t<std::is_signed_v<T>, T> MulOverflow(T X, T Y, T &Result) {
#endif
}

/// Type to force float point values onto the stack, so that x86 doesn't add
/// hidden precision, avoiding rounding differences on various platforms.
#if defined(__i386__) || defined(_M_IX86)
using stack_float_t = volatile float;
#else
using stack_float_t = float;
#endif

} // namespace llvm

#endif
16 changes: 16 additions & 0 deletions llvm/lib/Analysis/Loads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,19 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
HeaderFirstNonPHI, AC, &DT);
}

static bool suppressSpeculativeLoadForSanitizers(const Instruction &CtxI) {
const Function &F = *CtxI.getFunction();
// Speculative load may create a race that did not exist in the source.
return F.hasFnAttribute(Attribute::SanitizeThread) ||
// Speculative load may load data from dirty regions.
F.hasFnAttribute(Attribute::SanitizeAddress) ||
F.hasFnAttribute(Attribute::SanitizeHWAddress);
}

bool llvm::mustSuppressSpeculation(const LoadInst &LI) {
return !LI.isUnordered() || suppressSpeculativeLoadForSanitizers(LI);
}

/// Check if executing a load of this pointer value cannot trap.
///
/// If DT and ScanFrom are specified this method performs context-sensitive
Expand All @@ -362,6 +375,9 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &S
AssumptionCache *AC,
const DominatorTree *DT,
const TargetLibraryInfo *TLI) {
if (ScanFrom && suppressSpeculativeLoadForSanitizers(*ScanFrom))
return false;

// If DT is not specified we can't make context-sensitive query
const Instruction* CtxI = DT ? ScanFrom : nullptr;
if (isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, AC, DT,
Expand Down
11 changes: 0 additions & 11 deletions llvm/lib/Analysis/ValueTracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6798,17 +6798,6 @@ bool llvm::onlyUsedByLifetimeMarkersOrDroppableInsts(const Value *V) {
V, /* AllowLifetime */ true, /* AllowDroppable */ true);
}

bool llvm::mustSuppressSpeculation(const LoadInst &LI) {
if (!LI.isUnordered())
return true;
const Function &F = *LI.getFunction();
// Speculative load may create a race that did not exist in the source.
return F.hasFnAttribute(Attribute::SanitizeThread) ||
// Speculative load may load data from dirty regions.
F.hasFnAttribute(Attribute::SanitizeAddress) ||
F.hasFnAttribute(Attribute::SanitizeHWAddress);
}

bool llvm::isSafeToSpeculativelyExecute(const Instruction *Inst,
const Instruction *CtxI,
AssumptionCache *AC,
Expand Down
11 changes: 6 additions & 5 deletions llvm/lib/CodeGen/CalcSpillWeights.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <tuple>
Expand Down Expand Up @@ -257,7 +258,9 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
return -1.0f;
}

float Weight = 1.0f;
// Force Weight onto the stack so that x86 doesn't add hidden precision,
// similar to HWeight below.
stack_float_t Weight = 1.0f;
if (IsSpillable) {
// Get loop info for mi.
if (MI->getParent() != MBB) {
Expand All @@ -284,11 +287,9 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
Register HintReg = copyHint(MI, LI.reg(), TRI, MRI);
if (!HintReg)
continue;
// Force hweight onto the stack so that x86 doesn't add hidden precision,
// Force HWeight onto the stack so that x86 doesn't add hidden precision,
// making the comparison incorrectly pass (i.e., 1 > 1 == true??).
//
// FIXME: we probably shouldn't use floats at all.
volatile float HWeight = Hint[HintReg] += Weight;
stack_float_t HWeight = Hint[HintReg] += Weight;
if (HintReg.isVirtual() || MRI.isAllocatable(HintReg))
CopyHints.insert(CopyHint(HintReg, HWeight));
}
Expand Down
38 changes: 36 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8556,11 +8556,12 @@ static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
}

SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
FPClassTest Test, SDNodeFlags Flags,
const SDLoc &DL,
const FPClassTest OrigTestMask,
SDNodeFlags Flags, const SDLoc &DL,
SelectionDAG &DAG) const {
EVT OperandVT = Op.getValueType();
assert(OperandVT.isFloatingPoint());
FPClassTest Test = OrigTestMask;

// Degenerated cases.
if (Test == fcNone)
Expand Down Expand Up @@ -8594,9 +8595,21 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
// exceptions are ignored.
if (Flags.hasNoFPExcept() &&
isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
FPClassTest FPTestMask = Test;

ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;

// See if we can fold an | fcNan into an unordered compare.
FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;

// Can't fold the ordered check if we're only testing for snan or qnan
// individually.
if ((FPTestMask & fcNan) != fcNan)
OrderedFPTestMask = FPTestMask;

const bool IsOrdered = FPTestMask == OrderedFPTestMask;

if (std::optional<bool> IsCmp0 =
isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
IsCmp0 && (isCondCodeLegalOrCustom(
Expand Down Expand Up @@ -8628,6 +8641,27 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
return DAG.getSetCC(DL, ResultVT, Abs, Inf,
IsInverted ? ISD::SETUNE : ISD::SETOEQ);
}

if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
// TODO: Could handle ordered case, but it produces worse code for
// x86. Maybe handle ordered if fabs is free?

ISD::CondCode OrderedOp = IsInverted ? ISD::SETUGE : ISD::SETOLT;
ISD::CondCode UnorderedOp = IsInverted ? ISD::SETOGE : ISD::SETULT;

if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
OperandVT.getScalarType().getSimpleVT())) {
// (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal

// TODO: Maybe only makes sense if fabs is free. Integer test of
// exponent bits seems better for x86.
SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
SDValue SmallestNormal = DAG.getConstantFP(
APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
IsOrdered ? OrderedOp : UnorderedOp);
}
}
}

// In the general case use integer operations.
Expand Down
17 changes: 11 additions & 6 deletions llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1528,11 +1528,7 @@ static void emitRemark(const Function &F, OptimizationRemarkEmitter &ORE,

bool HWAddressSanitizer::selectiveInstrumentationShouldSkip(
Function &F, FunctionAnalysisManager &FAM) const {
bool Skip = [&]() {
if (ClRandomSkipRate.getNumOccurrences()) {
std::bernoulli_distribution D(ClRandomSkipRate);
return !D(*Rng);
}
auto SkipHot = [&]() {
if (!ClHotPercentileCutoff.getNumOccurrences())
return false;
auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
Expand All @@ -1544,7 +1540,16 @@ bool HWAddressSanitizer::selectiveInstrumentationShouldSkip(
}
return PSI->isFunctionHotInCallGraphNthPercentile(
ClHotPercentileCutoff, &F, FAM.getResult<BlockFrequencyAnalysis>(F));
}();
};

auto SkipRandom = [&]() {
if (!ClRandomSkipRate.getNumOccurrences())
return false;
std::bernoulli_distribution D(ClRandomSkipRate);
return !D(*Rng);
};

bool Skip = SkipRandom() || SkipHot();
emitRemark(F, FAM.getResult<OptimizationRemarkEmitterAnalysis>(F), Skip);
return Skip;
}
Expand Down
30 changes: 18 additions & 12 deletions llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,25 @@ static bool removeUbsanTraps(Function &F, const BlockFrequencyInfo &BFI,
SmallVector<std::pair<IntrinsicInst *, bool>, 16> ReplaceWithValue;
std::unique_ptr<RandomNumberGenerator> Rng;

auto ShouldRemove = [&](bool IsHot) {
if (!RandomRate.getNumOccurrences())
return IsHot;
auto GetRng = [&]() -> RandomNumberGenerator & {
if (!Rng)
Rng = F.getParent()->createRNG(F.getName());
std::bernoulli_distribution D(RandomRate);
return !D(*Rng);
return *Rng;
};

auto ShouldRemoveHot = [&](const BasicBlock &BB) {
return HotPercentileCutoff.getNumOccurrences() && PSI &&
PSI->isHotCountNthPercentile(
HotPercentileCutoff, BFI.getBlockProfileCount(&BB).value_or(0));
};

auto ShouldRemoveRandom = [&]() {
return RandomRate.getNumOccurrences() &&
!std::bernoulli_distribution(RandomRate)(GetRng());
};

auto ShouldRemove = [&](const BasicBlock &BB) {
return ShouldRemoveRandom() || ShouldRemoveHot(BB);
};

for (BasicBlock &BB : F) {
Expand All @@ -96,13 +108,7 @@ static bool removeUbsanTraps(Function &F, const BlockFrequencyInfo &BFI,
case Intrinsic::allow_runtime_check: {
++NumChecksTotal;

bool IsHot = false;
if (PSI) {
uint64_t Count = BFI.getBlockProfileCount(&BB).value_or(0);
IsHot = PSI->isHotCountNthPercentile(HotPercentileCutoff, Count);
}

bool ToRemove = ShouldRemove(IsHot);
bool ToRemove = ShouldRemove(BB);
ReplaceWithValue.push_back({
II,
ToRemove,
Expand Down
52 changes: 24 additions & 28 deletions llvm/test/CodeGen/X86/is_fpclass.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2602,24 +2602,22 @@ define i1 @issubnormal_or_nan_f(float %x) {
define i1 @issubnormal_or_zero_or_nan_f(float %x) {
; X86-LABEL: issubnormal_or_zero_or_nan_f:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: testl $2139095040, %eax # imm = 0x7F800000
; X86-NEXT: sete %cl
; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
; X86-NEXT: setge %al
; X86-NEXT: orb %cl, %al
; X86-NEXT: flds {{[0-9]+}}(%esp)
; X86-NEXT: fabs
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
; X86-NEXT: fxch %st(1)
; X86-NEXT: fucompp
; X86-NEXT: fnstsw %ax
; X86-NEXT: # kill: def $ah killed $ah killed $ax
; X86-NEXT: sahf
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: issubnormal_or_zero_or_nan_f:
; X64: # %bb.0:
; X64-NEXT: movd %xmm0, %eax
; X64-NEXT: testl $2139095040, %eax # imm = 0x7F800000
; X64-NEXT: sete %cl
; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
; X64-NEXT: setge %al
; X64-NEXT: orb %cl, %al
; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: setb %al
; X64-NEXT: retq
%class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 243) ; 0xf0|0x3 = "subnormal|zero|nan"
ret i1 %class
Expand Down Expand Up @@ -2773,24 +2771,22 @@ define i1 @not_issubnormal_or_nan_f(float %x) {
define i1 @not_issubnormal_or_zero_or_nan_f(float %x) {
; X86-LABEL: not_issubnormal_or_zero_or_nan_f:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: testl $2139095040, %eax # imm = 0x7F800000
; X86-NEXT: setne %cl
; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
; X86-NEXT: setl %al
; X86-NEXT: andb %cl, %al
; X86-NEXT: flds {{[0-9]+}}(%esp)
; X86-NEXT: fabs
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
; X86-NEXT: fxch %st(1)
; X86-NEXT: fucompp
; X86-NEXT: fnstsw %ax
; X86-NEXT: # kill: def $ah killed $ah killed $ax
; X86-NEXT: sahf
; X86-NEXT: setae %al
; X86-NEXT: retl
;
; X64-LABEL: not_issubnormal_or_zero_or_nan_f:
; X64: # %bb.0:
; X64-NEXT: movd %xmm0, %eax
; X64-NEXT: testl $2139095040, %eax # imm = 0x7F800000
; X64-NEXT: setne %cl
; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
; X64-NEXT: setl %al
; X64-NEXT: andb %cl, %al
; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: setae %al
; X64-NEXT: retq
%class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 780) ; ~(0xf0|0x3) = ~"subnormal|zero|nan"
ret i1 %class
Expand Down
56 changes: 56 additions & 0 deletions llvm/test/CodeGen/X86/pr99396.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
; RUN: llc < %s -mtriple=i386-unknown-freebsd -enable-misched -relocation-model=pic | FileCheck %s

@c = external local_unnamed_addr global ptr

declare i32 @fn2() local_unnamed_addr

declare i32 @fn3() local_unnamed_addr

define noundef i32 @fn4() #0 {
entry:
%tmp0 = load i32, ptr @fn4, align 4
; CHECK: movl fn4@GOT(%ebx), %edi
; CHECK-NEXT: movl (%edi), %edx
%tmp1 = load ptr, ptr @c, align 4
; CHECK: movl c@GOT(%ebx), %eax
; CHECK-NEXT: movl (%eax), %esi
; CHECK-NEXT: testl %esi, %esi
%cmp.g = icmp eq ptr %tmp1, null
br i1 %cmp.g, label %if.then.g, label %if.end3.g

if.then.g: ; preds = %entry
%tmp2 = load i32, ptr inttoptr (i32 1 to ptr), align 4
%cmp1.g = icmp slt i32 %tmp2, 0
br i1 %cmp1.g, label %if.then2.g, label %if.end3.g

if.then2.g: ; preds = %if.then.g
%.g = load volatile i32, ptr null, align 2147483648
br label %f.exit

if.end3.g: ; preds = %if.then.g, %entry
%h.i.g = icmp eq i32 %tmp0, 0
br i1 %h.i.g, label %f.exit, label %while.body.g

while.body.g: ; preds = %if.end3.g, %if.end8.g
%buff.addr.019.g = phi ptr [ %incdec.ptr.g, %if.end8.g ], [ @fn4, %if.end3.g ]
%g.addr.018.g = phi i32 [ %dec.g, %if.end8.g ], [ %tmp0, %if.end3.g ]
%call4.g = tail call i32 @fn3(ptr %tmp1, ptr %buff.addr.019.g, i32 %g.addr.018.g)
%cmp5.g = icmp slt i32 %call4.g, 0
br i1 %cmp5.g, label %if.then6.g, label %if.end8.g

if.then6.g: ; preds = %while.body.g
%call7.g = tail call i32 @fn2(ptr null)
br label %f.exit

if.end8.g: ; preds = %while.body.g
%dec.g = add i32 %g.addr.018.g, 1
%incdec.ptr.g = getelementptr i32, ptr %buff.addr.019.g, i32 1
store i64 0, ptr %tmp1, align 4
%h.not.g = icmp eq i32 %dec.g, 0
br i1 %h.not.g, label %f.exit, label %while.body.g

f.exit: ; preds = %if.end8.g, %if.then6.g, %if.end3.g, %if.then2.g
ret i32 0
}

attributes #0 = { "frame-pointer"="all" "tune-cpu"="generic" }
2 changes: 2 additions & 0 deletions llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
; RUN: opt < %s -passes='require<profile-summary>,hwasan' -pass-remarks=hwasan -pass-remarks-missed=hwasan -S -hwasan-percentile-cutoff-hot=990000 2>&1 | FileCheck %s --check-prefix=NONE
; RUN: opt < %s -passes='require<profile-summary>,hwasan' -pass-remarks=hwasan -pass-remarks-missed=hwasan -S -hwasan-random-rate=1.0 2>&1 | FileCheck %s --check-prefix=ALL
; RUN: opt < %s -passes='require<profile-summary>,hwasan' -pass-remarks=hwasan -pass-remarks-missed=hwasan -S -hwasan-random-rate=0.0 2>&1 | FileCheck %s --check-prefix=NONE
; RUN: opt < %s -passes='require<profile-summary>,hwasan' -pass-remarks=hwasan -pass-remarks-missed=hwasan -S -hwasan-random-rate=1.0 -hwasan-percentile-cutoff-hot=990000 2>&1 | FileCheck %s --check-prefix=NONE
; RUN: opt < %s -passes='require<profile-summary>,hwasan' -pass-remarks=hwasan -pass-remarks-missed=hwasan -S -hwasan-random-rate=0.0 -hwasan-percentile-cutoff-hot=700000 2>&1 | FileCheck %s --check-prefix=NONE

; ALL: remark: <unknown>:0:0: Sanitized: F=sanitize
; ALL: @sanitized
Expand Down
101 changes: 101 additions & 0 deletions llvm/test/Transforms/InstCombine/select-load.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=instcombine -S < %s | FileCheck %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-grtev4-linux-gnu"

define i32 @test_plain(i1 %f) {
; CHECK-LABEL: @test_plain(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 8
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 8
; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]], align 8
; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]], align 8
; CHECK-NEXT: [[L:%.*]] = select i1 [[F:%.*]], i32 [[A_VAL]], i32 [[B_VAL]]
; CHECK-NEXT: ret i32 [[L]]
;
entry:
%a = alloca i32, align 8
%b = alloca i32, align 8
%sel = select i1 %f, ptr %a, ptr %b
%l = load i32, ptr %sel, align 8
ret i32 %l
}

; Don't speculate as the condition may control which memory is valid from
; sanitizer perspective.
define i32 @test_asan(i1 %f) sanitize_address {
; CHECK-LABEL: @test_asan(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 8
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 8
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[F:%.*]], ptr [[A]], ptr [[B]]
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[SEL]], align 8
; CHECK-NEXT: ret i32 [[L]]
;
entry:
%a = alloca i32, align 8
%b = alloca i32, align 8
%sel = select i1 %f, ptr %a, ptr %b
%l = load i32, ptr %sel, align 8
ret i32 %l
}


; Don't speculate as the condition may control which memory is valid from
; sanitizer perspective.
define i32 @test_hwasan(i1 %f) sanitize_hwaddress {
; CHECK-LABEL: @test_hwasan(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 8
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 8
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[F:%.*]], ptr [[A]], ptr [[B]]
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[SEL]], align 8
; CHECK-NEXT: ret i32 [[L]]
;
entry:
%a = alloca i32, align 8
%b = alloca i32, align 8
%sel = select i1 %f, ptr %a, ptr %b
%l = load i32, ptr %sel, align 8
ret i32 %l
}

; Don't speculate as the condition may control which memory is valid from
; sanitizer perspective.
define i32 @test_tsan(i1 %f) sanitize_thread {
; CHECK-LABEL: @test_tsan(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 8
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 8
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[F:%.*]], ptr [[A]], ptr [[B]]
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[SEL]], align 8
; CHECK-NEXT: ret i32 [[L]]
;
entry:
%a = alloca i32, align 8
%b = alloca i32, align 8
%sel = select i1 %f, ptr %a, ptr %b
%l = load i32, ptr %sel, align 8
ret i32 %l
}

; Msan just propagates shadow, even if speculated load accesses uninitialized
; value, instrumentation will select shadow of the desired value anyway.
define i32 @test_msan(i1 %f) sanitize_memory {
; CHECK-LABEL: @test_msan(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 8
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 8
; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]], align 8
; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]], align 8
; CHECK-NEXT: [[L:%.*]] = select i1 [[F:%.*]], i32 [[A_VAL]], i32 [[B_VAL]]
; CHECK-NEXT: ret i32 [[L]]
;
entry:
%a = alloca i32, align 8
%b = alloca i32, align 8
%sel = select i1 %f, ptr %a, ptr %b
%l = load i32, ptr %sel, align 8
ret i32 %l
}
2 changes: 1 addition & 1 deletion llvm/test/Transforms/lower-builtin-allow-check.ll
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ define dso_local noundef i32 @veryHot(ptr noundef readonly %0) !prof !39 {
; ALL70-LABEL: define dso_local noundef i32 @veryHot(
; ALL70-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] {
; ALL70-NEXT: [[CHK:%.*]] = icmp eq ptr [[TMP0]], null
; ALL70-NEXT: [[HOT:%.*]] = xor i1 true, true
; ALL70-NEXT: [[HOT:%.*]] = xor i1 false, true
; ALL70-NEXT: [[TMP2:%.*]] = or i1 [[CHK]], [[HOT]]
; ALL70-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
; ALL70: 3:
Expand Down
2 changes: 1 addition & 1 deletion polly/lib/Analysis/ScopBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2770,7 +2770,7 @@ isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access,

auto &DL = scop->getFunction().getDataLayout();
if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getType(),
LI->getAlign(), DL)) {
LI->getAlign(), DL, nullptr)) {
SafeToLoad = isl::set::universe(AccessRelation.get_space().range());
} else if (BB != LI->getParent()) {
// Skip accesses in non-affine subregions as they might not be executed
Expand Down
3 changes: 2 additions & 1 deletion polly/lib/Analysis/ScopDetection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,8 @@ bool ScopDetection::onlyValidRequiredInvariantLoads(

for (auto NonAffineRegion : Context.NonAffineSubRegionSet) {
if (isSafeToLoadUnconditionally(Load->getPointerOperand(),
Load->getType(), Load->getAlign(), DL))
Load->getType(), Load->getAlign(), DL,
nullptr))
continue;

if (NonAffineRegion->contains(Load) &&
Expand Down