4 changes: 4 additions & 0 deletions clang/lib/Headers/stdatomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,11 @@ typedef _Atomic(uintmax_t) atomic_uintmax_t;

typedef struct atomic_flag { atomic_bool _Value; } atomic_flag;

#ifdef __cplusplus
#define ATOMIC_FLAG_INIT {false}
#else
#define ATOMIC_FLAG_INIT { 0 }
#endif

/* These should be provided by the libc implementation. */
#ifdef __cplusplus
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/Sema/CheckExprLifetime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include "CheckExprLifetime.h"
#include "clang/AST/Decl.h"
#include "clang/AST/Expr.h"
#include "clang/Basic/DiagnosticSema.h"
#include "clang/Sema/Initialization.h"
Expand Down Expand Up @@ -548,6 +549,14 @@ static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path,
EnableLifetimeWarnings);
}

if (auto *M = dyn_cast<MemberExpr>(Init)) {
// Lifetime of a non-reference type field is same as base object.
if (auto *F = dyn_cast<FieldDecl>(M->getMemberDecl());
F && !F->getType()->isReferenceType())
visitLocalsRetainedByInitializer(Path, M->getBase(), Visit, true,
EnableLifetimeWarnings);
}

if (isa<CallExpr>(Init)) {
if (EnableLifetimeWarnings)
handleGslAnnotatedTypes(Path, Init, Visit);
Expand Down
10 changes: 10 additions & 0 deletions clang/test/CodeGen/PowerPC/save-reg-params.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// RUN: %clang_cc1 -triple powerpc64-ibm-aix -emit-llvm -o - %s -msave-reg-params | FileCheck -check-prefix=SAVE %s
// RUN: %clang_cc1 -triple powerpc-ibm-aix -emit-llvm -o - %s -msave-reg-params | FileCheck -check-prefix=SAVE %s
// RUN: %clang_cc1 -triple powerpc64-ibm-aix -emit-llvm -o - %s | FileCheck -check-prefix=NOSAVE %s
// RUN: %clang_cc1 -triple powerpc-ibm-aix -emit-llvm -o - %s | FileCheck -check-prefix=NOSAVE %s

void bar(int);
void foo(int x) { bar(x); }

// SAVE: attributes #{{[0-9]+}} = { {{.+}} "save-reg-params" {{.+}} }
// NOSAVE-NOT: "save-reg-params"
7 changes: 7 additions & 0 deletions clang/test/Driver/aix-save-reg-params.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// RUN: %clang -### -target powerpc-ibm-aix-xcoff -msave-reg-params -c %s -o /dev/null 2>&1 | FileCheck %s
// RUN: %clang -### -target powerpc64-ibm-aix-xcoff -msave-reg-params -c %s -o /dev/null 2>&1 | FileCheck %s
// RUN: %clang -### -target powerpc-ibm-aix-xcoff -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=DISABLE
// RUN: %clang -### -target powerpc64-ibm-aix-xcoff -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=DISABLE

// CHECK: "-msave-reg-params"
// DISABLE-NOT: "-msave-reg-params"
4 changes: 4 additions & 0 deletions clang/test/Driver/ppc-unsupported.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,8 @@
// RUN: -c %s 2>&1 | FileCheck %s
// RUN: not %clang -target powerpc-unknown-aix -mabi=quadword-atomics \
// RUN: -c %s 2>&1 | FileCheck %s
// RUN: not %clang -target powerpc64le-unknown-linux-gnu -msave-reg-params \
// RUN: -c %s 2>&1 | FileCheck %s
// RUN: not %clang -target powerpc-unknown-unknown -msave-reg-params \
// RUN: -c %s 2>&1 | FileCheck %s
// CHECK: unsupported option
5 changes: 5 additions & 0 deletions clang/test/Headers/stdatomic.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
// RUN: %clang_cc1 -std=c11 -E %s | FileCheck %s
// RUN: %clang_cc1 -std=c11 -fms-compatibility -E %s | FileCheck %s
// RUN: %clang_cc1 -std=c11 %s -verify
// RUN: %clang_cc1 -x c++ -std=c++11 %s -verify
// expected-no-diagnostics
#include <stdatomic.h>

int bool_lock_free = ATOMIC_BOOL_LOCK_FREE;
Expand Down Expand Up @@ -31,3 +34,5 @@ int llong_lock_free = ATOMIC_LLONG_LOCK_FREE;

int pointer_lock_free = ATOMIC_POINTER_LOCK_FREE;
// CHECK: pointer_lock_free = {{ *[012] *;}}

atomic_flag f = ATOMIC_FLAG_INIT;
26 changes: 26 additions & 0 deletions clang/test/SemaCXX/attr-lifetimebound.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,31 @@ namespace usage_ok {
q = A(); // expected-warning {{object backing the pointer q will be destroyed at the end of the full-expression}}
r = A(1); // expected-warning {{object backing the pointer r will be destroyed at the end of the full-expression}}
}

struct FieldCheck {
struct Set {
int a;
};
struct Pair {
const int& a;
int b;
Set c;
int * d;
};
Pair p;
FieldCheck(const int& a): p(a){}
Pair& getR() [[clang::lifetimebound]] { return p; }
Pair* getP() [[clang::lifetimebound]] { return &p; }
Pair* getNoLB() { return &p; }
};
void test_field_access() {
int x = 0;
const int& a = FieldCheck{x}.getR().a;
const int& b = FieldCheck{x}.getP()->b; // expected-warning {{temporary bound to local reference 'b' will be destroyed at the end of the full-expression}}
const int& c = FieldCheck{x}.getP()->c.a; // expected-warning {{temporary bound to local reference 'c' will be destroyed at the end of the full-expression}}
const int& d = FieldCheck{x}.getNoLB()->c.a;
const int* e = FieldCheck{x}.getR().d;
}
}

# 1 "<std>" 1 3
Expand Down Expand Up @@ -239,3 +264,4 @@ namespace move_forward_et_al_examples {
S X;
S *AddressOfOk = std::addressof(X);
} // namespace move_forward_et_al_examples

5 changes: 5 additions & 0 deletions compiler-rt/lib/builtins/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,11 @@ switch32
switch8
switchu8

// This function generates a custom trampoline function with the specific
// realFunc and localsPtr values.
void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated,
const void* realFunc, void* localsPtr);

// There is no C interface to the *_vfp_d8_d15_regs functions. There are
// called in the prolog and epilog of Thumb1 functions. When the C++ ABI use
// SJLJ for exceptions, each function with a catch clause or destructors needs
Expand Down
28 changes: 0 additions & 28 deletions compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,6 @@ struct FEATURES {

extern struct FEATURES __aarch64_cpu_features;

struct SME_STATE {
long PSTATE;
long TPIDR2_EL0;
};

extern struct SME_STATE __arm_sme_state(void) __arm_streaming_compatible;

extern bool __aarch64_has_sme_and_tpidr2_el0;

#if __GNUC__ >= 9
#pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
#endif
Expand All @@ -28,22 +19,3 @@ __attribute__((constructor(90))) static void get_aarch64_cpu_features(void) {

__init_cpu_features();
}

__attribute__((target("sve"))) long
__arm_get_current_vg(void) __arm_streaming_compatible {
struct SME_STATE State = __arm_sme_state();
unsigned long long features =
__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED);
bool HasSVE = features & (1ULL << FEAT_SVE);

if (!HasSVE && !__aarch64_has_sme_and_tpidr2_el0)
return 0;

if (HasSVE || (State.PSTATE & 1)) {
long vl;
__asm__ __volatile__("cntd %0" : "=r"(vl));
return vl;
}

return 0;
}
44 changes: 44 additions & 0 deletions compiler-rt/lib/builtins/aarch64/sme-abi.S
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,15 @@
#if !defined(__APPLE__)
#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
#define TPIDR2_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)
#define CPU_FEATS_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_cpu_features)
#else
// MachO requires @page/@pageoff directives because the global is defined
// in a different file. Otherwise this file may fail to build.
#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@page
#define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff
#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)@page
#define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff
#endif

.arch armv9-a+sme
Expand Down Expand Up @@ -180,6 +184,46 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
ret
END_COMPILERRT_OUTLINE_FUNCTION(__arm_za_disable)

DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg)
.variant_pcs __arm_get_current_vg
BTI_C

stp x29, x30, [sp, #-16]!
.cfi_def_cfa_offset 16
mov x29, sp
.cfi_def_cfa w29, 16
.cfi_offset w30, -8
.cfi_offset w29, -16
adrp x17, CPU_FEATS_SYMBOL
ldr w17, [x17, CPU_FEATS_SYMBOL_OFFSET]
tbnz w17, #30, 0f
adrp x16, TPIDR2_SYMBOL
ldrb w16, [x16, TPIDR2_SYMBOL_OFFSET]
cbz w16, 1f
0:
mov x18, x1
bl __arm_sme_state
mov x1, x18
and x17, x17, #0x40000000
bfxil x17, x0, #0, #1
cbz x17, 1f
cntd x0
.cfi_def_cfa wsp, 16
ldp x29, x30, [sp], #16
.cfi_def_cfa_offset 0
.cfi_restore w30
.cfi_restore w29
ret
1:
mov x0, xzr
.cfi_def_cfa wsp, 16
ldp x29, x30, [sp], #16
.cfi_def_cfa_offset 0
.cfi_restore w30
.cfi_restore w29
ret
END_COMPILERRT_OUTLINE_FUNCTION(__arm_get_current_vg)

NO_EXEC_STACK_DIRECTIVE

// GNU property note for BTI and PAC
Expand Down
42 changes: 42 additions & 0 deletions compiler-rt/lib/builtins/trampoline_setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,45 @@ COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
__clear_cache(trampOnStack, &trampOnStack[10]);
}
#endif // __powerpc__ && !defined(__powerpc64__)

// The AArch64 compiler generates calls to __trampoline_setup() when creating
// trampoline functions on the stack for use with nested functions.
// This function creates a custom 36-byte trampoline function on the stack
// which loads x18 with a pointer to the outer function's locals
// and then jumps to the target nested function.
// Note: x18 is a reserved platform register on Windows and macOS.

#if defined(__aarch64__) && defined(__ELF__)
COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
int trampSizeAllocated,
const void *realFunc, void *localsPtr) {
// This should never happen, but if compiler did not allocate
// enough space on stack for the trampoline, abort.
if (trampSizeAllocated < 36)
compilerrt_abort();

// create trampoline
// Load realFunc into x17. mov/movk 16 bits at a time.
trampOnStack[0] =
0xd2800000u | ((((uint64_t)realFunc >> 0) & 0xffffu) << 5) | 0x11;
trampOnStack[1] =
0xf2a00000u | ((((uint64_t)realFunc >> 16) & 0xffffu) << 5) | 0x11;
trampOnStack[2] =
0xf2c00000u | ((((uint64_t)realFunc >> 32) & 0xffffu) << 5) | 0x11;
trampOnStack[3] =
0xf2e00000u | ((((uint64_t)realFunc >> 48) & 0xffffu) << 5) | 0x11;
// Load localsPtr into x18
trampOnStack[4] =
0xd2800000u | ((((uint64_t)localsPtr >> 0) & 0xffffu) << 5) | 0x12;
trampOnStack[5] =
0xf2a00000u | ((((uint64_t)localsPtr >> 16) & 0xffffu) << 5) | 0x12;
trampOnStack[6] =
0xf2c00000u | ((((uint64_t)localsPtr >> 32) & 0xffffu) << 5) | 0x12;
trampOnStack[7] =
0xf2e00000u | ((((uint64_t)localsPtr >> 48) & 0xffffu) << 5) | 0x12;
trampOnStack[8] = 0xd61f0220; // br x17

// Clear instruction cache.
__clear_cache(trampOnStack, &trampOnStack[9]);
}
#endif // defined(__aarch64__) && !defined(__APPLE__) && !defined(_WIN64)
2 changes: 1 addition & 1 deletion compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2237,6 +2237,7 @@ static const char *RegNumToRegName(int reg) {
case 31:
return "sp";
# endif
# endif // SANITIZER_LINUX
default:
return NULL;
}
Expand Down Expand Up @@ -2302,7 +2303,6 @@ static void DumpSingleReg(ucontext_t *ctx, int RegNum) {
(void)RegName;
# endif
}
# endif

void SignalContext::DumpAllRegisters(void *context) {
ucontext_t *ucontext = (ucontext_t *)context;
Expand Down
2 changes: 1 addition & 1 deletion compiler-rt/test/builtins/Unit/trampoline_setup_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

/*
* Tests nested functions
* The ppc compiler generates a call to __trampoline_setup
* The ppc and aarch64 compilers generates a call to __trampoline_setup
* The i386 and x86_64 compilers generate a call to ___enable_execute_stack
*/

Expand Down
1 change: 1 addition & 0 deletions compiler-rt/test/tsan/debug_alloc_stack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#ifndef __APPLE__
#include <sys/types.h>
Expand Down
27 changes: 20 additions & 7 deletions flang/lib/Lower/OpenMP/OpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1494,13 +1494,26 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
allSymbols.append(dsp.getAllSymbolsToPrivatize().begin(),
dsp.getAllSymbolsToPrivatize().end());

for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) {
converter.bindSymbol(*arg, hlfir::translateToExtendedValue(
loc, firOpBuilder, hlfir::Entity{prv},
/*contiguousHint=*/
evaluate::IsSimplyContiguous(
*arg, converter.getFoldingContext()))
.first);
unsigned argIdx = 0;
for (const semantics::Symbol *arg : allSymbols) {
auto bind = [&](const semantics::Symbol *sym) {
mlir::BlockArgument blockArg = region.getArgument(argIdx);
++argIdx;
converter.bindSymbol(*sym,
hlfir::translateToExtendedValue(
loc, firOpBuilder, hlfir::Entity{blockArg},
/*contiguousHint=*/
evaluate::IsSimplyContiguous(
*sym, converter.getFoldingContext()))
.first);
};

if (const auto *commonDet =
arg->detailsIf<semantics::CommonBlockDetails>()) {
for (const auto &mem : commonDet->objects())
bind(&*mem);
} else
bind(arg);
}

return allSymbols;
Expand Down
12 changes: 4 additions & 8 deletions flang/test/Lower/OpenMP/firstprivate-commonblock.f90
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
! RUN: %flang_fc1 -emit-hlfir -fopenmp \
! RUN: -mmlir --openmp-enable-delayed-privatization=true -o - %s 2>&1 \
! RUN: | FileCheck %s

!CHECK: func.func @_QPfirstprivate_common() {
!CHECK: %[[val_0:.*]] = fir.address_of(@c_) : !fir.ref<!fir.array<8xi8>>
Expand All @@ -12,15 +14,9 @@
!CHECK: %[[val_5:.*]] = fir.coordinate_of %[[val_4]], %[[val_c4]] : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
!CHECK: %[[val_6:.*]] = fir.convert %[[val_5]] : (!fir.ref<i8>) -> !fir.ref<f32>
!CHECK: %[[VAL_6_DECL:.*]]:2 = hlfir.declare %[[val_6]] {uniq_name = "_QFfirstprivate_commonEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: omp.parallel {
!CHECK: %[[val_7:.*]] = fir.alloca f32 {bindc_name = "x", pinned, uniq_name = "_QFfirstprivate_commonEx"}
!CHECK: omp.parallel private(@{{.*}} %{{.*}}#0 -> %[[val_7:.*]] : {{.*}}, @{{.*}} %{{.*}}#0 -> %[[val_9:.*]] : {{.*}}) {
!CHECK: %[[VAL_7_DECL:.*]]:2 = hlfir.declare %[[val_7]] {uniq_name = "_QFfirstprivate_commonEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: %[[val_8:.*]] = fir.load %[[VAL_3_DECL]]#0 : !fir.ref<f32>
!CHECK: hlfir.assign %[[val_8]] to %[[VAL_7_DECL]]#0 temporary_lhs : f32, !fir.ref<f32>
!CHECK: %[[val_9:.*]] = fir.alloca f32 {bindc_name = "y", pinned, uniq_name = "_QFfirstprivate_commonEy"}
!CHECK: %[[VAL_9_DECL:.*]]:2 = hlfir.declare %[[val_9]] {uniq_name = "_QFfirstprivate_commonEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: %[[val_10:.*]] = fir.load %[[VAL_6_DECL]]#0 : !fir.ref<f32>
!CHECK: hlfir.assign %[[val_10]] to %[[VAL_9_DECL]]#0 temporary_lhs : f32, !fir.ref<f32>
!CHECK: omp.terminator
!CHECK: }
!CHECK: return
Expand Down
24 changes: 9 additions & 15 deletions flang/test/Lower/OpenMP/private-commonblock.f90
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
! RUN: %flang_fc1 -emit-hlfir -fopenmp \
! RUN: -mmlir --openmp-enable-delayed-privatization=true -o - %s 2>&1 \
! RUN: | FileCheck %s

!CHECK: func.func @_QPprivate_common() {
!CHECK: omp.parallel {
!CHECK: %[[X:.*]] = fir.alloca f32 {bindc_name = "x", pinned, uniq_name = "_QFprivate_commonEx"}
!CHECK: omp.parallel private(@{{.*}} %{{.*}}#0 -> %[[X:.*]] : {{.*}}, @{{.*}} %{{.*}}#0 -> %[[Y:.*]] : {{.*}}) {
!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]] {uniq_name = "_QFprivate_commonEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: %[[Y:.*]] = fir.alloca f32 {bindc_name = "y", pinned, uniq_name = "_QFprivate_commonEy"}
!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y]] {uniq_name = "_QFprivate_commonEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: omp.terminator
!CHECK: }
Expand Down Expand Up @@ -48,17 +48,13 @@ subroutine private_common
!CHECK: %[[D_REF:.*]] = fir.convert %[[D_DECL]]#1 : (!fir.ref<!fir.array<5x!fir.char<1,5>>>) -> !fir.ref<!fir.char<1,5>>
!CHECK: %[[D_BOX:.*]] = fir.emboxchar %[[D_REF]], %[[TP5]] : (!fir.ref<!fir.char<1,5>>, index) -> !fir.boxchar<1>
!CHECK: fir.call @_QPsub1(%[[A_DECL]]#1, %[[B_DECL]]#1, %[[C_BOX]], %[[D_BOX]]) fastmath<contract> : (!fir.ref<i32>, !fir.ref<!fir.array<10xf32>>, !fir.boxchar<1>, !fir.boxchar<1>) -> ()
!CHECK: omp.parallel {
!CHECK: %[[A_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFprivate_clause_commonblockEa"}
!CHECK: omp.parallel private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]] : {{.*}}, @{{.*}} %{{.*}}#0 -> %[[B_PVT_REF:.*]] : {{.*}}, @{{.*}} %{{.*}}#0 -> %[[C_PVT_REF:.*]] : {{.*}}, @{{.*}} %{{.*}}#0 -> %[[D_PVT_REF:.*]] : {{.*}}) {
!CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFprivate_clause_commonblockEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[B_PVT_REF:.*]] = fir.alloca !fir.array<10xf32> {bindc_name = "b", pinned, uniq_name = "_QFprivate_clause_commonblockEb"}
!CHECK: %[[SH10:.*]] = fir.shape %c10 : (index) -> !fir.shape<1>
!CHECK: %[[SH10:.*]] = fir.shape %c10{{.*}} : (index) -> !fir.shape<1>
!CHECK: %[[B_PVT_DECL:.*]]:2 = hlfir.declare %[[B_PVT_REF]](%[[SH10]]) {uniq_name = "_QFprivate_clause_commonblockEb"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
!CHECK: %[[C_PVT_REF:.*]] = fir.alloca !fir.char<1,5> {bindc_name = "c", pinned, uniq_name = "_QFprivate_clause_commonblockEc"}
!CHECK: %[[C_PVT_DECL:.*]]:2 = hlfir.declare %[[C_PVT_REF]] typeparams %{{.*}} {uniq_name = "_QFprivate_clause_commonblockEc"} : (!fir.ref<!fir.char<1,5>>, index) -> (!fir.ref<!fir.char<1,5>>, !fir.ref<!fir.char<1,5>>)
!CHECK: %[[D_PVT_REF:.*]] = fir.alloca !fir.array<5x!fir.char<1,5>> {bindc_name = "d", pinned, uniq_name = "_QFprivate_clause_commonblockEd"}
!CHECK: %[[SH5:.*]] = fir.shape %c5_1 : (index) -> !fir.shape<1>
!CHECK: %[[D_PVT_DECL:.*]]:2 = hlfir.declare %[[D_PVT_REF]](%[[SH5]]) typeparams %[[TP5]] {uniq_name = "_QFprivate_clause_commonblockEd"} : (!fir.ref<!fir.array<5x!fir.char<1,5>>>, !fir.shape<1>, index) -> (!fir.ref<!fir.array<5x!fir.char<1,5>>>, !fir.ref<!fir.array<5x!fir.char<1,5>>>)
!CHECK: %[[SH5:.*]] = fir.shape %c5{{.*}} : (index) -> !fir.shape<1>
!CHECK: %[[D_PVT_DECL:.*]]:2 = hlfir.declare %[[D_PVT_REF]](%[[SH5]]) typeparams %c5{{.*}} {uniq_name = "_QFprivate_clause_commonblockEd"} : (!fir.ref<!fir.array<5x!fir.char<1,5>>>, !fir.shape<1>, index) -> (!fir.ref<!fir.array<5x!fir.char<1,5>>>, !fir.ref<!fir.array<5x!fir.char<1,5>>>)
!CHECK: %[[C_PVT_BOX:.*]] = fir.emboxchar %[[C_PVT_DECL]]#1, %{{.*}} : (!fir.ref<!fir.char<1,5>>, index) -> !fir.boxchar<1>
!CHECK: %[[D_PVT_REF:.*]] = fir.convert %[[D_PVT_DECL]]#1 : (!fir.ref<!fir.array<5x!fir.char<1,5>>>) -> !fir.ref<!fir.char<1,5>>
!CHECK: %[[D_PVT_BOX:.*]] = fir.emboxchar %[[D_PVT_REF]], %{{.*}} : (!fir.ref<!fir.char<1,5>>, index) -> !fir.boxchar<1>
Expand Down Expand Up @@ -98,10 +94,8 @@ subroutine private_clause_commonblock()
!CHECK: %[[C_ADDR:.*]] = fir.box_addr %[[C_BOX]] : (!fir.box<!fir.ptr<!fir.complex<4>>>) -> !fir.ptr<!fir.complex<4>>
!CHECK: %[[C_REF:.*]] = fir.convert %[[C_ADDR]] : (!fir.ptr<!fir.complex<4>>) -> !fir.ref<!fir.complex<4>>
!CHECK: fir.call @_QPsub4(%[[C_REF]], %[[A_DECL]]#1) fastmath<contract> : (!fir.ref<!fir.complex<4>>, !fir.ref<i32>) -> ()
!CHECK: omp.parallel {
!CHECK: %[[C_PVT_REF:.*]] = fir.alloca !fir.box<!fir.ptr<!fir.complex<4>>> {bindc_name = "c", pinned, uniq_name = "_QFprivate_clause_commonblock_pointerEc"}
!CHECK: omp.parallel private(@{{.*}} %{{.*}}#0 -> %[[C_PVT_REF:.*]] : {{.*}}, @{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]] : {{.*}}) {
!CHECK: %[[C_PVT_DECL:.*]]:2 = hlfir.declare %[[C_PVT_REF]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFprivate_clause_commonblock_pointerEc"} : (!fir.ref<!fir.box<!fir.ptr<!fir.complex<4>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.complex<4>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.complex<4>>>>)
!CHECK: %[[A_PVT_REF:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFprivate_clause_commonblock_pointerEa"}
!CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFprivate_clause_commonblock_pointerEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[C_PVT_BOX:.*]] = fir.load %[[C_PVT_DECL]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.complex<4>>>>
!CHECK: %[[C_PVT_ADDR:.*]] = fir.box_addr %[[C_PVT_BOX]] : (!fir.box<!fir.ptr<!fir.complex<4>>>) -> !fir.ptr<!fir.complex<4>>
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/riscv/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,7 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.sys.select.select

# sys/socket.h entrypoints
libc.src.sys.socket.bind
libc.src.sys.socket.socket
)
endif()
Expand Down
24 changes: 12 additions & 12 deletions libc/test/src/math/smoke/TotalOrderMagTest.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,24 +104,24 @@ class TotalOrderMagTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest {
}

void testNaNPayloads(TotalOrderMagFunc func) {
T qnan_123 = FPBits::quiet_nan(Sign::POS, 0x123).get_val();
T neg_qnan_123 = FPBits::quiet_nan(Sign::NEG, 0x123).get_val();
T snan_123 = FPBits::signaling_nan(Sign::POS, 0x123).get_val();
T neg_snan_123 = FPBits::signaling_nan(Sign::NEG, 0x123).get_val();
T qnan_0x42 = FPBits::quiet_nan(Sign::POS, 0x42).get_val();
T neg_qnan_0x42 = FPBits::quiet_nan(Sign::NEG, 0x42).get_val();
T snan_0x42 = FPBits::signaling_nan(Sign::POS, 0x42).get_val();
T neg_snan_0x42 = FPBits::signaling_nan(Sign::NEG, 0x42).get_val();

EXPECT_TRUE(funcWrapper(func, aNaN, aNaN));
EXPECT_TRUE(funcWrapper(func, sNaN, sNaN));
EXPECT_TRUE(funcWrapper(func, aNaN, qnan_123));
EXPECT_TRUE(funcWrapper(func, sNaN, snan_123));
EXPECT_FALSE(funcWrapper(func, qnan_123, aNaN));
EXPECT_FALSE(funcWrapper(func, snan_123, sNaN));
EXPECT_TRUE(funcWrapper(func, aNaN, qnan_0x42));
EXPECT_FALSE(funcWrapper(func, sNaN, snan_0x42));
EXPECT_FALSE(funcWrapper(func, qnan_0x42, aNaN));
EXPECT_TRUE(funcWrapper(func, snan_0x42, sNaN));

EXPECT_TRUE(funcWrapper(func, neg_aNaN, neg_aNaN));
EXPECT_TRUE(funcWrapper(func, neg_sNaN, neg_sNaN));
EXPECT_TRUE(funcWrapper(func, neg_aNaN, neg_qnan_123));
EXPECT_TRUE(funcWrapper(func, neg_sNaN, neg_snan_123));
EXPECT_FALSE(funcWrapper(func, neg_qnan_123, neg_aNaN));
EXPECT_FALSE(funcWrapper(func, neg_snan_123, neg_sNaN));
EXPECT_TRUE(funcWrapper(func, neg_aNaN, neg_qnan_0x42));
EXPECT_FALSE(funcWrapper(func, neg_sNaN, neg_snan_0x42));
EXPECT_FALSE(funcWrapper(func, neg_qnan_0x42, neg_aNaN));
EXPECT_TRUE(funcWrapper(func, neg_snan_0x42, neg_sNaN));
}
};

Expand Down
24 changes: 12 additions & 12 deletions libc/test/src/math/smoke/TotalOrderTest.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,24 +102,24 @@ class TotalOrderTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest {
}

void testNaNPayloads(TotalOrderFunc func) {
T qnan_123 = FPBits::quiet_nan(Sign::POS, 0x123).get_val();
T neg_qnan_123 = FPBits::quiet_nan(Sign::NEG, 0x123).get_val();
T snan_123 = FPBits::signaling_nan(Sign::POS, 0x123).get_val();
T neg_snan_123 = FPBits::signaling_nan(Sign::NEG, 0x123).get_val();
T qnan_0x42 = FPBits::quiet_nan(Sign::POS, 0x42).get_val();
T neg_qnan_0x42 = FPBits::quiet_nan(Sign::NEG, 0x42).get_val();
T snan_0x42 = FPBits::signaling_nan(Sign::POS, 0x42).get_val();
T neg_snan_0x42 = FPBits::signaling_nan(Sign::NEG, 0x42).get_val();

EXPECT_TRUE(funcWrapper(func, aNaN, aNaN));
EXPECT_TRUE(funcWrapper(func, sNaN, sNaN));
EXPECT_TRUE(funcWrapper(func, aNaN, qnan_123));
EXPECT_TRUE(funcWrapper(func, sNaN, snan_123));
EXPECT_FALSE(funcWrapper(func, qnan_123, aNaN));
EXPECT_FALSE(funcWrapper(func, snan_123, sNaN));
EXPECT_TRUE(funcWrapper(func, aNaN, qnan_0x42));
EXPECT_FALSE(funcWrapper(func, sNaN, snan_0x42));
EXPECT_FALSE(funcWrapper(func, qnan_0x42, aNaN));
EXPECT_TRUE(funcWrapper(func, snan_0x42, sNaN));

EXPECT_TRUE(funcWrapper(func, neg_aNaN, neg_aNaN));
EXPECT_TRUE(funcWrapper(func, neg_sNaN, neg_sNaN));
EXPECT_FALSE(funcWrapper(func, neg_aNaN, neg_qnan_123));
EXPECT_FALSE(funcWrapper(func, neg_sNaN, neg_snan_123));
EXPECT_TRUE(funcWrapper(func, neg_qnan_123, neg_aNaN));
EXPECT_TRUE(funcWrapper(func, neg_snan_123, neg_sNaN));
EXPECT_FALSE(funcWrapper(func, neg_aNaN, neg_qnan_0x42));
EXPECT_TRUE(funcWrapper(func, neg_sNaN, neg_snan_0x42));
EXPECT_TRUE(funcWrapper(func, neg_qnan_0x42, neg_aNaN));
EXPECT_FALSE(funcWrapper(func, neg_snan_0x42, neg_sNaN));
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//

// This test appears to hang with picolibc & qemu.
// This test did pass but is very slow when run using qemu. ~7 minutes on a
// Neoverse N1 (AArch64) server core.
// UNSUPPORTED: LIBCXX-PICOLIBC-FIXME

// <algorithm>
Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/Analysis/SimplifyQuery.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,12 @@ struct SimplifyQuery {
Copy.CC = &CC;
return Copy;
}

SimplifyQuery getWithoutCondContext() const {
SimplifyQuery Copy(*this);
Copy.CC = nullptr;
return Copy;
}
};

} // end namespace llvm
Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/CodeGen/AsmPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,9 @@ class AsmPrinter : public MachineFunctionPass {
/// split stack prologue.
bool HasNoSplitStack = false;

/// True if debugging information is available in this module.
bool DbgInfoAvailable = false;

protected:
explicit AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer);

Expand Down Expand Up @@ -430,6 +433,9 @@ class AsmPrinter : public MachineFunctionPass {
/// Get the CFISection type for the module.
CFISection getModuleCFISectionType() const { return ModuleCFISection; }

/// Returns true if valid debug info is present.
bool hasDebugInfo() const { return DbgInfoAvailable; }

bool needsSEHMoves();

/// Since emitting CFI unwind information is entangled with supporting the
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/CodeGen/MIRPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace llvm {

class MachineBasicBlock;
class MachineFunction;
class MachineModuleInfo;
class Module;
template <typename T> class SmallVectorImpl;

Expand Down
19 changes: 8 additions & 11 deletions llvm/include/llvm/CodeGen/MachineFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ class MachineConstantPool;
class MachineFrameInfo;
class MachineFunction;
class MachineJumpTableInfo;
class MachineModuleInfo;
class MachineRegisterInfo;
class MCContext;
class MCInstrDesc;
Expand Down Expand Up @@ -260,7 +259,6 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction {
const LLVMTargetMachine &Target;
const TargetSubtargetInfo *STI;
MCContext &Ctx;
MachineModuleInfo &MMI;

// RegInfo - Information about each register in use in the function.
MachineRegisterInfo *RegInfo;
Expand Down Expand Up @@ -395,15 +393,15 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction {

/// \}

/// Clear all the members of this MachineFunction, but the ones used
/// to initialize again the MachineFunction.
/// More specifically, this deallocates all the dynamically allocated
/// objects and get rid of all the XXXInfo data structure, but keep
/// unchanged the references to Fn, Target, MMI, and FunctionNumber.
/// Clear all the members of this MachineFunction, but the ones used to
/// initialize again the MachineFunction. More specifically, this deallocates
/// all the dynamically allocated objects and get rid of all the XXXInfo data
/// structure, but keep unchanged the references to Fn, Target, and
/// FunctionNumber.
void clear();
/// Allocate and initialize the different members.
/// In particular, the XXXInfo data structure.
/// \pre Fn, Target, MMI, and FunctionNumber are properly set.
/// \pre Fn, Target, and FunctionNumber are properly set.
void init();

public:
Expand Down Expand Up @@ -632,8 +630,8 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction {
const static unsigned int DebugOperandMemNumber;

MachineFunction(Function &F, const LLVMTargetMachine &Target,
const TargetSubtargetInfo &STI, unsigned FunctionNum,
MachineModuleInfo &MMI);
const TargetSubtargetInfo &STI, MCContext &Ctx,
unsigned FunctionNum);
MachineFunction(const MachineFunction &) = delete;
MachineFunction &operator=(const MachineFunction &) = delete;
~MachineFunction();
Expand Down Expand Up @@ -665,7 +663,6 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction {

GISelChangeObserver *getObserver() const { return Observer; }

MachineModuleInfo &getMMI() const { return MMI; }
MCContext &getContext() const { return Ctx; }

/// Returns the Section this function belongs to.
Expand Down
6 changes: 0 additions & 6 deletions llvm/include/llvm/CodeGen/MachineModuleInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,6 @@ class MachineModuleInfo {
// -g. At this moment, there's no way to specify that some CFI directives
// go into .eh_frame only, while others go into .debug_frame only.

/// True if debugging information is available in this module.
bool DbgInfoAvailable = false;

/// True if this module is being built for windows/msvc, and uses floating
/// point. This is used to emit an undefined reference to _fltused.
bool UsesMSVCFloatingPoint = false;
Expand Down Expand Up @@ -186,9 +183,6 @@ class MachineModuleInfo {
return const_cast<MachineModuleInfo*>(this)->getObjFileInfo<Ty>();
}

/// Returns true if valid debug info is present.
bool hasDebugInfo() const { return DbgInfoAvailable; }

bool usesMSVCFloatingPoint() const { return UsesMSVCFloatingPoint; }

void setUsesMSVCFloatingPoint(bool b) { UsesMSVCFloatingPoint = b; }
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/CodeGen/SelectionDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ struct KnownBits;
class LLVMContext;
class MachineBasicBlock;
class MachineConstantPoolValue;
class MachineModuleInfo;
class MCSymbol;
class OptimizationRemarkEmitter;
class ProfileSummaryInfo;
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/CodeGen/TargetInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class LiveIntervals;
class LiveVariables;
class MachineLoop;
class MachineMemOperand;
class MachineModuleInfo;
class MachineRegisterInfo;
class MCAsmInfo;
class MCInst;
Expand Down
22 changes: 11 additions & 11 deletions llvm/lib/Analysis/ValueTracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1435,7 +1435,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
// inferred hold at original context instruction. TODO: It may be
// correct to use the original context. IF warranted, explore and
// add sufficient tests to cover.
SimplifyQuery RecQ = Q;
SimplifyQuery RecQ = Q.getWithoutCondContext();
RecQ.CxtI = P;
computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ);
switch (Opcode) {
Expand Down Expand Up @@ -1468,7 +1468,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
// phi. This is important because that is where the value is actually
// "evaluated" even though it is used later somewhere else. (see also
// D69571).
SimplifyQuery RecQ = Q;
SimplifyQuery RecQ = Q.getWithoutCondContext();

unsigned OpNum = P->getOperand(0) == R ? 0 : 1;
Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator();
Expand Down Expand Up @@ -1546,7 +1546,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
// phi. This is important because that is where the value is actually
// "evaluated" even though it is used later somewhere else. (see also
// D69571).
SimplifyQuery RecQ = Q;
SimplifyQuery RecQ = Q.getWithoutCondContext();
RecQ.CxtI = P->getIncomingBlock(u)->getTerminator();

Known2 = KnownBits(BitWidth);
Expand Down Expand Up @@ -2329,7 +2329,7 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
// it is an induction variable where in each step its value is a power of
// two.
auto *PN = cast<PHINode>(I);
SimplifyQuery RecQ = Q;
SimplifyQuery RecQ = Q.getWithoutCondContext();

// Check if it is an induction variable and always power of two.
if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ))
Expand Down Expand Up @@ -2943,7 +2943,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
return true;

// Check if all incoming values are non-zero using recursion.
SimplifyQuery RecQ = Q;
SimplifyQuery RecQ = Q.getWithoutCondContext();
unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
return llvm::all_of(PN->operands(), [&](const Use &U) {
if (U.get() == PN)
Expand Down Expand Up @@ -3509,7 +3509,7 @@ static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2,
if (UsedFullRecursion)
return false;

SimplifyQuery RecQ = Q;
SimplifyQuery RecQ = Q.getWithoutCondContext();
RecQ.CxtI = IncomBB->getTerminator();
if (!isKnownNonEqual(IV1, IV2, DemandedElts, Depth + 1, RecQ))
return false;
Expand Down Expand Up @@ -4001,7 +4001,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,

// Take the minimum of all incoming values. This can't infinitely loop
// because of our depth threshold.
SimplifyQuery RecQ = Q;
SimplifyQuery RecQ = Q.getWithoutCondContext();
Tmp = TyBits;
for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) {
if (Tmp == 1) return Tmp;
Expand Down Expand Up @@ -5909,10 +5909,10 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
// Recurse, but cap the recursion to two levels, because we don't want
// to waste time spinning around in loops. We need at least depth 2 to
// detect known sign bits.
computeKnownFPClass(
IncValue, DemandedElts, InterestedClasses, KnownSrc,
PhiRecursionLimit,
Q.getWithInstruction(P->getIncomingBlock(U)->getTerminator()));
computeKnownFPClass(IncValue, DemandedElts, InterestedClasses, KnownSrc,
PhiRecursionLimit,
Q.getWithoutCondContext().getWithInstruction(
P->getIncomingBlock(U)->getTerminator()));

if (First) {
Known = KnownSrc;
Expand Down
17 changes: 8 additions & 9 deletions llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,7 @@ bool AsmPrinter::doInitialization(Module &M) {
MMI = MMIWP ? &MMIWP->getMMI() : nullptr;
HasSplitStack = false;
HasNoSplitStack = false;
DbgInfoAvailable = !M.debug_compile_units().empty();

AddrLabelSymbols = nullptr;

Expand Down Expand Up @@ -541,8 +542,7 @@ bool AsmPrinter::doInitialization(Module &M) {
if (EmitCodeView && TM.getTargetTriple().isOSWindows())
DebugHandlers.push_back(std::make_unique<CodeViewDebug>(this));
if (!EmitCodeView || M.getDwarfVersion()) {
assert(MMI && "MMI could not be nullptr here!");
if (MMI->hasDebugInfo()) {
if (hasDebugInfo()) {
DD = new DwarfDebug(this);
DebugHandlers.push_back(std::unique_ptr<DwarfDebug>(DD));
}
Expand Down Expand Up @@ -1278,7 +1278,7 @@ AsmPrinter::getFunctionCFISectionType(const Function &F) const {
return CFISection::EH;

assert(MMI != nullptr && "Invalid machine module info");
if (MMI->hasDebugInfo() || TM.Options.ForceDwarfFrameSection)
if (hasDebugInfo() || TM.Options.ForceDwarfFrameSection)
return CFISection::Debug;

return CFISection::None;
Expand Down Expand Up @@ -1669,11 +1669,10 @@ void AsmPrinter::emitPCSections(const MachineFunction &MF) {
}

/// Returns true if function begin and end labels should be emitted.
static bool needFuncLabels(const MachineFunction &MF,
const MachineModuleInfo &MMI) {
static bool needFuncLabels(const MachineFunction &MF, const AsmPrinter &Asm) {
if (!MF.getLandingPads().empty() || MF.hasEHFunclets() ||
MMI.hasDebugInfo() ||
MF.getFunction().hasMetadata(LLVMContext::MD_pcsections))
MF.getFunction().hasMetadata(LLVMContext::MD_pcsections) ||
Asm.hasDebugInfo())
return true;

// We might emit an EH table that uses function begin and end labels even if
Expand Down Expand Up @@ -1944,7 +1943,7 @@ void AsmPrinter::emitFunctionBody() {
// are automatically sized.
bool EmitFunctionSize = MAI->hasDotTypeDotSizeDirective() && !TT.isWasm();

if (needFuncLabels(*MF, *MMI) || EmitFunctionSize) {
if (EmitFunctionSize || needFuncLabels(*MF, *this)) {
// Create a symbol for the end of function.
CurrentFnEnd = createTempSymbol("func_end");
OutStreamer->emitLabel(CurrentFnEnd);
Expand Down Expand Up @@ -2588,7 +2587,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
if (F.hasFnAttribute("patchable-function-entry") ||
F.hasFnAttribute("function-instrument") ||
F.hasFnAttribute("xray-instruction-threshold") ||
needFuncLabels(MF, *MMI) || NeedsLocalForSize ||
needFuncLabels(MF, *this) || NeedsLocalForSize ||
MF.getTarget().Options.EmitStackSizeSection ||
MF.getTarget().Options.BBAddrMap || MF.hasBBLabels()) {
CurrentFnBegin = createTempSymbol("func_begin");
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,7 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
void CodeViewDebug::beginModule(Module *M) {
// If module doesn't have named metadata anchors or COFF debug section
// is not available, skip any debug info related stuff.
if (!MMI->hasDebugInfo() ||
if (!Asm->hasDebugInfo() ||
!Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) {
Asm = nullptr;
return;
Expand All @@ -636,7 +636,7 @@ void CodeViewDebug::beginModule(Module *M) {
}

void CodeViewDebug::endModule() {
if (!Asm || !MMI->hasDebugInfo())
if (!Asm || !Asm->hasDebugInfo())
return;

// The COFF .debug$S section consists of several subsections, each starting
Expand Down
13 changes: 5 additions & 8 deletions llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,10 +241,7 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) {
Ty->getTag() == dwarf::DW_TAG_unspecified_type;
}

static bool hasDebugInfo(const MachineModuleInfo *MMI,
const MachineFunction *MF) {
if (!MMI->hasDebugInfo())
return false;
static bool hasDebugInfo(const MachineFunction *MF) {
auto *SP = MF->getFunction().getSubprogram();
if (!SP)
return false;
Expand All @@ -258,7 +255,7 @@ static bool hasDebugInfo(const MachineModuleInfo *MMI,
void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
PrevInstBB = nullptr;

if (!Asm || !hasDebugInfo(MMI, MF)) {
if (!Asm || !hasDebugInfo(MF)) {
skippedNonDebugFunction();
return;
}
Expand Down Expand Up @@ -354,7 +351,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
}

void DebugHandlerBase::beginInstruction(const MachineInstr *MI) {
if (!Asm || !MMI->hasDebugInfo())
if (!Asm || !Asm->hasDebugInfo())
return;

assert(CurMI == nullptr);
Expand All @@ -380,7 +377,7 @@ void DebugHandlerBase::beginInstruction(const MachineInstr *MI) {
}

void DebugHandlerBase::endInstruction() {
if (!Asm || !MMI->hasDebugInfo())
if (!Asm || !Asm->hasDebugInfo())
return;

assert(CurMI != nullptr);
Expand Down Expand Up @@ -415,7 +412,7 @@ void DebugHandlerBase::endInstruction() {
}

void DebugHandlerBase::endFunction(const MachineFunction *MF) {
if (Asm && hasDebugInfo(MMI, MF))
if (Asm && hasDebugInfo(MF))
endFunctionImpl(MF);
DbgValues.clear();
DbgLabels.clear();
Expand Down
9 changes: 5 additions & 4 deletions llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1148,14 +1148,15 @@ sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
void DwarfDebug::beginModule(Module *M) {
DebugHandlerBase::beginModule(M);

if (!Asm || !MMI->hasDebugInfo())
if (!Asm)
return;

unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(),
M->debug_compile_units_end());
if (NumDebugCUs == 0)
return;

assert(NumDebugCUs > 0 && "Asm unexpectedly initialized");
assert(MMI->hasDebugInfo() &&
"DebugInfoAvailabilty unexpectedly not initialized");
SingleCU = NumDebugCUs == 1;
DenseMap<DIGlobalVariable *, SmallVector<DwarfCompileUnit::GlobalExpr, 1>>
GVMap;
Expand Down Expand Up @@ -1433,7 +1434,7 @@ void DwarfDebug::endModule() {

// If we aren't actually generating debug info (check beginModule -
// conditionalized on the presence of the llvm.dbg.cu metadata node)
if (!Asm || !MMI->hasDebugInfo())
if (!Asm || !Asm->hasDebugInfo())
return;

// Finalize the debug info for the module.
Expand Down
12 changes: 6 additions & 6 deletions llvm/lib/CodeGen/MachineFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,9 @@ static inline Align getFnStackAlignment(const TargetSubtargetInfo *STI,
}

MachineFunction::MachineFunction(Function &F, const LLVMTargetMachine &Target,
const TargetSubtargetInfo &STI,
unsigned FunctionNum, MachineModuleInfo &mmi)
: F(F), Target(Target), STI(&STI), Ctx(mmi.getContext()), MMI(mmi) {
const TargetSubtargetInfo &STI, MCContext &Ctx,
unsigned FunctionNum)
: F(F), Target(Target), STI(&STI), Ctx(Ctx) {
FunctionNumber = FunctionNum;
init();
}
Expand Down Expand Up @@ -654,9 +654,9 @@ void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const {

/// True if this function needs frame moves for debug or exceptions.
bool MachineFunction::needsFrameMoves() const {
return getMMI().hasDebugInfo() ||
getTarget().Options.ForceDwarfFrameSection ||
F.needsUnwindTableEntry();
return getTarget().Options.ForceDwarfFrameSection ||
F.needsUnwindTableEntry() ||
!F.getParent()->debug_compile_units().empty();
}

namespace llvm {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ MachineFunctionAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
.getCachedResult<MachineModuleAnalysis>(*F.getParent())
->getMMI();
auto MF = std::make_unique<MachineFunction>(
F, *TM, STI, Context.generateMachineFunctionNum(F), MMI);
F, *TM, STI, MMI.getContext(), Context.generateMachineFunctionNum(F));
MF->initTargetMachineFunctionInfo(STI);

// MRI callback for target specific initializations.
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/MachineLoopInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ bool MachineLoop::isLoopInvariant(MachineInstr &I,
}
}

if (!MO.isUse())
if (!MO.readsReg())
continue;

assert(MRI->getVRegDef(Reg) &&
Expand Down
11 changes: 1 addition & 10 deletions llvm/lib/CodeGen/MachineModuleInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@
using namespace llvm;
using namespace llvm::dwarf;

static cl::opt<bool>
DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
cl::desc("Disable debug info printing"));

// Out of line virtual method.
MachineModuleInfoImpl::~MachineModuleInfoImpl() = default;

Expand All @@ -43,7 +39,6 @@ void MachineModuleInfo::initialize() {
CurCallSite = 0;
NextFnNum = 0;
UsesMSVCFloatingPoint = false;
DbgInfoAvailable = false;
}

void MachineModuleInfo::finalize() {
Expand Down Expand Up @@ -104,7 +99,7 @@ MachineFunction &MachineModuleInfo::getOrCreateMachineFunction(Function &F) {
if (I.second) {
// No pre-existing machine function, create a new one.
const TargetSubtargetInfo &STI = *TM.getSubtargetImpl(F);
MF = new MachineFunction(F, TM, STI, NextFnNum++, *this);
MF = new MachineFunction(F, TM, STI, getContext(), NextFnNum++);
MF->initTargetMachineFunctionInfo(STI);

// MRI callback for target specific initializations.
Expand Down Expand Up @@ -224,8 +219,6 @@ bool MachineModuleInfoWrapperPass::doInitialization(Module &M) {
Ctx.diagnose(
DiagnosticInfoSrcMgr(SMD, M.getName(), IsInlineAsm, LocCookie));
});
MMI.DbgInfoAvailable = !DisableDebugInfoPrinting &&
!M.debug_compile_units().empty();
return false;
}

Expand All @@ -250,7 +243,5 @@ MachineModuleAnalysis::run(Module &M, ModuleAnalysisManager &) {
Ctx.diagnose(
DiagnosticInfoSrcMgr(SMD, M.getName(), IsInlineAsm, LocCookie));
});
MMI.DbgInfoAvailable =
!DisableDebugInfoPrinting && !M.debug_compile_units().empty();
return Result(MMI);
}
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21413,7 +21413,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}

// Turn 'store undef, Ptr' -> nothing.
if (Value.isUndef() && ST->isUnindexed())
if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
return Chain;

// Try to infer better alignment information than the store already has.
Expand Down
16 changes: 0 additions & 16 deletions llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1203,11 +1203,6 @@ void FastISel::handleDbgInfo(const Instruction *II) {

if (DbgLabelRecord *DLR = dyn_cast<DbgLabelRecord>(&DR)) {
assert(DLR->getLabel() && "Missing label");
if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DLR << "\n");
continue;
}

BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DLR->getDebugLoc(),
TII.get(TargetOpcode::DBG_LABEL))
.addMetadata(DLR->getLabel());
Expand Down Expand Up @@ -1402,12 +1397,6 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
assert(DI->getVariable() && "Missing variable");
if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI
<< " (!hasDebugInfo)\n");
return true;
}

if (FuncInfo.PreprocessedDbgDeclares.contains(DI))
return true;

Expand Down Expand Up @@ -1446,11 +1435,6 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
case Intrinsic::dbg_label: {
const DbgLabelInst *DI = cast<DbgLabelInst>(II);
assert(DI->getLabel() && "Missing label");
if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
return true;
}

BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel());
return true;
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8619,21 +8619,21 @@ SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain,
const BasicBlock *EHPadBB,
MCSymbol *&BeginLabel) {
MachineFunction &MF = DAG.getMachineFunction();
MachineModuleInfo &MMI = MF.getMMI();
MachineModuleInfo *MMI = DAG.getMMI();

// Insert a label before the invoke call to mark the try range. This can be
// used to detect deletion of the invoke via the MachineModuleInfo.
BeginLabel = MF.getContext().createTempSymbol();

// For SjLj, keep track of which landing pads go with which invokes
// so as to maintain the ordering of pads in the LSDA.
unsigned CallSiteIndex = MMI.getCurrentCallSite();
unsigned CallSiteIndex = MMI->getCurrentCallSite();
if (CallSiteIndex) {
MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);

// Now that the call site is handled, stop tracking it.
MMI.setCurrentCallSite(0);
MMI->setCurrentCallSite(0);
}

return DAG.getEHLabel(getCurSDLoc(), Chain, BeginLabel);
Expand Down
58 changes: 58 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1080,6 +1080,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Try to create BICs for vector ANDs.
setTargetDAGCombine(ISD::AND);

// llvm.init.trampoline and llvm.adjust.trampoline
setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);

// Vector add and sub nodes may conceal a high-half opportunity.
// Also, try to fold ADD into CSINC/CSINV..
setTargetDAGCombine({ISD::ADD, ISD::ABS, ISD::SUB, ISD::XOR, ISD::SINT_TO_FP,
Expand Down Expand Up @@ -6688,6 +6692,56 @@ static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) {
return Final;
}

SDValue AArch64TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
// Note: x18 cannot be used for the Nest parameter on Windows and macOS.
if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
report_fatal_error(
"ADJUST_TRAMPOLINE operation is only supported on Linux.");

return Op.getOperand(0);
}

SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {

// Note: x18 cannot be used for the Nest parameter on Windows and macOS.
if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
report_fatal_error("INIT_TRAMPOLINE operation is only supported on Linux.");

SDValue Chain = Op.getOperand(0);
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
SDValue Nest = Op.getOperand(3); // 'nest' parameter value
SDLoc dl(Op);

EVT PtrVT = getPointerTy(DAG.getDataLayout());
Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());

TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;

Entry.Ty = IntPtrTy;
Entry.Node = Trmp;
Args.push_back(Entry);
Entry.Node = DAG.getConstant(20, dl, MVT::i64);
Args.push_back(Entry);

Entry.Node = FPtr;
Args.push_back(Entry);
Entry.Node = Nest;
Args.push_back(Entry);

// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
CallingConv::C, Type::getVoidTy(*DAG.getContext()),
DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));

std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
}

SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");
Expand All @@ -6705,6 +6759,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerGlobalTLSAddress(Op, DAG);
case ISD::PtrAuthGlobalAddress:
return LowerPtrAuthGlobalAddress(Op, DAG);
case ISD::ADJUST_TRAMPOLINE:
return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::INIT_TRAMPOLINE:
return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::SETCC:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1143,6 +1143,8 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
SDValue TVal, SDValue FVal, const SDLoc &dl,
SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRIND(SDValue Op, SelectionDAG &DAG) const;
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPU_ASAN_INSTRUMENTATION_H

#include "AMDGPU.h"
#include "AMDGPUBaseInfo.h"
#include "AMDGPUMemoryUtils.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDGPUMemoryUtils.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUAnnotateKernelFeatures.cpp
AMDGPUAnnotateUniformValues.cpp
AMDGPUArgumentUsageInfo.cpp
AMDGPUAsanInstrumentation.cpp
AMDGPUAsmPrinter.cpp
AMDGPUAtomicOptimizer.cpp
AMDGPUAttributor.cpp
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
add_llvm_component_library(LLVMAMDGPUUtils
AMDGPUAsanInstrumentation.cpp
AMDGPUAsmUtils.cpp
AMDGPUBaseInfo.cpp
AMDGPUDelayedMCExpr.cpp
Expand Down
8 changes: 3 additions & 5 deletions llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ void NVPTXAsmPrinter::emitFunctionEntryLabel() {
// Emit initial .loc debug directive for correct relocation symbol data.
if (const DISubprogram *SP = MF->getFunction().getSubprogram()) {
assert(SP->getUnit());
if (!SP->getUnit()->isDebugDirectivesOnly() && MMI && MMI->hasDebugInfo())
if (!SP->getUnit()->isDebugDirectivesOnly())
emitInitialRawDwarfLocDirective(*MF);
}
}
Expand Down Expand Up @@ -912,7 +912,7 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
if (HasFullDebugInfo)
break;
}
if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
if (HasFullDebugInfo)
O << ", debug";

O << "\n";
Expand All @@ -928,8 +928,6 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
}

bool NVPTXAsmPrinter::doFinalization(Module &M) {
bool HasDebugInfo = MMI && MMI->hasDebugInfo();

// If we did not emit any functions, then the global declarations have not
// yet been emitted.
if (!GlobalsEmitted) {
Expand All @@ -945,7 +943,7 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
auto *TS =
static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
// Close the last emitted section
if (HasDebugInfo) {
if (hasDebugInfo()) {
TS->closeLastSection();
// Emit empty .debug_loc section for better support of the empty files.
OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
Expand Down
12 changes: 6 additions & 6 deletions llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3137,11 +3137,11 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) {
break;
MCSymbol *TempSym = OutContext.createNamedTempSymbol();
OutStreamer->emitLabel(TempSym);
OutStreamer->emitXCOFFExceptDirective(CurrentFnSym, TempSym,
LangMO.getImm(), ReasonMO.getImm(),
Subtarget->isPPC64() ? MI->getMF()->getInstructionCount() * 8 :
MI->getMF()->getInstructionCount() * 4,
MMI->hasDebugInfo());
OutStreamer->emitXCOFFExceptDirective(
CurrentFnSym, TempSym, LangMO.getImm(), ReasonMO.getImm(),
Subtarget->isPPC64() ? MI->getMF()->getInstructionCount() * 8
: MI->getMF()->getInstructionCount() * 4,
hasDebugInfo());
break;
}
case PPC::GETtlsMOD32AIX:
Expand Down Expand Up @@ -3199,7 +3199,7 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) {

bool PPCAIXAsmPrinter::doFinalization(Module &M) {
// Do streamer related finalization for DWARF.
if (!MAI->usesDwarfFileAndLocDirectives() && MMI->hasDebugInfo())
if (!MAI->usesDwarfFileAndLocDirectives() && hasDebugInfo())
OutStreamer->doFinalizationAtSectionEnd(
OutStreamer->getContext().getObjectFileInfo()->getTextSection());

Expand Down
23 changes: 23 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7215,6 +7215,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
// Reserve space for the linkage area on the stack.
const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
uint64_t SaveStackPos = CCInfo.getStackSize();
bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);

SmallVector<SDValue, 8> MemOps;
Expand All @@ -7233,6 +7235,27 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
continue;

if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
const TargetRegisterClass *RegClass = getRegClassForSVT(
LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
// On PPC64, debugger assumes extended 8-byte values are stored from GPR.
MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
MachinePointerInfo(), Align(PtrByteSize));
SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
MemOps.push_back(StoreReg);
}

if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
unsigned StoreSize =
Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
}

auto HandleMemLoc = [&]() {
const unsigned LocSize = LocVT.getStoreSize();
const unsigned ValSize = ValVT.getStoreSize();
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Transforms/IPO/FunctionAttrs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,9 @@ static void addLocAccess(MemoryEffects &ME, const MemoryLocation &Loc,
if (isNoModRef(MR))
return;

const Value *UO = getUnderlyingObject(Loc.Ptr);
assert(!isa<AllocaInst>(UO) &&
"Should have been handled by getModRefInfoMask()");
const Value *UO = getUnderlyingObjectAggressive(Loc.Ptr);
if (isa<AllocaInst>(UO))
return;
if (isa<Argument>(UO)) {
ME |= MemoryEffects::argMemOnly(MR);
return;
Expand Down
30 changes: 10 additions & 20 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,11 +206,6 @@ static cl::opt<unsigned> VectorizeMemoryCheckThreshold(
"vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
cl::desc("The maximum allowed number of runtime memory checks"));

static cl::opt<bool> UseLegacyCostModel(
"vectorize-use-legacy-cost-model", cl::init(false), cl::Hidden,
cl::desc("Use the legacy cost model instead of the VPlan-based cost model. "
"This option will be removed in the future."));

// Option prefer-predicate-over-epilogue indicates that an epilogue is undesired,
// that predication is preferred, and this lists all options. I.e., the
// vectorizer will try to fold the tail-loop (epilogue) into the vector body
Expand Down Expand Up @@ -9976,9 +9971,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
&CM, BFI, PSI, Checks);

VPlan &BestPlan =
UseLegacyCostModel ? LVP.getBestPlanFor(VF.Width) : LVP.getBestPlan();
assert((UseLegacyCostModel || BestPlan.hasScalarVFOnly()) &&
VPlan &BestPlan = LVP.getBestPlan();
assert(BestPlan.hasScalarVFOnly() &&
"VPlan cost model and legacy cost model disagreed");
LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);

Expand Down Expand Up @@ -10095,18 +10089,14 @@ bool LoopVectorizePass::processLoop(Loop *L) {
if (!MainILV.areSafetyChecksAdded())
DisableRuntimeUnroll = true;
} else {
ElementCount Width = VF.Width;
VPlan &BestPlan =
UseLegacyCostModel ? LVP.getBestPlanFor(Width) : LVP.getBestPlan();
if (!UseLegacyCostModel) {
assert(size(BestPlan.vectorFactors()) == 1 &&
"Plan should have a single VF");
Width = *BestPlan.vectorFactors().begin();
LLVM_DEBUG(dbgs()
<< "VF picked by VPlan cost model: " << Width << "\n");
assert(VF.Width == Width &&
"VPlan cost model and legacy cost model disagreed");
}
VPlan &BestPlan = LVP.getBestPlan();
assert(size(BestPlan.vectorFactors()) == 1 &&
"Plan should have a single VF");
ElementCount Width = *BestPlan.vectorFactors().begin();
LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << Width
<< "\n");
assert(VF.Width == Width &&
"VPlan cost model and legacy cost model disagreed");
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, Width,
VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
PSI, Checks);
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/AArch64/trampoline.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s

declare void @llvm.init.trampoline(ptr, ptr, ptr);
declare ptr @llvm.adjust.trampoline(ptr);

define i64 @f(ptr nest %c, i64 %x, i64 %y) {
%sum = add i64 %x, %y
ret i64 %sum
}

define i64 @main() {
%val = alloca i64
%nval = bitcast ptr %val to ptr
%tramp = alloca [36 x i8], align 8
; CHECK: bl __trampoline_setup
call void @llvm.init.trampoline(ptr %tramp, ptr @f, ptr %nval)
%fp = call ptr @llvm.adjust.trampoline(ptr %tramp)
ret i64 0
}
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,6 @@ define amdgpu_kernel void @marked_kernel_use_other_sgpr(ptr addrspace(1) %ptr) #
%queue.load = load volatile i8, ptr addrspace(4) %queue.ptr
%implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr
%dispatch.load = load volatile i8, ptr addrspace(4) %dispatch.ptr
store volatile i64 %dispatch.id, ptr addrspace(1) %ptr
ret void
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v16i16(ptr addrspace(
define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
%undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 999) #1
%bc = bitcast i64 %undef to <2 x i32>
store volatile <2 x i32> %bc, ptr addrspace(1) %out
store <2 x i32> %bc, ptr addrspace(1) %out
ret void
}

Expand All @@ -83,7 +83,7 @@ define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractel
%undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 9999) #1
%bc = bitcast i64 %undef to <2 x i32>
%elt1 = extractelement <2 x i32> %bc, i32 1
store volatile i32 %elt1, ptr addrspace(1) %out
store i32 %elt1, ptr addrspace(1) %out
ret void
}

Expand Down
25 changes: 23 additions & 2 deletions llvm/test/CodeGen/AMDGPU/dpp64_combine.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP64,GFX90A
; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP64,DPPMOV64
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS,GFX10
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS,GFX11

; GCN-LABEL: {{^}}dpp64_ceil:
; GCN: global_load_{{dwordx2|b64}} [[V:v\[[0-9:]+\]]],
Expand Down Expand Up @@ -69,6 +69,27 @@ define amdgpu_kernel void @dpp64_div(ptr addrspace(1) %arg, i64 %in1) {
ret void
}

; GCN-LABEL: {{^}}dpp64_loop:
; GCN: v_mov_b32_dpp
; DPP64: v_mov_b32_dpp
; GFX90A: v_add_co_u32_e32
; GFX90A: v_addc_co_u32_e32
; DPPMOV64: v_lshl_add_u64
; GFX10: v_mov_b32_dpp
; GFX10: v_add_co_u32
; GFX10: v_add_co_ci_u32_e32
; GFX11: v_add_co_u32_e64_dpp
; GFX11: v_add_co_ci_u32_e32
define amdgpu_cs void @dpp64_loop(i64 %arg) {
bb:
br label %bb1
bb1:
%i = call i64 @llvm.amdgcn.update.dpp.i64(i64 0, i64 0, i32 0, i32 0, i32 0, i1 false)
%i2 = add i64 %i, %arg
%i3 = atomicrmw add ptr addrspace(1) null, i64 %i2 monotonic, align 8
br label %bb1
}

declare i32 @llvm.amdgcn.workitem.id.x()
declare i64 @llvm.amdgcn.update.dpp.i64(i64, i64, i32, i32, i32, i1) #0
declare double @llvm.ceil.f64(double)
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ define void @func_use_lds_global() {
ret void
}

; ERR: warning: <unknown>:0:0: in function func_use_lds_global_constexpr_cast void (): local memory global used by non-kernel function
define void @func_use_lds_global_constexpr_cast() {
; ERR: warning: <unknown>:0:0: in function func_use_lds_global_constexpr_cast void (ptr addrspace(1)): local memory global used by non-kernel function
define void @func_use_lds_global_constexpr_cast(ptr addrspace(1) %out) {
; GFX8-SDAG-LABEL: func_use_lds_global_constexpr_cast:
; GFX8-SDAG: ; %bb.0:
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down Expand Up @@ -153,7 +153,7 @@ define void @func_use_lds_global_constexpr_cast() {
; GISEL-NEXT: s_setpc_b64 s[30:31]
; GISEL-NEXT: .LBB1_2:
; GISEL-NEXT: s_endpgm
store volatile i32 ptrtoint (ptr addrspace(3) @lds to i32), ptr addrspace(1) poison, align 4
store i32 ptrtoint (ptr addrspace(3) @lds to i32), ptr addrspace(1) %out, align 4
ret void
}

Expand Down
23 changes: 17 additions & 6 deletions llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-SDAG %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-GISEL %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-SDAG %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-GISEL %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
Expand Down Expand Up @@ -126,10 +126,21 @@ define amdgpu_cs void @caller() {
declare amdgpu_gfx void @callee(i32)

define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace(1) %outy, ptr addrspace(1) %outz) {
; GFX9-LABEL: workgroup_ids_gfx:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
; GFX9-SDAG-LABEL: workgroup_ids_gfx:
; GFX9-SDAG: ; %bb.0:
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: global_store_dword v[0:1], v0, off
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: global_store_dword v[2:3], v0, off
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: global_store_dword v[4:5], v0, off
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-GISEL-LABEL: workgroup_ids_gfx:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9ARCH-SDAG-LABEL: workgroup_ids_gfx:
; GFX9ARCH-SDAG: ; %bb.0:
Expand Down
79 changes: 79 additions & 0 deletions llvm/test/CodeGen/AMDGPU/machinelicm-undef-use.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=early-machinelicm %s -o - | FileCheck %s

# Issue #100115: test that MachineLICM does not assert on the undef use operand
# of the REG_SEQUENCE instruction.
---
name: test_undef_use
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: test_undef_use
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, undef [[DEF]], %subreg.sub1
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.5(0x04000000), %bb.1(0x7c000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vcc_lo = COPY undef [[DEF1]]
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.5, implicit $vcc_lo
; CHECK-NEXT: S_BRANCH %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.3(0x7c000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit undef $scc
; CHECK-NEXT: S_BRANCH %bb.4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: SI_LOOP undef [[DEF1]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
liveins: $vgpr0, $vgpr1

%0:vgpr_32 = IMPLICIT_DEF
%1:sreg_32 = IMPLICIT_DEF

bb.1:
successors: %bb.3(0x80000000)

%2:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, undef %0, %subreg.sub1
S_BRANCH %bb.3

bb.2:
successors: %bb.5(0x04000000), %bb.1(0x7c000000)

$vcc_lo = COPY undef %1
S_CBRANCH_VCCNZ %bb.5, implicit $vcc
S_BRANCH %bb.1

bb.3:
successors: %bb.4(0x04000000), %bb.3(0x7c000000)

S_CBRANCH_SCC1 %bb.3, implicit undef $scc
S_BRANCH %bb.4

bb.4:
successors: %bb.2(0x40000000), %bb.1(0x40000000)

SI_LOOP undef %1, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.2

bb.5:
S_ENDPGM 0
...
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/AMDGPU/mem-builtins.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,64 +9,64 @@ declare hidden i32 @strnlen(ptr nocapture, i32) #1
declare hidden i32 @strcmp(ptr nocapture, ptr nocapture) #1


; ERROR: error: <unknown>:0:0: in function test_memcmp void (ptr addrspace(1), ptr addrspace(1), ptr): unsupported call to function memcmp
; ERROR: error: <unknown>:0:0: in function test_memcmp void (ptr addrspace(1), ptr addrspace(1), ptr, ptr addrspace(1)): unsupported call to function memcmp

; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, memcmp@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, memcmp@rel32@hi+12
define amdgpu_kernel void @test_memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, ptr nocapture %p) #0 {
define amdgpu_kernel void @test_memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, ptr nocapture %p, ptr addrspace(1) %out) #0 {
entry:
%cmp = tail call i32 @memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, i64 2)
store volatile i32 %cmp, ptr addrspace(1) undef
store i32 %cmp, ptr addrspace(1) %out
ret void
}

; ERROR: error: <unknown>:0:0: in function test_memchr void (ptr addrspace(1), i32, i64): unsupported call to function memchr
; ERROR: error: <unknown>:0:0: in function test_memchr void (ptr addrspace(1), i32, i64, ptr addrspace(1)): unsupported call to function memchr

; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, memchr@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, memchr@rel32@hi+12
define amdgpu_kernel void @test_memchr(ptr addrspace(1) %src, i32 %char, i64 %len) #0 {
define amdgpu_kernel void @test_memchr(ptr addrspace(1) %src, i32 %char, i64 %len, ptr addrspace(1) %out) #0 {
%res = call ptr addrspace(1) @memchr(ptr addrspace(1) %src, i32 %char, i64 %len)
store volatile ptr addrspace(1) %res, ptr addrspace(1) undef
store ptr addrspace(1) %res, ptr addrspace(1) %out
ret void
}

; ERROR: error: <unknown>:0:0: in function test_strcpy void (ptr, ptr): unsupported call to function strcpy
; ERROR: error: <unknown>:0:0: in function test_strcpy void (ptr, ptr, ptr addrspace(1)): unsupported call to function strcpy

; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcpy@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcpy@rel32@hi+12
define amdgpu_kernel void @test_strcpy(ptr %dst, ptr %src) #0 {
define amdgpu_kernel void @test_strcpy(ptr %dst, ptr %src, ptr addrspace(1) %out) #0 {
%res = call ptr @strcpy(ptr %dst, ptr %src)
store volatile ptr %res, ptr addrspace(1) undef
store ptr %res, ptr addrspace(1) %out
ret void
}

; ERROR: error: <unknown>:0:0: in function test_strcmp void (ptr, ptr): unsupported call to function strcmp
; ERROR: error: <unknown>:0:0: in function test_strcmp void (ptr, ptr, ptr addrspace(1)): unsupported call to function strcmp

; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcmp@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcmp@rel32@hi+12
define amdgpu_kernel void @test_strcmp(ptr %src0, ptr %src1) #0 {
define amdgpu_kernel void @test_strcmp(ptr %src0, ptr %src1, ptr addrspace(1) %out) #0 {
%res = call i32 @strcmp(ptr %src0, ptr %src1)
store volatile i32 %res, ptr addrspace(1) undef
store i32 %res, ptr addrspace(1) %out
ret void
}

; ERROR: error: <unknown>:0:0: in function test_strlen void (ptr): unsupported call to function strlen
; ERROR: error: <unknown>:0:0: in function test_strlen void (ptr, ptr addrspace(1)): unsupported call to function strlen

; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strlen@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strlen@rel32@hi+12
define amdgpu_kernel void @test_strlen(ptr %src) #0 {
define amdgpu_kernel void @test_strlen(ptr %src, ptr addrspace(1) %out) #0 {
%res = call i32 @strlen(ptr %src)
store volatile i32 %res, ptr addrspace(1) undef
store i32 %res, ptr addrspace(1) %out
ret void
}

; ERROR: error: <unknown>:0:0: in function test_strnlen void (ptr, i32): unsupported call to function strnlen
; ERROR: error: <unknown>:0:0: in function test_strnlen void (ptr, i32, ptr addrspace(1)): unsupported call to function strnlen

; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strnlen@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strnlen@rel32@hi+12
define amdgpu_kernel void @test_strnlen(ptr %src, i32 %size) #0 {
define amdgpu_kernel void @test_strnlen(ptr %src, i32 %size, ptr addrspace(1) %out) #0 {
%res = call i32 @strnlen(ptr %src, i32 %size)
store volatile i32 %res, ptr addrspace(1) undef
store i32 %res, ptr addrspace(1) %out
ret void
}

Expand Down
50 changes: 0 additions & 50 deletions llvm/test/CodeGen/Generic/disable-debug-info-print.ll

This file was deleted.

58 changes: 58 additions & 0 deletions llvm/test/CodeGen/NVPTX/store-undef.ll
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,61 @@ define void @test_store_def(i64 %param0, i32 %param1, ptr %out) {
store %struct.T %S2, ptr %out
ret void
}

define void @test_store_volatile_undef(ptr %out, <8 x i32> %vec) {
; CHECK-LABEL: test_store_volatile_undef(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<23>;
; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [test_store_volatile_undef_param_0];
; CHECK-NEXT: st.volatile.v4.u32 [%rd1+16], {%r1, %r2, %r3, %r4};
; CHECK-NEXT: st.volatile.v2.u32 [%rd1+8], {%r5, %r6};
; CHECK-NEXT: st.volatile.u64 [%rd1], %rd2;
; CHECK-NEXT: ld.param.v4.u32 {%r7, %r8, %r9, %r10}, [test_store_volatile_undef_param_1];
; CHECK-NEXT: ld.param.v4.u32 {%r11, %r12, %r13, %r14}, [test_store_volatile_undef_param_1+16];
; CHECK-NEXT: st.volatile.v4.u32 [%rd3], {%r11, %r12, %r13, %r14};
; CHECK-NEXT: st.volatile.v4.u32 [%rd4], {%r7, %r8, %r9, %r10};
; CHECK-NEXT: st.volatile.v4.u32 [%rd1+16], {%r15, %r16, %r17, %r18};
; CHECK-NEXT: st.volatile.v4.u32 [%rd1], {%r19, %r20, %r21, %r22};
; CHECK-NEXT: ret;
store volatile %struct.T undef, ptr %out
store volatile <8 x i32> %vec, ptr undef
store volatile <8 x i32> undef, ptr %out
ret void
}

define void @test_store_volatile_of_poison(ptr %out) {
; CHECK-LABEL: test_store_volatile_of_poison(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<7>;
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [test_store_volatile_of_poison_param_0];
; CHECK-NEXT: st.volatile.v4.u32 [%rd1+16], {%r1, %r2, %r3, %r4};
; CHECK-NEXT: st.volatile.v2.u32 [%rd1+8], {%r5, %r6};
; CHECK-NEXT: st.volatile.u64 [%rd1], %rd2;
; CHECK-NEXT: ret;
store volatile %struct.T poison, ptr %out
ret void
}

define void @test_store_volatile_to_poison(%struct.T %param) {
; CHECK-LABEL: test_store_volatile_to_poison(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<7>;
; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [test_store_volatile_to_poison_param_0];
; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [test_store_volatile_to_poison_param_0+8];
; CHECK-NEXT: ld.param.v4.u32 {%r3, %r4, %r5, %r6}, [test_store_volatile_to_poison_param_0+16];
; CHECK-NEXT: st.volatile.v4.u32 [%rd2], {%r3, %r4, %r5, %r6};
; CHECK-NEXT: st.volatile.v2.u32 [%rd3], {%r1, %r2};
; CHECK-NEXT: st.volatile.u64 [%rd4], %rd1;
; CHECK-NEXT: ret;
store volatile %struct.T %param, ptr poison
ret void
}
867 changes: 867 additions & 0 deletions llvm/test/CodeGen/PowerPC/save-reg-params.ll

Large diffs are not rendered by default.

19 changes: 0 additions & 19 deletions llvm/test/CodeGen/X86/disable-debug-info-print-codeview.ll

This file was deleted.

4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/frame-order.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=x86_64-linux-gnueabi -disable-debug-info-print < %s | FileCheck %s
; RUN: opt -passes=strip -S < %s | llc -mtriple=x86_64-linux-gnueabi -disable-debug-info-print | FileCheck %s
; RUN: llc -mtriple=x86_64-linux-gnueabi < %s | FileCheck %s
; RUN: opt -passes=strip -S < %s | llc -mtriple=x86_64-linux-gnueabi | FileCheck %s

; This test checks if the code is generated correctly with and without debug info.

Expand Down
887 changes: 887 additions & 0 deletions llvm/test/CodeGen/X86/var-permute-128.ll

Large diffs are not rendered by default.

759 changes: 759 additions & 0 deletions llvm/test/CodeGen/X86/var-permute-256.ll

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llvm/test/MC/AMDGPU/gfx10_err_pos.s
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck %s --implicit-check-not=error: --strict-whitespace
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck %s --implicit-check-not=error: --strict-whitespace

//==============================================================================
// operands are not valid for this GPU or mode
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/MC/AMDGPU/gfx12_asm_vop1.s
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-ASM %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-wavefrontsize64 -disassemble -show-encoding | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -disassemble -show-encoding | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-ASM %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s

v_bfrev_b32_e32 v5, v1
// GFX12: v_bfrev_b32_e32 v5, v1 ; encoding: [0x01,0x71,0x0a,0x7e]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -disassemble -show-encoding -filetype=null < %s 2>&1 | FileCheck -check-prefix=GCN-ERR %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s 2>&1 | FileCheck -check-prefixes=W32 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s 2>&1 | FileCheck -check-prefixes=W64 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s 2>&1 | FileCheck -check-prefixes=W64 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding -filetype=null < %s 2>&1 | FileCheck -check-prefix=GFX11-ERR %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding -filetype=null < %s 2>&1 | FileCheck -check-prefix=GFX12-ERR %s

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_ds.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s

# GFX11: ds_add_f32 v0, v1 ; encoding: [0x00,0x00,0x54,0xd8,0x00,0x01,0x00,0x00]
0x00,0x00,0x54,0xd8,0x00,0x01,0x00,0x00
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_smem.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s

# GFX11: s_atc_probe 7, s[4:5], 0x64 ; encoding: [0xc2,0x01,0x88,0xf4,0x64,0x00,0x00,0xf8]
0xc2,0x01,0x88,0xf4,0x64,0x00,0x00,0xf8
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_sop1.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s

# GFX11: s_abs_i32 exec_hi, s1 ; encoding: [0x01,0x15,0xff,0xbe]
0x01,0x15,0xff,0xbe
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_sop2.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s

# GFX11: s_absdiff_i32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x83]
0x01,0x02,0x7f,0x83
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_sopc.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s

# GFX11: s_bitcmp0_b32 exec_hi, s1 ; encoding: [0x7f,0x01,0x0c,0xbf]
0x7f,0x01,0x0c,0xbf
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_sopk.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s

# GFX11: s_addk_i32 exec_hi, 0x1234 ; encoding: [0x34,0x12,0xff,0xb7]
0x34,0x12,0xff,0xb7
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_sopp.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s

# GFX11: s_barrier ; encoding: [0x00,0x00,0xbd,0xbf]
0x00,0x00,0xbd,0xbf
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefixes=GFX11,GFX11-REAL16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefixes=GFX11,GFX11-FAKE16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefixes=GFX11,GFX11-REAL16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefixes=GFX11,GFX11-FAKE16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefixes=GFX11,GFX11-REAL16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefixes=GFX11,GFX11-FAKE16 %s

# GFX11: v_bfrev_b32_e32 v5, v1 ; encoding: [0x01,0x71,0x0a,0x7e]
0x01,0x71,0x0a,0x7e
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-REAL16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-REAL16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-REAL16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s

# GFX11: v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0xff
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-REAL16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-REAL16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s

# GFX11: v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefixes=GFX11,W32,GFX11-FAKE16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefixes=GFX11,W32,GFX11-REAL16 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefixes=GFX11,W64 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefixes=GFX11,W64 %s

# W32: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x40]
# W64: v_add_co_ci_u32_e32 v5, vcc, v1, v2, vcc ; encoding: [0x01,0x05,0x0a,0x40]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp16.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s

# W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff]
# W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp8.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s

# W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05]
# W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s

# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s

# GFX11: v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s

# W32: v_cmp_class_f16_e64_dpp s10, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s

# GFX11: v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x7e,0x00,0xfd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s

# W32: v_cmp_class_f16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
# W64: v_cmp_class_f16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s

# GFX11: v_cmpx_class_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x05,0x02,0x00]
0x7e,0x00,0xfd,0xd4,0x01,0x05,0x02,0x00
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3p.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s

# GFX11: v_dot2_f32_bf16 v5, v1, v2, v3 ; encoding: [0x05,0x40,0x1a,0xcc,0x01,0x05,0x0e,0x1c]
0x05,0x40,0x1a,0xcc,0x01,0x05,0x0e,0x1c
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s

# GFX11: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xe ; encoding: [0x00,0x45,0x13,0xcc,0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xfe]
0x00,0x45,0x13,0xcc,0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xfe
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3p_dpp8.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s

# GFX11: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 neg_lo:[0,1,1] neg_hi:[1,0,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x00,0x45,0x13,0xcc,0xe9,0x04,0x0e,0xdc,0x01,0x77,0x39,0x05]
0x00,0x45,0x13,0xcc,0xe9,0x04,0x0e,0xdc,0x01,0x77,0x39,0x05
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=GFX12 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=GFX12 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=GFX12 %s

# GFX12: s_add_nc_u64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x80,0xa9]
0x02,0x04,0x80,0xa9
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=GFX12 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=GFX12 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=GFX12 %s

# GFX12: s_addk_co_i32 exec_hi, 0x1234 ; encoding: [0x34,0x12,0xff,0xb7]
0x34,0x12,0xff,0xb7
Expand Down
Loading