41 changes: 41 additions & 0 deletions clang/test/Analysis/issue-94193.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// RUN: %clang_analyze_cc1 %s -verify -analyzer-checker=core

#include "Inputs/system-header-simulator-cxx.h"


namespace GH94193 {
template<typename T> class optional {
union {
char x;
T uvalue;
};
bool holds_value = false;
public:
optional() = default;
optional(const optional&) = delete;
optional(optional&&) = delete;
template <typename U = T> explicit optional(U&& value) : holds_value(true) {
new (static_cast<void*>(std::addressof(uvalue))) T(std::forward<U>(value));
}
optional& operator=(const optional&) = delete;
optional& operator=(optional&&) = delete;
explicit operator bool() const {
return holds_value;
}
T& unwrap() & {
return uvalue; // no-warning: returns a valid value
}
};

int top1(int x) {
optional<int> opt{x}; // note: Ctor was inlined.
return opt.unwrap(); // no-warning: returns a valid value
}

std::string *top2() {
std::string a = "123";
// expected-warning@+2 {{address of stack memory associated with local variable 'a' returned}} diagnosed by -Wreturn-stack-address
// expected-warning@+1 {{Address of stack memory associated with local variable 'a' returned to caller [core.StackAddressEscape]}}
return std::addressof(a);
}
} // namespace GH94193
9 changes: 2 additions & 7 deletions clang/test/Analysis/use-after-move.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -570,13 +570,8 @@ void differentBranchesTest(int i) {
{
A a;
a.foo() > 0 ? a.foo() : A(std::move(a)).foo();
#ifdef DFS
// peaceful-note@-2 {{Assuming the condition is false}}
// peaceful-note@-3 {{'?' condition is false}}
#else
// peaceful-note@-5 {{Assuming the condition is true}}
// peaceful-note@-6 {{'?' condition is true}}
#endif
// peaceful-note@-1 {{Assuming the condition is true}}
// peaceful-note@-2 {{'?' condition is true}}
}
// Same thing, but with a switch statement.
{
Expand Down
48 changes: 48 additions & 0 deletions clang/test/CodeGen/attr-counted-by.c
Original file line number Diff line number Diff line change
Expand Up @@ -1906,3 +1906,51 @@ struct test30_struct {
void test30(struct test30_struct *ptr, int idx) {
ptr->pcpu_refcnt.__padding[idx] = __builtin_dynamic_object_size(ptr, 1);
}

struct test31_empty {};

struct test31_struct {
struct test31_empty y;
int s;
int x[] __counted_by(s);
};

// SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test31(
// SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR2]] {
// SANITIZE-WITH-ATTR-NEXT: entry:
// SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[PTR]], align 4
// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 2
// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP1]], i64 0)
// SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
// SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 4
// SANITIZE-WITH-ATTR-NEXT: [[DOTINV:%.*]] = icmp slt i32 [[DOT_COUNTED_BY_LOAD]], 0
// SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP4]]
// SANITIZE-WITH-ATTR-NEXT: ret i32 [[CONV]]
//
// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test31(
// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR2]] {
// NO-SANITIZE-WITH-ATTR-NEXT: entry:
// NO-SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[PTR]], align 4
// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 2
// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP1]], i64 0)
// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 4
// NO-SANITIZE-WITH-ATTR-NEXT: [[DOTINV:%.*]] = icmp slt i32 [[DOT_COUNTED_BY_LOAD]], 0
// NO-SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP4]]
// NO-SANITIZE-WITH-ATTR-NEXT: ret i32 [[CONV]]
//
// SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test31(
// SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR2]] {
// SANITIZE-WITHOUT-ATTR-NEXT: entry:
// SANITIZE-WITHOUT-ATTR-NEXT: ret i32 -1
//
// NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test31(
// NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] {
// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry:
// NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i32 -1
//
int test31(struct test31_struct *ptr, int idx) {
return __builtin_dynamic_object_size(ptr, 0);
}
11 changes: 11 additions & 0 deletions clang/test/Driver/loongarch-march.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
// RUN: FileCheck %s --check-prefix=CC1-LA64V1P0
// RUN: %clang --target=loongarch64 -march=la64v1.1 -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-LA64V1P1
// RUN: %clang --target=loongarch64 -march=la664 -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-LA664
// RUN: %clang --target=loongarch64 -march=loongarch64 -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-LOONGARCH64
// RUN: %clang --target=loongarch64 -march=la464 -S -emit-llvm %s -o - | \
Expand All @@ -14,6 +16,8 @@
// RUN: FileCheck %s --check-prefix=IR-LA64V1P0
// RUN: %clang --target=loongarch64 -march=la64v1.1 -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-LA64V1P1
// RUN: %clang --target=loongarch64 -march=la664 -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-LA664

// CC1-LOONGARCH64: "-target-cpu" "loongarch64"
// CC1-LOONGARCH64-NOT: "-target-feature"
Expand All @@ -39,10 +43,17 @@
// CC1-LA64V1P1-NOT: "-target-feature"
// CC1-LA64V1P1: "-target-abi" "lp64d"

// CC1-LA664: "-target-cpu" "la664"
// CC1-LA664-NOT: "-target-feature"
// CC1-LA664: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" "-target-feature" "+frecipe"
// CC1-LA664-NOT: "-target-feature"
// CC1-LA664: "-target-abi" "lp64d"

// IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+f,+ual"
// IR-LA464: attributes #[[#]] ={{.*}}"target-cpu"="la464" {{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx,+ual"
// IR-LA64V1P0: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+lsx,+ual"
// IR-LA64V1P1: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+frecipe,+lsx,+ual"
// IR-LA664: attributes #[[#]] ={{.*}}"target-cpu"="la664" {{.*}}"target-features"="+64bit,+d,+f,+frecipe,+lasx,+lsx,+ual"

int foo(void) {
return 3;
Expand Down
5 changes: 5 additions & 0 deletions clang/test/Driver/loongarch-mtune.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
// RUN: %clang --target=loongarch64 -mtune=la464 -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IRATTR -DCPU=la464

// RUN: %clang --target=loongarch64 -mtune=la664 -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1ARG -DCPU=la664
// RUN: %clang --target=loongarch64 -mtune=la664 -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IRATTR -DCPU=la664

// RUN: %clang --target=loongarch64 -mtune=invalidcpu -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1ARG -DCPU=invalidcpu
// RUN: not %clang --target=loongarch64 -mtune=invalidcpu -S -emit-llvm %s -o /dev/null 2>&1 | \
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Driver/print-supported-extensions-riscv.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
// CHECK-NEXT: za64rs 1.0 'Za64rs' (Reservation Set Size of at Most 64 Bytes)
// CHECK-NEXT: zaamo 1.0 'Zaamo' (Atomic Memory Operations)
// CHECK-NEXT: zabha 1.0 'Zabha' (Byte and Halfword Atomic Memory Operations)
// CHECK-NEXT: zacas 1.0 'Zacas' (Atomic Compare-And-Swap Instructions)
// CHECK-NEXT: zalrsc 1.0 'Zalrsc' (Load-Reserved/Store-Conditional)
// CHECK-NEXT: zama16b 1.0 'Zama16b' (Atomic 16-byte misaligned loads, stores and AMOs)
// CHECK-NEXT: zawrs 1.0 'Zawrs' (Wait on Reservation Set)
Expand Down Expand Up @@ -171,6 +170,7 @@
// CHECK-NEXT: Experimental extensions
// CHECK-NEXT: zicfilp 1.0 'Zicfilp' (Landing pad)
// CHECK-NEXT: zicfiss 1.0 'Zicfiss' (Shadow stack)
// CHECK-NEXT: zacas 1.0 'Zacas' (Atomic Compare-And-Swap Instructions)
// CHECK-NEXT: zalasr 0.1 'Zalasr' (Load-Acquire and Store-Release Instructions)
// CHECK-NEXT: smmpm 1.0 'Smmpm' (Machine-level Pointer Masking for M-mode)
// CHECK-NEXT: smnpm 1.0 'Smnpm' (Machine-level Pointer Masking for next lower privilege mode)
Expand Down
8 changes: 8 additions & 0 deletions clang/test/Preprocessor/init-loongarch.c
Original file line number Diff line number Diff line change
Expand Up @@ -832,6 +832,14 @@
// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=loongarch64 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +frecipe | \
// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.1 -DTUNE=loongarch64 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 | \
// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la664 -DTUNE=la664 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=la664 | \
// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=la664 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -mtune=la664 | \
// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la664 %s
// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 -mtune=loongarch64 | \
// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la664 -DTUNE=loongarch64 %s

// ARCH-TUNE: #define __loongarch_arch "[[ARCH]]"
// FRECIPE: #define __loongarch_frecipe 1
Expand Down
18 changes: 9 additions & 9 deletions clang/test/Preprocessor/riscv-target-features.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@
// CHECK-NOT: __riscv_za64rs {{.*$}}
// CHECK-NOT: __riscv_zaamo {{.*$}}
// CHECK-NOT: __riscv_zabha {{.*$}}
// CHECK-NOT: __riscv_zacas {{.*$}}
// CHECK-NOT: __riscv_zalrsc {{.*$}}
// CHECK-NOT: __riscv_zama16b {{.*$}}
// CHECK-NOT: __riscv_zawrs {{.*$}}
Expand Down Expand Up @@ -182,6 +181,7 @@
// CHECK-NOT: __riscv_sspm{{.*$}}
// CHECK-NOT: __riscv_ssqosid{{.*$}}
// CHECK-NOT: __riscv_supm{{.*$}}
// CHECK-NOT: __riscv_zacas {{.*$}}
// CHECK-NOT: __riscv_zalasr {{.*$}}
// CHECK-NOT: __riscv_zfbfmin {{.*$}}
// CHECK-NOT: __riscv_zicfilp {{.*$}}
Expand Down Expand Up @@ -747,14 +747,6 @@
// RUN: -o - | FileCheck --check-prefix=CHECK-ZABHA-EXT %s
// CHECK-ZABHA-EXT: __riscv_zabha 1000000{{$}}

// RUN: %clang --target=riscv32 \
// RUN: -march=rv32ia_zacas1p0 -E -dM %s \
// RUN: -o - | FileCheck --check-prefix=CHECK-ZACAS-EXT %s
// RUN: %clang --target=riscv64 \
// RUN: -march=rv64ia_zacas1p0 -E -dM %s \
// RUN: -o - | FileCheck --check-prefix=CHECK-ZACAS-EXT %s
// CHECK-ZACAS-EXT: __riscv_zacas 1000000{{$}}

// RUN: %clang --target=riscv32 \
// RUN: -march=rv32i_zalrsc1p0 -E -dM %s \
// RUN: -o - | FileCheck --check-prefix=CHECK-ZALRSC-EXT %s
Expand Down Expand Up @@ -1626,6 +1618,14 @@
// CHECK-ZVKT-EXT: __riscv_zvkt 1000000{{$}}

// Experimental extensions
// RUN: %clang --target=riscv32 -menable-experimental-extensions \
// RUN: -march=rv32ia_zacas1p0 -E -dM %s \
// RUN: -o - | FileCheck --check-prefix=CHECK-ZACAS-EXT %s
// RUN: %clang --target=riscv64 -menable-experimental-extensions \
// RUN: -march=rv64ia_zacas1p0 -E -dM %s \
// RUN: -o - | FileCheck --check-prefix=CHECK-ZACAS-EXT %s
// CHECK-ZACAS-EXT: __riscv_zacas 1000000{{$}}

// RUN: %clang --target=riscv32 -menable-experimental-extensions \
// RUN: -march=rv32i_zalasr0p1 -E -dM %s \
// RUN: -o - | FileCheck --check-prefix=CHECK-ZALASR-EXT %s
Expand Down
95 changes: 82 additions & 13 deletions clang/unittests/AST/ASTImporterTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9681,37 +9681,106 @@ AST_MATCHER_P(EnumDecl, hasEnumConstName, StringRef, ConstName) {
return false;
}

TEST_P(ASTImporterOptionSpecificTestBase, ImportAnonymousEnum) {
TEST_P(ASTImporterOptionSpecificTestBase, ImportAnonymousEnums) {
const char *Code =
R"(
struct A {
enum { E1, E2 } x;
enum { E3, E4 } y;
};
)";
Decl *FromTU = getTuDecl(Code, Lang_CXX11);
auto *FromEnumE1 = FirstDeclMatcher<EnumDecl>().match(
FromTU, enumDecl(hasEnumConstName("E1")));
auto *ImportedEnumE1 = Import(FromEnumE1, Lang_CXX11);
EXPECT_TRUE(ImportedEnumE1);
auto *FromEnumE3 = FirstDeclMatcher<EnumDecl>().match(
FromTU, enumDecl(hasEnumConstName("E3")));
auto *ImportedEnumE3 = Import(FromEnumE3, Lang_CXX11);
EXPECT_TRUE(ImportedEnumE3);
EXPECT_NE(ImportedEnumE1, ImportedEnumE3);
}

TEST_P(ASTImporterOptionSpecificTestBase, ImportFreeStandingAnonymousEnums) {
const char *Code =
R"(
struct A {
enum { E1, E2 };
enum { E3, E4 };
};
)";
Decl *FromTU = getTuDecl(Code, Lang_CXX11);
auto *FromEnumE1 = FirstDeclMatcher<EnumDecl>().match(
FromTU, enumDecl(hasEnumConstName("E1")));
auto *ImportedEnumE1 = Import(FromEnumE1, Lang_CXX11);
EXPECT_TRUE(ImportedEnumE1);
auto *FromEnumE3 = FirstDeclMatcher<EnumDecl>().match(
FromTU, enumDecl(hasEnumConstName("E3")));
auto *ImportedEnumE3 = Import(FromEnumE3, Lang_CXX11);
EXPECT_TRUE(ImportedEnumE3);
EXPECT_NE(ImportedEnumE1, ImportedEnumE3);
}

TEST_P(ASTImporterOptionSpecificTestBase, ImportExistingAnonymousEnums) {
const char *ToCode =
R"(
struct A {
enum { E1, E2} x;
enum { E3, E4} y;
enum { E1, E2 } x;
enum { E3, E4 } y;
};
)";
Decl *ToTU = getToTuDecl(ToCode, Lang_CXX11);
auto *ToE1 = FirstDeclMatcher<EnumDecl>().match(
auto *ToEnumE1 = FirstDeclMatcher<EnumDecl>().match(
ToTU, enumDecl(hasEnumConstName("E1")));
auto *ToE3 = FirstDeclMatcher<EnumDecl>().match(
auto *ToEnumE3 = FirstDeclMatcher<EnumDecl>().match(
ToTU, enumDecl(hasEnumConstName("E3")));
const char *Code =
R"(
struct A {
enum { E1, E2} x;
enum { E3, E4} y;
enum { E1, E2 } x;
enum { E3, E4 } y;
};
)";
Decl *FromTU = getTuDecl(Code, Lang_CXX11);
auto *FromE1 = FirstDeclMatcher<EnumDecl>().match(
auto *FromEnumE1 = FirstDeclMatcher<EnumDecl>().match(
FromTU, enumDecl(hasEnumConstName("E1")));
auto *ImportedEnumE1 = Import(FromEnumE1, Lang_CXX11);
ASSERT_TRUE(ImportedEnumE1);
EXPECT_EQ(ImportedEnumE1, ToEnumE1);
auto *FromEnumE3 = FirstDeclMatcher<EnumDecl>().match(
FromTU, enumDecl(hasEnumConstName("E3")));
auto *ImportedEnumE3 = Import(FromEnumE3, Lang_CXX11);
ASSERT_TRUE(ImportedEnumE3);
EXPECT_EQ(ImportedEnumE3, ToEnumE3);
}

TEST_P(ASTImporterOptionSpecificTestBase, ImportExistingEmptyAnonymousEnums) {
const char *ToCode =
R"(
struct A {
enum {};
};
)";
Decl *ToTU = getToTuDecl(ToCode, Lang_CXX11);
auto *ToE1 = FirstDeclMatcher<EnumDecl>().match(ToTU, enumDecl());
const char *Code =
R"(
struct A {
enum {};
enum {};
};
)";
Decl *FromTU = getTuDecl(Code, Lang_CXX11);
auto *FromE1 = FirstDeclMatcher<EnumDecl>().match(FromTU, enumDecl());
auto *ImportedE1 = Import(FromE1, Lang_CXX11);
ASSERT_TRUE(ImportedE1);
EXPECT_EQ(ImportedE1, ToE1);
auto *FromE3 = FirstDeclMatcher<EnumDecl>().match(
FromTU, enumDecl(hasEnumConstName("E3")));
auto *ImportedE3 = Import(FromE3, Lang_CXX11);
ASSERT_TRUE(ImportedE3);
EXPECT_EQ(ImportedE3, ToE3);
auto *FromE2 = LastDeclMatcher<EnumDecl>().match(FromTU, enumDecl());
ASSERT_NE(FromE1, FromE2);
auto *ImportedE2 = Import(FromE2, Lang_CXX11);
ASSERT_TRUE(ImportedE2);
// FIXME: These should not be equal, or the import should fail.
EXPECT_EQ(ImportedE2, ToE1);
}

INSTANTIATE_TEST_SUITE_P(ParameterizedTests, ASTImporterLookupTableTest,
Expand Down
12 changes: 6 additions & 6 deletions compiler-rt/test/asan/TestCases/Posix/ignore_free_hook.cpp
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
// RUN: %clangxx_asan -O2 %s -o %t -DTEST=basic_hook_works && not %run %t \
// RUN: |& FileCheck %s -check-prefix=CHECK-BASIC
// RUN: 2>&1 | FileCheck %s -check-prefix=CHECK-BASIC
// RUN: %clangxx_asan -O2 %s -o %t -DTEST=ignore && %run %t \
// RUN: |& FileCheck %s -check-prefix=CHECK-IGNORE
// RUN: 2>&1 | FileCheck %s -check-prefix=CHECK-IGNORE
// RUN: %clangxx_asan -O2 %s -o %t -DTEST=ignore_twice && not %run %t \
// RUN: |& FileCheck %s -check-prefix=CHECK-IGNORE-2
// RUN: 2>&1 | FileCheck %s -check-prefix=CHECK-IGNORE-2
// RUN: %clangxx_asan -O2 %s -o %t -DTEST=mismatch && %env_asan_opts=alloc_dealloc_mismatch=1 not %run %t \
// RUN: |& FileCheck %s -check-prefix=CHECK-MISMATCH
// RUN: 2>&1 | FileCheck %s -check-prefix=CHECK-MISMATCH
// RUN: %clangxx_asan -O2 %s -o %t -DTEST=ignore_mismatch && %env_asan_opts=alloc_dealloc_mismatch=1 %run %t \
// RUN: |& FileCheck %s -check-prefix=CHECK-IGNORE-MISMATCH
// RUN: 2>&1 | FileCheck %s -check-prefix=CHECK-IGNORE-MISMATCH
// RUN: %clangxx_asan -O2 %s -o %t -DTEST=double_delete && not %run %t \
// RUN: |& FileCheck %s -check-prefix=CHECK-DOUBLE-DELETE
// RUN: 2>&1 | FileCheck %s -check-prefix=CHECK-DOUBLE-DELETE

#include <stdio.h>
#include <stdlib.h>
Expand Down
8 changes: 4 additions & 4 deletions compiler-rt/test/hwasan/TestCases/Posix/ignore_free_hook.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
// RUN: %clangxx_hwasan -O2 %s -o %t -DTEST=basic_hook_works && not %run %t \
// RUN: |& FileCheck %s -check-prefix=CHECK-BASIC
// RUN: 2>&1 | FileCheck %s -check-prefix=CHECK-BASIC
// RUN: %clangxx_hwasan -O2 %s -o %t -DTEST=ignore && %run %t \
// RUN: |& FileCheck %s -check-prefix=CHECK-IGNORE
// RUN: 2>&1 | FileCheck %s -check-prefix=CHECK-IGNORE
// RUN: %clangxx_hwasan -O2 %s -o %t -DTEST=ignore_twice && not %run %t \
// RUN: |& FileCheck %s -check-prefix=CHECK-IGNORE-2
// RUN: 2>&1 | FileCheck %s -check-prefix=CHECK-IGNORE-2
// RUN: %clangxx_hwasan -O2 %s -o %t -DTEST=double_delete && not %run %t \
// RUN: |& FileCheck %s -check-prefix=CHECK-DOUBLE-DELETE
// RUN: 2>&1 | FileCheck %s -check-prefix=CHECK-DOUBLE-DELETE

#include <sanitizer/hwasan_interface.h>
#include <stdio.h>
Expand Down
9 changes: 3 additions & 6 deletions llvm/docs/RISCVUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ on support follow.
``Za64rs`` Supported (`See note <#riscv-profiles-extensions-note>`__)
``Zaamo`` Assembly Support
``Zabha`` Supported
``Zacas`` Supported (`See note <#riscv-zacas-note>`__)
``Zalrsc`` Assembly Support
``Zama16b`` Supported (`See note <#riscv-profiles-extensions-note>`__)
``Zawrs`` Assembly Support
Expand Down Expand Up @@ -281,11 +280,6 @@ Supported
``Za128rs``, ``Za64rs``, ``Zama16b``, ``Zic64b``, ``Ziccamoa``, ``Ziccif``, ``Zicclsm``, ``Ziccrse``, ``Shcounterenvw``, ``Shgatpa``, ``Shtvala``, ``Shvsatpa``, ``Shvstvala``, ``Shvstvecd``, ``Ssccptr``, ``Sscounterenw``, ``Ssstateen``, ``Ssstrict``, ``Sstvala``, ``Sstvecd``, ``Ssu64xl``, ``Svade``, ``Svbare``
These extensions are defined as part of the `RISC-V Profiles specification <https://github.com/riscv/riscv-profiles/releases/tag/v1.0>`__. They do not introduce any new features themselves, but instead describe existing hardware features.

.. _riscv-zacas-note:

``Zacas``
amocas.w will be used for i32 cmpxchg. amocas.d will be used i64 cmpxchg on RV64. The compiler will not generate amocas.d on RV32 or amocas.q on RV64 due to ABI compatibilty. These can only be used in the assembler.

Experimental Extensions
=======================

Expand All @@ -299,6 +293,9 @@ The primary goal of experimental support is to assist in the process of ratifica
``experimental-ssqosid``
LLVM implements assembler support for the `v1.0-rc1 draft specification <https://github.com/riscv/riscv-ssqosid/releases/tag/v1.0-rc1>`_.

``experimental-zacas``
LLVM implements the `1.0 release specification <https://github.com/riscvarchive/riscv-zacas/releases/tag/v1.0>`__. amocas.w will be used for i32 cmpxchg. amocas.d will be used i64 cmpxchg on RV64. The compiler will not generate amocas.d on RV32 or amocas.q on RV64 due to ABI compatibilty. These can only be used in the assembler. The extension will be left as experimental until `an ABI issue <https://github.com/riscv-non-isa/riscv-elf-psabi-doc/issues/444>`__ is resolved.

``experimental-zalasr``
LLVM implements the `0.0.5 draft specification <https://github.com/mehnadnerd/riscv-zalasr>`__.

Expand Down
8 changes: 7 additions & 1 deletion llvm/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,13 @@ Changes to the LoongArch Backend
* i32 is now a native type in the datalayout string. This enables
LoopStrengthReduce for loops with i32 induction variables, among other
optimizations.
* Codegen support is added for TLS Desciptor.
* Interleaved vectorization and vector shuffle are supported on LoongArch and
the experimental feature ``auto-vec`` is removed.
* Allow ``f16`` codegen with expansion to libcalls.
* Clarify that emulated TLS is not supported.
* A codegen issue for ``bstrins.w`` is fixed on loongarch32.
* Assorted codegen improvements.

Changes to the MIPS Backend
---------------------------
Expand All @@ -185,7 +192,6 @@ Changes to the RISC-V Backend
* Codegen support was added for the Zimop (May-Be-Operations) extension.
* The experimental Ssnpm, Smnpm, Smmpm, Sspm, and Supm 1.0.0 Pointer Masking extensions are supported.
* The experimental Ssqosid extension is supported.
* Zacas is no longer experimental.
* Added the CSR names from the Resumable Non-Maskable Interrupts (Smrnmi) extension.
* llvm-objdump now prints disassembled opcode bytes in groups of 2 or 4 bytes to
match GNU objdump. The bytes within the groups are in big endian order.
Expand Down
19 changes: 19 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,25 @@ class GShl : public GenericMachineInstr {
};
};

/// Represents a threeway compare.
class GSUCmp : public GenericMachineInstr {
public:
Register getLHSReg() const { return getOperand(1).getReg(); }
Register getRHSReg() const { return getOperand(2).getReg(); }

bool isSigned() const { return getOpcode() == TargetOpcode::G_SCMP; }

static bool classof(const MachineInstr *MI) {
switch (MI->getOpcode()) {
case TargetOpcode::G_SCMP:
case TargetOpcode::G_UCMP:
return true;
default:
return false;
}
};
};

} // namespace llvm

#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
1 change: 1 addition & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ class LegalizerHelper {

LegalizeResult lowerISFPCLASS(MachineInstr &MI);

LegalizeResult lowerThreewayCompare(MachineInstr &MI);
LegalizeResult lowerMinMax(MachineInstr &MI);
LegalizeResult lowerFCopySign(MachineInstr &MI);
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI);
Expand Down
7 changes: 0 additions & 7 deletions llvm/include/llvm/MC/MCAssembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ class MCAssembler {

bool HasLayout = false;
bool RelaxAll = false;
bool SubsectionsViaSymbols = false;

SectionListType Sections;

Expand Down Expand Up @@ -144,7 +143,6 @@ class MCAssembler {
std::unique_ptr<MCObjectWriter> Writer);
MCAssembler(const MCAssembler &) = delete;
MCAssembler &operator=(const MCAssembler &) = delete;
~MCAssembler();

/// Compute the effective fragment size.
uint64_t computeFragmentSize(const MCFragment &F) const;
Expand Down Expand Up @@ -194,7 +192,6 @@ class MCAssembler {
MCObjectWriter &getWriter() const { return *Writer; }

MCDwarfLineTableParams getDWARFLinetableParams() const { return LTParams; }
void setDWARFLinetableParams(MCDwarfLineTableParams P) { LTParams = P; }

/// Finish - Do final processing and write the object to the output stream.
/// \p Writer is used for custom object writer (as the MCJIT does),
Expand All @@ -204,10 +201,6 @@ class MCAssembler {
// Layout all section and prepare them for emission.
void layout();

// FIXME: This does not belong here.
bool getSubsectionsViaSymbols() const { return SubsectionsViaSymbols; }
void setSubsectionsViaSymbols(bool Value) { SubsectionsViaSymbols = Value; }

bool hasLayout() const { return HasLayout; }
bool getRelaxAll() const { return RelaxAll; }
void setRelaxAll(bool Value) { RelaxAll = Value; }
Expand Down
34 changes: 12 additions & 22 deletions llvm/include/llvm/MC/MCELFObjectWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,12 @@ class ELFObjectWriter : public MCObjectWriter {

public:
std::unique_ptr<MCELFObjectTargetWriter> TargetObjectWriter;
raw_pwrite_stream &OS;
raw_pwrite_stream *DwoOS = nullptr;

DenseMap<const MCSectionELF *, std::vector<ELFRelocationEntry>> Relocations;
DenseMap<const MCSymbolELF *, const MCSymbolELF *> Renames;
bool IsLittleEndian = false;
bool SeenGnuAbi = false;
std::optional<uint8_t> OverrideABIVersion;

Expand All @@ -173,8 +177,11 @@ class ELFObjectWriter : public MCObjectWriter {
};
SmallVector<Symver, 0> Symvers;

ELFObjectWriter(std::unique_ptr<MCELFObjectTargetWriter> MOTW)
: TargetObjectWriter(std::move(MOTW)) {}
ELFObjectWriter(std::unique_ptr<MCELFObjectTargetWriter> MOTW,
raw_pwrite_stream &OS, bool IsLittleEndian);
ELFObjectWriter(std::unique_ptr<MCELFObjectTargetWriter> MOTW,
raw_pwrite_stream &OS, raw_pwrite_stream &DwoOS,
bool IsLittleEndian);

void reset() override;
void executePostLayoutBinding(MCAssembler &Asm) override;
Expand All @@ -185,6 +192,7 @@ class ELFObjectWriter : public MCObjectWriter {
const MCSymbol &SymA,
const MCFragment &FB, bool InSet,
bool IsPCRel) const override;
uint64_t writeObject(MCAssembler &Asm) override;

bool hasRelocationAddend() const;
bool usesRela(const MCTargetOptions *TO, const MCSectionELF &Sec) const;
Expand All @@ -193,11 +201,8 @@ class ELFObjectWriter : public MCObjectWriter {
const MCSymbolELF *Sym, uint64_t C,
unsigned Type) const;

virtual bool checkRelocation(MCContext &Ctx, SMLoc Loc,
const MCSectionELF *From,
const MCSectionELF *To) {
return true;
}
bool checkRelocation(MCContext &Ctx, SMLoc Loc, const MCSectionELF *From,
const MCSectionELF *To);

unsigned getELFHeaderEFlags() const { return ELFHeaderEFlags; }
void setELFHeaderEFlags(unsigned Flags) { ELFHeaderEFlags = Flags; }
Expand All @@ -209,21 +214,6 @@ class ELFObjectWriter : public MCObjectWriter {
// Override the default e_ident[EI_ABIVERSION] in the ELF header.
void setOverrideABIVersion(uint8_t V) { OverrideABIVersion = V; }
};

/// Construct a new ELF writer instance.
///
/// \param MOTW - The target specific ELF writer subclass.
/// \param OS - The stream to write to.
/// \returns The constructed object writer.
std::unique_ptr<MCObjectWriter>
createELFObjectWriter(std::unique_ptr<MCELFObjectTargetWriter> MOTW,
raw_pwrite_stream &OS, bool IsLittleEndian);

std::unique_ptr<MCObjectWriter>
createELFDwoObjectWriter(std::unique_ptr<MCELFObjectTargetWriter> MOTW,
raw_pwrite_stream &OS, raw_pwrite_stream &DwoOS,
bool IsLittleEndian);

} // end namespace llvm

#endif // LLVM_MC_MCELFOBJECTWRITER_H
5 changes: 5 additions & 0 deletions llvm/include/llvm/MC/MCObjectWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class MCObjectWriter {
std::string CompilerVersion;
std::vector<const MCSymbol *> AddrsigSyms;
bool EmitAddrsigSection = false;
bool SubsectionsViaSymbols = false;

struct CGProfileEntry {
const MCSymbolRefExpr *From;
Expand Down Expand Up @@ -114,6 +115,10 @@ class MCObjectWriter {
std::vector<const MCSymbol *> &getAddrsigSyms() { return AddrsigSyms; }
SmallVector<CGProfileEntry, 0> &getCGProfile() { return CGProfile; }

// Mach-O specific: Whether .subsections_via_symbols is enabled.
bool getSubsectionsViaSymbols() const { return SubsectionsViaSymbols; }
void setSubsectionsViaSymbols(bool Value) { SubsectionsViaSymbols = Value; }

/// Write the object file and returns the number of bytes written.
///
/// This routine is called by the assembler after layout and relaxation is
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/TargetParser/LoongArchTargetParser.def
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ LOONGARCH_FEATURE("+lasx", FK_LASX)
LOONGARCH_FEATURE("+lbt", FK_LBT)
LOONGARCH_FEATURE("+lvz", FK_LVZ)
LOONGARCH_FEATURE("+ual", FK_UAL)
LOONGARCH_FEATURE("+frecipe", FK_FRECIPE)

#undef LOONGARCH_FEATURE

Expand All @@ -19,5 +20,6 @@ LOONGARCH_FEATURE("+ual", FK_UAL)

LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64 | FK_UAL)
LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL)
LOONGARCH_ARCH("la664", AK_LA664, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL | FK_FRECIPE)

#undef LOONGARCH_ARCH
3 changes: 3 additions & 0 deletions llvm/include/llvm/TargetParser/LoongArchTargetParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ enum FeatureKind : uint32_t {

// Allow memory accesses to be unaligned.
FK_UAL = 1 << 8,

// Floating-point approximate reciprocal instructions are available.
FK_FRECIPE = 1 << 9,
};

struct FeatureInfo {
Expand Down
33 changes: 33 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4006,6 +4006,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case G_UMIN:
case G_UMAX:
return lowerMinMax(MI);
case G_SCMP:
case G_UCMP:
return lowerThreewayCompare(MI);
case G_FCOPYSIGN:
return lowerFCopySign(MI);
case G_FMINNUM:
Expand Down Expand Up @@ -7269,6 +7272,36 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
return Legalized;
}

LegalizerHelper::LegalizeResult
LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) {
GSUCmp *Cmp = cast<GSUCmp>(&MI);

Register Dst = Cmp->getReg(0);
LLT DstTy = MRI.getType(Dst);
LLT CmpTy = DstTy.changeElementSize(1);

CmpInst::Predicate LTPredicate = Cmp->isSigned()
? CmpInst::Predicate::ICMP_SLT
: CmpInst::Predicate::ICMP_ULT;
CmpInst::Predicate GTPredicate = Cmp->isSigned()
? CmpInst::Predicate::ICMP_SGT
: CmpInst::Predicate::ICMP_UGT;

auto One = MIRBuilder.buildConstant(DstTy, 1);
auto Zero = MIRBuilder.buildConstant(DstTy, 0);
auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
Cmp->getRHSReg());
auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);

auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
Cmp->getRHSReg());
MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);

MI.eraseFromParent();
return Legalized;
}

LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
Expand Down
100 changes: 35 additions & 65 deletions llvm/lib/MC/ELFObjectWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,58 +198,6 @@ struct ELFWriter {
void writeSection(uint32_t GroupSymbolIndex, uint64_t Offset, uint64_t Size,
const MCSectionELF &Section);
};

class ELFSingleObjectWriter : public ELFObjectWriter {
raw_pwrite_stream &OS;
bool IsLittleEndian;

public:
ELFSingleObjectWriter(std::unique_ptr<MCELFObjectTargetWriter> MOTW,
raw_pwrite_stream &OS, bool IsLittleEndian)
: ELFObjectWriter(std::move(MOTW)), OS(OS),
IsLittleEndian(IsLittleEndian) {}

uint64_t writeObject(MCAssembler &Asm) override {
return ELFWriter(*this, OS, IsLittleEndian, ELFWriter::AllSections)
.writeObject(Asm);
}

friend struct ELFWriter;
};

class ELFDwoObjectWriter : public ELFObjectWriter {
raw_pwrite_stream &OS, &DwoOS;
bool IsLittleEndian;

public:
ELFDwoObjectWriter(std::unique_ptr<MCELFObjectTargetWriter> MOTW,
raw_pwrite_stream &OS, raw_pwrite_stream &DwoOS,
bool IsLittleEndian)
: ELFObjectWriter(std::move(MOTW)), OS(OS), DwoOS(DwoOS),
IsLittleEndian(IsLittleEndian) {}

bool checkRelocation(MCContext &Ctx, SMLoc Loc, const MCSectionELF *From,
const MCSectionELF *To) override {
if (isDwoSection(*From)) {
Ctx.reportError(Loc, "A dwo section may not contain relocations");
return false;
}
if (To && isDwoSection(*To)) {
Ctx.reportError(Loc, "A relocation may not refer to a dwo section");
return false;
}
return true;
}

uint64_t writeObject(MCAssembler &Asm) override {
uint64_t Size = ELFWriter(*this, OS, IsLittleEndian, ELFWriter::NonDwoOnly)
.writeObject(Asm);
Size += ELFWriter(*this, DwoOS, IsLittleEndian, ELFWriter::DwoOnly)
.writeObject(Asm);
return Size;
}
};

} // end anonymous namespace

uint64_t ELFWriter::align(Align Alignment) {
Expand Down Expand Up @@ -1156,6 +1104,16 @@ uint64_t ELFWriter::writeObject(MCAssembler &Asm) {
return W.OS.tell() - StartOffset;
}

ELFObjectWriter::ELFObjectWriter(std::unique_ptr<MCELFObjectTargetWriter> MOTW,
raw_pwrite_stream &OS, bool IsLittleEndian)
: TargetObjectWriter(std::move(MOTW)), OS(OS),
IsLittleEndian(IsLittleEndian) {}
ELFObjectWriter::ELFObjectWriter(std::unique_ptr<MCELFObjectTargetWriter> MOTW,
raw_pwrite_stream &OS,
raw_pwrite_stream &DwoOS, bool IsLittleEndian)
: TargetObjectWriter(std::move(MOTW)), OS(OS), DwoOS(&DwoOS),
IsLittleEndian(IsLittleEndian) {}

void ELFObjectWriter::reset() {
ELFHeaderEFlags = 0;
SeenGnuAbi = false;
Expand Down Expand Up @@ -1357,6 +1315,22 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
return false;
}

bool ELFObjectWriter::checkRelocation(MCContext &Ctx, SMLoc Loc,
const MCSectionELF *From,
const MCSectionELF *To) {
if (DwoOS) {
if (isDwoSection(*From)) {
Ctx.reportError(Loc, "A dwo section may not contain relocations");
return false;
}
if (To && isDwoSection(*To)) {
Ctx.reportError(Loc, "A relocation may not refer to a dwo section");
return false;
}
}
return true;
}

void ELFObjectWriter::recordRelocation(MCAssembler &Asm,
const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
Expand Down Expand Up @@ -1473,17 +1447,13 @@ bool ELFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
return &SymA.getSection() == FB.getParent();
}

std::unique_ptr<MCObjectWriter>
llvm::createELFObjectWriter(std::unique_ptr<MCELFObjectTargetWriter> MOTW,
raw_pwrite_stream &OS, bool IsLittleEndian) {
return std::make_unique<ELFSingleObjectWriter>(std::move(MOTW), OS,
IsLittleEndian);
}

std::unique_ptr<MCObjectWriter>
llvm::createELFDwoObjectWriter(std::unique_ptr<MCELFObjectTargetWriter> MOTW,
raw_pwrite_stream &OS, raw_pwrite_stream &DwoOS,
bool IsLittleEndian) {
return std::make_unique<ELFDwoObjectWriter>(std::move(MOTW), OS, DwoOS,
IsLittleEndian);
uint64_t ELFObjectWriter::writeObject(MCAssembler &Asm) {
uint64_t Size =
ELFWriter(*this, OS, IsLittleEndian,
DwoOS ? ELFWriter::NonDwoOnly : ELFWriter::AllSections)
.writeObject(Asm);
if (DwoOS)
Size += ELFWriter(*this, *DwoOS, IsLittleEndian, ELFWriter::DwoOnly)
.writeObject(Asm);
return Size;
}
11 changes: 6 additions & 5 deletions llvm/lib/MC/MCAsmBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,17 @@ MCAsmBackend::~MCAsmBackend() = default;
std::unique_ptr<MCObjectWriter>
MCAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
auto TW = createObjectTargetWriter();
bool IsLE = Endian == llvm::endianness::little;
switch (TW->getFormat()) {
case Triple::ELF:
return createELFObjectWriter(cast<MCELFObjectTargetWriter>(std::move(TW)),
OS, Endian == llvm::endianness::little);
case Triple::MachO:
return createMachObjectWriter(cast<MCMachObjectTargetWriter>(std::move(TW)),
OS, Endian == llvm::endianness::little);
OS, IsLE);
case Triple::COFF:
return createWinCOFFObjectWriter(
cast<MCWinCOFFObjectTargetWriter>(std::move(TW)), OS);
case Triple::ELF:
return std::make_unique<ELFObjectWriter>(
cast<MCELFObjectTargetWriter>(std::move(TW)), OS, IsLE);
case Triple::SPIRV:
return createSPIRVObjectWriter(
cast<MCSPIRVObjectTargetWriter>(std::move(TW)), OS);
Expand Down Expand Up @@ -71,7 +72,7 @@ MCAsmBackend::createDwoObjectWriter(raw_pwrite_stream &OS,
return createWinCOFFDwoObjectWriter(
cast<MCWinCOFFObjectTargetWriter>(std::move(TW)), OS, DwoOS);
case Triple::ELF:
return createELFDwoObjectWriter(
return std::make_unique<ELFObjectWriter>(
cast<MCELFObjectTargetWriter>(std::move(TW)), OS, DwoOS,
Endian == llvm::endianness::little);
case Triple::Wasm:
Expand Down
5 changes: 1 addition & 4 deletions llvm/lib/MC/MCAssembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,8 @@ MCAssembler::MCAssembler(MCContext &Context,
: Context(Context), Backend(std::move(Backend)),
Emitter(std::move(Emitter)), Writer(std::move(Writer)) {}

MCAssembler::~MCAssembler() = default;

void MCAssembler::reset() {
RelaxAll = false;
SubsectionsViaSymbols = false;
Sections.clear();
Symbols.clear();
ThumbFuncs.clear();
Expand Down Expand Up @@ -1095,7 +1092,7 @@ bool MCAssembler::relaxLEB(MCLEBFragment &LF) {
// Use evaluateKnownAbsolute for Mach-O as a hack: .subsections_via_symbols
// requires that .uleb128 A-B is foldable where A and B reside in different
// fragments. This is used by __gcc_except_table.
bool Abs = getSubsectionsViaSymbols()
bool Abs = getWriter().getSubsectionsViaSymbols()
? LF.getValue().evaluateKnownAbsolute(Value, *this)
: LF.getValue().evaluateAsAbsolute(Value, *this);
if (!Abs) {
Expand Down
12 changes: 0 additions & 12 deletions llvm/lib/MC/MCELFStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,18 +88,6 @@ void MCELFStreamer::emitLabelAtPos(MCSymbol *S, SMLoc Loc, MCDataFragment &F,
void MCELFStreamer::emitAssemblerFlag(MCAssemblerFlag Flag) {
// Let the target do whatever target specific stuff it needs to do.
getAssembler().getBackend().handleAssemblerFlag(Flag);
// Do any generic stuff we need to do.
switch (Flag) {
case MCAF_SyntaxUnified: return; // no-op here.
case MCAF_Code16: return; // Change parsing mode; no-op here.
case MCAF_Code32: return; // Change parsing mode; no-op here.
case MCAF_Code64: return; // Change parsing mode; no-op here.
case MCAF_SubsectionsViaSymbols:
getAssembler().setSubsectionsViaSymbols(true);
return;
}

llvm_unreachable("invalid assembler flag!");
}

// If bundle alignment is used and there are any instructions in the section, it
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/MC/MCMachOStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ void MCMachOStreamer::emitAssemblerFlag(MCAssemblerFlag Flag) {
case MCAF_Code32: return; // Change parsing mode; no-op here.
case MCAF_Code64: return; // Change parsing mode; no-op here.
case MCAF_SubsectionsViaSymbols:
getAssembler().setSubsectionsViaSymbols(true);
getWriter().setSubsectionsViaSymbols(true);
return;
}
}
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/MC/MCObjectWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ MCObjectWriter::~MCObjectWriter() = default;
void MCObjectWriter::reset() {
FileNames.clear();
AddrsigSyms.clear();
EmitAddrsigSection = false;
SubsectionsViaSymbols = false;
CGProfile.clear();
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/MC/MachObjectWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,7 @@ bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
if (!hasReliableSymbolDifference) {
if (!SA.isInSection() || &SecA != &SecB ||
(!SA.isTemporary() && FB.getAtom() != SA.getFragment()->getAtom() &&
Asm.getSubsectionsViaSymbols()))
SubsectionsViaSymbols))
return false;
return true;
}
Expand Down Expand Up @@ -894,7 +894,7 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm) {

// Write the prolog, starting with the header and load command...
writeHeader(MachO::MH_OBJECT, NumLoadCommands, LoadCommandsSize,
Asm.getSubsectionsViaSymbols());
SubsectionsViaSymbols);
uint32_t Prot =
MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
writeSegmentLoadCommand("", NumSections, 0, VMSize, SectionDataStart,
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1288,6 +1288,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)

getActionDefinitionsBuilder(G_PREFETCH).custom();

getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();

getLegacyLegalizerInfo().computeTables();
verify(*ST.getInstrInfo());
}
Expand Down
20 changes: 16 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -947,6 +947,12 @@ def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset",
"Has restricted SOffset (immediate not supported)."
>;

def FeatureRequiredExportPriority : SubtargetFeature<"required-export-priority",
"HasRequiredExportPriority",
"true",
"Export priority must be explicitly manipulated on GFX11.5"
>;

//===------------------------------------------------------------===//
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
Expand Down Expand Up @@ -1567,7 +1573,8 @@ def FeatureISAVersion11_Generic: FeatureSet<
FeatureUserSGPRInit16Bug,
FeatureMADIntraFwdBug,
FeaturePrivEnabledTrap2NopBug,
FeatureRequiresCOV6])>;
FeatureRequiresCOV6,
FeatureRequiredExportPriority])>;

def FeatureISAVersion11_0_Common : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
Expand Down Expand Up @@ -1597,20 +1604,23 @@ def FeatureISAVersion11_5_0 : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
[FeatureSALUFloatInsts,
FeatureDPPSrc1SGPR,
FeatureVGPRSingleUseHintInsts])>;
FeatureVGPRSingleUseHintInsts,
FeatureRequiredExportPriority])>;

def FeatureISAVersion11_5_1 : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
[FeatureSALUFloatInsts,
FeatureDPPSrc1SGPR,
FeatureVGPRSingleUseHintInsts,
Feature1_5xVGPRs])>;
Feature1_5xVGPRs,
FeatureRequiredExportPriority])>;

def FeatureISAVersion11_5_2 : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
[FeatureSALUFloatInsts,
FeatureDPPSrc1SGPR,
FeatureVGPRSingleUseHintInsts])>;
FeatureVGPRSingleUseHintInsts,
FeatureRequiredExportPriority])>;

def FeatureISAVersion12 : FeatureSet<
[FeatureGFX12,
Expand Down Expand Up @@ -2016,6 +2026,8 @@ def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">,

def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">;

def HasXNACKEnabled : Predicate<"Subtarget->isXNACKEnabled()">;

def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
AssemblerPredicate<(all_of Feature16BitInsts)>;

Expand Down
112 changes: 112 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/TargetParser/TargetParser.h"
Expand Down Expand Up @@ -1104,6 +1105,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
fixWMMAHazards(MI);
fixShift64HighRegBug(MI);
fixVALUMaskWriteHazard(MI);
fixRequiredExportPriority(MI);
}

bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
Expand Down Expand Up @@ -2895,3 +2897,113 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {

return true;
}

static bool ensureEntrySetPrio(MachineFunction *MF, int Priority,
const SIInstrInfo &TII) {
MachineBasicBlock &EntryMBB = MF->front();
if (EntryMBB.begin() != EntryMBB.end()) {
auto &EntryMI = *EntryMBB.begin();
if (EntryMI.getOpcode() == AMDGPU::S_SETPRIO &&
EntryMI.getOperand(0).getImm() >= Priority)
return false;
}

BuildMI(EntryMBB, EntryMBB.begin(), DebugLoc(), TII.get(AMDGPU::S_SETPRIO))
.addImm(Priority);
return true;
}

bool GCNHazardRecognizer::fixRequiredExportPriority(MachineInstr *MI) {
if (!ST.hasRequiredExportPriority())
return false;

// Assume the following shader types will never have exports,
// and avoid adding or adjusting S_SETPRIO.
MachineBasicBlock *MBB = MI->getParent();
MachineFunction *MF = MBB->getParent();
auto CC = MF->getFunction().getCallingConv();
switch (CC) {
case CallingConv::AMDGPU_CS:
case CallingConv::AMDGPU_CS_Chain:
case CallingConv::AMDGPU_CS_ChainPreserve:
case CallingConv::AMDGPU_KERNEL:
return false;
default:
break;
}

const int MaxPriority = 3;
const int NormalPriority = 2;
const int PostExportPriority = 0;

auto It = MI->getIterator();
switch (MI->getOpcode()) {
case AMDGPU::S_ENDPGM:
case AMDGPU::S_ENDPGM_SAVED:
case AMDGPU::S_ENDPGM_ORDERED_PS_DONE:
case AMDGPU::SI_RETURN_TO_EPILOG:
// Ensure shader with calls raises priority at entry.
// This ensures correct priority if exports exist in callee.
if (MF->getFrameInfo().hasCalls())
return ensureEntrySetPrio(MF, NormalPriority, TII);
return false;
case AMDGPU::S_SETPRIO: {
// Raise minimum priority unless in workaround.
auto &PrioOp = MI->getOperand(0);
int Prio = PrioOp.getImm();
bool InWA = (Prio == PostExportPriority) &&
(It != MBB->begin() && TII.isEXP(*std::prev(It)));
if (InWA || Prio >= NormalPriority)
return false;
PrioOp.setImm(std::min(Prio + NormalPriority, MaxPriority));
return true;
}
default:
if (!TII.isEXP(*MI))
return false;
break;
}

// Check entry priority at each export (as there will only be a few).
// Note: amdgpu_gfx can only be a callee, so defer to caller setprio.
bool Changed = false;
if (CC != CallingConv::AMDGPU_Gfx)
Changed = ensureEntrySetPrio(MF, NormalPriority, TII);

auto NextMI = std::next(It);
bool EndOfShader = false;
if (NextMI != MBB->end()) {
// Only need WA at end of sequence of exports.
if (TII.isEXP(*NextMI))
return Changed;
// Assume appropriate S_SETPRIO after export means WA already applied.
if (NextMI->getOpcode() == AMDGPU::S_SETPRIO &&
NextMI->getOperand(0).getImm() == PostExportPriority)
return Changed;
EndOfShader = NextMI->getOpcode() == AMDGPU::S_ENDPGM;
}

const DebugLoc &DL = MI->getDebugLoc();

// Lower priority.
BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
.addImm(PostExportPriority);

if (!EndOfShader) {
// Wait for exports to complete.
BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_WAITCNT_EXPCNT))
.addReg(AMDGPU::SGPR_NULL)
.addImm(0);
}

BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);

if (!EndOfShader) {
// Return to normal (higher) priority.
BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
.addImm(NormalPriority);
}

return true;
}
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
bool fixWMMAHazards(MachineInstr *MI);
bool fixShift64HighRegBug(MachineInstr *MI);
bool fixVALUMaskWriteHazard(MachineInstr *MI);
bool fixRequiredExportPriority(MachineInstr *MI);

int checkMAIHazards(MachineInstr *MI);
int checkMAIHazards908(MachineInstr *MI);
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasVOPDInsts = false;
bool HasVALUTransUseHazard = false;
bool HasForceStoreSC0SC1 = false;
bool HasRequiredExportPriority = false;

bool RequiresCOV6 = false;

Expand Down Expand Up @@ -1282,6 +1283,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,

bool hasRestrictedSOffset() const { return HasRestrictedSOffset; }

bool hasRequiredExportPriority() const { return HasRequiredExportPriority; }

/// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
/// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
Expand Down
48 changes: 40 additions & 8 deletions llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
CombineInfo &Paired, bool Modify = false);
static bool widthsFit(const GCNSubtarget &STI, const CombineInfo &CI,
const CombineInfo &Paired);
static unsigned getNewOpcode(const CombineInfo &CI, const CombineInfo &Paired);
unsigned getNewOpcode(const CombineInfo &CI, const CombineInfo &Paired);
static std::pair<unsigned, unsigned> getSubRegIdxs(const CombineInfo &CI,
const CombineInfo &Paired);
const TargetRegisterClass *
Expand Down Expand Up @@ -353,6 +353,7 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
case AMDGPU::S_LOAD_DWORDX2_IMM:
case AMDGPU::S_LOAD_DWORDX2_IMM_ec:
case AMDGPU::GLOBAL_LOAD_DWORDX2:
case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR:
case AMDGPU::GLOBAL_STORE_DWORDX2:
Expand All @@ -363,6 +364,7 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM:
case AMDGPU::S_LOAD_DWORDX3_IMM:
case AMDGPU::S_LOAD_DWORDX3_IMM_ec:
case AMDGPU::GLOBAL_LOAD_DWORDX3:
case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR:
case AMDGPU::GLOBAL_STORE_DWORDX3:
Expand All @@ -373,6 +375,7 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
case AMDGPU::S_LOAD_DWORDX4_IMM:
case AMDGPU::S_LOAD_DWORDX4_IMM_ec:
case AMDGPU::GLOBAL_LOAD_DWORDX4:
case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR:
case AMDGPU::GLOBAL_STORE_DWORDX4:
Expand All @@ -383,6 +386,7 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM:
case AMDGPU::S_LOAD_DWORDX8_IMM:
case AMDGPU::S_LOAD_DWORDX8_IMM_ec:
return 8;
case AMDGPU::DS_READ_B32:
case AMDGPU::DS_READ_B32_gfx9:
Expand Down Expand Up @@ -507,6 +511,10 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::S_LOAD_DWORDX3_IMM:
case AMDGPU::S_LOAD_DWORDX4_IMM:
case AMDGPU::S_LOAD_DWORDX8_IMM:
case AMDGPU::S_LOAD_DWORDX2_IMM_ec:
case AMDGPU::S_LOAD_DWORDX3_IMM_ec:
case AMDGPU::S_LOAD_DWORDX4_IMM_ec:
case AMDGPU::S_LOAD_DWORDX8_IMM_ec:
return S_LOAD_IMM;
case AMDGPU::DS_READ_B32:
case AMDGPU::DS_READ_B32_gfx9:
Expand Down Expand Up @@ -591,6 +599,10 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::S_LOAD_DWORDX3_IMM:
case AMDGPU::S_LOAD_DWORDX4_IMM:
case AMDGPU::S_LOAD_DWORDX8_IMM:
case AMDGPU::S_LOAD_DWORDX2_IMM_ec:
case AMDGPU::S_LOAD_DWORDX3_IMM_ec:
case AMDGPU::S_LOAD_DWORDX4_IMM_ec:
case AMDGPU::S_LOAD_DWORDX8_IMM_ec:
return AMDGPU::S_LOAD_DWORD_IMM;
case AMDGPU::GLOBAL_LOAD_DWORD:
case AMDGPU::GLOBAL_LOAD_DWORDX2:
Expand Down Expand Up @@ -703,6 +715,10 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::S_LOAD_DWORDX3_IMM:
case AMDGPU::S_LOAD_DWORDX4_IMM:
case AMDGPU::S_LOAD_DWORDX8_IMM:
case AMDGPU::S_LOAD_DWORDX2_IMM_ec:
case AMDGPU::S_LOAD_DWORDX3_IMM_ec:
case AMDGPU::S_LOAD_DWORDX4_IMM_ec:
case AMDGPU::S_LOAD_DWORDX8_IMM_ec:
Result.SBase = true;
return Result;
case AMDGPU::DS_READ_B32:
Expand Down Expand Up @@ -1212,8 +1228,14 @@ void SILoadStoreOptimizer::copyToDestRegs(

// Copy to the old destination registers.
const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
const auto *Dest0 = TII->getNamedOperand(*CI.I, OpName);
const auto *Dest1 = TII->getNamedOperand(*Paired.I, OpName);
auto *Dest0 = TII->getNamedOperand(*CI.I, OpName);
auto *Dest1 = TII->getNamedOperand(*Paired.I, OpName);

// The constrained sload instructions in S_LOAD_IMM class will have
// `early-clobber` flag in the dst operand. Remove the flag before using the
// MOs in copies.
Dest0->setIsEarlyClobber(false);
Dest1->setIsEarlyClobber(false);

BuildMI(*MBB, InsertBefore, DL, CopyDesc)
.add(*Dest0) // Copy to same destination including flags and sub reg.
Expand Down Expand Up @@ -1700,19 +1722,29 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
case 8:
return AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM;
}
case S_LOAD_IMM:
case S_LOAD_IMM: {
// If XNACK is enabled, use the constrained opcodes when the first load is
// under-aligned.
const MachineMemOperand *MMO = *CI.I->memoperands_begin();
bool NeedsConstrainedOpc =
STM->isXNACKEnabled() && MMO->getAlign().value() < Width * 4;
switch (Width) {
default:
return 0;
case 2:
return AMDGPU::S_LOAD_DWORDX2_IMM;
return NeedsConstrainedOpc ? AMDGPU::S_LOAD_DWORDX2_IMM_ec
: AMDGPU::S_LOAD_DWORDX2_IMM;
case 3:
return AMDGPU::S_LOAD_DWORDX3_IMM;
return NeedsConstrainedOpc ? AMDGPU::S_LOAD_DWORDX3_IMM_ec
: AMDGPU::S_LOAD_DWORDX3_IMM;
case 4:
return AMDGPU::S_LOAD_DWORDX4_IMM;
return NeedsConstrainedOpc ? AMDGPU::S_LOAD_DWORDX4_IMM_ec
: AMDGPU::S_LOAD_DWORDX4_IMM;
case 8:
return AMDGPU::S_LOAD_DWORDX8_IMM;
return NeedsConstrainedOpc ? AMDGPU::S_LOAD_DWORDX8_IMM_ec
: AMDGPU::S_LOAD_DWORDX8_IMM;
}
}
case GLOBAL_LOAD:
switch (Width) {
default:
Expand Down
91 changes: 67 additions & 24 deletions llvm/lib/Target/AMDGPU/SMInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,25 @@ class SM_Discard_Pseudo <string opName, OffsetMode offsets>
let has_soffset = offsets.HasSOffset;
}

multiclass SM_Load_Pseudos<string op, RegisterClass baseClass,
RegisterClass dstClass, OffsetMode offsets> {
defvar opName = !tolower(op);
def "" : SM_Load_Pseudo <opName, baseClass, dstClass, offsets>;

// The constrained multi-dword load equivalents with early clobber flag at
// the dst operands. They are needed only for codegen and there is no need
// for their real opcodes.
if !gt(dstClass.RegTypes[0].Size, 32) then
let Constraints = "@earlyclobber $sdst",
PseudoInstr = op # offsets.Variant in
def "" # _ec : SM_Load_Pseudo <opName, baseClass, dstClass, offsets>;
}

multiclass SM_Pseudo_Loads<RegisterClass baseClass,
RegisterClass dstClass> {
defvar opName = !tolower(NAME);
def _IMM : SM_Load_Pseudo <opName, baseClass, dstClass, IMM_Offset>;
def _SGPR : SM_Load_Pseudo <opName, baseClass, dstClass, SGPR_Offset>;
def _SGPR_IMM : SM_Load_Pseudo <opName, baseClass, dstClass, SGPR_IMM_Offset>;
defm _IMM : SM_Load_Pseudos <NAME, baseClass, dstClass, IMM_Offset>;
defm _SGPR : SM_Load_Pseudos <NAME, baseClass, dstClass, SGPR_Offset>;
defm _SGPR_IMM : SM_Load_Pseudos <NAME, baseClass, dstClass, SGPR_IMM_Offset>;
}

multiclass SM_Pseudo_Stores<RegisterClass baseClass,
Expand Down Expand Up @@ -853,45 +866,74 @@ def SMRDBufferImm : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm">;
def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">;
def SMRDBufferSgprImm : ComplexPattern<iPTR, 2, "SelectSMRDBufferSgprImm">;

multiclass SMRD_Pattern <string Instr, ValueType vt, bit immci = true> {
class SMRDAlignedLoadPat<PatFrag Op> : PatFrag <(ops node:$ptr), (Op node:$ptr), [{
// Returns true if it is a single dword load or naturally aligned multi-dword load.
LoadSDNode *Ld = cast<LoadSDNode>(N);
unsigned Size = Ld->getMemoryVT().getStoreSize();
return Size <= 4 || Ld->getAlign().value() >= Size;
}]> {
let GISelPredicateCode = [{
auto &Ld = cast<GLoad>(MI);
TypeSize Size = Ld.getMMO().getSize().getValue();
return Size <= 4 || Ld.getMMO().getAlign().value() >= Size;
}];
}

def aligned_smrd_load : SMRDAlignedLoadPat<smrd_load>;

multiclass SMRD_Patterns <string Instr, ValueType vt, PatFrag frag,
bit immci = true, string suffix = ""> {
// 1. IMM offset
def : GCNPat <
(smrd_load (SMRDImm i64:$sbase, i32:$offset)),
(vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
>;
(frag (SMRDImm i64:$sbase, i32:$offset)),
(vt (!cast<SM_Pseudo>(Instr#"_IMM"#suffix) $sbase, $offset, 0))>;

// 2. 32-bit IMM offset on CI
if immci then def : GCNPat <
(smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
(vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
let OtherPredicates = [isGFX7Only];
(frag (SMRDImm32 i64:$sbase, i32:$offset)),
(vt (!cast<InstSI>(Instr#"_IMM_ci"#suffix) $sbase, $offset, 0))> {
let SubtargetPredicate = isGFX7Only;
}

// 3. SGPR offset
def : GCNPat <
(smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $soffset, 0))> {
let OtherPredicates = [isNotGFX9Plus];
(frag (SMRDSgpr i64:$sbase, i32:$soffset)),
(vt (!cast<SM_Pseudo>(Instr#"_SGPR"#suffix) $sbase, $soffset, 0))> {
let SubtargetPredicate = isNotGFX9Plus;
}
def : GCNPat <
(smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, 0, 0))> {
let OtherPredicates = [isGFX9Plus];
(frag (SMRDSgpr i64:$sbase, i32:$soffset)),
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM"#suffix) $sbase, $soffset, 0, 0))> {
let SubtargetPredicate = isGFX9Plus;
}

// 4. SGPR+IMM offset
def : GCNPat <
(smrd_load (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))> {
let OtherPredicates = [isGFX9Plus];
(frag (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
(vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM"#suffix) $sbase, $soffset, $offset, 0))> {
let SubtargetPredicate = isGFX9Plus;
}

// 5. No offset
def : GCNPat <
(vt (smrd_load (i64 SReg_64:$sbase))),
(vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))
>;
(vt (frag (i64 SReg_64:$sbase))),
(vt (!cast<SM_Pseudo>(Instr#"_IMM"#suffix) i64:$sbase, 0, 0))>;
}

multiclass SMRD_Pattern <string Instr, ValueType vt, bit immci = true> {
// High priority when XNACK is enabled and the load was naturally aligned.
let OtherPredicates = [HasXNACKEnabled], AddedComplexity = 102 in
defm: SMRD_Patterns <Instr, vt, aligned_smrd_load, immci>;

// XNACK is enabled and the load wasn't naturally aligned. The constrained sload variant.
if !gt(vt.Size, 32) then {
let OtherPredicates = [HasXNACKEnabled], AddedComplexity = 101 in
defm: SMRD_Patterns <Instr, vt, smrd_load, /*immci=*/false, /*suffix=*/"_ec">;
}

// XNACK is disabled.
let AddedComplexity = 100 in
defm: SMRD_Patterns <Instr, vt, smrd_load, immci>;
}

multiclass SMLoad_Pattern <string Instr, ValueType vt, bit immci = true> {
Expand Down Expand Up @@ -1005,6 +1047,8 @@ defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_ubyte, "S_BUFFER_LOAD_U8">;
defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_short, "S_BUFFER_LOAD_I16">;
defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_ushort, "S_BUFFER_LOAD_U16">;

} // End let AddedComplexity = 100

foreach vt = Reg32Types.types in {
defm : SMRD_Pattern <"S_LOAD_DWORD", vt>;
}
Expand All @@ -1029,7 +1073,6 @@ foreach vt = SReg_512.RegTypes in {
defm : SMRD_Pattern <"S_LOAD_DWORDX16", vt>;
}

} // End let AddedComplexity = 100

defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", i32>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2i32>;
Expand Down
12 changes: 7 additions & 5 deletions llvm/lib/Target/LoongArch/LoongArch.td
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,6 @@ def FeatureRelax
: SubtargetFeature<"relax", "HasLinkerRelax", "true",
"Enable Linker relaxation">;

// Experimental auto vectorization
def FeatureAutoVec
: SubtargetFeature<"auto-vec", "HasExpAutoVec", "true",
"Experimental auto vectorization">;

// Floating point approximation operation
def FeatureFrecipe
: SubtargetFeature<"frecipe", "HasFrecipe", "true",
Expand Down Expand Up @@ -151,6 +146,13 @@ def : ProcessorModel<"la464", NoSchedModel, [Feature64Bit,
FeatureExtLVZ,
FeatureExtLBT]>;

def : ProcessorModel<"la664", NoSchedModel, [Feature64Bit,
FeatureUAL,
FeatureExtLASX,
FeatureExtLVZ,
FeatureExtLBT,
FeatureFrecipe]>;

//===----------------------------------------------------------------------===//
// Define the LoongArch target.
//===----------------------------------------------------------------------===//
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ TypeSize LoongArchTTIImpl::getRegisterBitWidth(
case TargetTransformInfo::RGK_Scalar:
return TypeSize::getFixed(ST->is64Bit() ? 64 : 32);
case TargetTransformInfo::RGK_FixedWidthVector:
if (!ST->hasExpAutoVec())
return DefSize;
if (ST->hasExtLASX())
return TypeSize::getFixed(256);
if (ST->hasExtLSX())
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/RISCV/RISCVFeatures.td
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,8 @@ def HasStdExtZabha : Predicate<"Subtarget->hasStdExtZabha()">,
"'Zabha' (Byte and Halfword Atomic Memory Operations)">;

def FeatureStdExtZacas
: RISCVExtension<"zacas", 1, 0,
"'Zacas' (Atomic Compare-And-Swap Instructions)">,
: RISCVExperimentalExtension<"zacas", 1, 0,
"'Zacas' (Atomic Compare-And-Swap Instructions)">,
RISCVExtensionBitmask<0, 26>;
def HasStdExtZacas : Predicate<"Subtarget->hasStdExtZacas()">,
AssemblerPredicate<(all_of FeatureStdExtZacas),
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/TargetParser/Host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1562,6 +1562,8 @@ StringRef sys::getHostCPUName() {
switch (processor_id & 0xf000) {
case 0xc000: // Loongson 64bit, 4-issue
return "la464";
case 0xd000: // Loongson 64bit, 6-issue
return "la664";
// TODO: Others.
default:
break;
Expand Down Expand Up @@ -2067,7 +2069,8 @@ const StringMap<bool> sys::getHostCPUFeatures() {
Features["zvfhmin"] = ExtMask & (1ULL << 31); // RISCV_HWPROBE_EXT_ZVFHMIN
Features["zfa"] = ExtMask & (1ULL << 32); // RISCV_HWPROBE_EXT_ZFA
Features["ztso"] = ExtMask & (1ULL << 33); // RISCV_HWPROBE_EXT_ZTSO
Features["zacas"] = ExtMask & (1ULL << 34); // RISCV_HWPROBE_EXT_ZACAS
// TODO: Re-enable zacas when it is marked non-experimental again.
// Features["zacas"] = ExtMask & (1ULL << 34); // RISCV_HWPROBE_EXT_ZACAS
Features["zicond"] = ExtMask & (1ULL << 35); // RISCV_HWPROBE_EXT_ZICOND
Features["zihintpause"] =
ExtMask & (1ULL << 36); // RISCV_HWPROBE_EXT_ZIHINTPAUSE
Expand Down
141 changes: 141 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -O0 -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=1 -verify-machineinstrs %s -o - | FileCheck %s
---
name: test_scmp
body: |
bb.0.entry:
; CHECK-LABEL: name: test_scmp
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[C1]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SELECT]]
; CHECK-NEXT: $w0 = COPY [[SELECT1]](s32)
%0:_(s64) = COPY $x0
%1:_(s64) = COPY $x0
%4:_(s2) = G_SCMP %0(s64), %1
%14:_(s32) = G_ANYEXT %4(s2)
$w0 = COPY %14(s32)

...
---
name: test_ucmp
body: |
bb.0.entry:
; CHECK-LABEL: name: test_ucmp
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[C1]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SELECT]]
; CHECK-NEXT: $w0 = COPY [[SELECT1]](s32)
%0:_(s64) = COPY $x0
%1:_(s64) = COPY $x0
%4:_(s2) = G_UCMP %0(s64), %1
%14:_(s32) = G_ANYEXT %4(s2)
$w0 = COPY %14(s32)

...
---
name: test_ucmp_vector
body: |
bb.0.entry:
; CHECK-LABEL: name: test_ucmp_vector
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $w2
; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $w3
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(ugt), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR2]](<8 x s8>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC]], [[UV]]
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR3]](<8 x s8>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<4 x s16>), [[UV3:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV2]], [[TRUNC1]]
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR4]](<8 x s8>)
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s16>), [[UV5:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>)
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV4]], [[XOR]]
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND]], [[AND1]]
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(ult), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]]
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C3]](s8), [[C3]](s8), [[C3]](s8), [[C3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR5]](<8 x s8>)
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<4 x s16>), [[UV7:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT3]](<8 x s16>)
; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC2]], [[UV6]]
; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C3]](s8), [[C3]](s8), [[C3]](s8), [[C3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR6]](<8 x s8>)
; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT4]](<8 x s16>)
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV8]], [[TRUNC3]]
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s16>) = G_AND [[OR]], [[XOR1]]
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND2]], [[AND3]]
; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR1]](<4 x s16>)
; CHECK-NEXT: $q0 = COPY [[ANYEXT5]](<4 x s32>)
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
%2:_(s32) = COPY $w2
%3:_(s32) = COPY $w3
%4:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32)
%5:_(s32) = COPY $w0
%6:_(s32) = COPY $w1
%7:_(s32) = COPY $w2
%8:_(s32) = COPY $w3
%9:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32)
%10:_(<4 x s2>) = G_UCMP %4(<4 x s32>), %9
%11:_(<4 x s32>) = G_ANYEXT %10(<4 x s2>)
$q0 = COPY %11(<4 x s32>)

...
---
name: test_ucmp_i128
body: |
bb.0.entry:
; CHECK-LABEL: name: test_ucmp_i128
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[DEF]](s64), [[DEF]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[DEF]](s64), [[DEF]]
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[SELECT]](s32), [[C]], [[C1]]
; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[DEF]](s64), [[DEF]]
; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[DEF]](s64), [[DEF]]
; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s32), [[ICMP5]], [[ICMP3]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[SELECT2]](s32), [[C2]], [[SELECT1]]
; CHECK-NEXT: $w0 = COPY [[SELECT3]](s32)
%0:_(s64) = COPY $x0
%1:_(s64) = COPY $x0
%l:_(s128) = G_ANYEXT %0
%r:_(s128) = G_ANYEXT %1
%4:_(s2) = G_UCMP %l(s128), %r
%14:_(s32) = G_ANYEXT %4(s2)
$w0 = COPY %14(s32)
Original file line number Diff line number Diff line change
Expand Up @@ -352,11 +352,12 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_SCMP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_UCMP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_SELECT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
Expand Down
87 changes: 63 additions & 24 deletions llvm/test/CodeGen/AArch64/scmp.ll
Original file line number Diff line number Diff line change
@@ -1,26 +1,45 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI

define i8 @scmp.8.8(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: scmp.8.8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: cmp w8, w1, sxtb
; CHECK-NEXT: cset w8, gt
; CHECK-NEXT: csinv w0, w8, wzr, ge
; CHECK-NEXT: ret
; CHECK-SD-LABEL: scmp.8.8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sxtb w8, w0
; CHECK-SD-NEXT: cmp w8, w1, sxtb
; CHECK-SD-NEXT: cset w8, gt
; CHECK-SD-NEXT: csinv w0, w8, wzr, ge
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: scmp.8.8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sxtb w8, w0
; CHECK-GI-NEXT: sxtb w9, w1
; CHECK-GI-NEXT: cmp w8, w9
; CHECK-GI-NEXT: cset w8, gt
; CHECK-GI-NEXT: csinv w0, w8, wzr, ge
; CHECK-GI-NEXT: ret
%1 = call i8 @llvm.scmp(i8 %x, i8 %y)
ret i8 %1
}

define i8 @scmp.8.16(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: scmp.8.16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: cmp w8, w1, sxth
; CHECK-NEXT: cset w8, gt
; CHECK-NEXT: csinv w0, w8, wzr, ge
; CHECK-NEXT: ret
; CHECK-SD-LABEL: scmp.8.16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sxth w8, w0
; CHECK-SD-NEXT: cmp w8, w1, sxth
; CHECK-SD-NEXT: cset w8, gt
; CHECK-SD-NEXT: csinv w0, w8, wzr, ge
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: scmp.8.16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sxth w8, w0
; CHECK-GI-NEXT: sxth w9, w1
; CHECK-GI-NEXT: cmp w8, w9
; CHECK-GI-NEXT: cset w8, gt
; CHECK-GI-NEXT: csinv w0, w8, wzr, ge
; CHECK-GI-NEXT: ret
%1 = call i8 @llvm.scmp(i16 %x, i16 %y)
ret i8 %1
}
Expand Down Expand Up @@ -48,15 +67,35 @@ define i8 @scmp.8.64(i64 %x, i64 %y) nounwind {
}

define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
; CHECK-LABEL: scmp.8.128:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x2, x0
; CHECK-NEXT: sbcs xzr, x3, x1
; CHECK-NEXT: cset w8, lt
; CHECK-NEXT: cmp x0, x2
; CHECK-NEXT: sbcs xzr, x1, x3
; CHECK-NEXT: csinv w0, w8, wzr, ge
; CHECK-NEXT: ret
; CHECK-SD-LABEL: scmp.8.128:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: cmp x2, x0
; CHECK-SD-NEXT: sbcs xzr, x3, x1
; CHECK-SD-NEXT: cset w8, lt
; CHECK-SD-NEXT: cmp x0, x2
; CHECK-SD-NEXT: sbcs xzr, x1, x3
; CHECK-SD-NEXT: csinv w0, w8, wzr, ge
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: scmp.8.128:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: cset w8, gt
; CHECK-GI-NEXT: cmp x0, x2
; CHECK-GI-NEXT: cset w9, hi
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: csel w8, w9, w8, eq
; CHECK-GI-NEXT: tst w8, #0x1
; CHECK-GI-NEXT: cset w8, ne
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: cset w9, lt
; CHECK-GI-NEXT: cmp x0, x2
; CHECK-GI-NEXT: cset w10, lo
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: csel w9, w10, w9, eq
; CHECK-GI-NEXT: tst w9, #0x1
; CHECK-GI-NEXT: csinv w0, w8, wzr, eq
; CHECK-GI-NEXT: ret
%1 = call i8 @llvm.scmp(i128 %x, i128 %y)
ret i8 %1
}
Expand Down
134 changes: 98 additions & 36 deletions llvm/test/CodeGen/AArch64/ucmp.ll
Original file line number Diff line number Diff line change
@@ -1,26 +1,45 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI

define i8 @ucmp.8.8(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: ucmp.8.8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: cmp w8, w1, uxtb
; CHECK-NEXT: cset w8, hi
; CHECK-NEXT: csinv w0, w8, wzr, hs
; CHECK-NEXT: ret
; CHECK-SD-LABEL: ucmp.8.8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: and w8, w0, #0xff
; CHECK-SD-NEXT: cmp w8, w1, uxtb
; CHECK-SD-NEXT: cset w8, hi
; CHECK-SD-NEXT: csinv w0, w8, wzr, hs
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ucmp.8.8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: and w8, w0, #0xff
; CHECK-GI-NEXT: and w9, w1, #0xff
; CHECK-GI-NEXT: cmp w8, w9
; CHECK-GI-NEXT: cset w8, hi
; CHECK-GI-NEXT: csinv w0, w8, wzr, hs
; CHECK-GI-NEXT: ret
%1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
ret i8 %1
}

define i8 @ucmp.8.16(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: ucmp.8.16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: cmp w8, w1, uxth
; CHECK-NEXT: cset w8, hi
; CHECK-NEXT: csinv w0, w8, wzr, hs
; CHECK-NEXT: ret
; CHECK-SD-LABEL: ucmp.8.16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: and w8, w0, #0xffff
; CHECK-SD-NEXT: cmp w8, w1, uxth
; CHECK-SD-NEXT: cset w8, hi
; CHECK-SD-NEXT: csinv w0, w8, wzr, hs
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ucmp.8.16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: and w8, w0, #0xffff
; CHECK-GI-NEXT: and w9, w1, #0xffff
; CHECK-GI-NEXT: cmp w8, w9
; CHECK-GI-NEXT: cset w8, hi
; CHECK-GI-NEXT: csinv w0, w8, wzr, hs
; CHECK-GI-NEXT: ret
%1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
ret i8 %1
}
Expand Down Expand Up @@ -48,15 +67,35 @@ define i8 @ucmp.8.64(i64 %x, i64 %y) nounwind {
}

define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind {
; CHECK-LABEL: ucmp.8.128:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x2, x0
; CHECK-NEXT: sbcs xzr, x3, x1
; CHECK-NEXT: cset w8, lo
; CHECK-NEXT: cmp x0, x2
; CHECK-NEXT: sbcs xzr, x1, x3
; CHECK-NEXT: csinv w0, w8, wzr, hs
; CHECK-NEXT: ret
; CHECK-SD-LABEL: ucmp.8.128:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: cmp x2, x0
; CHECK-SD-NEXT: sbcs xzr, x3, x1
; CHECK-SD-NEXT: cset w8, lo
; CHECK-SD-NEXT: cmp x0, x2
; CHECK-SD-NEXT: sbcs xzr, x1, x3
; CHECK-SD-NEXT: csinv w0, w8, wzr, hs
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ucmp.8.128:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: cset w8, hi
; CHECK-GI-NEXT: cmp x0, x2
; CHECK-GI-NEXT: cset w9, hi
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: csel w8, w9, w8, eq
; CHECK-GI-NEXT: tst w8, #0x1
; CHECK-GI-NEXT: cset w8, ne
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: cset w9, lo
; CHECK-GI-NEXT: cmp x0, x2
; CHECK-GI-NEXT: cset w10, lo
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: csel w9, w10, w9, eq
; CHECK-GI-NEXT: tst w9, #0x1
; CHECK-GI-NEXT: csinv w0, w8, wzr, eq
; CHECK-GI-NEXT: ret
%1 = call i8 @llvm.ucmp(i128 %x, i128 %y)
ret i8 %1
}
Expand Down Expand Up @@ -95,18 +134,41 @@ define i64 @ucmp.64.64(i64 %x, i64 %y) nounwind {
}

define <1 x i64> @ucmp.1.64.65(<1 x i65> %x, <1 x i65> %y) {
; CHECK-LABEL: ucmp.1.64.65:
; CHECK: // %bb.0:
; CHECK-NEXT: and x8, x1, #0x1
; CHECK-NEXT: and x9, x3, #0x1
; CHECK-NEXT: cmp x2, x0
; CHECK-NEXT: sbcs xzr, x9, x8
; CHECK-NEXT: cset x10, lo
; CHECK-NEXT: cmp x0, x2
; CHECK-NEXT: sbcs xzr, x8, x9
; CHECK-NEXT: csinv x8, x10, xzr, hs
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: ret
; CHECK-SD-LABEL: ucmp.1.64.65:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: and x8, x1, #0x1
; CHECK-SD-NEXT: and x9, x3, #0x1
; CHECK-SD-NEXT: cmp x2, x0
; CHECK-SD-NEXT: sbcs xzr, x9, x8
; CHECK-SD-NEXT: cset x10, lo
; CHECK-SD-NEXT: cmp x0, x2
; CHECK-SD-NEXT: sbcs xzr, x8, x9
; CHECK-SD-NEXT: csinv x8, x10, xzr, hs
; CHECK-SD-NEXT: fmov d0, x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ucmp.1.64.65:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: and x8, x1, #0x1
; CHECK-GI-NEXT: and x9, x3, #0x1
; CHECK-GI-NEXT: cmp x8, x9
; CHECK-GI-NEXT: cset w10, hi
; CHECK-GI-NEXT: cmp x0, x2
; CHECK-GI-NEXT: cset w11, hi
; CHECK-GI-NEXT: cmp x8, x9
; CHECK-GI-NEXT: csel w10, w11, w10, eq
; CHECK-GI-NEXT: tst w10, #0x1
; CHECK-GI-NEXT: cset x10, ne
; CHECK-GI-NEXT: cmp x8, x9
; CHECK-GI-NEXT: cset w11, lo
; CHECK-GI-NEXT: cmp x0, x2
; CHECK-GI-NEXT: cset w12, lo
; CHECK-GI-NEXT: cmp x8, x9
; CHECK-GI-NEXT: csel w8, w12, w11, eq
; CHECK-GI-NEXT: tst w8, #0x1
; CHECK-GI-NEXT: csinv x8, x10, xzr, eq
; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: ret
%1 = call <1 x i64> @llvm.ucmp(<1 x i65> %x, <1 x i65> %y)
ret <1 x i64> %1
}
100 changes: 50 additions & 50 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1021,20 +1021,20 @@ main_body:
define amdgpu_kernel void @global_atomic_fadd_f64_noret(ptr addrspace(1) %ptr, double %data) {
; GFX90A-LABEL: global_atomic_fadd_f64_noret:
; GFX90A: ; %bb.0: ; %main_body
; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1]
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
; GFX90A-NEXT: s_endpgm
;
; GFX940-LABEL: global_atomic_fadd_f64_noret:
; GFX940: ; %bb.0: ; %main_body
; GFX940-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX940-NEXT: v_mov_b32_e32 v2, 0
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1]
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
Expand All @@ -1044,20 +1044,20 @@ main_body:
define amdgpu_kernel void @global_atomic_fmin_f64_noret(ptr addrspace(1) %ptr, double %data) {
; GFX90A-LABEL: global_atomic_fmin_f64_noret:
; GFX90A: ; %bb.0: ; %main_body
; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
; GFX90A-NEXT: global_atomic_min_f64 v2, v[0:1], s[0:1]
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
; GFX90A-NEXT: global_atomic_min_f64 v2, v[0:1], s[4:5]
; GFX90A-NEXT: s_endpgm
;
; GFX940-LABEL: global_atomic_fmin_f64_noret:
; GFX940: ; %bb.0: ; %main_body
; GFX940-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX940-NEXT: v_mov_b32_e32 v2, 0
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
; GFX940-NEXT: global_atomic_min_f64 v2, v[0:1], s[0:1]
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
; GFX940-NEXT: global_atomic_min_f64 v2, v[0:1], s[4:5]
; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
Expand All @@ -1067,20 +1067,20 @@ main_body:
define amdgpu_kernel void @global_atomic_fmax_f64_noret(ptr addrspace(1) %ptr, double %data) {
; GFX90A-LABEL: global_atomic_fmax_f64_noret:
; GFX90A: ; %bb.0: ; %main_body
; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
; GFX90A-NEXT: global_atomic_max_f64 v2, v[0:1], s[0:1]
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
; GFX90A-NEXT: global_atomic_max_f64 v2, v[0:1], s[4:5]
; GFX90A-NEXT: s_endpgm
;
; GFX940-LABEL: global_atomic_fmax_f64_noret:
; GFX940: ; %bb.0: ; %main_body
; GFX940-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX940-NEXT: v_mov_b32_e32 v2, 0
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
; GFX940-NEXT: global_atomic_max_f64 v2, v[0:1], s[0:1]
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
; GFX940-NEXT: global_atomic_max_f64 v2, v[0:1], s[4:5]
; GFX940-NEXT: s_endpgm
main_body:
%ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
Expand Down Expand Up @@ -1134,14 +1134,14 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX940-NEXT: s_cbranch_execz .LBB39_2
; GFX940-NEXT: ; %bb.1:
; GFX940-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x24
; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
; GFX940-NEXT: v_mov_b32_e32 v2, 0
; GFX940-NEXT: buffer_wbl2 sc0 sc1
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3] sc1
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5] sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: buffer_inv sc0 sc1
; GFX940-NEXT: .LBB39_2:
Expand All @@ -1162,13 +1162,13 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_cbranch_execz .LBB40_2
; GFX90A-NEXT: ; %bb.1:
; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x24
; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
; GFX90A-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
; GFX90A-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3]
; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: buffer_wbinvl1_vol
; GFX90A-NEXT: .LBB40_2:
Expand All @@ -1184,14 +1184,14 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX940-NEXT: s_cbranch_execz .LBB40_2
; GFX940-NEXT: ; %bb.1:
; GFX940-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x24
; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
; GFX940-NEXT: v_mov_b32_e32 v2, 0
; GFX940-NEXT: buffer_wbl2 sc1
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3]
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: buffer_inv sc1
; GFX940-NEXT: .LBB40_2:
Expand Down Expand Up @@ -1248,14 +1248,14 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX940-NEXT: s_cbranch_execz .LBB41_2
; GFX940-NEXT: ; %bb.1:
; GFX940-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x24
; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
; GFX940-NEXT: v_mov_b32_e32 v2, 0
; GFX940-NEXT: buffer_wbl2 sc0 sc1
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3] sc1
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5] sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: buffer_inv sc0 sc1
; GFX940-NEXT: .LBB41_2:
Expand All @@ -1276,13 +1276,13 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_cbranch_execz .LBB42_2
; GFX90A-NEXT: ; %bb.1:
; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x24
; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
; GFX90A-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
; GFX90A-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3]
; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: buffer_wbinvl1_vol
; GFX90A-NEXT: .LBB42_2:
Expand All @@ -1298,14 +1298,14 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX940-NEXT: s_cbranch_execz .LBB42_2
; GFX940-NEXT: ; %bb.1:
; GFX940-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x24
; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
; GFX940-NEXT: v_mov_b32_e32 v2, 0
; GFX940-NEXT: buffer_wbl2 sc1
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3]
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: buffer_inv sc1
; GFX940-NEXT: .LBB42_2:
Expand Down Expand Up @@ -1522,14 +1522,14 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX940-NEXT: s_cbranch_execz .LBB49_2
; GFX940-NEXT: ; %bb.1:
; GFX940-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x24
; GFX940-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
; GFX940-NEXT: v_mov_b32_e32 v2, 0
; GFX940-NEXT: buffer_wbl2 sc1
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3]
; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: buffer_inv sc1
; GFX940-NEXT: .LBB49_2:
Expand Down Expand Up @@ -1761,19 +1761,19 @@ main_body:
define amdgpu_kernel void @flat_atomic_fadd_f64_noret(ptr %ptr, double %data) {
; GFX90A-LABEL: flat_atomic_fadd_f64_noret:
; GFX90A: ; %bb.0: ; %main_body
; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1]
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1]
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
; GFX90A-NEXT: s_endpgm
;
; GFX940-LABEL: flat_atomic_fadd_f64_noret:
; GFX940: ; %bb.0: ; %main_body
; GFX940-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
; GFX940-NEXT: s_endpgm
main_body:
Expand Down Expand Up @@ -1842,19 +1842,19 @@ main_body:
define amdgpu_kernel void @flat_atomic_fmin_f64_noret(ptr %ptr, double %data) {
; GFX90A-LABEL: flat_atomic_fmin_f64_noret:
; GFX90A: ; %bb.0: ; %main_body
; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1]
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1]
; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
; GFX90A-NEXT: s_endpgm
;
; GFX940-LABEL: flat_atomic_fmin_f64_noret:
; GFX940: ; %bb.0: ; %main_body
; GFX940-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
; GFX940-NEXT: s_endpgm
main_body:
Expand Down Expand Up @@ -1884,19 +1884,19 @@ main_body:
define amdgpu_kernel void @flat_atomic_fmax_f64_noret(ptr %ptr, double %data) {
; GFX90A-LABEL: flat_atomic_fmax_f64_noret:
; GFX90A: ; %bb.0: ; %main_body
; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1]
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1]
; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
; GFX90A-NEXT: s_endpgm
;
; GFX940-LABEL: flat_atomic_fmax_f64_noret:
; GFX940: ; %bb.0: ; %main_body
; GFX940-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
; GFX940-NEXT: s_endpgm
main_body:
Expand Down
Loading