10 changes: 9 additions & 1 deletion clang/test/AST/Interp/vectors.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -verify=expected,both %s
// RUN: %clang_cc1 -verify=ref,both %s

// both-no-diagnostics
// ref-no-diagnostics

typedef int __attribute__((vector_size(16))) VI4;
constexpr VI4 A = {1,2,3,4};
Expand All @@ -13,10 +13,18 @@ namespace Vector {
return VI4 { n * 3, n + 4, n - 5, n / 6 };
}
constexpr auto v1 = f(10);
static_assert(__builtin_vectorelements(v1) == (16 / sizeof(int)), "");

typedef double __attribute__((vector_size(32))) VD4;
constexpr VD4 g(int n) {
return (VD4) { n / 2.0, n + 1.5, n - 5.4, n * 0.9 };
}
constexpr auto v2 = g(4);
static_assert(__builtin_vectorelements(v2) == (32 / sizeof(double)), "");
}

/// FIXME: We need to support BitCasts between vector types.
namespace {
typedef float __attribute__((vector_size(16))) VI42;
constexpr VI42 A2 = A; // expected-error {{must be initialized by a constant expression}}
}
6 changes: 4 additions & 2 deletions clang/test/Analysis/Inputs/system-header-simulator-cxx.h
Original file line number Diff line number Diff line change
Expand Up @@ -1106,6 +1106,7 @@ using ostream = basic_ostream<char>;
extern std::ostream cout;

ostream &operator<<(ostream &, const string &);

#if __cplusplus >= 202002L
template <class T>
ostream &operator<<(ostream &, const std::unique_ptr<T> &);
Expand All @@ -1122,11 +1123,12 @@ istream &getline(istream &, string &, char);
istream &getline(istream &, string &);
} // namespace std

#ifdef TEST_INLINABLE_ALLOCATORS
namespace std {
void *malloc(size_t);
void free(void *);
}
} // namespace std

#ifdef TEST_INLINABLE_ALLOCATORS
void* operator new(std::size_t size, const std::nothrow_t&) throw() { return std::malloc(size); }
void* operator new[](std::size_t size, const std::nothrow_t&) throw() { return std::malloc(size); }
void operator delete(void* ptr, const std::nothrow_t&) throw() { std::free(ptr); }
Expand Down
24 changes: 16 additions & 8 deletions clang/test/Analysis/cxx-uninitialized-object-ptr-ref.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// RUN: %clang_analyze_cc1 -analyzer-checker=core,optin.cplusplus.UninitializedObject \
// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,optin.cplusplus.UninitializedObject \
// RUN: -analyzer-config optin.cplusplus.UninitializedObject:Pedantic=true -DPEDANTIC \
// RUN: -analyzer-config optin.cplusplus.UninitializedObject:CheckPointeeInitialization=true \
// RUN: -std=c++11 -verify %s

// RUN: %clang_analyze_cc1 -analyzer-checker=core,optin.cplusplus.UninitializedObject \
// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,optin.cplusplus.UninitializedObject \
// RUN: -analyzer-config optin.cplusplus.UninitializedObject:CheckPointeeInitialization=true \
// RUN: -std=c++11 -verify %s

Expand Down Expand Up @@ -316,7 +316,10 @@ void fCyclicPointerTest2() {

// Void pointer tests are mainly no-crash tests.

void *malloc(int size);
typedef __typeof(sizeof(int)) size_t;

void *calloc(size_t nmemb, size_t size);
void free(void *p);

class VoidPointerTest1 {
void *vptr;
Expand All @@ -328,8 +331,9 @@ class VoidPointerTest1 {
};

void fVoidPointerTest1() {
void *vptr = malloc(sizeof(int));
void *vptr = calloc(1, sizeof(int));
VoidPointerTest1(vptr, char());
free(vptr);
}

class VoidPointerTest2 {
Expand All @@ -342,8 +346,9 @@ class VoidPointerTest2 {
};

void fVoidPointerTest2() {
void *vptr = malloc(sizeof(int));
void *vptr = calloc(1, sizeof(int));
VoidPointerTest2(&vptr, char());
free(vptr);
}

class VoidPointerRRefTest1 {
Expand All @@ -359,8 +364,9 @@ upon returning to the caller. This will be a dangling reference}}
};

void fVoidPointerRRefTest1() {
void *vptr = malloc(sizeof(int));
void *vptr = calloc(1, sizeof(int));
VoidPointerRRefTest1(vptr, char());
free(vptr);
}

class VoidPointerRRefTest2 {
Expand All @@ -376,8 +382,9 @@ upon returning to the caller. This will be a dangling reference}}
};

void fVoidPointerRRefTest2() {
void *vptr = malloc(sizeof(int));
void *vptr = calloc(1, sizeof(int));
VoidPointerRRefTest2(&vptr, char());
free(vptr);
}

class VoidPointerLRefTest {
Expand All @@ -393,8 +400,9 @@ upon returning to the caller. This will be a dangling reference}}
};

void fVoidPointerLRefTest() {
void *vptr = malloc(sizeof(int));
void *vptr = calloc(1, sizeof(int));
VoidPointerLRefTest(vptr, char());
free(vptr);
}

struct CyclicVoidPointerTest {
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Analysis/exercise-ps.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: %clang_analyze_cc1 %s -verify -Wno-error=implicit-function-declaration \
// RUN: -analyzer-checker=core \
// RUN: -analyzer-checker=core,unix.Malloc \
// RUN: -analyzer-config core.CallAndMessage:ArgPointeeInitializedness=true
//
// Just exercise the analyzer on code that has at one point caused issues
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Analysis/explain-svals.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 -verify %s \
// RUN: -analyzer-checker=core.builtin \
// RUN: -analyzer-checker=debug.ExprInspection \
// RUN: -analyzer-checker=unix.cstring \
// RUN: -analyzer-checker=unix.Malloc \
// RUN: -analyzer-config display-checker-name=false

typedef unsigned long size_t;
Expand Down
24 changes: 24 additions & 0 deletions clang/test/Analysis/malloc-std-namespace.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc -verify -analyzer-output=text %s

// This file tests that unix.Malloc can handle C++ code where e.g. malloc and
// free are declared within the namespace 'std' by the header <cstdlib>.

#include "Inputs/system-header-simulator-cxx.h"

void leak() {
int *p = static_cast<int*>(std::malloc(sizeof(int))); // expected-note{{Memory is allocated}}
} // expected-warning{{Potential leak of memory pointed to by 'p'}}
// expected-note@-1{{Potential leak of memory pointed to by 'p'}}

void no_leak() {
int *p = static_cast<int*>(std::malloc(sizeof(int)));
std::free(p); // no-warning
}

void invalid_free() {
int i;
int *p = &i;
//expected-note@+2{{Argument to free() is the address of the local variable 'i', which is not memory allocated by malloc()}}
//expected-warning@+1{{Argument to free() is the address of the local variable 'i', which is not memory allocated by malloc()}}
std::free(p);
}
11 changes: 11 additions & 0 deletions clang/test/Analysis/malloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,17 @@ void allocaFree(void) {
free(p); // expected-warning {{Memory allocated by alloca() should not be deallocated}}
}

void allocaFreeBuiltin(void) {
int *p = __builtin_alloca(sizeof(int));
free(p); // expected-warning {{Memory allocated by alloca() should not be deallocated}}
}

void allocaFreeBuiltinAlign(void) {
int *p = __builtin_alloca_with_align(sizeof(int), 64);
free(p); // expected-warning {{Memory allocated by alloca() should not be deallocated}}
}


int* mallocEscapeRet(void) {
int *p = malloc(12);
return p; // no warning
Expand Down
11 changes: 11 additions & 0 deletions clang/test/Analysis/malloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,14 @@ void *realloc(void **ptr, size_t size) { realloc(ptr, size); } // no-crash
namespace pr46253_paramty2{
void *realloc(void *ptr, int size) { realloc(ptr, size); } // no-crash
} // namespace pr46253_paramty2

namespace pr81597 {
struct S {};
struct T {
void free(const S& s);
};
void f(T& t) {
S s;
t.free(s); // no-warning: This is not the free you are looking for...
}
} // namespace pr81597
2 changes: 1 addition & 1 deletion clang/test/Analysis/stack-addr-ps.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_analyze_cc1 -analyzer-checker=core -fblocks -verify %s
// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc -fblocks -verify %s

int* f1(void) {
int x = 0;
Expand Down
4 changes: 2 additions & 2 deletions clang/test/Analysis/stackaddrleak.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: %clang_analyze_cc1 -analyzer-checker=core -verify -std=c99 -Dbool=_Bool -Wno-bool-conversion %s
// RUN: %clang_analyze_cc1 -analyzer-checker=core -verify -x c++ -Wno-bool-conversion %s
// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc -verify -std=c99 -Dbool=_Bool -Wno-bool-conversion %s
// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc -verify -x c++ -Wno-bool-conversion %s

typedef __INTPTR_TYPE__ intptr_t;
char const *p;
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGen/target-data.c
Original file line number Diff line number Diff line change
Expand Up @@ -251,11 +251,11 @@

// RUN: %clang_cc1 -triple spir-unknown -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=SPIR
// SPIR: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
// SPIR: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1"

// RUN: %clang_cc1 -triple spir64-unknown -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=SPIR64
// SPIR64: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
// SPIR64: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1"

// RUN: %clang_cc1 -triple bpfel -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=BPFEL
Expand Down
16 changes: 16 additions & 0 deletions clang/test/CodeGenCXX/control-flow-in-stmt-expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -391,3 +391,19 @@ void ArrayInitWithContinue() {
})};
}
}

struct [[clang::trivial_abi]] HasTrivialABI {
HasTrivialABI();
~HasTrivialABI();
};
void AcceptTrivialABI(HasTrivialABI, int);
void TrivialABI() {
// CHECK-LABEL: define dso_local void @_Z10TrivialABIv()
AcceptTrivialABI(HasTrivialABI(), ({
if (foo()) return;
// CHECK: if.then:
// CHECK-NEXT: call void @_ZN13HasTrivialABID1Ev
// CHECK-NEXT: br label %return
0;
}));
}
24 changes: 24 additions & 0 deletions clang/test/Driver/windows-seh-async-verify.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// RUN: %clang --target=x86_64-pc-windows -fasync-exceptions -fsyntax-only -### %s 2>&1 | FileCheck %s
// RUN: %clang_cl --target=x86_64-pc-windows /EHa -fsyntax-only -### -- %s 2>&1 | FileCheck %s
// RUN: %clang --target=x86_64-pc-windows-gnu -fasync-exceptions -fsyntax-only -### %s 2>&1 | FileCheck %s --check-prefixes=GNU-ALL,GNU
// RUN: %clang_cl --target=x86_64-pc-windows-gnu /EHa -fsyntax-only -### -- %s 2>&1 | FileCheck %s --check-prefixes=GNU-ALL,CL-GNU

// CHECK-NOT: warning
// GNU: warning: argument unused during compilation: '-fasync-exceptions' [-Wunused-command-line-argument]
// CL-GNU: warning: argument unused during compilation: '/EHa' [-Wunused-command-line-argument]

// CHECK: -fasync-exceptions
// GNU-ALL-NOT: -fasync-exceptions
struct S {
union _Un {
~_Un() {}
char _Buf[12];
};
_Un _un;
};

struct Embed {
S v2;
};

void PR62449() { Embed v{}; }
12 changes: 12 additions & 0 deletions clang/test/Index/USR/func-type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,15 @@ void Func( void (* (*)(int, int))(int, int) );
// CHECK: {{[0-9]+}}:6 | function/C | Func | c:@F@Func#*F*Fv(#I#I)(#I#I)# |
void Func( void (* (*)(int, int, int))(int) );
// CHECK: {{[0-9]+}}:6 | function/C | Func | c:@F@Func#*F*Fv(#I)(#I#I#I)# |

// Functions with parameter types that only differ in top-level cv-qualification should generate the same USR.

void f( const int );
// CHECK: {{[0-9]+}}:6 | function/C | f | c:@F@f#I# |
void f( int );
// CHECK: {{[0-9]+}}:6 | function/C | f | c:@F@f#I# |

void g( int );
// CHECK: {{[0-9]+}}:6 | function/C | g | c:@F@g#I# |
void g( const int );
// CHECK: {{[0-9]+}}:6 | function/C | g | c:@F@g#I# |
44 changes: 44 additions & 0 deletions clang/test/Modules/hashing-decls-in-exprs-from-gmf-2.cppm
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// RUN: rm -rf %t
// RUN: mkdir -p %t
// RUN: split-file %s %t
//
// RUN: %clang_cc1 -std=c++20 -fskip-odr-check-in-gmf %t/A.cppm -emit-module-interface -o %t/A.pcm
// RUN: %clang_cc1 -std=c++20 -fskip-odr-check-in-gmf %t/test.cpp -fprebuilt-module-path=%t -fsyntax-only -verify

//--- header.h
#pragma once
template <class _Tp>
class Optional {};

template <class _Tp>
concept C = requires(const _Tp& __t) {
[]<class _Up>(const Optional<_Up>&) {}(__t);
};

//--- func.h
#include "header.h"
template <C T>
void func() {}

//--- test_func.h
#include "func.h"

inline void test_func() {
func<Optional<int>>();
}

//--- A.cppm
module;
#include "header.h"
#include "test_func.h"
export module A;
export using ::test_func;

//--- test.cpp
// expected-no-diagnostics
import A;
#include "test_func.h"

void test() {
test_func();
}
20 changes: 20 additions & 0 deletions clang/test/SemaCXX/instantiate-new-placement-size.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// RUN: %clang -S -fno-discard-value-names -emit-llvm -o - %s | FileCheck %s
// Issue no: 41441
#include <new>

// CHECK: call void @llvm.memset.p0.i64(ptr align 1 %x, i8 0, i64 8, i1 false)
// CHECK: call void @llvm.memset.p0.i64(ptr align 16 %x, i8 0, i64 32, i1 false)
template <typename TYPE>
void f()
{
typedef TYPE TArray[8];

TArray x;
new(&x) TArray();
}

int main()
{
f<char>();
f<int>();
}
8 changes: 8 additions & 0 deletions flang/docs/Intrinsics.md
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,14 @@ CALL CO_REDUCE
CALL CO_SUM
```

### Inquiry Functions
ACCESS (GNU extension) is not supported on Windows. Otherwise:
```
CHARACTER(LEN=*) :: path = 'path/to/file'
IF (ACCESS(path, 'rwx')) &
...
```

## Non-standard intrinsics
### PGI
```
Expand Down
7 changes: 7 additions & 0 deletions flang/include/flang/Runtime/extensions.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,12 @@ std::int64_t RTNAME(Signal)(std::int64_t number, void (*handler)(int));
// GNU extension subroutine SLEEP(SECONDS)
void RTNAME(Sleep)(std::int64_t seconds);

// GNU extension function ACCESS(NAME, MODE)
// TODO: not supported on Windows
#ifndef _WIN32
std::int64_t FORTRAN_PROCEDURE_NAME(access)(const char *name,
std::int64_t nameLength, const char *mode, std::int64_t modeLength);
#endif

} // extern "C"
#endif // FORTRAN_RUNTIME_EXTENSIONS_H_
4 changes: 2 additions & 2 deletions flang/lib/Lower/OpenMP/ClauseProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -832,8 +832,8 @@ createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc,
}

bool ClauseProcessor::processMap(
mlir::Location currentLocation, const llvm::omp::Directive &directive,
Fortran::lower::StatementContext &stmtCtx, mlir::omp::MapClauseOps &result,
mlir::Location currentLocation, Fortran::lower::StatementContext &stmtCtx,
mlir::omp::MapClauseOps &result,
llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> *mapSyms,
llvm::SmallVectorImpl<mlir::Location> *mapSymLocs,
llvm::SmallVectorImpl<mlir::Type> *mapSymTypes) const {
Expand Down
3 changes: 1 addition & 2 deletions flang/lib/Lower/OpenMP/ClauseProcessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,7 @@ class ClauseProcessor {
// They may be used later on to create the block_arguments for some of the
// target directives that require it.
bool processMap(
mlir::Location currentLocation, const llvm::omp::Directive &directive,
Fortran::lower::StatementContext &stmtCtx,
mlir::Location currentLocation, Fortran::lower::StatementContext &stmtCtx,
mlir::omp::MapClauseOps &result,
llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> *mapSyms =
nullptr,
Expand Down
2,451 changes: 1,237 additions & 1,214 deletions flang/lib/Lower/OpenMP/OpenMP.cpp

Large diffs are not rendered by default.

73 changes: 73 additions & 0 deletions flang/runtime/extensions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "flang/Runtime/entry-names.h"
#include "flang/Runtime/io-api.h"
#include <chrono>
#include <cstring>
#include <ctime>
#include <signal.h>
#include <thread>
Expand Down Expand Up @@ -138,5 +139,77 @@ void RTNAME(Sleep)(std::int64_t seconds) {
std::this_thread::sleep_for(std::chrono::seconds(seconds));
}

// TODO: not supported on Windows
#ifndef _WIN32
std::int64_t FORTRAN_PROCEDURE_NAME(access)(const char *name,
std::int64_t nameLength, const char *mode, std::int64_t modeLength) {
std::int64_t ret{-1};
if (nameLength <= 0 || modeLength <= 0 || !name || !mode) {
return ret;
}

// ensure name is null terminated
char *newName{nullptr};
if (name[nameLength - 1] != '\0') {
newName = static_cast<char *>(std::malloc(nameLength + 1));
std::memcpy(newName, name, nameLength);
newName[nameLength] = '\0';
name = newName;
}

// calculate mode
bool read{false};
bool write{false};
bool execute{false};
bool exists{false};
int imode{0};

for (std::int64_t i = 0; i < modeLength; ++i) {
switch (mode[i]) {
case 'r':
read = true;
break;
case 'w':
write = true;
break;
case 'x':
execute = true;
break;
case ' ':
exists = true;
break;
default:
// invalid mode
goto cleanup;
}
}
if (!read && !write && !execute && !exists) {
// invalid mode
goto cleanup;
}

if (!read && !write && !execute) {
imode = F_OK;
} else {
if (read) {
imode |= R_OK;
}
if (write) {
imode |= W_OK;
}
if (execute) {
imode |= X_OK;
}
}
ret = access(name, imode);

cleanup:
if (newName) {
free(newName);
}
return ret;
}
#endif

} // namespace Fortran::runtime
} // extern "C"
2 changes: 1 addition & 1 deletion flang/test/Lower/OpenMP/FIR/target.f90
Original file line number Diff line number Diff line change
Expand Up @@ -411,8 +411,8 @@ end subroutine omp_target_implicit_bounds
!CHECK-LABEL: func.func @_QPomp_target_thread_limit() {
subroutine omp_target_thread_limit
integer :: a
!CHECK: %[[VAL_1:.*]] = arith.constant 64 : i32
!CHECK: %[[MAP:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "a"}
!CHECK: %[[VAL_1:.*]] = arith.constant 64 : i32
!CHECK: omp.target thread_limit(%[[VAL_1]] : i32) map_entries(%[[MAP]] -> %[[ARG_0:.*]] : !fir.ref<i32>) {
!CHECK: ^bb0(%[[ARG_0]]: !fir.ref<i32>):
!$omp target map(tofrom: a) thread_limit(64)
Expand Down
2 changes: 1 addition & 1 deletion flang/test/Lower/OpenMP/target.f90
Original file line number Diff line number Diff line change
Expand Up @@ -490,8 +490,8 @@ end subroutine omp_target_implicit_bounds
!CHECK-LABEL: func.func @_QPomp_target_thread_limit() {
subroutine omp_target_thread_limit
integer :: a
!CHECK: %[[VAL_1:.*]] = arith.constant 64 : i32
!CHECK: %[[MAP:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "a"}
!CHECK: %[[VAL_1:.*]] = arith.constant 64 : i32
!CHECK: omp.target thread_limit(%[[VAL_1]] : i32) map_entries(%[[MAP]] -> %{{.*}} : !fir.ref<i32>) {
!CHECK: ^bb0(%{{.*}}: !fir.ref<i32>):
!$omp target map(tofrom: a) thread_limit(64)
Expand Down
4 changes: 2 additions & 2 deletions flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ subroutine only_use_device_ptr

!CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr()
!CHECK: omp.target_data use_device_ptr({{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) use_device_addr(%{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) {
!CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>):
!CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, %{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
subroutine mix_use_device_ptr_and_addr
use iso_c_binding
integer, pointer, dimension(:) :: array
Expand All @@ -47,7 +47,7 @@ subroutine only_use_device_addr

!CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr_and_map()
!CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>) use_device_ptr(%{{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) use_device_addr(%{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) {
!CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>):
!CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, %{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
subroutine mix_use_device_ptr_and_addr_and_map
use iso_c_binding
integer :: i, j
Expand Down
422 changes: 422 additions & 0 deletions flang/unittests/Runtime/AccessTest.cpp

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions flang/unittests/Runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
add_flang_unittest(FlangRuntimeTests
AccessTest.cpp
Allocatable.cpp
ArrayConstructor.cpp
BufferTest.cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
template <class _Backend, class _Index, class _Brick>
_LIBCPP_HIDE_FROM_ABI optional<bool> __parallel_or(_Index __first, _Index __last, _Brick __f) {
std::atomic<bool> __found(false);
auto __ret = __pstl::__cpu_traits<_Backend>::__parallel_for(__first, __last, [__f, &__found](_Index __i, _Index __j) {
auto __ret = __pstl::__cpu_traits<_Backend>::__for_each(__first, __last, [__f, &__found](_Index __i, _Index __j) {
if (!__found.load(std::memory_order_relaxed) && __f(__i, __j)) {
__found.store(true, std::memory_order_relaxed);
__pstl::__cpu_traits<_Backend>::__cancel_execution();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ _LIBCPP_HIDE_FROM_ABI optional<__empty>
__pstl_fill(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return __pstl::__cpu_traits<__cpu_backend_tag>::__parallel_for(
return __pstl::__cpu_traits<__cpu_backend_tag>::__for_each(
__first, __last, [&__value](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
[[maybe_unused]] auto __res = std::__pstl_fill<__remove_parallel_policy_t<_ExecutionPolicy>>(
__cpu_backend_tag{}, __brick_first, __brick_last, __value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ __parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool
_DifferenceType __initial_dist = __b_first ? __n : -1;
std::atomic<_DifferenceType> __extremum(__initial_dist);
// TODO: find out what is better here: parallel_for or parallel_reduce
auto __res = __pstl::__cpu_traits<_Backend>::__parallel_for(
auto __res = __pstl::__cpu_traits<_Backend>::__for_each(
__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
// See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of
// why using a shared variable scales fairly well in this situation.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ _LIBCPP_HIDE_FROM_ABI optional<__empty>
__pstl_for_each(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, _Functor __func) {
if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return __pstl::__cpu_traits<__cpu_backend_tag>::__parallel_for(
return __pstl::__cpu_traits<__cpu_backend_tag>::__for_each(
__first, __last, [__func](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
[[maybe_unused]] auto __res = std::__pstl_for_each<__remove_parallel_policy_t<_ExecutionPolicy>>(
__cpu_backend_tag{}, __brick_first, __brick_last, __func);
Expand Down
24 changes: 12 additions & 12 deletions libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ template <>
struct __cpu_traits<__libdispatch_backend_tag> {
template <class _RandomAccessIterator, class _Functor>
_LIBCPP_HIDE_FROM_ABI static optional<__empty>
__parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func) {
__for_each(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func) {
return __libdispatch::__dispatch_parallel_for(
__libdispatch::__partition_chunks(__last - __first), std::move(__first), std::move(__func));
}
Expand All @@ -105,14 +105,14 @@ struct __cpu_traits<__libdispatch_backend_tag> {
typename _RandomAccessIterator3,
typename _Compare,
typename _LeafMerge>
_LIBCPP_HIDE_FROM_ABI static optional<__empty> __parallel_merge(
_RandomAccessIterator1 __first1,
_RandomAccessIterator1 __last1,
_RandomAccessIterator2 __first2,
_RandomAccessIterator2 __last2,
_RandomAccessIterator3 __result,
_Compare __comp,
_LeafMerge __leaf_merge) noexcept {
_LIBCPP_HIDE_FROM_ABI static optional<__empty>
__merge(_RandomAccessIterator1 __first1,
_RandomAccessIterator1 __last1,
_RandomAccessIterator2 __first2,
_RandomAccessIterator2 __last2,
_RandomAccessIterator3 __result,
_Compare __comp,
_LeafMerge __leaf_merge) noexcept {
__libdispatch::__chunk_partitions __partitions =
__libdispatch::__partition_chunks(std::max<ptrdiff_t>(__last1 - __first1, __last2 - __first2));

Expand Down Expand Up @@ -201,7 +201,7 @@ struct __cpu_traits<__libdispatch_backend_tag> {
}

template <class _RandomAccessIterator, class _Transform, class _Value, class _Combiner, class _Reduction>
_LIBCPP_HIDE_FROM_ABI static optional<_Value> __parallel_transform_reduce(
_LIBCPP_HIDE_FROM_ABI static optional<_Value> __transform_reduce(
_RandomAccessIterator __first,
_RandomAccessIterator __last,
_Transform __transform,
Expand Down Expand Up @@ -248,8 +248,8 @@ struct __cpu_traits<__libdispatch_backend_tag> {
}

template <class _RandomAccessIterator, class _Comp, class _LeafSort>
_LIBCPP_HIDE_FROM_ABI static optional<__empty> __parallel_stable_sort(
_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp, _LeafSort __leaf_sort) {
_LIBCPP_HIDE_FROM_ABI static optional<__empty>
__stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp, _LeafSort __leaf_sort) {
const auto __size = __last - __first;
auto __partitions = __libdispatch::__partition_chunks(__size);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ _LIBCPP_HIDE_FROM_ABI optional<_ForwardOutIterator> __pstl_merge(
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
auto __res = __pstl::__cpu_traits<__cpu_backend_tag>::__parallel_merge(
auto __res = __pstl::__cpu_traits<__cpu_backend_tag>::__merge(
__first1,
__last1,
__first2,
Expand Down
24 changes: 12 additions & 12 deletions libcxx/include/__algorithm/pstl_backends/cpu_backends/serial.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,20 @@ template <>
struct __cpu_traits<__serial_backend_tag> {
template <class _RandomAccessIterator, class _Fp>
_LIBCPP_HIDE_FROM_ABI static optional<__empty>
__parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Fp __f) {
__for_each(_RandomAccessIterator __first, _RandomAccessIterator __last, _Fp __f) {
__f(__first, __last);
return __empty{};
}

template <class _Index, class _UnaryOp, class _Tp, class _BinaryOp, class _Reduce>
_LIBCPP_HIDE_FROM_ABI static optional<_Tp>
__parallel_transform_reduce(_Index __first, _Index __last, _UnaryOp, _Tp __init, _BinaryOp, _Reduce __reduce) {
__transform_reduce(_Index __first, _Index __last, _UnaryOp, _Tp __init, _BinaryOp, _Reduce __reduce) {
return __reduce(std::move(__first), std::move(__last), std::move(__init));
}

template <class _RandomAccessIterator, class _Compare, class _LeafSort>
_LIBCPP_HIDE_FROM_ABI static optional<__empty> __parallel_stable_sort(
_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, _LeafSort __leaf_sort) {
_LIBCPP_HIDE_FROM_ABI static optional<__empty>
__stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, _LeafSort __leaf_sort) {
__leaf_sort(__first, __last, __comp);
return __empty{};
}
Expand All @@ -60,14 +60,14 @@ struct __cpu_traits<__serial_backend_tag> {
class _RandomAccessIterator3,
class _Compare,
class _LeafMerge>
_LIBCPP_HIDE_FROM_ABI static optional<__empty> __parallel_merge(
_RandomAccessIterator1 __first1,
_RandomAccessIterator1 __last1,
_RandomAccessIterator2 __first2,
_RandomAccessIterator2 __last2,
_RandomAccessIterator3 __outit,
_Compare __comp,
_LeafMerge __leaf_merge) {
_LIBCPP_HIDE_FROM_ABI static optional<__empty>
__merge(_RandomAccessIterator1 __first1,
_RandomAccessIterator1 __last1,
_RandomAccessIterator2 __first2,
_RandomAccessIterator2 __last2,
_RandomAccessIterator3 __outit,
_Compare __comp,
_LeafMerge __leaf_merge) {
__leaf_merge(__first1, __last1, __first2, __last2, __outit, __comp);
return __empty{};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ template <class _ExecutionPolicy, class _RandomAccessIterator, class _Comp>
_LIBCPP_HIDE_FROM_ABI optional<__empty>
__pstl_stable_sort(__cpu_backend_tag, _RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) {
if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy>) {
return __pstl::__cpu_traits<__cpu_backend_tag>::__parallel_stable_sort(
return __pstl::__cpu_traits<__cpu_backend_tag>::__stable_sort(
__first, __last, __comp, [](_RandomAccessIterator __g_first, _RandomAccessIterator __g_last, _Comp __g_comp) {
std::stable_sort(__g_first, __g_last, __g_comp);
});
Expand Down
24 changes: 12 additions & 12 deletions libcxx/include/__algorithm/pstl_backends/cpu_backends/thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,20 @@ template <>
struct __cpu_traits<__std_thread_backend_tag> {
template <class _RandomAccessIterator, class _Fp>
_LIBCPP_HIDE_FROM_ABI static optional<__empty>
__parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Fp __f) {
__for_each(_RandomAccessIterator __first, _RandomAccessIterator __last, _Fp __f) {
__f(__first, __last);
return __empty{};
}

template <class _Index, class _UnaryOp, class _Tp, class _BinaryOp, class _Reduce>
_LIBCPP_HIDE_FROM_ABI static optional<_Tp>
__parallel_transform_reduce(_Index __first, _Index __last, _UnaryOp, _Tp __init, _BinaryOp, _Reduce __reduce) {
__transform_reduce(_Index __first, _Index __last, _UnaryOp, _Tp __init, _BinaryOp, _Reduce __reduce) {
return __reduce(std::move(__first), std::move(__last), std::move(__init));
}

template <class _RandomAccessIterator, class _Compare, class _LeafSort>
_LIBCPP_HIDE_FROM_ABI static optional<__empty> __parallel_stable_sort(
_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, _LeafSort __leaf_sort) {
_LIBCPP_HIDE_FROM_ABI static optional<__empty>
__stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, _LeafSort __leaf_sort) {
__leaf_sort(__first, __last, __comp);
return __empty{};
}
Expand All @@ -63,14 +63,14 @@ struct __cpu_traits<__std_thread_backend_tag> {
class _RandomAccessIterator3,
class _Compare,
class _LeafMerge>
_LIBCPP_HIDE_FROM_ABI static optional<__empty> __parallel_merge(
_RandomAccessIterator1 __first1,
_RandomAccessIterator1 __last1,
_RandomAccessIterator2 __first2,
_RandomAccessIterator2 __last2,
_RandomAccessIterator3 __outit,
_Compare __comp,
_LeafMerge __leaf_merge) {
_LIBCPP_HIDE_FROM_ABI static optional<__empty>
__merge(_RandomAccessIterator1 __first1,
_RandomAccessIterator1 __last1,
_RandomAccessIterator2 __first2,
_RandomAccessIterator2 __last2,
_RandomAccessIterator3 __outit,
_Compare __comp,
_LeafMerge __leaf_merge) {
__leaf_merge(__first1, __last1, __first2, __last2, __outit, __comp);
return __empty{};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ _LIBCPP_HIDE_FROM_ABI optional<_ForwardOutIterator> __pstl_transform(
if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
__pstl::__cpu_traits<__cpu_backend_tag>::__parallel_for(
__pstl::__cpu_traits<__cpu_backend_tag>::__for_each(
__first, __last, [__op, __first, __result](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
auto __res = std::__pstl_transform<__remove_parallel_policy_t<_ExecutionPolicy>>(
__cpu_backend_tag{}, __brick_first, __brick_last, __result + (__brick_first - __first), __op);
Expand Down Expand Up @@ -98,7 +98,7 @@ _LIBCPP_HIDE_FROM_ABI optional<_ForwardOutIterator> __pstl_transform(
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
auto __res = __pstl::__cpu_traits<__cpu_backend_tag>::__parallel_for(
auto __res = __pstl::__cpu_traits<__cpu_backend_tag>::__for_each(
__first1,
__last1,
[__op, __first1, __first2, __result](_ForwardIterator1 __brick_first, _ForwardIterator1 __brick_last) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ _LIBCPP_HIDE_FROM_ABI optional<_Tp> __pstl_transform_reduce(
if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) {
return __pstl::__cpu_traits<__cpu_backend_tag>::__parallel_transform_reduce(
return __pstl::__cpu_traits<__cpu_backend_tag>::__transform_reduce(
__first1,
std::move(__last1),
[__first1, __first2, __transform](_ForwardIterator1 __iter) {
Expand Down Expand Up @@ -167,7 +167,7 @@ _LIBCPP_HIDE_FROM_ABI optional<_Tp> __pstl_transform_reduce(
_UnaryOperation __transform) {
if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return __pstl::__cpu_traits<__cpu_backend_tag>::__parallel_transform_reduce(
return __pstl::__cpu_traits<__cpu_backend_tag>::__transform_reduce(
std::move(__first),
std::move(__last),
[__transform](_ForwardIterator __iter) { return __transform(*__iter); },
Expand Down
27 changes: 13 additions & 14 deletions libcxx/include/__pstl/cpu_algos/cpu_traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,31 +32,30 @@ namespace __pstl {
// ================
//
// template <class _RandomAccessIterator, class _Functor>
// optional<__empty> __parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func);
// optional<__empty> __for_each(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func);
// - __func must take a subrange of [__first, __last) that should be executed in serial
//
// template <class _Iterator, class _UnaryOp, class _Tp, class _BinaryOp, class _Reduction>
// optional<_Tp> __parallel_transform_reduce(_Iterator __first, _Iterator __last, _UnaryOp, _Tp __init, _BinaryOp,
// _Reduction);
// optional<_Tp> __transform_reduce(_Iterator __first, _Iterator __last, _UnaryOp, _Tp __init, _BinaryOp, _Reduction);
//
// template <class _RandomAccessIterator1,
// class _RandomAccessIterator2,
// class _RandomAccessIterator3,
// class _Compare,
// class _LeafMerge>
// optional<_RandomAccessIterator3> __parallel_merge(_RandomAccessIterator1 __first1,
// _RandomAccessIterator1 __last1,
// _RandomAccessIterator2 __first2,
// _RandomAccessIterator2 __last2,
// _RandomAccessIterator3 __outit,
// _Compare __comp,
// _LeafMerge __leaf_merge);
// optional<_RandomAccessIterator3> __merge(_RandomAccessIterator1 __first1,
// _RandomAccessIterator1 __last1,
// _RandomAccessIterator2 __first2,
// _RandomAccessIterator2 __last2,
// _RandomAccessIterator3 __outit,
// _Compare __comp,
// _LeafMerge __leaf_merge);
//
// template <class _RandomAccessIterator, class _Comp, class _LeafSort>
// optional<__empty> __parallel_stable_sort(_RandomAccessIterator __first,
// _RandomAccessIterator __last,
// _Comp __comp,
// _LeafSort __leaf_sort);
// optional<__empty> __stable_sort(_RandomAccessIterator __first,
// _RandomAccessIterator __last,
// _Comp __comp,
// _LeafSort __leaf_sort);
//
// void __cancel_execution();
// Cancel the execution of other jobs - they aren't needed anymore. This is not a binding request,
Expand Down
10 changes: 9 additions & 1 deletion lld/COFF/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2072,8 +2072,16 @@ void Writer::createRuntimePseudoRelocs() {
return;
}

if (!rels.empty())
if (!rels.empty()) {
log("Writing " + Twine(rels.size()) + " runtime pseudo relocations");
const char *symbolName = "_pei386_runtime_relocator";
Symbol *relocator = ctx.symtab.findUnderscore(symbolName);
if (!relocator)
error("output image has runtime pseudo relocations, but the function " +
Twine(symbolName) +
" is missing; it is needed for fixing the relocations at runtime");
}

PseudoRelocTableChunk *table = make<PseudoRelocTableChunk>(rels);
rdataSec->addChunk(table);
EmptyChunk *endOfList = make<EmptyChunk>();
Expand Down
6 changes: 5 additions & 1 deletion lld/ELF/InputSection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,11 @@ void InputSection::copyRelocations(uint8_t *buf,
addend += sec->getFile<ELFT>()->mipsGp0;
}

if (RelTy::IsRela)
if (config->emachine == EM_LOONGARCH && type == R_LARCH_ALIGN)
// LoongArch psABI v2.30, the R_LARCH_ALIGN requires symbol index.
// If it use the section symbol, the addend should not be changed.
p->r_addend = addend;
else if (RelTy::IsRela)
p->r_addend = sym.getVA(addend) - section->getOutputSection()->addr;
// For SHF_ALLOC sections relocated by REL, append a relocation to
// sec->relocations so that relocateAlloc transitively called by
Expand Down
3 changes: 3 additions & 0 deletions lld/test/COFF/autoimport-arm-data.s
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@
.text
.thumb
main:
bx lr
.global _pei386_runtime_relocator
_pei386_runtime_relocator:
bx lr
.data
ptr:
Expand Down
3 changes: 3 additions & 0 deletions lld/test/COFF/autoimport-arm64-data.s
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@
.global main
.text
main:
ret
.global _pei386_runtime_relocator
_pei386_runtime_relocator:
ret
.data
ptr:
Expand Down
3 changes: 3 additions & 0 deletions lld/test/COFF/autoimport-gnu-implib.s
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,8 @@
.text
main:
movl data(%rip), %eax
ret
.global _pei386_runtime_relocator
_pei386_runtime_relocator:
ret
.data
36 changes: 36 additions & 0 deletions lld/test/COFF/autoimport-handler-func.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# REQUIRES: x86
# RUN: split-file %s %t.dir

# RUN: llvm-dlltool -m i386:x86-64 -d %t.dir/lib.def -D lib.dll -l %t.dir/lib.lib

# RUN: llvm-mc -triple=x86_64-windows-gnu %t.dir/main.s -filetype=obj -o %t.dir/main.obj
# RUN: llvm-mc -triple=x86_64-windows-gnu %t.dir/func.s -filetype=obj -o %t.dir/func.obj
# RUN: env LLD_IN_TEST=1 not lld-link -lldmingw -out:%t.dir/main.exe -entry:main %t.dir/main.obj %t.dir/lib.lib 2>&1 | FileCheck %s --check-prefix=ERR

# RUN: lld-link -lldmingw -out:%t.dir/main.exe -entry:main %t.dir/main.obj %t.dir/func.obj %t.dir/lib.lib 2>&1 | FileCheck %s --check-prefix=NOERR --allow-empty

# ERR: error: output image has runtime pseudo relocations, but the function _pei386_runtime_relocator is missing; it is needed for fixing the relocations at runtime

# NOERR-NOT: error

#--- main.s
.global main
.text
main:
ret

.data
.long 1
.quad variable
.long 2

#--- func.s
.global _pei386_runtime_relocator
.text
_pei386_runtime_relocator:
ret

#--- lib.def
EXPORTS
variable DATA

3 changes: 3 additions & 0 deletions lld/test/COFF/autoimport-warn.s
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ main:
movl variable2(%rip), %ecx
addl %ecx, %eax
ret
.global _pei386_runtime_relocator
_pei386_runtime_relocator:
ret

.section .rdata$.refptr.variable1,"dr",discard,.refptr.variable1
.global .refptr.variable1
Expand Down
3 changes: 3 additions & 0 deletions lld/test/COFF/autoimport-x86.s
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@
.text
main:
movl variable(%rip), %eax
ret
.global _pei386_runtime_relocator
_pei386_runtime_relocator:
ret
.data
ptr:
Expand Down
28 changes: 28 additions & 0 deletions lld/test/ELF/loongarch-relax-align-ldr.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# REQUIRES: loongarch
## Test `ld -r` not changes the addend of R_LARCH_ALIGN.

# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.64.o
# RUN: ld.lld -r %t.64.o %t.64.o -o %t.64.r
# RUN: llvm-objdump -dr --no-show-raw-insn %t.64.r | FileCheck %s

# CHECK: <.text>:
# CHECK-NEXT: break 1
# CHECK-NEXT: nop
# CHECK-NEXT: {{0*}}04: R_LARCH_ALIGN .text+0x804
# CHECK-NEXT: nop
# CHECK-NEXT: nop
# CHECK-NEXT: break 2
# CHECK-NEXT: break 0
# CHECK-NEXT: break 0
# CHECK-NEXT: break 0
# CHECK-NEXT: break 1
# CHECK-NEXT: nop
# CHECK-NEXT: {{0*}}24: R_LARCH_ALIGN .text+0x804
# CHECK-NEXT: nop
# CHECK-NEXT: nop
# CHECK-NEXT: break 2

.text
break 1
.p2align 4, , 8
break 2
5 changes: 3 additions & 2 deletions lld/test/ELF/loongarch-relax-emit-relocs.s
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
# CHECK-NEXT: R_LARCH_PCALA_LO12 _start
# CHECK-NEXT: R_LARCH_RELAX *ABS*
# CHECK-NEXT: nop
# CHECK-NEXT: R_LARCH_ALIGN .Lla-relax-align0+0x4
# CHECK-NEXT: R_LARCH_ALIGN .text+0x4
# CHECK-NEXT: nop
# CHECK-NEXT: ret

Expand All @@ -37,11 +37,12 @@
# CHECKR-NEXT: R_LARCH_PCALA_LO12 _start
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
# CHECKR-NEXT: nop
# CHECKR-NEXT: R_LARCH_ALIGN .Lla-relax-align0+0x4
# CHECKR-NEXT: R_LARCH_ALIGN .text+0x4
# CHECKR-NEXT: nop
# CHECKR-NEXT: nop
# CHECKR-NEXT: ret

.text
.global _start
_start:
la.pcrel $a0, _start
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -555,5 +555,9 @@ void eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI,
/// debug users of \p MI by writing the effect of \p MI in a DIExpression.
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI);

/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
/// having only floating-point operands.
bool isPreISelGenericFloatingPointOpcode(unsigned Opc);

} // End namespace llvm.
#endif
5 changes: 1 addition & 4 deletions llvm/lib/Analysis/Lint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,10 +350,7 @@ void Lint::visitCallBase(CallBase &I) {
}

case Intrinsic::vastart:
Check(I.getParent()->getParent()->isVarArg(),
"Undefined behavior: va_start called in a non-varargs function",
&I);

// vastart in non-varargs function is rejected by the verifier
visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI),
std::nullopt, nullptr, MemRef::Read | MemRef::Write);
break;
Expand Down
44 changes: 44 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1665,3 +1665,47 @@ void llvm::salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI) {
}
}
}

bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) {
switch (Opc) {
case TargetOpcode::G_FABS:
case TargetOpcode::G_FADD:
case TargetOpcode::G_FCANONICALIZE:
case TargetOpcode::G_FCEIL:
case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_FCOPYSIGN:
case TargetOpcode::G_FCOS:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_FEXP2:
case TargetOpcode::G_FEXP:
case TargetOpcode::G_FFLOOR:
case TargetOpcode::G_FLOG10:
case TargetOpcode::G_FLOG2:
case TargetOpcode::G_FLOG:
case TargetOpcode::G_FMA:
case TargetOpcode::G_FMAD:
case TargetOpcode::G_FMAXIMUM:
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FMAXNUM_IEEE:
case TargetOpcode::G_FMINIMUM:
case TargetOpcode::G_FMINNUM:
case TargetOpcode::G_FMINNUM_IEEE:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FNEARBYINT:
case TargetOpcode::G_FNEG:
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPOW:
case TargetOpcode::G_FPTRUNC:
case TargetOpcode::G_FREM:
case TargetOpcode::G_FRINT:
case TargetOpcode::G_FSIN:
case TargetOpcode::G_FSQRT:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_INTRINSIC_ROUND:
case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
case TargetOpcode::G_INTRINSIC_TRUNC:
return true;
default:
return false;
}
}
16 changes: 16 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24467,6 +24467,22 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
return DAG.getSplatVector(NVT, DL, V.getOperand(0));

// extract_subvector(insert_subvector(x,y,c1),c2)
// --> extract_subvector(y,c2-c1)
// iff we're just extracting from the inserted subvector.
if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
SDValue InsSub = V.getOperand(1);
EVT InsSubVT = InsSub.getValueType();
unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
unsigned InsIdx = V.getConstantOperandVal(2);
unsigned NumSubElts = NVT.getVectorMinNumElements();
if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector())
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
}

// Try to move vector bitcast after extract_subv by scaling extraction index:
// extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
if (V.getOpcode() == ISD::BITCAST &&
Expand Down
10 changes: 8 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1159,8 +1159,14 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
}

SDValue Unrolled = DAG.UnrollVectorOp(Node);
for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
Results.push_back(Unrolled.getValue(I));
if (Node->getNumValues() == 1) {
Results.push_back(Unrolled);
} else {
assert(Node->getNumValues() == Unrolled->getNumValues() &&
"VectorLegalizer Expand returned wrong number of results!");
for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
Results.push_back(Unrolled.getValue(I));
}
}

SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
Expand Down
12 changes: 10 additions & 2 deletions llvm/lib/CodeGen/TargetLoweringBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1809,8 +1809,16 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
else if (attr.hasRetAttr(Attribute::ZExt))
Flags.setZExt();

for (unsigned i = 0; i < NumParts; ++i)
Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isfixed=*/true, 0, 0));
for (unsigned i = 0; i < NumParts; ++i) {
ISD::ArgFlagsTy OutFlags = Flags;
if (NumParts > 1 && i == 0)
OutFlags.setSplit();
else if (i == NumParts - 1 && i != 0)
OutFlags.setSplitEnd();

Outs.push_back(
ISD::OutputArg(OutFlags, PartVT, VT, /*isfixed=*/true, 0, 0));
}
}
}

Expand Down
9 changes: 5 additions & 4 deletions llvm/lib/IR/AutoUpgrade.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5341,10 +5341,11 @@ MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {

std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
Triple T(TT);
// The only data layout upgrades needed for pre-GCN are setting the address
// space of globals to 1.
if (T.isAMDGPU() && !T.isAMDGCN() && !DL.contains("-G") &&
!DL.starts_with("G")) {
// The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
// the address space of globals to 1. This does not apply to SPIRV Logical.
if (((T.isAMDGPU() && !T.isAMDGCN()) ||
(T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
!DL.contains("-G") && !DL.starts_with("G")) {
return DL.empty() ? std::string("G1") : (DL + "-G1").str();
}

Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/IR/Verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5798,6 +5798,11 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {

break;
}
case Intrinsic::vastart: {
Check(Call.getFunction()->isVarArg(),
"va_start called in a non-varargs function");
break;
}
case Intrinsic::vector_reduce_and:
case Intrinsic::vector_reduce_or:
case Intrinsic::vector_reduce_xor:
Expand Down
37 changes: 0 additions & 37 deletions llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,43 +424,6 @@ void AArch64RegisterBankInfo::applyMappingImpl(
}
}

/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
/// having only floating-point operands.
static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
switch (Opc) {
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FMA:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC:
case TargetOpcode::G_FCEIL:
case TargetOpcode::G_FFLOOR:
case TargetOpcode::G_FNEARBYINT:
case TargetOpcode::G_FNEG:
case TargetOpcode::G_FCOS:
case TargetOpcode::G_FSIN:
case TargetOpcode::G_FLOG10:
case TargetOpcode::G_FLOG:
case TargetOpcode::G_FLOG2:
case TargetOpcode::G_FSQRT:
case TargetOpcode::G_FABS:
case TargetOpcode::G_FEXP:
case TargetOpcode::G_FRINT:
case TargetOpcode::G_INTRINSIC_TRUNC:
case TargetOpcode::G_INTRINSIC_ROUND:
case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FMINNUM:
case TargetOpcode::G_FMAXIMUM:
case TargetOpcode::G_FMINIMUM:
return true;
}
return false;
}

const RegisterBankInfo::InstructionMapping &
AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
const MachineInstr &MI) const {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ class GCNCreateVOPD : public MachineFunctionPass {
}
}

SII->fixImplicitOperands(*VOPDInst);
for (auto CompIdx : VOPD::COMPONENTS)
VOPDInst.copyImplicitOps(*MI[CompIdx]);

Expand Down
7 changes: 0 additions & 7 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -612,13 +612,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Reserve null register - it shall never be allocated
reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL64);

// Disallow vcc_hi allocation in wave32. It may be allocated but most likely
// will result in bugs.
if (isWave32) {
Reserved.set(AMDGPU::VCC);
Reserved.set(AMDGPU::VCC_HI);
}

// Reserve SGPRs.
//
unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,11 +226,8 @@ bool LoongArchAsmBackend::shouldInsertFixupForCodeAlign(
MCFixup::create(0, Dummy, MCFixupKind(LoongArch::fixup_loongarch_align));
const MCSymbolRefExpr *MCSym = getSecToAlignSym()[Sec];
if (MCSym == nullptr) {
// Create a symbol and make the value of symbol is zero.
MCSymbol *Sym = Ctx.createNamedTempSymbol("la-relax-align");
Sym->setFragment(&*Sec->getBeginSymbol()->getFragment());
Asm.registerSymbol(*Sym);
MCSym = MCSymbolRefExpr::create(Sym, Ctx);
// Use section symbol directly.
MCSym = MCSymbolRefExpr::create(Sec->getBeginSymbol(), Ctx);
getSecToAlignSym()[Sec] = MCSym;
}

Expand Down
24 changes: 2 additions & 22 deletions llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,26 +104,6 @@ MipsRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
}
}

// Instructions where all register operands are floating point.
static bool isFloatingPointOpcode(unsigned Opc) {
switch (Opc) {
case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_FABS:
case TargetOpcode::G_FSQRT:
case TargetOpcode::G_FCEIL:
case TargetOpcode::G_FFLOOR:
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC:
return true;
default:
return false;
}
}

// Instructions where use operands are floating point registers.
// Def operands are general purpose.
static bool isFloatingPointOpcodeUse(unsigned Opc) {
Expand All @@ -133,7 +113,7 @@ static bool isFloatingPointOpcodeUse(unsigned Opc) {
case TargetOpcode::G_FCMP:
return true;
default:
return isFloatingPointOpcode(Opc);
return isPreISelGenericFloatingPointOpcode(Opc);
}
}

Expand All @@ -145,7 +125,7 @@ static bool isFloatingPointOpcodeDef(unsigned Opc) {
case TargetOpcode::G_UITOFP:
return true;
default:
return isFloatingPointOpcode(Opc);
return isPreISelGenericFloatingPointOpcode(Opc);
}
}

Expand Down
39 changes: 1 addition & 38 deletions llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "PPCRegisterBankInfo.h"
#include "PPCRegisterInfo.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
Expand Down Expand Up @@ -239,44 +240,6 @@ PPCRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
return getInstructionMapping(MappingID, Cost, OperandsMapping, NumOperands);
}

/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
/// having only floating-point operands.
/// FIXME: this is copied from target AArch64. Needs some code refactor here to
/// put this function in GlobalISel/Utils.cpp.
static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
switch (Opc) {
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FMA:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC:
case TargetOpcode::G_FCEIL:
case TargetOpcode::G_FFLOOR:
case TargetOpcode::G_FNEARBYINT:
case TargetOpcode::G_FNEG:
case TargetOpcode::G_FCOS:
case TargetOpcode::G_FSIN:
case TargetOpcode::G_FLOG10:
case TargetOpcode::G_FLOG:
case TargetOpcode::G_FLOG2:
case TargetOpcode::G_FSQRT:
case TargetOpcode::G_FABS:
case TargetOpcode::G_FEXP:
case TargetOpcode::G_FRINT:
case TargetOpcode::G_INTRINSIC_TRUNC:
case TargetOpcode::G_INTRINSIC_ROUND:
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FMINNUM:
case TargetOpcode::G_FMAXIMUM:
case TargetOpcode::G_FMINIMUM:
return true;
}
return false;
}

/// \returns true if a given intrinsic \p ID only uses and defines FPRs.
static bool isFPIntrinsic(unsigned ID) {
// TODO: Add more intrinsics.
Expand Down
57 changes: 29 additions & 28 deletions llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,15 @@ struct RISCVOutgoingValueAssigner : public CallLowering::OutgoingValueAssigner {
// Whether this is assigning args for a return.
bool IsRet;

// true if assignArg has been called for a mask argument, false otherwise.
bool AssignedFirstMaskArg = false;
RVVArgDispatcher &RVVDispatcher;

public:
RISCVOutgoingValueAssigner(
RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet)
RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet,
RVVArgDispatcher &RVVDispatcher)
: CallLowering::OutgoingValueAssigner(nullptr),
RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet) {}
RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet),
RVVDispatcher(RVVDispatcher) {}

bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
Expand All @@ -51,16 +52,9 @@ struct RISCVOutgoingValueAssigner : public CallLowering::OutgoingValueAssigner {
const DataLayout &DL = MF.getDataLayout();
const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();

std::optional<unsigned> FirstMaskArgument;
if (Subtarget.hasVInstructions() && !AssignedFirstMaskArg &&
ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) {
FirstMaskArgument = ValNo;
AssignedFirstMaskArg = true;
}

if (RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT,
LocInfo, Flags, State, Info.IsFixed, IsRet, Info.Ty,
*Subtarget.getTargetLowering(), FirstMaskArgument))
*Subtarget.getTargetLowering(), RVVDispatcher))
return true;

StackSize = State.getStackSize();
Expand Down Expand Up @@ -181,14 +175,15 @@ struct RISCVIncomingValueAssigner : public CallLowering::IncomingValueAssigner {
// Whether this is assigning args from a return.
bool IsRet;

// true if assignArg has been called for a mask argument, false otherwise.
bool AssignedFirstMaskArg = false;
RVVArgDispatcher &RVVDispatcher;

public:
RISCVIncomingValueAssigner(
RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet)
RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet,
RVVArgDispatcher &RVVDispatcher)
: CallLowering::IncomingValueAssigner(nullptr),
RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet) {}
RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet),
RVVDispatcher(RVVDispatcher) {}

bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
Expand All @@ -201,16 +196,9 @@ struct RISCVIncomingValueAssigner : public CallLowering::IncomingValueAssigner {
if (LocVT.isScalableVector())
MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();

std::optional<unsigned> FirstMaskArgument;
if (Subtarget.hasVInstructions() && !AssignedFirstMaskArg &&
ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) {
FirstMaskArgument = ValNo;
AssignedFirstMaskArg = true;
}

if (RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT,
LocInfo, Flags, State, /*IsFixed=*/true, IsRet, Info.Ty,
*Subtarget.getTargetLowering(), FirstMaskArgument))
*Subtarget.getTargetLowering(), RVVDispatcher))
return true;

StackSize = State.getStackSize();
Expand Down Expand Up @@ -420,9 +408,11 @@ bool RISCVCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 4> SplitRetInfos;
splitToValueTypes(OrigRetInfo, SplitRetInfos, DL, CC);

RVVArgDispatcher Dispatcher{&MF, getTLI<RISCVTargetLowering>(),
ArrayRef(F.getReturnType())};
RISCVOutgoingValueAssigner Assigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
/*IsRet=*/true);
/*IsRet=*/true, Dispatcher);
RISCVOutgoingValueHandler Handler(MIRBuilder, MF.getRegInfo(), Ret);
return determineAndHandleAssignments(Handler, Assigner, SplitRetInfos,
MIRBuilder, CC, F.isVarArg());
Expand Down Expand Up @@ -531,6 +521,7 @@ bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
CallingConv::ID CC = F.getCallingConv();

SmallVector<ArgInfo, 32> SplitArgInfos;
SmallVector<Type *, 4> TypeList;
unsigned Index = 0;
for (auto &Arg : F.args()) {
// Construct the ArgInfo object from destination register and argument type.
Expand All @@ -542,12 +533,16 @@ bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
// correspondingly and appended to SplitArgInfos.
splitToValueTypes(AInfo, SplitArgInfos, DL, CC);

TypeList.push_back(Arg.getType());

++Index;
}

RVVArgDispatcher Dispatcher{&MF, getTLI<RISCVTargetLowering>(),
ArrayRef(TypeList)};
RISCVIncomingValueAssigner Assigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
/*IsRet=*/false);
/*IsRet=*/false, Dispatcher);
RISCVFormalArgHandler Handler(MIRBuilder, MF.getRegInfo());

SmallVector<CCValAssign, 16> ArgLocs;
Expand Down Expand Up @@ -585,11 +580,13 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,

SmallVector<ArgInfo, 32> SplitArgInfos;
SmallVector<ISD::OutputArg, 8> Outs;
SmallVector<Type *, 4> TypeList;
for (auto &AInfo : Info.OrigArgs) {
// Handle any required unmerging of split value types from a given VReg into
// physical registers. ArgInfo objects are constructed correspondingly and
// appended to SplitArgInfos.
splitToValueTypes(AInfo, SplitArgInfos, DL, CC);
TypeList.push_back(AInfo.Ty);
}

// TODO: Support tail calls.
Expand All @@ -607,9 +604,11 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
Call.addRegMask(TRI->getCallPreservedMask(MF, Info.CallConv));

RVVArgDispatcher ArgDispatcher{&MF, getTLI<RISCVTargetLowering>(),
ArrayRef(TypeList)};
RISCVOutgoingValueAssigner ArgAssigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
/*IsRet=*/false);
/*IsRet=*/false, ArgDispatcher);
RISCVOutgoingValueHandler ArgHandler(MIRBuilder, MF.getRegInfo(), Call);
if (!determineAndHandleAssignments(ArgHandler, ArgAssigner, SplitArgInfos,
MIRBuilder, CC, Info.IsVarArg))
Expand Down Expand Up @@ -637,9 +636,11 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 4> SplitRetInfos;
splitToValueTypes(Info.OrigRet, SplitRetInfos, DL, CC);

RVVArgDispatcher RetDispatcher{&MF, getTLI<RISCVTargetLowering>(),
ArrayRef(F.getReturnType())};
RISCVIncomingValueAssigner RetAssigner(
CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
/*IsRet=*/true);
/*IsRet=*/true, RetDispatcher);
RISCVCallReturnHandler RetHandler(MIRBuilder, MF.getRegInfo(), Call);
if (!determineAndHandleAssignments(RetHandler, RetAssigner, SplitRetInfos,
MIRBuilder, CC, Info.IsVarArg))
Expand Down
40 changes: 0 additions & 40 deletions llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,46 +142,6 @@ static const RegisterBankInfo::ValueMapping *getFPValueMapping(unsigned Size) {
return &RISCV::ValueMappings[Idx];
}

/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
/// having only floating-point operands.
/// FIXME: this is copied from target AArch64. Needs some code refactor here to
/// put this function in GlobalISel/Utils.cpp.
static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
switch (Opc) {
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FMA:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC:
case TargetOpcode::G_FCEIL:
case TargetOpcode::G_FFLOOR:
case TargetOpcode::G_FNEARBYINT:
case TargetOpcode::G_FNEG:
case TargetOpcode::G_FCOPYSIGN:
case TargetOpcode::G_FCOS:
case TargetOpcode::G_FSIN:
case TargetOpcode::G_FLOG10:
case TargetOpcode::G_FLOG:
case TargetOpcode::G_FLOG2:
case TargetOpcode::G_FSQRT:
case TargetOpcode::G_FABS:
case TargetOpcode::G_FEXP:
case TargetOpcode::G_FRINT:
case TargetOpcode::G_INTRINSIC_TRUNC:
case TargetOpcode::G_INTRINSIC_ROUND:
case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FMINNUM:
case TargetOpcode::G_FMAXIMUM:
case TargetOpcode::G_FMINIMUM:
return true;
}
return false;
}

// TODO: Make this more like AArch64?
bool RISCVRegisterBankInfo::hasFPConstraints(
const MachineInstr &MI, const MachineRegisterInfo &MRI,
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/RISCV/RISCVFeatures.td
Original file line number Diff line number Diff line change
Expand Up @@ -1226,9 +1226,9 @@ def TuneNoSinkSplatOperands
"false", "Disable sink splat operands to enable .vx, .vf,"
".wx, and .wf instructions">;

def TuneNoStripWSuffix
: SubtargetFeature<"no-strip-w-suffix", "EnableStripWSuffix", "false",
"Disable strip W suffix">;
def TunePreferWInst
: SubtargetFeature<"prefer-w-inst", "PreferWInst", "true",
"Prefer instructions with W suffix">;

def TuneConditionalCompressedMoveFusion
: SubtargetFeature<"conditional-cmv-fusion", "HasConditionalCompressedMoveFusion",
Expand Down
244 changes: 204 additions & 40 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
Expand Down Expand Up @@ -18223,33 +18224,12 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
return false;
}

static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
std::optional<unsigned> FirstMaskArgument,
CCState &State, const RISCVTargetLowering &TLI) {
const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
if (RC == &RISCV::VRRegClass) {
// Assign the first mask argument to V0.
// This is an interim calling convention and it may be changed in the
// future.
if (FirstMaskArgument && ValNo == *FirstMaskArgument)
return State.AllocateReg(RISCV::V0);
return State.AllocateReg(ArgVRs);
}
if (RC == &RISCV::VRM2RegClass)
return State.AllocateReg(ArgVRM2s);
if (RC == &RISCV::VRM4RegClass)
return State.AllocateReg(ArgVRM4s);
if (RC == &RISCV::VRM8RegClass)
return State.AllocateReg(ArgVRM8s);
llvm_unreachable("Unhandled register class for ValueType");
}

// Implements the RISC-V calling convention. Returns true upon failure.
bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
std::optional<unsigned> FirstMaskArgument) {
RVVArgDispatcher &RVVDispatcher) {
unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
assert(XLen == 32 || XLen == 64);
MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
Expand Down Expand Up @@ -18418,7 +18398,7 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
else if (ValVT == MVT::f64 && !UseGPRForF64)
Reg = State.AllocateReg(ArgFPR64s);
else if (ValVT.isVector()) {
Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI);
Reg = RVVDispatcher.getNextPhysReg();
if (!Reg) {
// For return values, the vector must be passed fully via registers or
// via the stack.
Expand Down Expand Up @@ -18504,9 +18484,15 @@ void RISCVTargetLowering::analyzeInputArgs(
unsigned NumArgs = Ins.size();
FunctionType *FType = MF.getFunction().getFunctionType();

std::optional<unsigned> FirstMaskArgument;
if (Subtarget.hasVInstructions())
FirstMaskArgument = preAssignMask(Ins);
RVVArgDispatcher Dispatcher;
if (IsRet) {
Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)};
} else {
SmallVector<Type *, 4> TypeList;
for (const Argument &Arg : MF.getFunction().args())
TypeList.push_back(Arg.getType());
Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)};
}

for (unsigned i = 0; i != NumArgs; ++i) {
MVT ArgVT = Ins[i].VT;
Expand All @@ -18521,7 +18507,7 @@ void RISCVTargetLowering::analyzeInputArgs(
RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
FirstMaskArgument)) {
Dispatcher)) {
LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
<< ArgVT << '\n');
llvm_unreachable(nullptr);
Expand All @@ -18535,9 +18521,13 @@ void RISCVTargetLowering::analyzeOutputArgs(
CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
unsigned NumArgs = Outs.size();

std::optional<unsigned> FirstMaskArgument;
if (Subtarget.hasVInstructions())
FirstMaskArgument = preAssignMask(Outs);
SmallVector<Type *, 4> TypeList;
if (IsRet)
TypeList.push_back(MF.getFunction().getReturnType());
else if (CLI)
for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs())
TypeList.push_back(Arg.Ty);
RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)};

for (unsigned i = 0; i != NumArgs; i++) {
MVT ArgVT = Outs[i].VT;
Expand All @@ -18547,7 +18537,7 @@ void RISCVTargetLowering::analyzeOutputArgs(
RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
FirstMaskArgument)) {
Dispatcher)) {
LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
<< ArgVT << "\n");
llvm_unreachable(nullptr);
Expand Down Expand Up @@ -18728,7 +18718,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
ISD::ArgFlagsTy ArgFlags, CCState &State,
bool IsFixed, bool IsRet, Type *OrigTy,
const RISCVTargetLowering &TLI,
std::optional<unsigned> FirstMaskArgument) {
RVVArgDispatcher &RVVDispatcher) {
if (LocVT == MVT::i32 || LocVT == MVT::i64) {
if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
Expand Down Expand Up @@ -18806,13 +18796,14 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
}

if (LocVT.isVector()) {
if (unsigned Reg =
allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) {
MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg();
if (AllocatedVReg) {
// Fixed-length vectors are located in the corresponding scalable-vector
// container types.
if (ValVT.isFixedLengthVector())
LocVT = TLI.getContainerForFixedLengthVector(LocVT);
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
State.addLoc(
CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));
} else {
// Try and pass the address via a "fast" GPR.
if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
Expand Down Expand Up @@ -19440,17 +19431,15 @@ bool RISCVTargetLowering::CanLowerReturn(
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);

std::optional<unsigned> FirstMaskArgument;
if (Subtarget.hasVInstructions())
FirstMaskArgument = preAssignMask(Outs);
RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)};

for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
MVT VT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
*this, FirstMaskArgument))
ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
nullptr, *this, Dispatcher))
return false;
}
return true;
Expand Down Expand Up @@ -21247,6 +21236,181 @@ unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
return Subtarget.getMinimumJumpTableEntries();
}

// Handle single arg such as return value.
template <typename Arg>
void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) {
// This lambda determines whether an array of types are constructed by
// homogeneous vector types.
auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) {
// First, extract the first element in the argument type.
auto It = ArgList.begin();
MVT FirstArgRegType = It->VT;

// Return if there is no return or the type needs split.
if (It == ArgList.end() || It->Flags.isSplit())
return false;

++It;

// Return if this argument type contains only 1 element, or it's not a
// vector type.
if (It == ArgList.end() || !FirstArgRegType.isScalableVector())
return false;

// Second, check if the following elements in this argument type are all the
// same.
for (; It != ArgList.end(); ++It)
if (It->Flags.isSplit() || It->VT != FirstArgRegType)
return false;

return true;
};

if (isHomogeneousScalableVectorType(ArgList)) {
// Handle as tuple type
RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false});
} else {
// Handle as normal vector type
bool FirstVMaskAssigned = false;
for (const auto &OutArg : ArgList) {
MVT RegisterVT = OutArg.VT;

// Skip non-RVV register type
if (!RegisterVT.isVector())
continue;

if (RegisterVT.isFixedLengthVector())
RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);

if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) {
RVVArgInfos.push_back({1, RegisterVT, true});
FirstVMaskAssigned = true;
continue;
}

RVVArgInfos.push_back({1, RegisterVT, false});
}
}
}

// Handle multiple args.
template <>
void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) {
const DataLayout &DL = MF->getDataLayout();
const Function &F = MF->getFunction();
LLVMContext &Context = F.getContext();

bool FirstVMaskAssigned = false;
for (Type *Ty : TypeList) {
StructType *STy = dyn_cast<StructType>(Ty);
if (STy && STy->containsHomogeneousScalableVectorTypes()) {
Type *ElemTy = STy->getTypeAtIndex(0U);
EVT VT = TLI->getValueType(DL, ElemTy);
MVT RegisterVT =
TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
unsigned NumRegs =
TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);

RVVArgInfos.push_back(
{NumRegs * STy->getNumElements(), RegisterVT, false});
} else {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*TLI, DL, Ty, ValueVTs);

for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
++Value) {
EVT VT = ValueVTs[Value];
MVT RegisterVT =
TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
unsigned NumRegs =
TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);

// Skip non-RVV register type
if (!RegisterVT.isVector())
continue;

if (RegisterVT.isFixedLengthVector())
RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);

if (!FirstVMaskAssigned &&
RegisterVT.getVectorElementType() == MVT::i1) {
RVVArgInfos.push_back({1, RegisterVT, true});
FirstVMaskAssigned = true;
--NumRegs;
}

RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false});
}
}
}
}

void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul,
unsigned StartReg) {
assert((StartReg % LMul) == 0 &&
"Start register number should be multiple of lmul");
const MCPhysReg *VRArrays;
switch (LMul) {
default:
report_fatal_error("Invalid lmul");
case 1:
VRArrays = ArgVRs;
break;
case 2:
VRArrays = ArgVRM2s;
break;
case 4:
VRArrays = ArgVRM4s;
break;
case 8:
VRArrays = ArgVRM8s;
break;
}

for (unsigned i = 0; i < NF; ++i)
if (StartReg)
AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]);
else
AllocatedPhysRegs.push_back(MCPhysReg());
}

/// This function determines if each RVV argument is passed by register, if the
/// argument can be assigned to a VR, then give it a specific register.
/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg.
void RVVArgDispatcher::compute() {
uint32_t AssignedMap = 0;
auto allocate = [&](const RVVArgInfo &ArgInfo) {
// Allocate first vector mask argument to V0.
if (ArgInfo.FirstVMask) {
AllocatedPhysRegs.push_back(RISCV::V0);
return;
}

unsigned RegsNeeded = divideCeil(
ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock);
unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded;
for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs;
StartReg += RegsNeeded) {
uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg;
if ((AssignedMap & Map) == 0) {
allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8);
AssignedMap |= Map;
return;
}
}

allocatePhysReg(ArgInfo.NF, RegsNeeded, 0);
};

for (unsigned i = 0; i < RVVArgInfos.size(); ++i)
allocate(RVVArgInfos[i]);
}

MCPhysReg RVVArgDispatcher::getNextPhysReg() {
assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range");
return AllocatedPhysRegs[CurIdx++];
}

namespace llvm::RISCVVIntrinsicsTable {

#define GET_RISCVVIntrinsicsTable_IMPL
Expand Down
59 changes: 56 additions & 3 deletions llvm/lib/Target/RISCV/RISCVISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ namespace llvm {
class InstructionCost;
class RISCVSubtarget;
struct RISCVRegisterInfo;
class RVVArgDispatcher;

namespace RISCVISD {
// clang-format off
Expand Down Expand Up @@ -875,7 +876,7 @@ class RISCVTargetLowering : public TargetLowering {
ISD::ArgFlagsTy ArgFlags, CCState &State,
bool IsFixed, bool IsRet, Type *OrigTy,
const RISCVTargetLowering &TLI,
std::optional<unsigned> FirstMaskArgument);
RVVArgDispatcher &RVVDispatcher);

private:
void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
Expand Down Expand Up @@ -1017,19 +1018,71 @@ class RISCVTargetLowering : public TargetLowering {
unsigned getMinimumJumpTableEntries() const override;
};

/// As per the spec, the rules for passing vector arguments are as follows:
///
/// 1. For the first vector mask argument, use v0 to pass it.
/// 2. For vector data arguments or rest vector mask arguments, starting from
/// the v8 register, if a vector register group between v8-v23 that has not been
/// allocated can be found and the first register number is a multiple of LMUL,
/// then allocate this vector register group to the argument and mark these
/// registers as allocated. Otherwise, pass it by reference and are replaced in
/// the argument list with the address.
/// 3. For tuple vector data arguments, starting from the v8 register, if
/// NFIELDS consecutive vector register groups between v8-v23 that have not been
/// allocated can be found and the first register number is a multiple of LMUL,
/// then allocate these vector register groups to the argument and mark these
/// registers as allocated. Otherwise, pass it by reference and are replaced in
/// the argument list with the address.
class RVVArgDispatcher {
public:
static constexpr unsigned NumArgVRs = 16;

struct RVVArgInfo {
unsigned NF;
MVT VT;
bool FirstVMask = false;
};

template <typename Arg>
RVVArgDispatcher(const MachineFunction *MF, const RISCVTargetLowering *TLI,
ArrayRef<Arg> ArgList)
: MF(MF), TLI(TLI) {
constructArgInfos(ArgList);
compute();
}

RVVArgDispatcher() = default;

MCPhysReg getNextPhysReg();

private:
SmallVector<RVVArgInfo, 4> RVVArgInfos;
SmallVector<MCPhysReg, 4> AllocatedPhysRegs;

const MachineFunction *MF = nullptr;
const RISCVTargetLowering *TLI = nullptr;

unsigned CurIdx = 0;

template <typename Arg> void constructArgInfos(ArrayRef<Arg> Ret);
void compute();
void allocatePhysReg(unsigned NF = 1, unsigned LMul = 1,
unsigned StartReg = 0);
};

namespace RISCV {

bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
std::optional<unsigned> FirstMaskArgument);
RVVArgDispatcher &RVVDispatcher);

bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
std::optional<unsigned> FirstMaskArgument);
RVVArgDispatcher &RVVDispatcher);

bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
Expand Down
66 changes: 66 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2718,6 +2718,50 @@ std::string RISCVInstrInfo::createMIROperandComment(
return Comment;
}

// clang-format off
#define CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL) \
RISCV::Pseudo##OP##_##LMUL

#define CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) \
RISCV::Pseudo##OP##_##LMUL##_MASK

#define CASE_RVV_OPCODE_LMUL(OP, LMUL) \
CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL): \
case CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL)

#define CASE_RVV_OPCODE_UNMASK_WIDEN(OP) \
CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF8): \
case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF4): \
case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF2): \
case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M1): \
case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M2): \
case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M4)

#define CASE_RVV_OPCODE_UNMASK(OP) \
CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M8)

#define CASE_RVV_OPCODE_MASK_WIDEN(OP) \
CASE_RVV_OPCODE_MASK_LMUL(OP, MF8): \
case CASE_RVV_OPCODE_MASK_LMUL(OP, MF4): \
case CASE_RVV_OPCODE_MASK_LMUL(OP, MF2): \
case CASE_RVV_OPCODE_MASK_LMUL(OP, M1): \
case CASE_RVV_OPCODE_MASK_LMUL(OP, M2): \
case CASE_RVV_OPCODE_MASK_LMUL(OP, M4)

#define CASE_RVV_OPCODE_MASK(OP) \
CASE_RVV_OPCODE_MASK_WIDEN(OP): \
case CASE_RVV_OPCODE_MASK_LMUL(OP, M8)

#define CASE_RVV_OPCODE_WIDEN(OP) \
CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
case CASE_RVV_OPCODE_MASK_WIDEN(OP)

#define CASE_RVV_OPCODE(OP) \
CASE_RVV_OPCODE_UNMASK(OP): \
case CASE_RVV_OPCODE_MASK(OP)
// clang-format on

// clang-format off
#define CASE_VMA_OPCODE_COMMON(OP, TYPE, LMUL) \
RISCV::PseudoV##OP##_##TYPE##_##LMUL
Expand Down Expand Up @@ -2798,6 +2842,28 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case RISCV::PseudoCCMOVGPR:
// Operands 4 and 5 are commutable.
return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5);
case CASE_RVV_OPCODE(VADD_VV):
case CASE_RVV_OPCODE(VAND_VV):
case CASE_RVV_OPCODE(VOR_VV):
case CASE_RVV_OPCODE(VXOR_VV):
case CASE_RVV_OPCODE_MASK(VMSEQ_VV):
case CASE_RVV_OPCODE_MASK(VMSNE_VV):
case CASE_RVV_OPCODE(VMIN_VV):
case CASE_RVV_OPCODE(VMINU_VV):
case CASE_RVV_OPCODE(VMAX_VV):
case CASE_RVV_OPCODE(VMAXU_VV):
case CASE_RVV_OPCODE(VMUL_VV):
case CASE_RVV_OPCODE(VMULH_VV):
case CASE_RVV_OPCODE(VMULHU_VV):
case CASE_RVV_OPCODE_WIDEN(VWADD_VV):
case CASE_RVV_OPCODE_WIDEN(VWADDU_VV):
case CASE_RVV_OPCODE_WIDEN(VWMUL_VV):
case CASE_RVV_OPCODE_WIDEN(VWMULU_VV):
case CASE_RVV_OPCODE_WIDEN(VWMACC_VV):
case CASE_RVV_OPCODE_WIDEN(VWMACCU_VV):
case CASE_RVV_OPCODE_UNMASK(VADC_VVM):
// Operands 2 and 3 are commutable.
return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
case CASE_VFMA_SPLATS(FMADD):
case CASE_VFMA_SPLATS(FMSUB):
case CASE_VFMA_SPLATS(FMACC):
Expand Down
92 changes: 50 additions & 42 deletions llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
Original file line number Diff line number Diff line change
Expand Up @@ -2129,8 +2129,9 @@ multiclass VPseudoBinary<VReg RetClass,
LMULInfo MInfo,
string Constraint = "",
int sew = 0,
int TargetConstraintType = 1> {
let VLMul = MInfo.value, SEW=sew in {
int TargetConstraintType = 1,
bit Commutable = 0> {
let VLMul = MInfo.value, SEW=sew, isCommutable = Commutable in {
defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
def suffix : VPseudoBinaryNoMaskTU<RetClass, Op1Class, Op2Class,
Constraint, TargetConstraintType>;
Expand Down Expand Up @@ -2169,8 +2170,9 @@ multiclass VPseudoBinaryM<VReg RetClass,
DAGOperand Op2Class,
LMULInfo MInfo,
string Constraint = "",
int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
int TargetConstraintType = 1,
bit Commutable = 0> {
let VLMul = MInfo.value, isCommutable = Commutable in {
def "_" # MInfo.MX : VPseudoBinaryMOutNoMask<RetClass, Op1Class, Op2Class,
Constraint, TargetConstraintType>;
let ForceTailAgnostic = true in
Expand Down Expand Up @@ -2228,8 +2230,8 @@ multiclass VPseudoTiedBinaryRoundingMode<VReg RetClass,
}


multiclass VPseudoBinaryV_VV<LMULInfo m, string Constraint = "", int sew = 0> {
defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint, sew>;
multiclass VPseudoBinaryV_VV<LMULInfo m, string Constraint = "", int sew = 0, bit Commutable = 0> {
defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint, sew, Commutable=Commutable>;
}

multiclass VPseudoBinaryV_VV_RM<LMULInfo m, string Constraint = ""> {
Expand Down Expand Up @@ -2333,9 +2335,10 @@ multiclass VPseudoVALU_MM<bit Commutable = 0> {
// * The destination EEW is greater than the source EEW, the source EMUL is
// at least 1, and the overlap is in the highest-numbered part of the
// destination register group is legal. Otherwise, it is illegal.
multiclass VPseudoBinaryW_VV<LMULInfo m> {
multiclass VPseudoBinaryW_VV<LMULInfo m, bit Commutable = 0> {
defm _VV : VPseudoBinary<m.wvrclass, m.vrclass, m.vrclass, m,
"@earlyclobber $rd", TargetConstraintType=3>;
"@earlyclobber $rd", TargetConstraintType=3,
Commutable=Commutable>;
}

multiclass VPseudoBinaryW_VV_RM<LMULInfo m, int sew = 0> {
Expand Down Expand Up @@ -2455,7 +2458,9 @@ multiclass VPseudoBinaryV_VM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1,
m.vrclass, m.vrclass, m, CarryIn, Constraint, TargetConstraintType>;
}

multiclass VPseudoTiedBinaryV_VM<LMULInfo m, int TargetConstraintType = 1> {
multiclass VPseudoTiedBinaryV_VM<LMULInfo m, int TargetConstraintType = 1,
bit Commutable = 0> {
let isCommutable = Commutable in
def "_VVM" # "_" # m.MX:
VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
m.vrclass, m.vrclass, m, 1, "",
Expand Down Expand Up @@ -2669,8 +2674,10 @@ multiclass PseudoVEXT_VF8 {
// lowest-numbered part of the source register group".
// With LMUL<=1 the source and dest occupy a single register so any overlap
// is in the lowest-numbered part.
multiclass VPseudoBinaryM_VV<LMULInfo m, int TargetConstraintType = 1> {
defm _VV : VPseudoBinaryM<m.moutclass, m.vrclass, m.vrclass, m, "", TargetConstraintType>;
multiclass VPseudoBinaryM_VV<LMULInfo m, int TargetConstraintType = 1,
bit Commutable = 0> {
defm _VV : VPseudoBinaryM<m.moutclass, m.vrclass, m.vrclass, m, "",
TargetConstraintType, Commutable=Commutable>;
}

multiclass VPseudoBinaryM_VX<LMULInfo m, int TargetConstraintType = 1> {
Expand Down Expand Up @@ -2749,10 +2756,11 @@ multiclass VPseudoVSSHT_VV_VX_VI_RM<Operand ImmType = simm5, string Constraint =
}
}

multiclass VPseudoVALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
multiclass VPseudoVALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = "",
bit Commutable = 0> {
foreach m = MxList in {
defvar mx = m.MX;
defm "" : VPseudoBinaryV_VV<m, Constraint>,
defm "" : VPseudoBinaryV_VV<m, Constraint, Commutable=Commutable>,
SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", mx,
forceMergeOpRead=true>;
defm "" : VPseudoBinaryV_VX<m, Constraint>,
Expand Down Expand Up @@ -2802,17 +2810,17 @@ multiclass VPseudoVAALU_VV_VX_RM {
multiclass VPseudoVMINMAX_VV_VX {
foreach m = MxList in {
defvar mx = m.MX;
defm "" : VPseudoBinaryV_VV<m>,
defm "" : VPseudoBinaryV_VV<m, Commutable=1>,
SchedBinary<"WriteVIMinMaxV", "ReadVIMinMaxV", "ReadVIMinMaxV", mx>;
defm "" : VPseudoBinaryV_VX<m>,
SchedBinary<"WriteVIMinMaxX", "ReadVIMinMaxV", "ReadVIMinMaxX", mx>;
}
}

multiclass VPseudoVMUL_VV_VX {
multiclass VPseudoVMUL_VV_VX<bit Commutable = 0> {
foreach m = MxList in {
defvar mx = m.MX;
defm "" : VPseudoBinaryV_VV<m>,
defm "" : VPseudoBinaryV_VV<m, Commutable=Commutable>,
SchedBinary<"WriteVIMulV", "ReadVIMulV", "ReadVIMulV", mx>;
defm "" : VPseudoBinaryV_VX<m>,
SchedBinary<"WriteVIMulX", "ReadVIMulV", "ReadVIMulX", mx>;
Expand Down Expand Up @@ -2962,10 +2970,10 @@ multiclass VPseudoVALU_VX_VI<Operand ImmType = simm5> {
}
}

multiclass VPseudoVWALU_VV_VX {
multiclass VPseudoVWALU_VV_VX<bit Commutable = 0> {
foreach m = MxListW in {
defvar mx = m.MX;
defm "" : VPseudoBinaryW_VV<m>,
defm "" : VPseudoBinaryW_VV<m, Commutable=Commutable>,
SchedBinary<"WriteVIWALUV", "ReadVIWALUV", "ReadVIWALUV", mx,
forceMergeOpRead=true>;
defm "" : VPseudoBinaryW_VX<m>,
Expand All @@ -2974,10 +2982,10 @@ multiclass VPseudoVWALU_VV_VX {
}
}

multiclass VPseudoVWMUL_VV_VX {
multiclass VPseudoVWMUL_VV_VX<bit Commutable = 0> {
foreach m = MxListW in {
defvar mx = m.MX;
defm "" : VPseudoBinaryW_VV<m>,
defm "" : VPseudoBinaryW_VV<m, Commutable=Commutable>,
SchedBinary<"WriteVIWMulV", "ReadVIWMulV", "ReadVIWMulV", mx,
forceMergeOpRead=true>;
defm "" : VPseudoBinaryW_VX<m>,
Expand Down Expand Up @@ -3072,7 +3080,7 @@ multiclass VPseudoVMRG_VM_XM_IM {
multiclass VPseudoVCALU_VM_XM_IM {
foreach m = MxList in {
defvar mx = m.MX;
defm "" : VPseudoTiedBinaryV_VM<m>,
defm "" : VPseudoTiedBinaryV_VM<m, Commutable=1>,
SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx,
forceMergeOpRead=true>;
defm "" : VPseudoTiedBinaryV_XM<m>,
Expand Down Expand Up @@ -3285,10 +3293,10 @@ multiclass VPseudoTernaryV_VF_AAXA_RM<LMULInfo m, FPR_Info f,
sew, Commutable=1>;
}

multiclass VPseudoTernaryW_VV<LMULInfo m> {
multiclass VPseudoTernaryW_VV<LMULInfo m, bit Commutable = 0> {
defvar constraint = "@earlyclobber $rd";
defm _VV : VPseudoTernaryWithPolicy<m.wvrclass, m.vrclass, m.vrclass, m,
constraint, /*Commutable*/ 0, TargetConstraintType=3>;
constraint, Commutable=Commutable, TargetConstraintType=3>;
}

multiclass VPseudoTernaryW_VV_RM<LMULInfo m, int sew = 0> {
Expand Down Expand Up @@ -3378,10 +3386,10 @@ multiclass VPseudoVSLD_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
}
}

multiclass VPseudoVWMAC_VV_VX {
multiclass VPseudoVWMAC_VV_VX<bit Commutable = 0> {
foreach m = MxListW in {
defvar mx = m.MX;
defm "" : VPseudoTernaryW_VV<m>,
defm "" : VPseudoTernaryW_VV<m, Commutable=Commutable>,
SchedTernary<"WriteVIWMulAddV", "ReadVIWMulAddV", "ReadVIWMulAddV",
"ReadVIWMulAddV", mx>;
defm "" : VPseudoTernaryW_VX<m>,
Expand Down Expand Up @@ -3434,10 +3442,10 @@ multiclass VPseudoVWMAC_VV_VF_BF_RM {
}
}

multiclass VPseudoVCMPM_VV_VX_VI {
multiclass VPseudoVCMPM_VV_VX_VI<bit Commutable = 0> {
foreach m = MxList in {
defvar mx = m.MX;
defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2>,
defm "" : VPseudoBinaryM_VV<m, TargetConstraintType=2, Commutable=Commutable>,
SchedBinary<"WriteVICmpV", "ReadVICmpV", "ReadVICmpV", mx>;
defm "" : VPseudoBinaryM_VX<m, TargetConstraintType=2>,
SchedBinary<"WriteVICmpX", "ReadVICmpV", "ReadVICmpX", mx>;
Expand Down Expand Up @@ -6246,7 +6254,7 @@ defm PseudoVLSEG : VPseudoUSSegLoadFF;
//===----------------------------------------------------------------------===//
// 11.1. Vector Single-Width Integer Add and Subtract
//===----------------------------------------------------------------------===//
defm PseudoVADD : VPseudoVALU_VV_VX_VI;
defm PseudoVADD : VPseudoVALU_VV_VX_VI<Commutable=1>;
defm PseudoVSUB : VPseudoVALU_VV_VX;
defm PseudoVRSUB : VPseudoVALU_VX_VI;

Expand Down Expand Up @@ -6311,9 +6319,9 @@ foreach vti = AllIntegerVectors in {
//===----------------------------------------------------------------------===//
// 11.2. Vector Widening Integer Add/Subtract
//===----------------------------------------------------------------------===//
defm PseudoVWADDU : VPseudoVWALU_VV_VX;
defm PseudoVWADDU : VPseudoVWALU_VV_VX<Commutable=1>;
defm PseudoVWSUBU : VPseudoVWALU_VV_VX;
defm PseudoVWADD : VPseudoVWALU_VV_VX;
defm PseudoVWADD : VPseudoVWALU_VV_VX<Commutable=1>;
defm PseudoVWSUB : VPseudoVWALU_VV_VX;
defm PseudoVWADDU : VPseudoVWALU_WV_WX;
defm PseudoVWSUBU : VPseudoVWALU_WV_WX;
Expand Down Expand Up @@ -6344,9 +6352,9 @@ defm PseudoVMSBC : VPseudoVCALUM_V_X<"@earlyclobber $rd">;
//===----------------------------------------------------------------------===//
// 11.5. Vector Bitwise Logical Instructions
//===----------------------------------------------------------------------===//
defm PseudoVAND : VPseudoVALU_VV_VX_VI;
defm PseudoVOR : VPseudoVALU_VV_VX_VI;
defm PseudoVXOR : VPseudoVALU_VV_VX_VI;
defm PseudoVAND : VPseudoVALU_VV_VX_VI<Commutable=1>;
defm PseudoVOR : VPseudoVALU_VV_VX_VI<Commutable=1>;
defm PseudoVXOR : VPseudoVALU_VV_VX_VI<Commutable=1>;

//===----------------------------------------------------------------------===//
// 11.6. Vector Single-Width Bit Shift Instructions
Expand All @@ -6364,8 +6372,8 @@ defm PseudoVNSRA : VPseudoVNSHT_WV_WX_WI;
//===----------------------------------------------------------------------===//
// 11.8. Vector Integer Comparison Instructions
//===----------------------------------------------------------------------===//
defm PseudoVMSEQ : VPseudoVCMPM_VV_VX_VI;
defm PseudoVMSNE : VPseudoVCMPM_VV_VX_VI;
defm PseudoVMSEQ : VPseudoVCMPM_VV_VX_VI<Commutable=1>;
defm PseudoVMSNE : VPseudoVCMPM_VV_VX_VI<Commutable=1>;
defm PseudoVMSLTU : VPseudoVCMPM_VV_VX;
defm PseudoVMSLT : VPseudoVCMPM_VV_VX;
defm PseudoVMSLEU : VPseudoVCMPM_VV_VX_VI;
Expand All @@ -6384,9 +6392,9 @@ defm PseudoVMAX : VPseudoVMINMAX_VV_VX;
//===----------------------------------------------------------------------===//
// 11.10. Vector Single-Width Integer Multiply Instructions
//===----------------------------------------------------------------------===//
defm PseudoVMUL : VPseudoVMUL_VV_VX;
defm PseudoVMULH : VPseudoVMUL_VV_VX;
defm PseudoVMULHU : VPseudoVMUL_VV_VX;
defm PseudoVMUL : VPseudoVMUL_VV_VX<Commutable=1>;
defm PseudoVMULH : VPseudoVMUL_VV_VX<Commutable=1>;
defm PseudoVMULHU : VPseudoVMUL_VV_VX<Commutable=1>;
defm PseudoVMULHSU : VPseudoVMUL_VV_VX;

//===----------------------------------------------------------------------===//
Expand All @@ -6400,8 +6408,8 @@ defm PseudoVREM : VPseudoVDIV_VV_VX;
//===----------------------------------------------------------------------===//
// 11.12. Vector Widening Integer Multiply Instructions
//===----------------------------------------------------------------------===//
defm PseudoVWMUL : VPseudoVWMUL_VV_VX;
defm PseudoVWMULU : VPseudoVWMUL_VV_VX;
defm PseudoVWMUL : VPseudoVWMUL_VV_VX<Commutable=1>;
defm PseudoVWMULU : VPseudoVWMUL_VV_VX<Commutable=1>;
defm PseudoVWMULSU : VPseudoVWMUL_VV_VX;

//===----------------------------------------------------------------------===//
Expand All @@ -6415,8 +6423,8 @@ defm PseudoVNMSUB : VPseudoVMAC_VV_VX_AAXA;
//===----------------------------------------------------------------------===//
// 11.14. Vector Widening Integer Multiply-Add Instructions
//===----------------------------------------------------------------------===//
defm PseudoVWMACCU : VPseudoVWMAC_VV_VX;
defm PseudoVWMACC : VPseudoVWMAC_VV_VX;
defm PseudoVWMACCU : VPseudoVWMAC_VV_VX<Commutable=1>;
defm PseudoVWMACC : VPseudoVWMAC_VV_VX<Commutable=1>;
defm PseudoVWMACCSU : VPseudoVWMAC_VV_VX;
defm PseudoVWMACCUS : VPseudoVWMAC_VX;

Expand Down
89 changes: 77 additions & 12 deletions llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,24 @@
// extended bits aren't consumed or because the input was already sign extended
// by an earlier instruction.
//
// Then it removes the -w suffix from opw instructions whenever all users are
// dependent only on the lower word of the result of the instruction.
// The cases handled are:
// * addw because c.add has a larger register encoding than c.addw.
// * addiw because it helps reduce test differences between RV32 and RV64
// w/o being a pessimization.
// * mulw because c.mulw doesn't exist but c.mul does (w/ zcb)
// * slliw because c.slliw doesn't exist and c.slli does
// Then:
// 1. Unless explicit disabled or the target prefers instructions with W suffix,
// it removes the -w suffix from opw instructions whenever all users are
// dependent only on the lower word of the result of the instruction.
// The cases handled are:
// * addw because c.add has a larger register encoding than c.addw.
// * addiw because it helps reduce test differences between RV32 and RV64
// w/o being a pessimization.
// * mulw because c.mulw doesn't exist but c.mul does (w/ zcb)
// * slliw because c.slliw doesn't exist and c.slli does
//
// 2. Or if explicit enabled or the target prefers instructions with W suffix,
// it adds the W suffix to the instruction whenever all users are dependent
// only on the lower word of the result of the instruction.
// The cases handled are:
// * add/addi/sub/mul.
// * slli with imm < 32.
// * ld/lwu.
//===---------------------------------------------------------------------===//

#include "RISCV.h"
Expand Down Expand Up @@ -60,6 +69,8 @@ class RISCVOptWInstrs : public MachineFunctionPass {
const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
bool stripWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII,
const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
bool appendWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII,
const RISCVSubtarget &ST, MachineRegisterInfo &MRI);

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
Expand Down Expand Up @@ -672,9 +683,6 @@ bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF,
const RISCVInstrInfo &TII,
const RISCVSubtarget &ST,
MachineRegisterInfo &MRI) {
if (DisableStripWSuffix || !ST.enableStripWSuffix())
return false;

bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
Expand All @@ -698,6 +706,58 @@ bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF,
return MadeChange;
}

bool RISCVOptWInstrs::appendWSuffixes(MachineFunction &MF,
const RISCVInstrInfo &TII,
const RISCVSubtarget &ST,
MachineRegisterInfo &MRI) {
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
unsigned WOpc;
// TODO: Add more?
switch (MI.getOpcode()) {
default:
continue;
case RISCV::ADD:
WOpc = RISCV::ADDW;
break;
case RISCV::ADDI:
WOpc = RISCV::ADDIW;
break;
case RISCV::SUB:
WOpc = RISCV::SUBW;
break;
case RISCV::MUL:
WOpc = RISCV::MULW;
break;
case RISCV::SLLI:
// SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits
if (MI.getOperand(2).getImm() >= 32)
continue;
WOpc = RISCV::SLLIW;
break;
case RISCV::LD:
case RISCV::LWU:
WOpc = RISCV::LW;
break;
}

if (hasAllWUsers(MI, ST, MRI)) {
LLVM_DEBUG(dbgs() << "Replacing " << MI);
MI.setDesc(TII.get(WOpc));
MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
MI.clearFlag(MachineInstr::MIFlag::IsExact);
LLVM_DEBUG(dbgs() << " with " << MI);
++NumTransformedToWInstrs;
MadeChange = true;
}
}
}

return MadeChange;
}

bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
Expand All @@ -711,7 +771,12 @@ bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) {

bool MadeChange = false;
MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI);
MadeChange |= stripWSuffixes(MF, TII, ST, MRI);

if (!(DisableStripWSuffix || ST.preferWInst()))
MadeChange |= stripWSuffixes(MF, TII, ST, MRI);

if (ST.preferWInst())
MadeChange |= appendWSuffixes(MF, TII, ST, MRI);

return MadeChange;
}
4 changes: 2 additions & 2 deletions llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ static std::string computeDataLayout(const Triple &TT) {
// mean anything.
if (Arch == Triple::spirv32)
return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
"v96:128-v192:256-v256:256-v512:512-v1024:1024";
"v96:128-v192:256-v256:256-v512:512-v1024:1024-G1";
return "e-i64:64-v16:16-v24:32-v32:32-v48:64-"
"v96:128-v192:256-v256:256-v512:512-v1024:1024";
"v96:128-v192:256-v256:256-v512:512-v1024:1024-G1";
}

static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
Expand Down
Loading