29 changes: 16 additions & 13 deletions flang/test/Fir/CUDA/cuda-data-transfer.fir
Original file line number Diff line number Diff line change
Expand Up @@ -199,12 +199,12 @@ func.func @_QPsub8() attributes {fir.bindc_name = "t"} {
// CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<5xi32>
// CHECK: %[[LOCAL:.*]] = fir.declare %[[ALLOCA]]
// CHECK: %[[GBL:.*]] = fir.address_of(@_QMmtestsEn) : !fir.ref<!fir.array<5xi32>>
// CHECK: %[[DECL:.*]] = fir.declare %[[GBL]]
// CHECK: %[[HOST:.*]] = fir.convert %[[DECL]] : (!fir.ref<!fir.array<5xi32>>) -> !fir.llvm_ptr<i8>
// CHECK: %[[SRC:.*]] = fir.call @_FortranACUFGetDeviceAddress(%[[HOST]], %{{.*}}, %{{.*}}) : (!fir.llvm_ptr<i8>, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8>
// CHECK: %[[SRC_CONV:.*]] = fir.convert %[[SRC]] : (!fir.llvm_ptr<i8>) -> !fir.ref<!fir.array<5xi32>>
// CHECK: %[[GBL_CONV:.*]] = fir.convert %[[GBL]] : (!fir.ref<!fir.array<5xi32>>) -> !fir.llvm_ptr<i8>
// CHECK: %[[ADDR:.*]] = fir.call @_FortranACUFGetDeviceAddress(%[[GBL_CONV]], %{{.*}}, %{{.*}}) : (!fir.llvm_ptr<i8>, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8>
// CHECK: %[[ADDR_CONV:.*]] = fir.convert %[[ADDR]] : (!fir.llvm_ptr<i8>) -> !fir.ref<!fir.array<5xi32>>
// CHECK: %[[DECL:.*]] = fir.declare %[[ADDR_CONV]]
// CHECK: %[[DST:.*]] = fir.convert %[[LOCAL]] : (!fir.ref<!fir.array<5xi32>>) -> !fir.llvm_ptr<i8>
// CHECK: %[[SRC:.*]] = fir.convert %[[SRC_CONV]] : (!fir.ref<!fir.array<5xi32>>) -> !fir.llvm_ptr<i8>
// CHECK: %[[SRC:.*]] = fir.convert %[[DECL]] : (!fir.ref<!fir.array<5xi32>>) -> !fir.llvm_ptr<i8>
// CHECK: fir.call @_FortranACUFDataTransferPtrPtr(%[[DST]], %[[SRC]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr<i8>, !fir.llvm_ptr<i8>, i64, i32, !fir.ref<i8>, i32) -> none


Expand All @@ -223,11 +223,11 @@ func.func @_QPsub9() {
// CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<5xi32>
// CHECK: %[[LOCAL:.*]] = fir.declare %[[ALLOCA]]
// CHECK: %[[GBL:.*]] = fir.address_of(@_QMmtestsEn) : !fir.ref<!fir.array<5xi32>>
// CHECK: %[[DECL:.*]] = fir.declare %[[GBL]]
// CHECK: %[[HOST:.*]] = fir.convert %[[DECL]] : (!fir.ref<!fir.array<5xi32>>) -> !fir.llvm_ptr<i8>
// CHECK: %[[DST:.*]] = fir.call @_FortranACUFGetDeviceAddress(%[[HOST]], %{{.*}}, %{{.*}}) : (!fir.llvm_ptr<i8>, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8>
// CHECK: %[[DST_CONV:.*]] = fir.convert %[[DST]] : (!fir.llvm_ptr<i8>) -> !fir.ref<!fir.array<5xi32>>
// CHECK: %[[DST:.*]] = fir.convert %[[DST_CONV]] : (!fir.ref<!fir.array<5xi32>>) -> !fir.llvm_ptr<i8>
// CHECK: %[[GBL_CONV:.*]] = fir.convert %[[GBL]] : (!fir.ref<!fir.array<5xi32>>) -> !fir.llvm_ptr<i8>
// CHECK: %[[ADDR:.*]] = fir.call @_FortranACUFGetDeviceAddress(%[[GBL_CONV]], %{{.*}}, %{{.*}}) : (!fir.llvm_ptr<i8>, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8>
// CHECK: %[[ADDR_CONV:.*]] = fir.convert %[[ADDR]] : (!fir.llvm_ptr<i8>) -> !fir.ref<!fir.array<5xi32>>
// CHECK: %[[DECL:.*]] = fir.declare %[[ADDR_CONV]]
// CHECK: %[[DST:.*]] = fir.convert %[[DECL]] : (!fir.ref<!fir.array<5xi32>>) -> !fir.llvm_ptr<i8>
// CHECK: %[[SRC:.*]] = fir.convert %[[LOCAL]] : (!fir.ref<!fir.array<5xi32>>) -> !fir.llvm_ptr<i8>
// CHECK: fir.call @_FortranACUFDataTransferPtrPtr(%[[DST]], %[[SRC]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr<i8>, !fir.llvm_ptr<i8>, i64, i32, !fir.ref<i8>, i32) -> none

Expand Down Expand Up @@ -380,9 +380,12 @@ func.func @_QPdevice_addr_conv() {
}

// CHECK-LABEL: func.func @_QPdevice_addr_conv()
// CHECK: %[[DEV_ADDR:.*]] = fir.call @_FortranACUFGetDeviceAddress(%{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr<i8>, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8>
// CHECK: %[[DEV_ADDR_CONV:.*]] = fir.convert %[[DEV_ADDR]] : (!fir.llvm_ptr<i8>) -> !fir.ref<!fir.array<4xf32>>
// CHECK: fir.embox %[[DEV_ADDR_CONV]](%{{.*}}) : (!fir.ref<!fir.array<4xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<4xf32>>
// CHECK: %[[GBL:.*]] = fir.address_of(@_QMmod1Ea_dev) : !fir.ref<!fir.array<4xf32>>
// CHECK: %[[GBL_CONV:.*]] = fir.convert %[[GBL]] : (!fir.ref<!fir.array<4xf32>>) -> !fir.llvm_ptr<i8>
// CHECK: %[[ADDR:.*]] = fir.call @_FortranACUFGetDeviceAddress(%[[GBL_CONV]], %{{.*}}, %{{.*}}) : (!fir.llvm_ptr<i8>, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8>
// CHECK: %[[ADDR_CONV:.*]] = fir.convert %[[ADDR]] : (!fir.llvm_ptr<i8>) -> !fir.ref<!fir.array<4xf32>>
// CHECK: %[[DECL:.*]] = fir.declare %[[ADDR_CONV]](%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QMmod1Ea_dev"} : (!fir.ref<!fir.array<4xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<4xf32>>
// CHECK: fir.embox %[[DECL]](%{{.*}}) : (!fir.ref<!fir.array<4xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<4xf32>>
// CHECK: fir.call @_FortranACUFDataTransferCstDesc

func.func @_QQchar_transfer() attributes {fir.bindc_name = "char_transfer"} {
Expand Down
36 changes: 36 additions & 0 deletions flang/test/Fir/CUDA/cuda-global-addr.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// RUN: fir-opt --cuf-convert %s | FileCheck %s

module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} {
fir.global @_QMmod1Eadev {data_attr = #cuf.cuda<device>} : !fir.array<10xi32> {
%0 = fir.zero_bits !fir.array<10xi32>
fir.has_value %0 : !fir.array<10xi32>
}
func.func @_QQmain() attributes {fir.bindc_name = "test"} {
%c14_i32 = arith.constant 14 : i32
%c6_i32 = arith.constant 6 : i32
%c4 = arith.constant 4 : index
%c1_i32 = arith.constant 1 : i32
%c0_i32 = arith.constant 0 : i32
%c10 = arith.constant 10 : index
%1 = fir.shape %c10 : (index) -> !fir.shape<1>
%3 = fir.address_of(@_QMmod1Eadev) : !fir.ref<!fir.array<10xi32>>
%4 = fir.declare %3(%1) {data_attr = #cuf.cuda<device>, uniq_name = "_QMmod1Eadev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> !fir.ref<!fir.array<10xi32>>
%5 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"}
%6 = fir.declare %5 {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
fir.store %c0_i32 to %6 : !fir.ref<i32>
%7 = fir.array_coor %4(%1) %c4 : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>, index) -> !fir.ref<i32>
cuf.data_transfer %c1_i32 to %7 {transfer_kind = #cuf.cuda_transfer<host_device>} : i32, !fir.ref<i32>
return
}

}

// CHECK-LABEL: func.func @_QQmain()
// CHECK: %[[ADDR:.*]] = fir.address_of(@_QMmod1Eadev) : !fir.ref<!fir.array<10xi32>>
// CHECK: %[[ADDRPTR:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<10xi32>>) -> !fir.llvm_ptr<i8>
// CHECK: %[[DEVICE_ADDR:.*]] = fir.call @_FortranACUFGetDeviceAddress(%[[ADDRPTR]], %{{.*}}, %{{.*}}) : (!fir.llvm_ptr<i8>, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8>
// CHECK: %[[DEVICE_ADDR_CONV:.*]] = fir.convert %[[DEVICE_ADDR]] : (!fir.llvm_ptr<i8>) -> !fir.ref<!fir.array<10xi32>>
// CHECK: %[[DECL:.*]] = fir.declare %[[DEVICE_ADDR_CONV]](%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QMmod1Eadev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> !fir.ref<!fir.array<10xi32>>
// CHECK: %[[ARRAY_COOR:.*]] = fir.array_coor %[[DECL]](%{{.*}}) %c4{{.*}} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>, index) -> !fir.ref<i32>
// CHECK: %[[ARRAY_COOR_PTR:.*]] = fir.convert %[[ARRAY_COOR]] : (!fir.ref<i32>) -> !fir.llvm_ptr<i8>
// CHECK: fir.call @_FortranACUFDataTransferPtrPtr(%[[ARRAY_COOR_PTR]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr<i8>, !fir.llvm_ptr<i8>, i64, i32, !fir.ref<i8>, i32) -> none
6 changes: 3 additions & 3 deletions flang/test/HLFIR/elemental-codegen.fir
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ func.func @test_polymorphic(%arg0: !fir.class<!fir.type<_QMtypesTt>> {fir.bindc_
// CHECK: %[[VAL_35:.*]] = fir.absent !fir.box<none>
// CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_4]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_31]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
// CHECK: %[[VAL_38:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_36]], %[[VAL_34]], %[[VAL_35]], %[[VAL_37]], %[[VAL_33]]) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %[[VAL_38:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_36]], %{{.*}}, %[[VAL_34]], %[[VAL_35]], %[[VAL_37]], %[[VAL_33]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
// CHECK: %[[VAL_40:.*]] = arith.constant 1 : index
// CHECK: fir.do_loop %[[VAL_41:.*]] = %[[VAL_40]] to %[[EX1]] step %[[VAL_40]] unordered {
Expand Down Expand Up @@ -276,7 +276,7 @@ func.func @test_polymorphic_expr(%arg0: !fir.class<!fir.type<_QMtypesTt>> {fir.b
// CHECK: %[[VAL_36:.*]] = fir.absent !fir.box<none>
// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_5]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_32]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
// CHECK: %[[VAL_39:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_37]], %[[VAL_35]], %[[VAL_36]], %[[VAL_38]], %[[VAL_34]]) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %[[VAL_39:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_37]], %{{.*}}, %[[VAL_35]], %[[VAL_36]], %[[VAL_38]], %[[VAL_34]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
// CHECK: %[[VAL_41:.*]] = arith.constant 1 : index
// CHECK: fir.do_loop %[[VAL_42:.*]] = %[[VAL_41]] to %[[VAL_3]] step %[[VAL_41]] unordered {
Expand Down Expand Up @@ -329,7 +329,7 @@ func.func @test_polymorphic_expr(%arg0: !fir.class<!fir.type<_QMtypesTt>> {fir.b
// CHECK: %[[VAL_85:.*]] = fir.absent !fir.box<none>
// CHECK: %[[VAL_86:.*]] = fir.convert %[[VAL_4]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %[[VAL_87:.*]] = fir.convert %[[VAL_81]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
// CHECK: %[[VAL_88:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_86]], %[[VAL_84]], %[[VAL_85]], %[[VAL_87]], %[[VAL_83]]) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %[[VAL_88:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_86]], %{{.*}}, %[[VAL_84]], %[[VAL_85]], %[[VAL_87]], %[[VAL_83]]) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %[[VAL_89:.*]] = fir.load %[[VAL_63]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
// CHECK: %[[VAL_90:.*]] = arith.constant 1 : index
// CHECK: fir.do_loop %[[VAL_91:.*]] = %[[VAL_90]] to %[[VAL_3]] step %[[VAL_90]] unordered {
Expand Down
4 changes: 2 additions & 2 deletions flang/test/Lower/OpenACC/acc-declare.f90
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,6 @@ subroutine init()
end module

! CHECK-LABEL: func.func @_QMacc_declare_post_action_statPinit()
! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEx_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEx_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: fir.if
! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEy_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath<contract> {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_post_action_statEy_acc_declare_update_desc_post_alloc>} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
26 changes: 13 additions & 13 deletions flang/test/Lower/allocatable-polymorphic.f90
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ subroutine test_allocatable()
! CHECK: %[[C0:.*]] = arith.constant 0 : i32
! CHECK: fir.call @_FortranAAllocatableInitDerivedForAllocate(%[[P_CAST]], %[[TYPE_DESC_P1_CAST]], %[[RANK]], %[[C0]]) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.ref<none>, i32, i32) -> none
! CHECK: %[[P_CAST:.*]] = fir.convert %[[P_DECL]]#1 : (!fir.ref<!fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>) -> !fir.ref<!fir.box<none>>
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[P_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[P_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32

! CHECK: %[[TYPE_DESC_P1:.*]] = fir.type_desc !fir.type<_QMpolyTp1{a:i32,b:i32}>
! CHECK: %[[C1_CAST:.*]] = fir.convert %[[C1_DECL]]#1 : (!fir.ref<!fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>) -> !fir.ref<!fir.box<none>>
Expand All @@ -276,7 +276,7 @@ subroutine test_allocatable()
! CHECK: %[[C0:.*]] = arith.constant 0 : i32
! CHECK: fir.call @_FortranAAllocatableInitDerivedForAllocate(%[[C1_CAST]], %[[TYPE_DESC_P1_CAST]], %[[RANK]], %[[C0]]) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.ref<none>, i32, i32) -> none
! CHECK: %[[C1_CAST:.*]] = fir.convert %[[C1_DECL]]#1 : (!fir.ref<!fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>) -> !fir.ref<!fir.box<none>>
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C1_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C1_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32

! CHECK: %[[TYPE_DESC_P2:.*]] = fir.type_desc !fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}>
! CHECK: %[[C2_CAST:.*]] = fir.convert %[[C2_DECL]]#1 : (!fir.ref<!fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>) -> !fir.ref<!fir.box<none>>
Expand All @@ -285,7 +285,7 @@ subroutine test_allocatable()
! CHECK: %[[C0:.*]] = arith.constant 0 : i32
! CHECK: fir.call @_FortranAAllocatableInitDerivedForAllocate(%[[C2_CAST]], %[[TYPE_DESC_P2_CAST]], %[[RANK]], %[[C0]]) {{.*}}: (!fir.ref<!fir.box<none>>, !fir.ref<none>, i32, i32) -> none
! CHECK: %[[C2_CAST:.*]] = fir.convert %[[C2_DECL]]#1 : (!fir.ref<!fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>) -> !fir.ref<!fir.box<none>>
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C2_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C2_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32

! CHECK: %[[TYPE_DESC_P1:.*]] = fir.type_desc !fir.type<_QMpolyTp1{a:i32,b:i32}>
! CHECK: %[[C3_CAST:.*]] = fir.convert %[[C3_DECL]]#1 : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>>) -> !fir.ref<!fir.box<none>>
Expand All @@ -300,7 +300,7 @@ subroutine test_allocatable()
! CHECK: %[[C10_I64:.*]] = fir.convert %[[C10]] : (i32) -> i64
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableSetBounds(%[[C3_CAST]], %[[C0]], %[[C1_I64]], %[[C10_I64]]) {{.*}}: (!fir.ref<!fir.box<none>>, i32, i64, i64) -> none
! CHECK: %[[C3_CAST:.*]] = fir.convert %[[C3_DECL]]#1 : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>>) -> !fir.ref<!fir.box<none>>
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C3_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C3_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32

! CHECK: %[[TYPE_DESC_P2:.*]] = fir.type_desc !fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}>
! CHECK: %[[C4_CAST:.*]] = fir.convert %[[C4_DECL]]#1 : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>>) -> !fir.ref<!fir.box<none>>
Expand All @@ -316,7 +316,7 @@ subroutine test_allocatable()
! CHECK: %[[C20_I64:.*]] = fir.convert %[[C20]] : (i32) -> i64
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableSetBounds(%[[C4_CAST]], %[[C0]], %[[C1_I64]], %[[C20_I64]]) {{.*}}: (!fir.ref<!fir.box<none>>, i32, i64, i64) -> none
! CHECK: %[[C4_CAST:.*]] = fir.convert %[[C4_DECL]]#1 : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>>) -> !fir.ref<!fir.box<none>>
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C4_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C4_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32

! CHECK: %[[C1_LOAD1:.*]] = fir.load %[[C1_DECL]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>>
! CHECK: fir.dispatch "proc1"(%[[C1_LOAD1]] : !fir.class<!fir.heap<!fir.type<_QMpolyTp1{a:i32,b:i32}>>>)
Expand Down Expand Up @@ -390,7 +390,7 @@ subroutine test_unlimited_polymorphic_with_intrinsic_type_spec()
! CHECK: %[[CORANK:.*]] = arith.constant 0 : i32
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableInitIntrinsicForAllocate(%[[BOX_NONE]], %[[CAT]], %[[KIND]], %[[RANK]], %[[CORANK]]) {{.*}} : (!fir.ref<!fir.box<none>>, i32, i32, i32, i32) -> none
! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[P_DECL]]#1 : (!fir.ref<!fir.class<!fir.heap<none>>>) -> !fir.ref<!fir.box<none>>
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32

! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[PTR_DECL]]#1 : (!fir.ref<!fir.class<!fir.ptr<none>>>) -> !fir.ref<!fir.box<none>>
! CHECK: %[[CAT:.*]] = arith.constant 1 : i32
Expand Down Expand Up @@ -573,7 +573,7 @@ subroutine test_allocatable_up_character()
! CHECK: %[[CORANK:.*]] = arith.constant 0 : i32
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableInitCharacterForAllocate(%[[A_NONE]], %[[LEN]], %[[KIND]], %[[RANK]], %[[CORANK]]) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i32, i32, i32) -> none
! CHECK: %[[A_NONE:.*]] = fir.convert %[[A_DECL]]#1 : (!fir.ref<!fir.class<!fir.heap<none>>>) -> !fir.ref<!fir.box<none>>
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32

end module

Expand All @@ -592,17 +592,17 @@ program test_alloc
! LLVM-LABEL: define void @_QMpolyPtest_allocatable()

! LLVM: %{{.*}} = call {} @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp1, i32 0, i32 0)
! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 -1, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
! LLVM: %{{.*}} = call {} @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp1, i32 0, i32 0)
! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 -1, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
! LLVM: %{{.*}} = call {} @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp2, i32 0, i32 0)
! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 -1, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
! LLVM: %{{.*}} = call {} @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp1, i32 1, i32 0)
! LLVM: %{{.*}} = call {} @_FortranAAllocatableSetBounds(ptr %{{.*}}, i32 0, i64 1, i64 10)
! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 -1, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
! LLVM: %{{.*}} = call {} @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp2, i32 1, i32 0)
! LLVM: %{{.*}} = call {} @_FortranAAllocatableSetBounds(ptr %{{.*}}, i32 0, i64 1, i64 20)
! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 -1, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
! LLVM-COUNT-2: call void %{{[0-9]*}}()

! LLVM: call void @llvm.memcpy.p0.p0.i32
Expand Down Expand Up @@ -683,5 +683,5 @@ program test_alloc
! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } { ptr null, i64 ptrtoint (ptr getelementptr (%_QMpolyTp1, ptr null, i32 1) to i64), i32 20240719, i8 0, i8 42, i8 2, i8 1, ptr @_QMpolyEXdtXp1, [1 x i64] zeroinitializer }, ptr %[[ALLOCA1:[0-9]*]]
! LLVM: call void @llvm.memcpy.p0.p0.i32(ptr %[[ALLOCA2:[0-9]+]], ptr %[[ALLOCA1]], i32 40, i1 false)
! LLVM: %{{.*}} = call {} @_FortranAAllocatableInitDerivedForAllocate(ptr %[[ALLOCA2]], ptr @_QMpolyEXdtXp1, i32 0, i32 0)
! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %[[ALLOCA2]], i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %[[ALLOCA2]], i64 -1, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
! LLVM: %{{.*}} = call i32 @_FortranAAllocatableDeallocatePolymorphic(ptr %[[ALLOCA2]], ptr {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}})
4 changes: 2 additions & 2 deletions flang/test/Lower/allocatable-runtime.f90
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ subroutine foo()
! CHECK: fir.call @{{.*}}AllocatableSetBounds(%[[xBoxCast2]], %c0{{.*}}, %[[xlbCast]], %[[xubCast]]) {{.*}}: (!fir.ref<!fir.box<none>>, i32, i64, i64) -> none
! CHECK-DAG: %[[xBoxCast3:.*]] = fir.convert %[[xBoxAddr]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
! CHECK-DAG: %[[sourceFile:.*]] = fir.convert %{{.*}} -> !fir.ref<i8>
! CHECK: fir.call @{{.*}}AllocatableAllocate(%[[xBoxCast3]], %false{{.*}}, %[[errMsg]], %[[sourceFile]], %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: fir.call @{{.*}}AllocatableAllocate(%[[xBoxCast3]], %c-1{{.*}}, %false{{.*}}, %[[errMsg]], %[[sourceFile]], %{{.*}}) {{.*}}: (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32

! Simply check that we are emitting the right numebr of set bound for y and z. Otherwise, this is just like x.
! CHECK: fir.convert %[[yBoxAddr]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<none>>
Expand Down Expand Up @@ -180,4 +180,4 @@ subroutine mold_allocation()
! CHECK: %[[M_BOX_NONE:.*]] = fir.convert %[[EMBOX_M]] : (!fir.box<!fir.array<10xi32>>) -> !fir.box<none>
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableApplyMold(%[[A_BOX_NONE]], %[[M_BOX_NONE]], %[[RANK]]) {{.*}} : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32) -> none
! CHECK: %[[A_BOX_NONE:.*]] = fir.convert %[[A]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
4 changes: 2 additions & 2 deletions flang/test/Lower/allocate-mold.f90
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ subroutine scalar_mold_allocation()
! CHECK: %[[A_REF_BOX_NONE1:.*]] = fir.convert %[[A]] : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> !fir.ref<!fir.box<none>>
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableApplyMold(%[[A_REF_BOX_NONE1]], %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32) -> none
! CHECK: %[[A_REF_BOX_NONE2:.*]] = fir.convert %[[A]] : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> !fir.ref<!fir.box<none>>
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_REF_BOX_NONE2]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_REF_BOX_NONE2]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32

subroutine array_scalar_mold_allocation()
real, allocatable :: a(:)
Expand All @@ -40,4 +40,4 @@ end subroutine array_scalar_mold_allocation
! CHECK: %[[REF_BOX_A1:.*]] = fir.convert %1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableSetBounds(%[[REF_BOX_A1]], {{.*}},{{.*}}, {{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, i32, i64, i64) -> none
! CHECK: %[[REF_BOX_A2:.*]] = fir.convert %[[A]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[REF_BOX_A2]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[REF_BOX_A2]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
6 changes: 3 additions & 3 deletions flang/test/Lower/polymorphic.f90
Original file line number Diff line number Diff line change
Expand Up @@ -1154,11 +1154,11 @@ program test
! CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "test"} {
! CHECK: %[[ADDR_O:.*]] = fir.address_of(@_QFEo) : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMpolymorphic_testTouter{inner:!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>}>>>>
! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[ADDR_O]] : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QMpolymorphic_testTouter{inner:!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>}>>>>) -> !fir.ref<!fir.box<none>>
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
! CHECK: %[[O:.*]] = fir.load %[[ADDR_O]] : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMpolymorphic_testTouter{inner:!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>}>>>>
! CHECK: %[[FIELD_INNER:.*]] = fir.field_index inner, !fir.type<_QMpolymorphic_testTouter{inner:!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>}>
! CHECK: %[[COORD_INNER:.*]] = fir.coordinate_of %[[O]], %[[FIELD_INNER]] : (!fir.box<!fir.heap<!fir.type<_QMpolymorphic_testTouter{inner:!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>}>>>, !fir.field) -> !fir.ref<!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>>
! CHECK: %{{.*}} = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%arg1 = %9) -> (!fir.array<5x!fir.logical<4>>) {
! CHECK: %{{.*}} = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%arg1 = %{{.*}}) -> (!fir.array<5x!fir.logical<4>>) {
! CHECK: %[[EMBOXED:.*]] = fir.embox %[[COORD_INNER]] : (!fir.ref<!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>>) -> !fir.class<!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>>
! CHECK: %{{.*}} = fir.call @_QMpolymorphic_testPlt(%17, %[[EMBOXED]]) {{.*}} : (!fir.ref<i32>, !fir.class<!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>>) -> !fir.logical<4>
! CHECK: %{{.*}} = fir.call @_QMpolymorphic_testPlt(%{{.*}}, %[[EMBOXED]]) {{.*}} : (!fir.ref<i32>, !fir.class<!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>>) -> !fir.logical<4>
! CHECK: }
2 changes: 1 addition & 1 deletion flang/test/Semantics/test_symbols.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
# Strips out blank lines and all comments except for "!DEF:", "!REF:", "!$acc" and "!$omp"
with open(src, "r") as text_in:
for line in text_in:
text = re.sub(r"!(?![DR]EF:|\$omp|\$acc).*", "", line)
text = re.sub(r"!(?![DR]EF:|\$omp|\$acc).*", "", line, re.I)
text = re.sub(r"^\s*$", "", text)
diff1 += text

Expand Down
2 changes: 1 addition & 1 deletion flang/unittests/Evaluate/fp-testing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
#include <xmmintrin.h>
#endif

using Fortran::common::RealFlag;
using Fortran::common::RoundingMode;
using Fortran::evaluate::RealFlag;

ScopedHostFloatingPointEnvironment::ScopedHostFloatingPointEnvironment(
#if __x86_64__
Expand Down
6 changes: 3 additions & 3 deletions flang/unittests/Evaluate/fp-testing.h
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#ifndef FORTRAN_TEST_EVALUATE_FP_TESTING_H_
#define FORTRAN_TEST_EVALUATE_FP_TESTING_H_

#include "flang/Evaluate/target.h"
#include "flang/Common/target-rounding.h"
#include <fenv.h>

using Fortran::common::RealFlags;
using Fortran::common::Rounding;
using Fortran::common::RoundingMode;
using Fortran::evaluate::RealFlags;
using Fortran::evaluate::Rounding;

class ScopedHostFloatingPointEnvironment {
public:
Expand Down
3 changes: 2 additions & 1 deletion flang/unittests/Runtime/CUDA/Allocatable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ TEST(AllocatableCUFTest, SimpleDeviceAllocatable) {
CUDA_REPORT_IF_ERROR(cudaMalloc(&device_desc, a->SizeInBytes()));

RTNAME(AllocatableAllocate)
(*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
(*a, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
__LINE__);
EXPECT_TRUE(a->IsAllocated());
RTNAME(CUFDescriptorSync)(device_desc, a.get(), __FILE__, __LINE__);
cudaDeviceSynchronize();
Expand Down
6 changes: 4 additions & 2 deletions flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ TEST(AllocatableCUFTest, SimpleDeviceAllocate) {
EXPECT_FALSE(a->HasAddendum());
RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
RTNAME(AllocatableAllocate)
(*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
(*a, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
__LINE__);
EXPECT_TRUE(a->IsAllocated());
RTNAME(AllocatableDeallocate)
(*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
Expand All @@ -53,7 +54,8 @@ TEST(AllocatableCUFTest, SimplePinnedAllocate) {
EXPECT_FALSE(a->HasAddendum());
RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
RTNAME(AllocatableAllocate)
(*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
(*a, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
__LINE__);
EXPECT_TRUE(a->IsAllocated());
RTNAME(AllocatableDeallocate)
(*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
Expand Down
3 changes: 2 additions & 1 deletion flang/unittests/Runtime/CUDA/Memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ TEST(MemoryCUFTest, CUFDataTransferDescDesc) {
EXPECT_EQ((int)kDeviceAllocatorPos, dev->GetAllocIdx());
RTNAME(AllocatableSetBounds)(*dev, 0, 1, 10);
RTNAME(AllocatableAllocate)
(*dev, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
(*dev, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
__LINE__);
EXPECT_TRUE(dev->IsAllocated());

// Create temp array to transfer to device.
Expand Down
2 changes: 1 addition & 1 deletion flang/unittests/Runtime/Complex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#pragma clang diagnostic ignored "-Wc99-extensions"
#endif

#include "flang/Common/Fortran.h"
#include "flang/Common/Fortran-consts.h"
#include "flang/Runtime/cpp-type.h"
#include "flang/Runtime/entry-names.h"

Expand Down
9 changes: 9 additions & 0 deletions libc/config/windows/headers.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
set(TARGET_PUBLIC_HEADERS
libc.include.ctype
libc.include.string
libc.include.inttypes
libc.include.stdlib
libc.include.errno
libc.include.fenv
libc.include.math
)
2 changes: 1 addition & 1 deletion libc/docs/fenv.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Macros
:align: center
:header-rows: 1

* - Function
* - Macro
- Implemented
- C23 Standard Section
- POSIX.1-2017 Standard Section
Expand Down
33 changes: 30 additions & 3 deletions libc/docs/setjmp.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,28 @@
.. include:: check.rst

setjmp.h Functions
==================
========
setjmp.h
========

Macros
======

.. list-table::
:widths: auto
:align: center
:header-rows: 1

* - Macro
- Implemented
- C23 Standard Section
- POSIX.1-2017 Standard Section
* - __STDC_VERSION_SETJMP_H__
-
- 7.13.2
-

Functions
=========

.. list-table::
:widths: auto
Expand All @@ -10,7 +31,13 @@ setjmp.h Functions

* - Function
- Implemented
- Standard
- C23 Standard Section
- POSIX.1-2017 Standard Section
* - longjmp
- |check|
- 7.13.2.1
-
* - setjmp
- |check|
- 7.13.1.1
-
2 changes: 1 addition & 1 deletion libc/docs/signal.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Macros
:align: center
:header-rows: 1

* - Function
* - Macro
- Implemented
- C23 Standard Section
- POSIX.1-2017 Standard Section
Expand Down
2 changes: 1 addition & 1 deletion libc/docs/stdbit.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Macros
:align: center
:header-rows: 1

* - Function
* - Macro
- Implemented
- C23 Standard Section
- POSIX.1-2017 Standard Section
Expand Down
2 changes: 1 addition & 1 deletion libc/docs/threads.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Macros
:align: center
:header-rows: 1

* - Function
* - Macro
- Implemented
- C23 Standard Section
- POSIX.1-2017 Standard Section
Expand Down
4 changes: 1 addition & 3 deletions libc/src/__support/macros/optimization.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {

#ifndef LIBC_MATH
#define LIBC_MATH 0
#else
#endif // LIBC_MATH

#if (LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS)
#define LIBC_MATH_HAS_SKIP_ACCURATE_PASS
Expand All @@ -58,6 +58,4 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
#define LIBC_MATH_HAS_SMALL_TABLES
#endif

#endif // LIBC_MATH

#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_OPTIMIZATION_H
21 changes: 18 additions & 3 deletions libc/src/math/generic/atan2f.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ namespace LIBC_NAMESPACE_DECL {

namespace {

#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS

// Look up tables for accurate pass:

// atan(i/16) with i = 0..16, generated by Sollya with:
Expand Down Expand Up @@ -163,6 +165,8 @@ float atan2f_double_double(double num_d, double den_d, double q_d, int idx,
return static_cast<float>(cpp::bit_cast<double>(rr_bits));
}

#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS

} // anonymous namespace

// There are several range reduction steps we can take for atan2(y, x) as
Expand Down Expand Up @@ -283,14 +287,24 @@ LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) {
fputil::DoubleDouble const_term = CONST_ADJ[x_sign][y_sign][recip];
double q_d = num_d / den_d;

double k_d = fputil::nearest_integer(q_d * 0x1.0p4f);
double k_d = fputil::nearest_integer(q_d * 0x1.0p4);
int idx = static_cast<int>(k_d);
double r;

#ifdef LIBC_MATH_HAS_SMALL_TABLES
double p = atan_eval_no_table(num_d, den_d, k_d * 0x1.0p-4);
r = final_sign * (p + (const_term.hi + ATAN_K_OVER_16[idx]));
#else
q_d = fputil::multiply_add(k_d, -0x1.0p-4, q_d);

double p = atan_eval(q_d, idx);
double r = final_sign *
fputil::multiply_add(q_d, p, const_term.hi + ATAN_COEFFS[idx][0]);
r = final_sign *
fputil::multiply_add(q_d, p, const_term.hi + ATAN_COEFFS[idx][0]);
#endif // LIBC_MATH_HAS_SMALL_TABLES

#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
return static_cast<float>(r);
#else
constexpr uint32_t LOWER_ERR = 4;
// Mask sticky bits in double precision before rounding to single precision.
constexpr uint32_t MASK =
Expand All @@ -306,6 +320,7 @@ LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) {

return atan2f_double_double(num_d, den_d, q_d, idx, k_d, final_sign,
const_term);
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
}

} // namespace LIBC_NAMESPACE_DECL
66 changes: 57 additions & 9 deletions libc/src/math/generic/inv_trigf_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,35 @@
namespace LIBC_NAMESPACE_DECL {

// PI and PI / 2
constexpr double M_MATH_PI = 0x1.921fb54442d18p+1;
constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0;
static constexpr double M_MATH_PI = 0x1.921fb54442d18p+1;
static constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0;

extern double ATAN_COEFFS[17][9];

// Look-up table for atan(k/16) with k = 0..16.
static constexpr double ATAN_K_OVER_16[17] = {
0.0,
0x1.ff55bb72cfdeap-5,
0x1.fd5ba9aac2f6ep-4,
0x1.7b97b4bce5b02p-3,
0x1.f5b75f92c80ddp-3,
0x1.362773707ebccp-2,
0x1.6f61941e4def1p-2,
0x1.a64eec3cc23fdp-2,
0x1.dac670561bb4fp-2,
0x1.0657e94db30dp-1,
0x1.1e00babdefeb4p-1,
0x1.345f01cce37bbp-1,
0x1.4978fa3269ee1p-1,
0x1.5d58987169b18p-1,
0x1.700a7c5784634p-1,
0x1.819d0b7158a4dp-1,
0x1.921fb54442d18p-1,
};

// For |x| <= 1/32 and 0 <= i <= 16, return Q(x) such that:
// Q(x) ~ (atan(x + i/16) - atan(i/16)) / x.
LIBC_INLINE double atan_eval(double x, int i) {
LIBC_INLINE static double atan_eval(double x, unsigned i) {
double x2 = x * x;

double c0 = fputil::multiply_add(x, ATAN_COEFFS[i][2], ATAN_COEFFS[i][1]);
Expand All @@ -39,16 +60,43 @@ LIBC_INLINE double atan_eval(double x, int i) {
return p;
}

// Evaluate atan without big lookup table.
// atan(n/d) - atan(k/16) = atan((n/d - k/16) / (1 + (n/d) * (k/16)))
// = atan((n - d * k/16)) / (d + n * k/16))
// So we let q = (n - d * k/16) / (d + n * k/16),
// and approximate with Taylor polynomial:
// atan(q) ~ q - q^3/3 + q^5/5 - q^7/7 + q^9/9
LIBC_INLINE static double atan_eval_no_table(double num, double den,
double k_over_16) {
double num_r = fputil::multiply_add(den, -k_over_16, num);
double den_r = fputil::multiply_add(num, k_over_16, den);
double q = num_r / den_r;

constexpr double ATAN_TAYLOR[] = {
-0x1.5555555555555p-2,
0x1.999999999999ap-3,
-0x1.2492492492492p-3,
0x1.c71c71c71c71cp-4,
};
double q2 = q * q;
double q3 = q2 * q;
double q4 = q2 * q2;
double c0 = fputil::multiply_add(q2, ATAN_TAYLOR[1], ATAN_TAYLOR[0]);
double c1 = fputil::multiply_add(q2, ATAN_TAYLOR[3], ATAN_TAYLOR[2]);
double d = fputil::multiply_add(q4, c1, c0);
return fputil::multiply_add(q3, d, q);
}

// > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
// [|1, D...|], [0, 0.5]);
constexpr double ASIN_COEFFS[10] = {0x1.5555555540fa1p-3, 0x1.333333512edc2p-4,
0x1.6db6cc1541b31p-5, 0x1.f1caff324770ep-6,
0x1.6e43899f5f4f4p-6, 0x1.1f847cf652577p-6,
0x1.9b60f47f87146p-7, 0x1.259e2634c494fp-6,
-0x1.df946fa875ddp-8, 0x1.02311ecf99c28p-5};
static constexpr double ASIN_COEFFS[10] = {
0x1.5555555540fa1p-3, 0x1.333333512edc2p-4, 0x1.6db6cc1541b31p-5,
0x1.f1caff324770ep-6, 0x1.6e43899f5f4f4p-6, 0x1.1f847cf652577p-6,
0x1.9b60f47f87146p-7, 0x1.259e2634c494fp-6, -0x1.df946fa875ddp-8,
0x1.02311ecf99c28p-5};

// Evaluate P(x^2) - 1, where P(x^2) ~ asin(x)/x
LIBC_INLINE double asin_eval(double xsq) {
LIBC_INLINE static double asin_eval(double xsq) {
double x4 = xsq * xsq;
double r1 = fputil::polyeval(x4, ASIN_COEFFS[0], ASIN_COEFFS[2],
ASIN_COEFFS[4], ASIN_COEFFS[6], ASIN_COEFFS[8]);
Expand Down
8 changes: 4 additions & 4 deletions libc/utils/docgen/docgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,15 @@ def load_api(header: Header) -> Dict:
return json.loads(api)


def print_tbl_dir():
def print_tbl_dir(name):
print(
f"""
.. list-table::
:widths: auto
:align: center
:header-rows: 1
* - Function
* - {name}
- Implemented
- C23 Standard Section
- POSIX.1-2017 Standard Section"""
Expand All @@ -112,7 +112,7 @@ def print_functions_rst(header: Header, functions: Dict):
print(tbl_hdr)
print("=" * len(tbl_hdr))

print_tbl_dir()
print_tbl_dir("Function")

for name in sorted(functions.keys()):
print(f" * - {name}")
Expand All @@ -138,7 +138,7 @@ def print_macros_rst(header: Header, macros: Dict):
print(tbl_hdr)
print("=" * len(tbl_hdr))

print_tbl_dir()
print_tbl_dir("Macro")

for name in sorted(macros.keys()):
print(f" * - {name}")
Expand Down
10 changes: 5 additions & 5 deletions libc/utils/docgen/setjmp.json
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
{
"macros": {
"__STDC_VERSION_SETJMP_H__": {
"defined": "7.13.2"
},
"setjmp": {
"defined": "7.13.1.1"
"c-definition": "7.13.2"
}
},
"functions": {
"setjmp": {
"c-definition": "7.13.1.1"
},
"longjmp": {
"defined": "7.13.2.1"
"c-definition": "7.13.2.1"
}
}
}
10 changes: 0 additions & 10 deletions libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,8 @@

#include "Loader.h"

#if defined(__has_include)
#if __has_include("hsa/hsa.h")
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#elif __has_include("hsa.h")
#include "hsa.h"
#include "hsa_ext_amd.h"
#endif
#else
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#endif

#include "llvm/Frontend/Offloading/Utility.h"

Expand Down
2 changes: 2 additions & 0 deletions lld/COFF/COFFLinkerContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ class COFFLinkerContext : public CommonLinkerContext {
Timer diskCommitTimer;

Configuration config;

DynamicRelocsChunk *dynamicRelocs = nullptr;
};

} // namespace lld::coff
Expand Down
78 changes: 78 additions & 0 deletions lld/COFF/Chunks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

using namespace llvm;
using namespace llvm::object;
using namespace llvm::support;
using namespace llvm::support::endian;
using namespace llvm::COFF;
using llvm::support::ulittle32_t;
Expand Down Expand Up @@ -1147,4 +1148,81 @@ uint32_t ImportThunkChunkARM64EC::extendRanges() {
return sizeof(arm64Thunk) - sizeof(uint32_t);
}

size_t Arm64XDynamicRelocEntry::getSize() const {
switch (type) {
case IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE:
return sizeof(uint16_t) + size; // A header and a payload.
case IMAGE_DVRT_ARM64X_FIXUP_TYPE_DELTA:
case IMAGE_DVRT_ARM64X_FIXUP_TYPE_ZEROFILL:
llvm_unreachable("unsupported type");
}
}

void Arm64XDynamicRelocEntry::writeTo(uint8_t *buf) const {
auto out = reinterpret_cast<ulittle16_t *>(buf);
*out = (offset & 0xfff) | (type << 12);

switch (type) {
case IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE:
*out |= ((bit_width(size) - 1) << 14); // Encode the size.
switch (size) {
case 2:
out[1] = value;
break;
case 4:
*reinterpret_cast<ulittle32_t *>(out + 1) = value;
break;
case 8:
*reinterpret_cast<ulittle64_t *>(out + 1) = value;
break;
default:
llvm_unreachable("invalid size");
}
break;
case IMAGE_DVRT_ARM64X_FIXUP_TYPE_DELTA:
case IMAGE_DVRT_ARM64X_FIXUP_TYPE_ZEROFILL:
llvm_unreachable("unsupported type");
}
}

void DynamicRelocsChunk::finalize() {
llvm::stable_sort(arm64xRelocs, [=](const Arm64XDynamicRelocEntry &a,
const Arm64XDynamicRelocEntry &b) {
return a.offset < b.offset;
});

size = sizeof(coff_dynamic_reloc_table) + sizeof(coff_dynamic_relocation64) +
sizeof(coff_base_reloc_block_header);

for (const Arm64XDynamicRelocEntry &entry : arm64xRelocs) {
assert(!(entry.offset & ~0xfff)); // Not yet supported.
size += entry.getSize();
}

size = alignTo(size, sizeof(uint32_t));
}

void DynamicRelocsChunk::writeTo(uint8_t *buf) const {
auto table = reinterpret_cast<coff_dynamic_reloc_table *>(buf);
table->Version = 1;
table->Size = sizeof(coff_dynamic_relocation64);
buf += sizeof(*table);

auto header = reinterpret_cast<coff_dynamic_relocation64 *>(buf);
header->Symbol = IMAGE_DYNAMIC_RELOCATION_ARM64X;
buf += sizeof(*header);

auto pageHeader = reinterpret_cast<coff_base_reloc_block_header *>(buf);
pageHeader->BlockSize = sizeof(*pageHeader);
for (const Arm64XDynamicRelocEntry &entry : arm64xRelocs) {
entry.writeTo(buf + pageHeader->BlockSize);
pageHeader->BlockSize += entry.getSize();
}
pageHeader->BlockSize = alignTo(pageHeader->BlockSize, sizeof(uint32_t));

header->BaseRelocSize = pageHeader->BlockSize;
table->Size += header->BaseRelocSize;
assert(size == sizeof(*table) + sizeof(*header) + header->BaseRelocSize);
}

} // namespace lld::coff
36 changes: 36 additions & 0 deletions lld/COFF/Chunks.h
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,42 @@ class ECExportThunkChunk : public NonSectionCodeChunk {
Defined *target;
};

// ARM64X entry for dynamic relocations.
class Arm64XDynamicRelocEntry {
public:
Arm64XDynamicRelocEntry(llvm::COFF::Arm64XFixupType type, uint8_t size,
uint32_t offset, uint64_t value)
: offset(offset), value(value), type(type), size(size) {}

size_t getSize() const;
void writeTo(uint8_t *buf) const;

uint32_t offset;
uint64_t value;

private:
llvm::COFF::Arm64XFixupType type;
uint8_t size;
};

// Dynamic relocation chunk containing ARM64X relocations for the hybrid image.
class DynamicRelocsChunk : public NonSectionChunk {
public:
DynamicRelocsChunk() {}
size_t getSize() const override { return size; }
void writeTo(uint8_t *buf) const override;
void finalize();

void add(llvm::COFF::Arm64XFixupType type, uint8_t size, uint32_t offset,
uint64_t value) {
arm64xRelocs.emplace_back(type, size, offset, value);
}

private:
std::vector<Arm64XDynamicRelocEntry> arm64xRelocs;
size_t size;
};

// MinGW specific, for the "automatic import of variables from DLLs" feature.
// This provides the table of runtime pseudo relocations, for variable
// references that turned out to need to be imported from a DLL even though
Expand Down
64 changes: 60 additions & 4 deletions lld/COFF/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ static_assert(sizeof(dosProgram) % 8 == 0,

static const int dosStubSize = sizeof(dos_header) + sizeof(dosProgram);
static_assert(dosStubSize % 8 == 0, "DOSStub size must be multiple of 8");
static const uint32_t coffHeaderOffset = dosStubSize + sizeof(PEMagic);
static const uint32_t peHeaderOffset =
coffHeaderOffset + sizeof(coff_file_header);
static const uint32_t dataDirOffset64 =
peHeaderOffset + sizeof(pe32plus_header);

static const int numberOfDataDirectory = 16;

Expand Down Expand Up @@ -272,6 +277,7 @@ class Writer {
OutputSection *findSection(StringRef name);
void addBaserels();
void addBaserelBlocks(std::vector<Baserel> &v);
void createDynamicRelocs();

uint32_t getSizeOfInitializedData();

Expand Down Expand Up @@ -754,6 +760,8 @@ void Writer::run() {
llvm::TimeTraceScope timeScope("Write PE");
ScopedTimer t1(ctx.codeLayoutTimer);

if (ctx.config.machine == ARM64X)
ctx.dynamicRelocs = make<DynamicRelocsChunk>();
createImportTables();
createSections();
appendImportThunks();
Expand All @@ -764,6 +772,7 @@ void Writer::run() {
mergeSections();
sortECChunks();
appendECImportTables();
createDynamicRelocs();
removeUnusedSections();
finalizeAddresses();
removeEmptySections();
Expand Down Expand Up @@ -1597,8 +1606,14 @@ void Writer::assignAddresses() {

for (OutputSection *sec : ctx.outputSections) {
llvm::TimeTraceScope timeScope("Section: ", sec->name);
if (sec == relocSec)
if (sec == relocSec) {
sec->chunks.clear();
addBaserels();
if (ctx.dynamicRelocs) {
ctx.dynamicRelocs->finalize();
relocSec->addChunk(ctx.dynamicRelocs);
}
}
uint64_t rawSize = 0, virtualSize = 0;
sec->header.VirtualAddress = rva;

Expand Down Expand Up @@ -1673,6 +1688,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
buf += sizeof(PEMagic);

// Write COFF header
assert(coffHeaderOffset == buf - buffer->getBufferStart());
auto *coff = reinterpret_cast<coff_file_header *>(buf);
buf += sizeof(*coff);
switch (config->machine) {
Expand Down Expand Up @@ -1705,6 +1721,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
sizeof(PEHeaderTy) + sizeof(data_directory) * numberOfDataDirectory;

// Write PE header
assert(peHeaderOffset == buf - buffer->getBufferStart());
auto *pe = reinterpret_cast<PEHeaderTy *>(buf);
buf += sizeof(*pe);
pe->Magic = config->is64() ? PE32Header::PE32_PLUS : PE32Header::PE32;
Expand Down Expand Up @@ -1770,6 +1787,8 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
pe->SizeOfInitializedData = getSizeOfInitializedData();

// Write data directory
assert(!ctx.config.is64() ||
dataDirOffset64 == buf - buffer->getBufferStart());
auto *dir = reinterpret_cast<data_directory *>(buf);
buf += sizeof(*dir) * numberOfDataDirectory;
if (edataStart) {
Expand Down Expand Up @@ -1799,9 +1818,12 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
exceptionTable.last->getSize() -
exceptionTable.first->getRVA();
}
if (relocSec->getVirtualSize()) {
size_t relocSize = relocSec->getVirtualSize();
if (ctx.dynamicRelocs)
relocSize -= ctx.dynamicRelocs->getSize();
if (relocSize) {
dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = relocSec->getRVA();
dir[BASE_RELOCATION_TABLE].Size = relocSec->getVirtualSize();
dir[BASE_RELOCATION_TABLE].Size = relocSize;
}
if (Symbol *sym = ctx.symtab.findUnderscore("_tls_used")) {
if (Defined *b = dyn_cast<Defined>(sym)) {
Expand Down Expand Up @@ -2523,7 +2545,6 @@ uint32_t Writer::getSizeOfInitializedData() {
void Writer::addBaserels() {
if (!ctx.config.relocatable)
return;
relocSec->chunks.clear();
std::vector<Baserel> v;
for (OutputSection *sec : ctx.outputSections) {
if (sec->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE)
Expand Down Expand Up @@ -2557,6 +2578,29 @@ void Writer::addBaserelBlocks(std::vector<Baserel> &v) {
relocSec->addChunk(make<BaserelChunk>(page, &v[i], &v[0] + j));
}

void Writer::createDynamicRelocs() {
if (!ctx.dynamicRelocs)
return;

// Adjust the Machine field in the COFF header to AMD64.
ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint16_t),
coffHeaderOffset + offsetof(coff_file_header, Machine),
AMD64);

// Clear the load config directory.
// FIXME: Use the hybrid load config value instead.
ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
dataDirOffset64 +
LOAD_CONFIG_TABLE * sizeof(data_directory) +
offsetof(data_directory, RelativeVirtualAddress),
0);
ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
dataDirOffset64 +
LOAD_CONFIG_TABLE * sizeof(data_directory) +
offsetof(data_directory, Size),
0);
}

PartialSection *Writer::createPartialSection(StringRef name,
uint32_t outChars) {
PartialSection *&pSec = partialSections[{name, outChars}];
Expand Down Expand Up @@ -2660,6 +2704,18 @@ template <typename T> void Writer::prepareLoadConfig(T *loadConfig) {
loadConfig->DependentLoadFlags = ctx.config.dependentLoadFlags;
}

if (ctx.dynamicRelocs) {
IF_CONTAINS(DynamicValueRelocTableSection) {
loadConfig->DynamicValueRelocTableSection = relocSec->sectionIndex;
loadConfig->DynamicValueRelocTableOffset =
ctx.dynamicRelocs->getRVA() - relocSec->getRVA();
}
else {
warn("'_load_config_used' structure too small to include dynamic "
"relocations");
}
}

if (ctx.config.guardCF == GuardCFLevel::Off)
return;
RETURN_IF_NOT_CONTAINS(GuardFlags)
Expand Down
14 changes: 0 additions & 14 deletions lld/ELF/Arch/RISCV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,6 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s,
case R_RISCV_HI20:
case R_RISCV_LO12_I:
case R_RISCV_LO12_S:
case R_RISCV_RVC_LUI:
return R_ABS;
case R_RISCV_ADD8:
case R_RISCV_ADD16:
Expand Down Expand Up @@ -373,19 +372,6 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
return;
}

case R_RISCV_RVC_LUI: {
int64_t imm = SignExtend64(val + 0x800, bits) >> 12;
checkInt(ctx, loc, imm, 6, rel);
if (imm == 0) { // `c.lui rd, 0` is illegal, convert to `c.li rd, 0`
write16le(loc, (read16le(loc) & 0x0F83) | 0x4000);
} else {
uint16_t imm17 = extractBits(val + 0x800, 17, 17) << 12;
uint16_t imm16_12 = extractBits(val + 0x800, 16, 12) << 2;
write16le(loc, (read16le(loc) & 0xEF83) | imm17 | imm16_12);
}
return;
}

case R_RISCV_JAL: {
checkInt(ctx, loc, val, 21, rel);
checkAlignment(ctx, loc, val, 2, rel);
Expand Down
45 changes: 42 additions & 3 deletions lld/ELF/Arch/X86_64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,7 @@ RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
case R_X86_64_CODE_4_GOTPCRELX:
case R_X86_64_GOTTPOFF:
case R_X86_64_CODE_4_GOTTPOFF:
case R_X86_64_CODE_6_GOTTPOFF:
return R_GOT_PC;
case R_X86_64_GOTOFF64:
return R_GOTPLTREL;
Expand Down Expand Up @@ -562,8 +563,9 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
}
}

// In some conditions, R_X86_64_GOTTPOFF/R_X86_64_CODE_4_GOTTPOFF relocation can
// be optimized to R_X86_64_TPOFF32 so that it does not use GOT.
// In some conditions,
// R_X86_64_GOTTPOFF/R_X86_64_CODE_4_GOTTPOFF/R_X86_64_CODE_6_GOTTPOFF
// relocation can be optimized to R_X86_64_TPOFF32 so that it does not use GOT.
void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
uint64_t val) const {
uint8_t *inst = loc - 3;
Expand Down Expand Up @@ -605,7 +607,7 @@ void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
} else if (rel.type == R_X86_64_CODE_4_GOTTPOFF) {
if (loc[-4] != 0xd5) {
Err(ctx) << getErrorLoc(ctx, loc - 4)
<< "Invalid prefix with R_X86_64_CODE_4_GOTTPOFF!";
<< "invalid prefix with R_X86_64_CODE_4_GOTTPOFF!";
return;
}
const uint8_t rex = loc[-3];
Expand All @@ -623,6 +625,41 @@ void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
<< "R_X86_64_CODE_4_GOTTPOFF must be used in MOVQ or ADDQ "
"instructions only";
}
} else if (rel.type == R_X86_64_CODE_6_GOTTPOFF) {
if (loc[-6] != 0x62) {
Err(ctx) << getErrorLoc(ctx, loc - 6)
<< "invalid prefix with R_X86_64_CODE_6_GOTTPOFF!";
return;
}
// Check bits are satisfied:
// loc[-5]: X==1 (inverted polarity), (loc[-5] & 0x7) == 0x4
// loc[-4]: W==1, X2==1 (inverted polarity), pp==0b00(NP)
// loc[-3]: NF==1 or ND==1
// loc[-2]: opcode==0x1 or opcode==0x3
// loc[-1]: Mod==0b00, RM==0b101
if (((loc[-5] & 0x47) == 0x44) && ((loc[-4] & 0x87) == 0x84) &&
((loc[-3] & 0x14) != 0) && (loc[-2] == 0x1 || loc[-2] == 0x3) &&
((loc[-1] & 0xc7) == 0x5)) {
// "addq %reg1, foo@GOTTPOFF(%rip), %reg2" -> "addq $foo, %reg1, %reg2"
// "addq foo@GOTTPOFF(%rip), %reg1, %reg2" -> "addq $foo, %reg1, %reg2"
// "{nf} addq %reg1, foo@GOTTPOFF(%rip), %reg2"
// -> "{nf} addq $foo, %reg1, %reg2"
// "{nf} addq name@GOTTPOFF(%rip), %reg1, %reg2"
// -> "{nf} addq $foo, %reg1, %reg2"
// "{nf} addq name@GOTTPOFF(%rip), %reg" -> "{nf} addq $foo, %reg"
loc[-2] = 0x81;
// Move R bits to B bits in EVEX payloads and ModRM byte.
const uint8_t evexPayload0 = loc[-5];
if ((evexPayload0 & (1 << 7)) == 0)
loc[-5] = (evexPayload0 | (1 << 7)) & ~(1 << 5);
if ((evexPayload0 & (1 << 4)) == 0)
loc[-5] = evexPayload0 | (1 << 4) | (1 << 3);
*regSlot = 0xc0 | reg;
} else {
Err(ctx) << getErrorLoc(ctx, loc - 6)
<< "R_X86_64_CODE_6_GOTTPOFF must be used in ADDQ instructions "
"with NDD/NF/NDD+NF only";
}
} else {
llvm_unreachable("Unsupported relocation type!");
}
Expand Down Expand Up @@ -782,6 +819,7 @@ int64_t X86_64::getImplicitAddend(const uint8_t *buf, RelType type) const {
case R_X86_64_PC32:
case R_X86_64_GOTTPOFF:
case R_X86_64_CODE_4_GOTTPOFF:
case R_X86_64_CODE_6_GOTTPOFF:
case R_X86_64_PLT32:
case R_X86_64_TLSGD:
case R_X86_64_TLSLD:
Expand Down Expand Up @@ -893,6 +931,7 @@ void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
break;
case R_X86_64_GOTTPOFF:
case R_X86_64_CODE_4_GOTTPOFF:
case R_X86_64_CODE_6_GOTTPOFF:
if (rel.expr == R_RELAX_TLS_IE_TO_LE) {
relaxTlsIeToLe(loc, rel, val);
} else {
Expand Down
21 changes: 12 additions & 9 deletions lld/ELF/Relocations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1687,6 +1687,17 @@ template <class ELFT> void elf::scanRelocations(Ctx &ctx) {
outerFn();
}

RelocationBaseSection &elf::getIRelativeSection(Ctx &ctx) {
// Prior to Android V, there was a bug that caused RELR relocations to be
// applied after packed relocations. This meant that resolvers referenced by
// IRELATIVE relocations in the packed relocation section would read
// unrelocated globals with RELR relocations when
// --pack-relative-relocs=android+relr is enabled. Work around this by placing
// IRELATIVE in .rela.plt.
return ctx.arg.androidPackDynRelocs ? *ctx.in.relaPlt
: *ctx.mainPart->relaDyn;
}

static bool handleNonPreemptibleIfunc(Ctx &ctx, Symbol &sym, uint16_t flags) {
// Handle a reference to a non-preemptible ifunc. These are special in a
// few ways:
Expand Down Expand Up @@ -1736,17 +1747,9 @@ static bool handleNonPreemptibleIfunc(Ctx &ctx, Symbol &sym, uint16_t flags) {
// original section/value pairs. For non-GOT non-PLT relocation case below, we
// may alter section/value, so create a copy of the symbol to make
// section/value fixed.
//
// Prior to Android V, there was a bug that caused RELR relocations to be
// applied after packed relocations. This meant that resolvers referenced by
// IRELATIVE relocations in the packed relocation section would read
// unrelocated globals with RELR relocations when
// --pack-relative-relocs=android+relr is enabled. Work around this by placing
// IRELATIVE in .rela.plt.
auto *directSym = makeDefined(cast<Defined>(sym));
directSym->allocateAux(ctx);
auto &dyn =
ctx.arg.androidPackDynRelocs ? *ctx.in.relaPlt : *ctx.mainPart->relaDyn;
auto &dyn = getIRelativeSection(ctx);
addPltEntry(ctx, *ctx.in.iplt, *ctx.in.igotPlt, dyn, ctx.target->iRelativeRel,
*directSym);
sym.allocateAux(ctx);
Expand Down
3 changes: 3 additions & 0 deletions lld/ELF/Relocations.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class Symbol;
class InputSection;
class InputSectionBase;
class OutputSection;
class RelocationBaseSection;
class SectionBase;

// Represents a relocation type, such as R_X86_64_PC32 or R_ARM_THM_CALL.
Expand Down Expand Up @@ -356,6 +357,8 @@ sortRels(Relocs<llvm::object::Elf_Crel_Impl<is64>> rels,
return {};
}

RelocationBaseSection &getIRelativeSection(Ctx &ctx);

// Returns true if Expr refers a GOT entry. Note that this function returns
// false for TLS variables even though they need GOT, because TLS variables uses
// GOT differently than the regular variables.
Expand Down
14 changes: 9 additions & 5 deletions lld/ELF/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -844,11 +844,15 @@ template <class ELFT> void Writer<ELFT>::setReservedSymbolSections() {
ctx.sym.globalOffsetTable->section = sec;
}

// .rela_iplt_{start,end} mark the start and the end of .rel[a].dyn.
if (ctx.sym.relaIpltStart && ctx.mainPart->relaDyn->isNeeded()) {
ctx.sym.relaIpltStart->section = ctx.mainPart->relaDyn.get();
ctx.sym.relaIpltEnd->section = ctx.mainPart->relaDyn.get();
ctx.sym.relaIpltEnd->value = ctx.mainPart->relaDyn->getSize();
// .rela_iplt_{start,end} mark the start and the end of the section containing
// IRELATIVE relocations.
if (ctx.sym.relaIpltStart) {
auto &dyn = getIRelativeSection(ctx);
if (dyn.isNeeded()) {
ctx.sym.relaIpltStart->section = &dyn;
ctx.sym.relaIpltEnd->section = &dyn;
ctx.sym.relaIpltEnd->value = dyn.getSize();
}
}

PhdrEntry *last = nullptr;
Expand Down
4 changes: 4 additions & 0 deletions lld/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ ELF Improvements
Breaking changes
----------------

* Removed support for the (deprecated) `R_RISCV_RVC_LUI` relocation. This
was a binutils-internal relocation used during relaxation, and was not
emitted by compilers/assemblers.

COFF Improvements
-----------------
* ``/includeglob`` has been implemented to match the behavior of ``--undefined-glob`` available for ELF.
Expand Down
68 changes: 68 additions & 0 deletions lld/test/COFF/arm64x-loadconfig.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// REQUIRES: aarch64
// RUN: split-file %s %t.dir && cd %t.dir

// RUN: llvm-mc -filetype=obj -triple=aarch64-windows test.s -o test.obj
// RUN: llvm-mc -filetype=obj -triple=aarch64-windows loadconfig.s -o loadconfig.obj
// RUN: llvm-mc -filetype=obj -triple=aarch64-windows loadconfig-short.s -o loadconfig-short.obj

// RUN: lld-link -machine:arm64x -out:out.dll -dll -noentry loadconfig.obj test.obj

// RUN: llvm-readobj --coff-load-config out.dll | FileCheck -check-prefix=DYNRELOCS %s
// DYNRELOCS: DynamicValueRelocTableOffset: 0xC
// DYNRELOCS-NEXT: DynamicValueRelocTableSection: 4
// DYNRELOCS: DynamicRelocations [
// DYNRELOCS-NEXT: Version: 0x1
// DYNRELOCS-NEXT: Arm64X [
// DYNRELOCS-NEXT: Entry [
// DYNRELOCS-NEXT: RVA: 0x7C
// DYNRELOCS-NEXT: Type: VALUE
// DYNRELOCS-NEXT: Size: 0x2
// DYNRELOCS-NEXT: Value: 0x8664
// DYNRELOCS-NEXT: ]
// DYNRELOCS-NEXT: Entry [
// DYNRELOCS-NEXT: RVA: 0x150
// DYNRELOCS-NEXT: Type: VALUE
// DYNRELOCS-NEXT: Size: 0x4
// DYNRELOCS-NEXT: Value: 0x0
// DYNRELOCS-NEXT: ]
// DYNRELOCS-NEXT: Entry [
// DYNRELOCS-NEXT: RVA: 0x154
// DYNRELOCS-NEXT: Type: VALUE
// DYNRELOCS-NEXT: Size: 0x4
// DYNRELOCS-NEXT: Value: 0x0
// DYNRELOCS-NEXT: ]
// DYNRELOCS-NEXT: ]
// DYNRELOCS-NEXT: ]

// RUN: llvm-readobj --headers out.dll | FileCheck -check-prefix=HEADERS %s
// HEADERS: BaseRelocationTableRVA: 0x4000
// HEADERS-NEXT: BaseRelocationTableSize: 0xC
// HEADERS: LoadConfigTableRVA: 0x1000
// HEADERS-NEXT: LoadConfigTableSize: 0x140
// HEADERS: Name: .reloc (2E 72 65 6C 6F 63 00 00)
// HEADERS-NEXT: VirtualSize: 0x38

// RUN: lld-link -machine:arm64x -out:out-short.dll -dll -noentry loadconfig-short.obj 2>&1 | FileCheck --check-prefix=WARN-RELOC-SIZE %s
// WARN-RELOC-SIZE: lld-link: warning: '_load_config_used' structure too small to include dynamic relocations

#--- test.s
.data
sym:
// Emit a basereloc to make the loadconfig test more meaningful.
.xword sym

#--- loadconfig.s
.section .rdata,"dr"
.globl _load_config_used
.p2align 3, 0
_load_config_used:
.word 0x140
.fill 0x13c,1,0

#--- loadconfig-short.s
.section .rdata,"dr"
.globl _load_config_used
.p2align 3, 0
_load_config_used:
.word 0xe4
.fill 0xe0,1,0
139 changes: 133 additions & 6 deletions lld/test/ELF/invalid/broken-relaxation-x64.test
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# REQUIRES: x86

# RUN: yaml2obj %s -o %t.o
# RUN: not ld.lld %t.o -o /dev/null 2>&1 | FileCheck --check-prefix=ERR %s
# ERR: R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only
# ERR: R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only
# RUN: yaml2obj --docnum=1 %s -o %t1.o
# RUN: not ld.lld %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=ERR %s
# ERR: error: {{.*}}: R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only
# ERR: error: {{.*}}: R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only

## YAML below contains 2 relocations of type R_X86_64_GOTTPOFF, and a .text
## with fake content filled by 0xFF. That means instructions for relaxation are
## "broken", so they does not match any known valid relaxations. We also generate
## .tls section because we need it for correct processing of STT_TLS symbol.
!ELF
--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
Expand Down Expand Up @@ -44,4 +44,131 @@ Symbols:
Value: 0x12345
Size: 4
Binding: STB_GLOBAL



# RUN: yaml2obj --docnum=2 %s -o %t2.o
# RUN: not ld.lld %t2.o -o /dev/null 2>&1 | FileCheck --check-prefix=ERR2 %s
# ERR2: error: {{.*}}: invalid prefix with R_X86_64_CODE_4_GOTTPOFF!
# ERR2: error: {{.*}}: invalid prefix with R_X86_64_CODE_6_GOTTPOFF!

## YAML below contains 2 relocations of
## R_X86_64_CODE_4_GOTTPOFF/R_X86_64_CODE_6_GOTTPOFF type, and a .text with
## fake content filled by 0xFF. It's expected to get "invalid prefix" error
## message as above.
--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
OSABI: ELFOSABI_FREEBSD
Type: ET_REL
Machine: EM_X86_64
Sections:
- Type: SHT_PROGBITS
Name: .text
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
AddressAlign: 0x04
Content: "FFFFFFFFFFFFFFFFFFFF"
- Type: SHT_PROGBITS
Name: .tls
Flags: [ SHF_ALLOC, SHF_TLS ]
- Type: SHT_REL
Name: .rel.text
Link: .symtab
Info: .text
AddressAlign: 0x04
Relocations:
- Offset: 4
Symbol: foo
Type: R_X86_64_CODE_4_GOTTPOFF
- Offset: 6
Symbol: foo
Type: R_X86_64_CODE_6_GOTTPOFF
Symbols:
- Name: foo
Type: STT_TLS
Section: .text
Value: 0x12345
Size: 4
Binding: STB_GLOBAL


# RUN: yaml2obj --docnum=3 %s -o %t3.o
# RUN: not ld.lld %t3.o -o /dev/null 2>&1 | FileCheck --check-prefix=ERR3 %s
# ERR3: error: {{.*}}: R_X86_64_CODE_4_GOTTPOFF must be used in MOVQ or ADDQ instructions only

## YAML below contains R_X86_64_CODE_4_GOTTPOFF relocation type, and a .text
## with fake content filled by 0xd5, 0xFF, ... and 0xFF. It's expected to get
## the error message as above.
--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
OSABI: ELFOSABI_FREEBSD
Type: ET_REL
Machine: EM_X86_64
Sections:
- Type: SHT_PROGBITS
Name: .text
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
AddressAlign: 0x04
Content: "d5FFFFFFFFFFFFFFFFFF"
- Type: SHT_PROGBITS
Name: .tls
Flags: [ SHF_ALLOC, SHF_TLS ]
- Type: SHT_REL
Name: .rel.text
Link: .symtab
Info: .text
AddressAlign: 0x04
Relocations:
- Offset: 4
Symbol: foo
Type: R_X86_64_CODE_4_GOTTPOFF
Symbols:
- Name: foo
Type: STT_TLS
Section: .text
Value: 0x12345
Size: 4
Binding: STB_GLOBAL


# RUN: yaml2obj --docnum=4 %s -o %t4.o
# RUN: not ld.lld %t4.o -o /dev/null 2>&1 | FileCheck --check-prefix=ERR4 %s
# ERR4: error: {{.*}}: R_X86_64_CODE_6_GOTTPOFF must be used in ADDQ instructions with NDD/NF/NDD+NF only

## YAML below contains R_X86_64_CODE_6_GOTTPOFF relocation type, and a .text
## with fake content filled by 0x62, 0xFF, ... and 0xFF. It's expected to get
## the error message as above.
--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
OSABI: ELFOSABI_FREEBSD
Type: ET_REL
Machine: EM_X86_64
Sections:
- Type: SHT_PROGBITS
Name: .text
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
AddressAlign: 0x04
Content: "62FFFFFFFFFFFFFFFFFF"
- Type: SHT_PROGBITS
Name: .tls
Flags: [ SHF_ALLOC, SHF_TLS ]
- Type: SHT_REL
Name: .rel.text
Link: .symtab
Info: .text
AddressAlign: 0x04
Relocations:
- Offset: 6
Symbol: foo
Type: R_X86_64_CODE_6_GOTTPOFF
Symbols:
- Name: foo
Type: STT_TLS
Section: .text
Value: 0x12345
Size: 4
Binding: STB_GLOBAL
25 changes: 25 additions & 0 deletions lld/test/ELF/pack-dyn-relocs-ifunc.s
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,28 @@ _start:
.globl bar
bar:
ret

#--- c.s

# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-android c.s -o c.o
# RUN: ld.lld --pack-dyn-relocs=android c.o -o c
# RUN: llvm-readelf -sS c | FileCheck --check-prefix=STATIC %s

# STATIC: .rela.plt RELA 0000000000200158 000158 000018 18 AI 0 5 8
# STATIC: 0000000000200158 0 NOTYPE LOCAL HIDDEN 1 __rela_iplt_start
# STATIC: 0000000000200170 0 NOTYPE LOCAL HIDDEN 1 __rela_iplt_end

.text
.type foo, %gnu_indirect_function
.globl foo
foo:
ret

.globl _start
_start:
bl foo

.data
.balign 8
.quad __rela_iplt_start
.quad __rela_iplt_end
5 changes: 5 additions & 0 deletions lld/test/ELF/pack-dyn-relocs-tls-x86-64.s
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
foo:
movq tlsvar@GOTTPOFF(%rip), %rcx
movq tlsvar2@GOTTPOFF(%rip), %r31
addq tlsvar3@GOTTPOFF(%rip), %rcx, %r16


.section .tdata,"awT",@progbits
Expand All @@ -21,7 +22,11 @@ tlsvar:
.word 42
tlsvar2:
.word 42
tlsvar3:
.word 42

// CHECK: Section ({{.+}}) .rela.dyn {
// CHECK-NEXT: R_X86_64_TPOFF64 - 0x1234
// CHECK-NEXT: R_X86_64_TPOFF64 - 0x1236
// CHECK-NEXT: R_X86_64_TPOFF64 - 0x1238
// CHECK-NEXT: }
25 changes: 25 additions & 0 deletions lld/test/ELF/tls-opt.s
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,25 @@
// DISASM-NEXT: leaq -4(%r15), %r15
// DISASM-NEXT: addq $-4, %rsp
// DISASM-NEXT: addq $-4, %r12
# EGPR
// DISASM-NEXT: movq $-8, %r16
// DISASM-NEXT: movq $-8, %r20
// DISASM-NEXT: movq $-4, %r16
// DISASM-NEXT: addq $-8, %r16
// DISASM-NEXT: addq $-8, %r28
// DISASM-NEXT: addq $-4, %r16
# NDD
// DISASM-NEXT: addq $-10, %r16, %r16
// DISASM-NEXT: addq $-10, %r16, %r20
// DISASM-NEXT: addq $-10, %r16, %rax
// DISASM-NEXT: addq $-10, %rax, %r16
// DISASM-NEXT: addq $-10, %r8, %r16
// DISASM-NEXT: addq $-10, %rax, %r12
# NDD + NF
// DISASM-NEXT: {nf} addq $-10, %r8, %r16
// DISASM-NEXT: {nf} addq $-10, %rax, %r12
# NF
// DISASM-NEXT: {nf} addq $-10, %r12

// LD to LE:
// DISASM-NEXT: movq %fs:0, %rax
Expand Down Expand Up @@ -82,6 +95,18 @@ _start:
addq tls0@GOTTPOFF(%rip), %r16
addq tls0@GOTTPOFF(%rip), %r28
addq tls1@GOTTPOFF(%rip), %r16
# NDD
addq tls0@GOTTPOFF(%rip), %r16, %r16
addq tls0@GOTTPOFF(%rip), %r16, %r20
addq tls0@GOTTPOFF(%rip), %r16, %rax
addq tls0@GOTTPOFF(%rip), %rax, %r16
addq %r8, tls0@GOTTPOFF(%rip), %r16
addq tls0@GOTTPOFF(%rip), %rax, %r12
# NDD + NF
{nf} addq %r8, tls0@GOTTPOFF(%rip), %r16
{nf} addq tls0@GOTTPOFF(%rip), %rax, %r12
# NF
{nf} addq tls0@GOTTPOFF(%rip), %r12

// LD to LE
leaq tls0@tlsld(%rip), %rdi
Expand Down
35 changes: 35 additions & 0 deletions lld/test/ELF/x86-64-tls-ie-err.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
# RUN: not ld.lld %t.o -o /dev/null 2>&1 | FileCheck -DFILE=%t.o %s

# CHECK: error: [[FILE]]:(.text+0x2): invalid prefix with R_X86_64_CODE_4_GOTTPOFF!
# CHECK-NEXT: error: [[FILE]]:(.text+0x8): invalid prefix with R_X86_64_CODE_6_GOTTPOFF!
# CHECK-NEXT: error: [[FILE]]:(.text+0x12): R_X86_64_CODE_4_GOTTPOFF must be used in MOVQ or ADDQ instructions only
# CHECK-NEXT: error: [[FILE]]:(.text+0x1a): R_X86_64_CODE_6_GOTTPOFF must be used in ADDQ instructions with NDD/NF/NDD+NF only

## These negative tests are to check if the invalid prefix and unsupported
## instructions for TLS relocation types with APX instructions are handled as
## errors.

.type tls0,@object
.section .tbss,"awT",@nobits
.globl tls0
.align 4
tls0:
.long 0
.size tls0, 4

.text
.globl _start
_start:
addq 0(%rip), %rax, %r16
.reloc .-4, R_X86_64_CODE_4_GOTTPOFF, tls0-4

movq 0(%rip), %r16
.reloc .-4, R_X86_64_CODE_6_GOTTPOFF, tls0-4

andq 0(%rip), %r16
.reloc .-4, R_X86_64_CODE_4_GOTTPOFF, tls0-4

andq 0(%rip), %rax, %r16
.reloc .-4, R_X86_64_CODE_6_GOTTPOFF, tls0-4
42 changes: 30 additions & 12 deletions lld/test/ELF/x86-64-tls-ie-local.s
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,47 @@
# RUN: llvm-readobj -r %t.so | FileCheck --check-prefix=REL %s
# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t.so | FileCheck %s

# SEC: .got PROGBITS 0000000000002348 000348 000010 00 WA 0 0 8
# SEC: .got PROGBITS 0000000000002378 000378 000010 00 WA 0 0 8

## Dynamic relocations for non-preemptable symbols in a shared object have section index 0.
# REL: .rela.dyn {
# REL-NEXT: 0x2348 R_X86_64_TPOFF64 - 0x0
# REL-NEXT: 0x2350 R_X86_64_TPOFF64 - 0x4
# REL-NEXT: 0x2378 R_X86_64_TPOFF64 - 0x0
# REL-NEXT: 0x2380 R_X86_64_TPOFF64 - 0x4
# REL-NEXT: }

## &.got[0] - 0x127f = 0x2348 - 0x127f = 4297
## &.got[1] - 0x1286 = 0x2350 - 0x1286 = 4298
## &.got[2] - 0x128e = 0x2348 - 0x128e = 4282
## &.got[3] - 0x1296 = 0x2350 - 0x1296 = 4282
## &.got[0] - 0x127f = 0x2378 - 0x127f = 4345
## &.got[1] - 0x1286 = 0x2380 - 0x1286 = 4346
## &.got[2] - 0x128e = 0x2378 - 0x128e = 4330
## &.got[3] - 0x1296 = 0x2380 - 0x1296 = 4330
## &.got[0] - 0x12a0 = 0x2376 - 0x12a0 = 4310
## &.got[1] - 0x12aa = 0x237e - 0x12aa = 4308
## &.got[0] - 0x12b4 = 0x2376 - 0x12b4 = 4290
## &.got[1] - 0x12be = 0x237e - 0x12be = 4288
## &.got[0] - 0x12c8 = 0x2376 - 0x12c8 = 4270

# CHECK: 1278: addq 4297(%rip), %rax
# CHECK-NEXT: 127f: addq 4298(%rip), %rax
# CHECK-NEXT: 1286: addq 4282(%rip), %r16
# CHECK-NEXT: 128e: addq 4282(%rip), %r16
# CHECK: 1278: addq 4345(%rip), %rax
# CHECK-NEXT: 127f: addq 4346(%rip), %rax
# CHECK-NEXT: 1286: addq 4330(%rip), %r16
# CHECK-NEXT: 128e: addq 4330(%rip), %r16
# CHECK-NEXT: 1296: addq %r8, 4310(%rip), %r16
# CHECK-NEXT: 12a0: addq 4308(%rip), %rax, %r12
# CHECK-NEXT: 12aa: {nf} addq %r8, 4290(%rip), %r16
# CHECK-NEXT: 12b4: {nf} addq 4288(%rip), %rax, %r12
# CHECK-NEXT: 12be: {nf} addq 4270(%rip), %r12

addq foo@GOTTPOFF(%rip), %rax
addq bar@GOTTPOFF(%rip), %rax
# EGPR
addq foo@GOTTPOFF(%rip), %r16
addq bar@GOTTPOFF(%rip), %r16

# NDD
addq %r8, foo@GOTTPOFF(%rip), %r16
addq bar@GOTTPOFF(%rip), %rax, %r12
# NDD + NF
{nf} addq %r8, foo@GOTTPOFF(%rip), %r16
{nf} addq bar@GOTTPOFF(%rip), %rax, %r12
# NF
{nf} addq foo@GOTTPOFF(%rip), %r12

.section .tbss,"awT",@nobits
foo:
Expand Down
21 changes: 14 additions & 7 deletions lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include "clang/AST/Type.h"
#include "clang/Basic/Specifiers.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/DebugInfo/DWARF/DWARFTypePrinter.h"
#include "llvm/Demangle/Demangle.h"

#include <map>
Expand Down Expand Up @@ -826,11 +827,11 @@ std::string DWARFASTParserClang::GetDIEClassTemplateParams(DWARFDIE die) {
if (llvm::StringRef(die.GetName()).contains("<"))
return {};

TypeSystemClang::TemplateParameterInfos template_param_infos;
if (ParseTemplateParameterInfos(die, template_param_infos))
return m_ast.PrintTemplateParams(template_param_infos);

return {};
std::string name;
llvm::raw_string_ostream os(name);
llvm::DWARFTypePrinter<DWARFDIE> type_printer(os);
type_printer.appendAndTerminateTemplateParameters(die);
return name;
}

void DWARFASTParserClang::MapDeclDIEToDefDIE(
Expand Down Expand Up @@ -1618,9 +1619,9 @@ void DWARFASTParserClang::GetUniqueTypeNameAndDeclaration(
case DW_TAG_structure_type:
case DW_TAG_union_type: {
if (const char *class_union_struct_name = parent_decl_ctx_die.GetName()) {
qualified_name.insert(
0, GetDIEClassTemplateParams(parent_decl_ctx_die));
qualified_name.insert(0, "::");
qualified_name.insert(0,
GetDIEClassTemplateParams(parent_decl_ctx_die));
qualified_name.insert(0, class_union_struct_name);
}
parent_decl_ctx_die = parent_decl_ctx_die.GetParentDeclContextDIE();
Expand Down Expand Up @@ -1673,6 +1674,12 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc,
if (attrs.name) {
GetUniqueTypeNameAndDeclaration(die, cu_language, unique_typename,
unique_decl);
if (log) {
dwarf->GetObjectFile()->GetModule()->LogMessage(
log, "SymbolFileDWARF({0:p}) - {1:x16}: {2} has unique name: {3} ",
static_cast<void *>(this), die.GetID(), DW_TAG_value_to_name(tag),
unique_typename.AsCString());
}
if (UniqueDWARFASTType *unique_ast_entry_type =
dwarf->GetUniqueDWARFASTTypeMap().Find(
unique_typename, die, unique_decl, byte_size,
Expand Down
8 changes: 8 additions & 0 deletions lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ class DWARFUnit;
class DWARFDebugInfoEntry;
class DWARFDeclContext;
class SymbolFileDWARF;
class DWARFFormValue;

class DWARFBaseDIE {
public:
using DWARFFormValue = dwarf::DWARFFormValue;
DWARFBaseDIE() = default;

DWARFBaseDIE(DWARFUnit *cu, DWARFDebugInfoEntry *die)
Expand Down Expand Up @@ -117,6 +119,12 @@ class DWARFBaseDIE {
enum class Recurse : bool { no, yes };
DWARFAttributes GetAttributes(Recurse recurse = Recurse::yes) const;

// The following methods use LLVM naming convension in order to be are used by
// LLVM libraries.
dw_tag_t getTag() const { return Tag(); }

const char *getShortName() const { return GetName(); }

protected:
DWARFUnit *m_cu = nullptr;
DWARFDebugInfoEntry *m_die = nullptr;
Expand Down
37 changes: 37 additions & 0 deletions lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,43 @@ bool DWARFDIE::GetDIENamesAndRanges(
return false;
}

// The following methods use LLVM naming convension in order to be are used by
// LLVM libraries.
llvm::iterator_range<DWARFDIE::child_iterator> DWARFDIE::children() const {
return llvm::make_range(child_iterator(*this), child_iterator());
}

DWARFDIE::child_iterator DWARFDIE::begin() const {
return child_iterator(*this);
}

DWARFDIE::child_iterator DWARFDIE::end() const { return child_iterator(); }

std::optional<DWARFFormValue> DWARFDIE::find(const dw_attr_t attr) const {
DWARFFormValue form_value;
if (m_die->GetAttributeValue(m_cu, attr, form_value, nullptr, false))
return form_value;
return std::nullopt;
}

std::optional<uint64_t> DWARFDIE::getLanguage() const {
if (IsValid())
return m_cu->GetDWARFLanguageType();
return std::nullopt;
}

DWARFDIE DWARFDIE::resolveReferencedType(dw_attr_t attr) const {
return GetReferencedDIE(attr);
}

DWARFDIE DWARFDIE::resolveReferencedType(DWARFFormValue v) const {
if (IsValid())
return v.Reference();
return {};
}

DWARFDIE DWARFDIE::resolveTypeUnitReference() const {
if (DWARFDIE reference = GetReferencedDIE(DW_AT_signature))
return reference;
return *this;
}
17 changes: 17 additions & 0 deletions lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,25 @@ class DWARFDIE : public DWARFBaseDIE {
std::optional<int> &call_line, std::optional<int> &call_column,
DWARFExpressionList *frame_base) const;

// The following methods use LLVM naming convension in order to be are used by
// LLVM libraries.
std::optional<uint64_t> getLanguage() const;

DWARFDIE getParent() const { return GetParent(); }

DWARFDIE resolveReferencedType(dw_attr_t attr) const;

DWARFDIE resolveReferencedType(DWARFFormValue v) const;

DWARFDIE resolveTypeUnitReference() const;

std::optional<DWARFFormValue> find(const dw_attr_t attr) const;

/// The range of all the children of this DIE.
llvm::iterator_range<child_iterator> children() const;

child_iterator begin() const;
child_iterator end() const;
};

class DWARFDIE::child_iterator
Expand Down
25 changes: 25 additions & 0 deletions lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,31 @@ uint64_t DWARFFormValue::Reference(dw_offset_t base_offset) const {
}
}

std::optional<uint64_t> DWARFFormValue::getAsUnsignedConstant() const {
if ((!IsDataForm(m_form)) || m_form == lldb_private::dwarf::DW_FORM_sdata)
return std::nullopt;
return m_value.uval;
}

std::optional<int64_t> DWARFFormValue::getAsSignedConstant() const {
if ((!IsDataForm(m_form)) ||
(m_form == lldb_private::dwarf::DW_FORM_udata &&
uint64_t(std::numeric_limits<int64_t>::max()) < m_value.uval))
return std::nullopt;
switch (m_form) {
case lldb_private::dwarf::DW_FORM_data4:
return int32_t(m_value.uval);
case lldb_private::dwarf::DW_FORM_data2:
return int16_t(m_value.uval);
case lldb_private::dwarf::DW_FORM_data1:
return int8_t(m_value.uval);
case lldb_private::dwarf::DW_FORM_sdata:
case lldb_private::dwarf::DW_FORM_data8:
default:
return m_value.sval;
}
}

const uint8_t *DWARFFormValue::BlockData() const { return m_value.data; }

bool DWARFFormValue::IsBlockForm(const dw_form_t form) {
Expand Down
6 changes: 6 additions & 0 deletions lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ class DWARFFormValue {
void Clear();
static bool FormIsSupported(dw_form_t form);

// The following methods use LLVM naming convension in order to be are used by
// LLVM libraries.
std::optional<uint64_t> getAsUnsignedConstant() const;
std::optional<int64_t> getAsSignedConstant() const;
const char *getAsCString() const { return AsCString(); }

protected:
// Compile unit where m_value was located.
// It may be different from compile unit where m_value refers to.
Expand Down
74 changes: 42 additions & 32 deletions lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "lldb/Utility/Timer.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/ThreadPool.h"
#include <atomic>
#include <optional>

using namespace lldb_private;
Expand Down Expand Up @@ -81,44 +82,53 @@ void ManualDWARFIndex::Index() {
Progress progress("Manually indexing DWARF", module_desc.GetData(),
total_progress);

std::vector<IndexSet> sets(units_to_index.size());

// Keep memory down by clearing DIEs for any units if indexing
// caused us to load the unit's DIEs.
std::vector<std::optional<DWARFUnit::ScopedExtractDIEs>> clear_cu_dies(
units_to_index.size());
auto parser_fn = [&](size_t cu_idx) {
IndexUnit(*units_to_index[cu_idx], dwp_dwarf, sets[cu_idx]);
progress.Increment();
};

auto extract_fn = [&](size_t cu_idx) {
clear_cu_dies[cu_idx] = units_to_index[cu_idx]->ExtractDIEsScoped();
progress.Increment();
};

// Share one thread pool across operations to avoid the overhead of
// recreating the threads.
llvm::ThreadPoolTaskGroup task_group(Debugger::GetThreadPool());
const size_t num_threads = Debugger::GetThreadPool().getMaxConcurrency();

// Run a function for each compile unit in parallel using as many threads as
// are available. This is significantly faster than submiting a new task for
// each unit.
auto for_each_unit = [&](auto &&fn) {
std::atomic<size_t> next_cu_idx = 0;
auto wrapper = [&fn, &next_cu_idx, &units_to_index,
&progress](size_t worker_id) {
size_t cu_idx;
while ((cu_idx = next_cu_idx.fetch_add(1, std::memory_order_relaxed)) <
units_to_index.size()) {
fn(worker_id, cu_idx, units_to_index[cu_idx]);
progress.Increment();
}
};

// Create a task runner that extracts dies for each DWARF unit in a
// separate thread.
// First figure out which units didn't have their DIEs already
// parsed and remember this. If no DIEs were parsed prior to this index
// function call, we are going to want to clear the CU dies after we are
// done indexing to make sure we don't pull in all DWARF dies, but we need
// to wait until all units have been indexed in case a DIE in one
// unit refers to another and the indexes accesses those DIEs.
for (size_t i = 0; i < units_to_index.size(); ++i)
task_group.async(extract_fn, i);
task_group.wait();
for (size_t i = 0; i < num_threads; ++i)
task_group.async(wrapper, i);

// Now create a task runner that can index each DWARF unit in a
// separate thread so we can index quickly.
for (size_t i = 0; i < units_to_index.size(); ++i)
task_group.async(parser_fn, i);
task_group.wait();
task_group.wait();
};

// Extract dies for all DWARFs unit in parallel. Figure out which units
// didn't have their DIEs already parsed and remember this. If no DIEs were
// parsed prior to this index function call, we are going to want to clear the
// CU dies after we are done indexing to make sure we don't pull in all DWARF
// dies, but we need to wait until all units have been indexed in case a DIE
// in one unit refers to another and the indexes accesses those DIEs.
std::vector<std::optional<DWARFUnit::ScopedExtractDIEs>> clear_cu_dies(
units_to_index.size());
for_each_unit([&clear_cu_dies](size_t, size_t idx, DWARFUnit *unit) {
clear_cu_dies[idx] = unit->ExtractDIEsScoped();
});

// Now index all DWARF unit in parallel.
std::vector<IndexSet> sets(num_threads);
for_each_unit(
[this, dwp_dwarf, &sets](size_t worker_id, size_t, DWARFUnit *unit) {
IndexUnit(*unit, dwp_dwarf, sets[worker_id]);
});

// Merge partial indexes into a single index. Process each index in a set in
// parallel.
auto finalize_fn = [this, &sets, &progress](NameToDIE(IndexSet::*index)) {
NameToDIE &result = m_set.*index;
for (auto &set : sets)
Expand Down
29 changes: 14 additions & 15 deletions lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "SymbolFileDWARF.h"

#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
#include "llvm/DebugInfo/DWARF/DWARFTypePrinter.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/Format.h"
Expand Down Expand Up @@ -2726,22 +2727,20 @@ void SymbolFileDWARF::FindTypes(const TypeQuery &query, TypeResults &results) {
// Copy our match's context and update the basename we are looking for
// so we can use this only to compare the context correctly.
m_index->GetTypesWithQuery(query_simple, [&](DWARFDIE die) {
if (Type *matching_type = ResolveType(die, true, true)) {
ConstString name = matching_type->GetQualifiedName();
// We have found a type that still might not match due to template
// parameters. If we create a new TypeQuery that uses the new type's
// fully qualified name, we can find out if this type matches at all
// context levels. We can't use just the "match_simple" context
// because all template parameters were stripped off. The fully
// qualified name of the type will have the template parameters and
// will allow us to make sure it matches correctly.
TypeQuery die_query(name.GetStringRef(),
TypeQueryOptions::e_exact_match);
if (!query.ContextMatches(die_query.GetContextRef()))
return true; // Keep iterating over index types, context mismatch.

results.InsertUnique(matching_type->shared_from_this());
// Check the language, but only if we have a language filter.
if (query.HasLanguage()) {
if (!query.LanguageMatches(GetLanguageFamily(*die.GetCU())))
return true; // Keep iterating over index types, language mismatch.
}

std::string qualified_name;
llvm::raw_string_ostream os(qualified_name);
llvm::DWARFTypePrinter<DWARFDIE> type_printer(os);
type_printer.appendQualifiedName(die);
TypeQuery die_query(qualified_name, e_exact_match);
if (query.ContextMatches(die_query.GetContextRef()))
if (Type *matching_type = ResolveType(die, true, true))
results.InsertUnique(matching_type->shared_from_this());
return !results.Done(query); // Keep iterating if we aren't done.
});
if (results.Done(query)) {
Expand Down
20 changes: 0 additions & 20 deletions lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1403,26 +1403,6 @@ static TemplateParameterList *CreateTemplateParameterList(
return template_param_list;
}

std::string TypeSystemClang::PrintTemplateParams(
const TemplateParameterInfos &template_param_infos) {
llvm::SmallVector<NamedDecl *, 8> ignore;
clang::TemplateParameterList *template_param_list =
CreateTemplateParameterList(getASTContext(), template_param_infos,
ignore);
llvm::SmallVector<clang::TemplateArgument, 2> args(
template_param_infos.GetArgs());
if (template_param_infos.hasParameterPack()) {
llvm::ArrayRef<TemplateArgument> pack_args =
template_param_infos.GetParameterPackArgs();
args.append(pack_args.begin(), pack_args.end());
}
std::string str;
llvm::raw_string_ostream os(str);
clang::printTemplateArgumentList(os, args, GetTypePrintingPolicy(),
template_param_list);
return str;
}

clang::FunctionTemplateDecl *TypeSystemClang::CreateFunctionTemplateDecl(
clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
clang::FunctionDecl *func_decl,
Expand Down
4 changes: 0 additions & 4 deletions lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
Original file line number Diff line number Diff line change
Expand Up @@ -1148,10 +1148,6 @@ class TypeSystemClang : public TypeSystem {

bool SetDeclIsForcefullyCompleted(const clang::TagDecl *td);

/// Return the template parameters (including surrounding <>) in string form.
std::string
PrintTemplateParams(const TemplateParameterInfos &template_param_infos);

private:
/// Returns the PrintingPolicy used when generating the internal type names.
/// These type names are mostly used for the formatter selection.
Expand Down
36 changes: 36 additions & 0 deletions lldb/test/Shell/SymbolFile/DWARF/x86/simplified-template-names.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Test lldb is able to compute the fully qualified names on templates with
// -gsimple-template-names and -fdebug-types-section.

// REQUIRES: lld

// Test against logging to see if we print the fully qualified names correctly.
// RUN: %clangxx --target=x86_64-pc-linux -g -gsimple-template-names %s -c -o %t1.o
// RUN: ld.lld %t1.o -o %t1
// RUN: %lldb %t1 -o "log enable dwarf comp" -o "target variable v3" -o exit | FileCheck %s --check-prefix=LOG

// Test that we following DW_AT_signature correctly. If not, lldb might confuse the types of v1 and v2.
// RUN: %clangxx --target=x86_64-pc-linux -g -gsimple-template-names -fdebug-types-section %s -c -o %t2.o
// RUN: ld.lld %t2.o -o %t2
// RUN: %lldb %t2 -o "target variable v1 v2" -o exit | FileCheck %s --check-prefix=TYPE

// LOG: unique name: t3<t2<int> >::t4

// TYPE: (t2<outer_struct1::t1<int> >) v1 = {}
// TYPE-NEXT: (t2<outer_struct2::t1<int> >) v2 = {}

struct outer_struct1 {
template <typename> struct t1 {};
};

struct outer_struct2 {
template <typename> struct t1 {};
};

template <typename> struct t2 {};
t2<outer_struct1::t1<int>> v1;
t2<outer_struct2::t1<int>> v2;

template <typename> struct t3 {
struct t4 {};
};
t3<t2<int>>::t4 v3;
11 changes: 1 addition & 10 deletions llvm/cmake/modules/CrossCompile.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -69,17 +69,8 @@ function(llvm_create_cross_target project_name target_name toolchain buildtype)
"-DLLVM_EXTERNAL_${name}_SOURCE_DIR=${LLVM_EXTERNAL_${name}_SOURCE_DIR}")
endforeach()

if("libc" IN_LIST LLVM_ENABLE_PROJECTS AND NOT LIBC_HDRGEN_EXE)
set(libc_flags -DLLVM_LIBC_FULL_BUILD=ON -DLIBC_HDRGEN_ONLY=ON)
if(MSVC)
# Due to some issues mentioned in llvm/projects/CMakeLists.txt, libc build is disabled by
# default in the cross target when building with MSVC compatible compilers on Windows. Add
# LLVM_FORCE_BUILD_RUNTIME to bypass this issue and force its building on Windows.
list(APPEND libc_flags -DLLVM_FORCE_BUILD_RUNTIME=ON)
endif()
endif()
if(LLVM_LIBC_GPU_BUILD)
list(APPEND libc_flags -DLLVM_LIBC_GPU_BUILD=ON)
set(libc_flags -DLLVM_LIBC_GPU_BUILD=ON)
endif()

add_custom_command(OUTPUT ${${project_name}_${target_name}_BUILD}/CMakeCache.txt
Expand Down
41 changes: 38 additions & 3 deletions llvm/include/llvm/ADT/APFloat.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,41 @@ struct APFloatBase {
S_IEEEsingle,
S_IEEEdouble,
S_IEEEquad,
// The IBM double-double semantics. Such a number consists of a pair of
// IEEE 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
// (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
// Therefore it has two 53-bit mantissa parts that aren't necessarily
// adjacent to each other, and two 11-bit exponents.
//
// Note: we need to make the value different from semBogus as otherwise
// an unsafe optimization may collapse both values to a single address,
// and we heavily rely on them having distinct addresses.
S_PPCDoubleDouble,
// These are legacy semantics for the fallback, inaccurate implementation
// of IBM double-double, if the accurate semPPCDoubleDouble doesn't handle
// the operation. It's equivalent to having an IEEE number with consecutive
// 106 bits of mantissa and 11 bits of exponent.
//
// It's not equivalent to IBM double-double. For example, a legit IBM
// double-double, 1 + epsilon:
//
// 1 + epsilon = 1 + (1 >> 1076)
//
// is not representable by a consecutive 106 bits of mantissa.
//
// Currently, these semantics are used in the following way:
//
// semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
// (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
// semPPCDoubleDoubleLegacy -> IEEE operations
//
// We use bitcastToAPInt() to get the bit representation (in APInt) of the
// underlying IEEEdouble, then use the APInt constructor to construct the
// legacy IEEE float.
//
// TODO: Implement all operations in semPPCDoubleDouble, and delete these
// semantics.
S_PPCDoubleDoubleLegacy,
// 8-bit floating point number following IEEE-754 conventions with bit
// layout S1E5M2 as described in https://arxiv.org/abs/2209.05433.
S_Float8E5M2,
Expand Down Expand Up @@ -214,7 +248,7 @@ struct APFloatBase {
// types, there are no infinity or NaN values. The format is detailed in
// https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
S_Float4E2M1FN,

// TODO: Documentation is missing.
S_x87DoubleExtended,
S_MaxSemantics = S_x87DoubleExtended,
};
Expand All @@ -228,6 +262,7 @@ struct APFloatBase {
static const fltSemantics &IEEEdouble() LLVM_READNONE;
static const fltSemantics &IEEEquad() LLVM_READNONE;
static const fltSemantics &PPCDoubleDouble() LLVM_READNONE;
static const fltSemantics &PPCDoubleDoubleLegacy() LLVM_READNONE;
static const fltSemantics &Float8E5M2() LLVM_READNONE;
static const fltSemantics &Float8E5M2FNUZ() LLVM_READNONE;
static const fltSemantics &Float8E4M3() LLVM_READNONE;
Expand Down Expand Up @@ -688,7 +723,7 @@ class IEEEFloat final {
APInt convertDoubleAPFloatToAPInt() const;
APInt convertQuadrupleAPFloatToAPInt() const;
APInt convertF80LongDoubleAPFloatToAPInt() const;
APInt convertPPCDoubleDoubleAPFloatToAPInt() const;
APInt convertPPCDoubleDoubleLegacyAPFloatToAPInt() const;
APInt convertFloat8E5M2APFloatToAPInt() const;
APInt convertFloat8E5M2FNUZAPFloatToAPInt() const;
APInt convertFloat8E4M3APFloatToAPInt() const;
Expand All @@ -709,7 +744,7 @@ class IEEEFloat final {
void initFromDoubleAPInt(const APInt &api);
void initFromQuadrupleAPInt(const APInt &api);
void initFromF80LongDoubleAPInt(const APInt &api);
void initFromPPCDoubleDoubleAPInt(const APInt &api);
void initFromPPCDoubleDoubleLegacyAPInt(const APInt &api);
void initFromFloat8E5M2APInt(const APInt &api);
void initFromFloat8E5M2FNUZAPInt(const APInt &api);
void initFromFloat8E4M3APInt(const APInt &api);
Expand Down
1 change: 0 additions & 1 deletion llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ ELF_RELOC(R_RISCV_GOT32_PCREL, 41)
ELF_RELOC(R_RISCV_ALIGN, 43)
ELF_RELOC(R_RISCV_RVC_BRANCH, 44)
ELF_RELOC(R_RISCV_RVC_JUMP, 45)
ELF_RELOC(R_RISCV_RVC_LUI, 46)
ELF_RELOC(R_RISCV_RELAX, 51)
ELF_RELOC(R_RISCV_SUB6, 52)
ELF_RELOC(R_RISCV_SET6, 53)
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,11 @@ class LegalizeRuleSet {
LegalizeRuleSet &libcallFor(std::initializer_list<LLT> Types) {
return actionFor(LegalizeAction::Libcall, Types);
}
LegalizeRuleSet &libcallFor(bool Pred, std::initializer_list<LLT> Types) {
if (!Pred)
return *this;
return actionFor(LegalizeAction::Libcall, Types);
}
LegalizeRuleSet &
libcallFor(std::initializer_list<std::pair<LLT, LLT>> Types) {
return actionFor(LegalizeAction::Libcall, Types);
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,8 @@ class DWARFDie {

bool addressRangeContainsAddress(const uint64_t Address) const;

std::optional<uint64_t> getLanguage() const;

Expected<DWARFLocationExpressionsVector>
getLocations(dwarf::Attribute Attr) const;

Expand Down
33 changes: 21 additions & 12 deletions llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/Support/Error.h"

#include <string>

Expand Down Expand Up @@ -108,13 +109,11 @@ void DWARFTypePrinter<DieType>::appendArrayType(const DieType &D) {
if (std::optional<typename DieType::DWARFFormValue> UpperV =
C.find(dwarf::DW_AT_upper_bound))
UB = UpperV->getAsUnsignedConstant();
if (std::optional<typename DieType::DWARFFormValue> LV =
D.getDwarfUnit()->getUnitDIE().find(dwarf::DW_AT_language))
if (std::optional<uint64_t> LC = LV->getAsUnsignedConstant())
if ((DefaultLB =
LanguageLowerBound(static_cast<dwarf::SourceLanguage>(*LC))))
if (LB && *LB == *DefaultLB)
LB = std::nullopt;
if (std::optional<uint64_t> LV = D.getLanguage())
if ((DefaultLB =
LanguageLowerBound(static_cast<dwarf::SourceLanguage>(*LV))))
if (LB && *LB == *DefaultLB)
LB = std::nullopt;
if (!LB && !Count && !UB)
OS << "[]";
else if (!LB && (Count || UB) && DefaultLB)
Expand Down Expand Up @@ -151,6 +150,16 @@ template <typename DieType>
DieType resolveReferencedType(DieType D, typename DieType::DWARFFormValue F) {
return D.resolveReferencedType(F);
}
template <typename DWARFFormValueType>
const char *toString(std::optional<DWARFFormValueType> F) {
if (F) {
llvm::Expected<const char *> E = F->getAsCString();
if (E)
return *E;
llvm::consumeError(E.takeError());
}
return nullptr;
}
} // namespace detail

template <typename DieType>
Expand Down Expand Up @@ -240,7 +249,7 @@ DieType DWARFTypePrinter<DieType>::appendUnqualifiedNameBefore(
appendConstVolatileQualifierBefore(D);
break;
case dwarf::DW_TAG_namespace: {
if (const char *Name = dwarf::toString(D.find(dwarf::DW_AT_name), nullptr))
if (const char *Name = detail::toString(D.find(dwarf::DW_AT_name)))
OS << Name;
else
OS << "(anonymous namespace)";
Expand All @@ -262,7 +271,7 @@ DieType DWARFTypePrinter<DieType>::appendUnqualifiedNameBefore(
case DW_TAG_base_type:
*/
default: {
const char *NamePtr = dwarf::toString(D.find(dwarf::DW_AT_name), nullptr);
const char *NamePtr = detail::toString(D.find(dwarf::DW_AT_name));
if (!NamePtr) {
appendTypeTagName(D.getTag());
return DieType();
Expand Down Expand Up @@ -441,7 +450,7 @@ bool DWARFTypePrinter<DieType>::appendTemplateParameters(DieType D,
if (T.getTag() == dwarf::DW_TAG_pointer_type ||
T.getTag() == dwarf::DW_TAG_reference_type)
continue;
const char *RawName = dwarf::toString(T.find(dwarf::DW_AT_name), nullptr);
const char *RawName = detail::toString(T.find(dwarf::DW_AT_name));
assert(RawName);
StringRef Name = RawName;
auto V = C.find(dwarf::DW_AT_const_value);
Expand Down Expand Up @@ -534,7 +543,7 @@ bool DWARFTypePrinter<DieType>::appendTemplateParameters(DieType D,
}
if (C.getTag() == dwarf::DW_TAG_GNU_template_template_param) {
const char *RawName =
dwarf::toString(C.find(dwarf::DW_AT_GNU_template_name), nullptr);
detail::toString(C.find(dwarf::DW_AT_GNU_template_name));
assert(RawName);
StringRef Name = RawName;
Sep();
Expand Down Expand Up @@ -594,7 +603,7 @@ void DWARFTypePrinter<DieType>::appendConstVolatileQualifierAfter(DieType N) {
decomposeConstVolatile(N, T, C, V);
if (T && T.getTag() == dwarf::DW_TAG_subroutine_type)
appendSubroutineNameAfter(T, detail::resolveReferencedType(T), false,
C.isValid(), V.isValid());
static_cast<bool>(C), static_cast<bool>(V));
else
appendUnqualifiedNameAfter(T, detail::resolveReferencedType(T));
}
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -774,7 +774,7 @@ def int_amdgcn_prng_b32 : DefaultAttrsIntrinsic<

def int_amdgcn_bitop3 :
DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i8_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]>;

} // TargetPrefix = "amdgcn"
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/MC/MCRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,11 @@ class MCRegisterInfo {
/// be modelled, such as the top 16-bits of a 32-bit GPR.
bool isArtificial(MCRegister RegNo) const { return get(RegNo).IsArtificial; }

/// Returns true when the given register unit is considered artificial.
/// Register units are considered artificial when at least one of the
/// root registers is artificial.
bool isArtificialRegUnit(MCRegUnit Unit) const;

/// Return the number of registers this target has (useful for
/// sizing arrays holding per register information)
unsigned getNumRegs() const {
Expand Down
Loading