112 changes: 56 additions & 56 deletions llvm/test/Transforms/OpenMP/custom_state_machines.ll
Original file line number Diff line number Diff line change
Expand Up @@ -912,14 +912,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: worker_state_machine.begin:
; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
Expand Down Expand Up @@ -1037,14 +1037,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: worker_state_machine.begin:
; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
Expand Down Expand Up @@ -1188,14 +1188,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: worker_state_machine.begin:
; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
Expand Down Expand Up @@ -1314,14 +1314,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: worker_state_machine.begin:
; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
Expand Down Expand Up @@ -1438,14 +1438,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: worker_state_machine.begin:
; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
Expand Down Expand Up @@ -1562,14 +1562,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: worker_state_machine.begin:
; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
Expand Down Expand Up @@ -1668,14 +1668,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: is_worker_check:
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; AMDGPU: worker_state_machine.begin:
; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8**
Expand Down Expand Up @@ -1888,14 +1888,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
Expand Down Expand Up @@ -2012,14 +2012,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
Expand Down Expand Up @@ -2162,14 +2162,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
Expand Down Expand Up @@ -2287,14 +2287,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
Expand Down Expand Up @@ -2410,14 +2410,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
Expand Down Expand Up @@ -2533,14 +2533,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
Expand Down Expand Up @@ -2638,14 +2638,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: is_worker_check:
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
Expand Down
128 changes: 64 additions & 64 deletions llvm/test/Transforms/OpenMP/spmdization.ll

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions llvm/test/Transforms/OpenMP/spmdization_guarding.ll
Original file line number Diff line number Diff line change
Expand Up @@ -178,14 +178,14 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N)
; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
; CHECK-DISABLED-NEXT: [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 1, i1 false, i1 true) #[[ATTR4:[0-9]+]]
; CHECK-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; CHECK-DISABLED: is_worker_check:
; CHECK-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; CHECK-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; CHECK-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; CHECK-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; CHECK-DISABLED: is_worker_check:
; CHECK-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; CHECK-DISABLED: worker_state_machine.begin:
; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,14 @@ define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; CHECK: is_worker_check:
; CHECK-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; CHECK-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; CHECK-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; CHECK-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; CHECK: is_worker_check:
; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; CHECK: worker_state_machine.begin:
; CHECK-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
Expand Down